diff options
author | ARM gem5 Developers <none@none> | 2014-01-24 15:29:34 -0600 |
---|---|---|
committer | ARM gem5 Developers <none@none> | 2014-01-24 15:29:34 -0600 |
commit | 612f8f074fa1099cf70faf495d46cc647762a031 (patch) | |
tree | bd1e99c43bf15292395eadd4b7ae3f5c823545c3 /src | |
parent | f3585c841e964c98911784a187fc4f081a02a0a6 (diff) | |
download | gem5-612f8f074fa1099cf70faf495d46cc647762a031.tar.xz |
arm: Add support for ARMv8 (AArch64 & AArch32)
Note: AArch64 and AArch32 interworking is not supported. If you use an AArch64
kernel you are restricted to AArch64 user-mode binaries. This will be addressed
in a later patch.
Note: Virtualization is only supported in AArch32 mode. This will also be fixed
in a later patch.
Contributors:
Giacomo Gabrielli (TrustZone, LPAE, system-level AArch64, AArch64 NEON, validation)
Thomas Grocutt (AArch32 Virtualization, AArch64 FP, validation)
Mbou Eyole (AArch64 NEON, validation)
Ali Saidi (AArch64 Linux support, code integration, validation)
Edmund Grimley-Evans (AArch64 FP)
William Wang (AArch64 Linux support)
Rene De Jong (AArch64 Linux support, performance opt.)
Matt Horsnell (AArch64 MP, validation)
Matt Evans (device models, code integration, validation)
Chris Adeniyi-Jones (AArch64 syscall-emulation)
Prakash Ramrakhyani (validation)
Dam Sunwoo (validation)
Chander Sudanthi (validation)
Stephan Diestelhorst (validation)
Andreas Hansson (code integration, performance opt.)
Eric Van Hensbergen (performance opt.)
Gabe Black
Diffstat (limited to 'src')
133 files changed, 39112 insertions, 2525 deletions
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 55a589c32..eaec92f4d 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -34,8 +34,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Andreas Sandberg +# Giacomo Gabrielli from m5.params import * +from m5.proxy import * from m5.SimObject import SimObject class ArmISA(SimObject): @@ -43,12 +45,9 @@ class ArmISA(SimObject): cxx_class = 'ArmISA::ISA' cxx_header = "arch/arm/isa.hh" - # 0x35 Implementor is '5' from "M5" - # 0x0 Variant - # 0xf Architecture from CPUID scheme - # 0xc00 Primary part number ("c" or higher implies ARM v7) - # 0x0 Revision - midr = Param.UInt32(0x350fc000, "Main ID Register") + system = Param.System(Parent.any, "System this ISA object belongs to") + + midr = Param.UInt32(0x410fc0f0, "MIDR value") # See section B4.1.93 - B4.1.94 of the ARM ARM # @@ -56,19 +55,19 @@ class ArmISA(SimObject): # Note: ThumbEE is disabled for now since we don't support CP14 # config registers and jumping to ThumbEE vectors id_pfr0 = Param.UInt32(0x00000031, "Processor Feature Register 0") - # !Timer | !Virti | !M Profile | !TrustZone | ARMv4 - id_pfr1 = Param.UInt32(0x00000001, "Processor Feature Register 1") + # !Timer | Virti | !M Profile | TrustZone | ARMv4 + id_pfr1 = Param.UInt32(0x00001011, "Processor Feature Register 1") # See section B4.1.89 - B4.1.92 of the ARM ARM # VMSAv7 support - id_mmfr0 = Param.UInt32(0x00000003, "Memory Model Feature Register 0") + id_mmfr0 = Param.UInt32(0x10201103, "Memory Model Feature Register 0") id_mmfr1 = Param.UInt32(0x00000000, "Memory Model Feature Register 1") # no HW access | WFI stalling | ISB and DSB | # all TLB maintenance | no Harvard id_mmfr2 = Param.UInt32(0x01230000, "Memory Model Feature Register 2") # SuperSec | Coherent TLB | Bcast Maint | # BP Maint | Cache Maint Set/way | Cache Maint MVA - id_mmfr3 = Param.UInt32(0xF0102211, "Memory Model Feature Register 3") + id_mmfr3 = Param.UInt32(0x02102211, "Memory Model Feature Register 3") # See section B4.1.84 of ARM ARM # All values are latest for ARMv7-A profile @@ -79,5 +78,40 @@ class ArmISA(SimObject): id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4") id_isar5 = Param.UInt32(0x00000000, "Instruction Set Attribute Register 5") + fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register") + + # [31:0] is implementation defined + id_aa64afr0_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Auxiliary Feature Register 0") + # Reserved for future expansion + id_aa64afr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Auxiliary Feature Register 1") + + # 1 CTX CMPs | 2 WRPs | 2 BRPs | !PMU | !Trace | Debug v8-A + id_aa64dfr0_el1 = Param.UInt64(0x0000000000101006, + "AArch64 Debug Feature Register 0") + # Reserved for future expansion + id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Debug Feature Register 1") + + # !CRC32 | !SHA2 | !SHA1 | !AES + id_aa64isar0_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Instruction Set Attribute Register 0") + # Reserved for future expansion + id_aa64isar1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Instruction Set Attribute Register 1") + + # 4K | 64K | !16K | !BigEndEL0 | !SNSMem | !BigEnd | 8b ASID | 40b PA + id_aa64mmfr0_el1 = Param.UInt64(0x0000000000f00002, + "AArch64 Memory Model Feature Register 0") + # Reserved for future expansion + id_aa64mmfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Memory Model Feature Register 1") - fpsid = Param.UInt32(0x410430A0, "Floating-point System ID Register") + # !GICv3 CP15 | AdvSIMD | FP | !EL3 | !EL2 | EL1 (AArch64) | EL0 (AArch64) + # (no AArch32/64 interprocessing support for now) + id_aa64pfr0_el1 = Param.UInt64(0x0000000000000011, + "AArch64 Processor Feature Register 0") + # Reserved for future expansion + id_aa64pfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Processor Feature Register 1") diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py index b48c2a29d..39b7ec8ff 100644 --- a/src/arch/arm/ArmSystem.py +++ b/src/arch/arm/ArmSystem.py @@ -1,4 +1,4 @@ -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -44,7 +44,8 @@ class ArmMachineType(Enum): 'RealView_PBX' : 1901, 'VExpress_ELT' : 2272, 'VExpress_CA9' : 2272, - 'VExpress_EMM' : 2272} + 'VExpress_EMM' : 2272, + 'VExpress_EMM64' : 2272} class ArmSystem(System): type = 'ArmSystem' @@ -54,6 +55,23 @@ class ArmSystem(System): boot_loader = Param.String("", "File that contains the boot loader code if any") gic_cpu_addr = Param.Addr(0, "Addres of the GIC CPU interface") flags_addr = Param.Addr(0, "Address of the flags register for MP booting") + have_security = Param.Bool(False, + "True if Security Extensions are implemented") + have_virtualization = Param.Bool(False, + "True if Virtualization Extensions are implemented") + have_lpae = Param.Bool(False, "True if LPAE is implemented") + have_generic_timer = Param.Bool(False, + "True if the Generic Timer extension is implemented") + highest_el_is_64 = Param.Bool(False, + "True if the register width of the highest implemented exception level " + "is 64 bits (ARMv8)") + reset_addr_64 = Param.UInt64(0x0, + "Reset address if the highest implemented exception level is 64 bits " + "(ARMv8)") + phys_addr_range_64 = Param.UInt8(40, + "Supported physical address range in bits when using AArch64 (ARMv8)") + have_large_asid_64 = Param.Bool(False, + "True if ASID is 16 bits in AArch64 (ARMv8)") class LinuxArmSystem(ArmSystem): type = 'LinuxArmSystem' @@ -61,8 +79,10 @@ class LinuxArmSystem(ArmSystem): load_addr_mask = 0x0fffffff machine_type = Param.ArmMachineType('RealView_PBX', "Machine id from http://www.arm.linux.org.uk/developer/machines/") - atags_addr = Param.Addr(0x100, - "Address where default atags structure should be written") + atags_addr = Param.Addr("Address where default atags structure should " \ + "be written") + boot_release_addr = Param.Addr(0xfff8, "Address where secondary CPUs " \ + "spin waiting boot in the loader") dtb_filename = Param.String("", "File that contains the Device Tree Blob. Don't use DTB if empty.") early_kernel_symbols = Param.Bool(False, diff --git a/src/arch/arm/ArmTLB.py b/src/arch/arm/ArmTLB.py index c70dd80c8..01ac8016a 100644 --- a/src/arch/arm/ArmTLB.py +++ b/src/arch/arm/ArmTLB.py @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -42,10 +42,12 @@ from m5.params import * from m5.proxy import * from MemObject import MemObject +# Basic stage 1 translation objects class ArmTableWalker(MemObject): type = 'ArmTableWalker' cxx_class = 'ArmISA::TableWalker' cxx_header = "arch/arm/table_walker.hh" + is_stage2 = Param.Bool(False, "Is this object for stage 2 translation?") port = MasterPort("Port for TableWalker to do walk the translation with") sys = Param.System(Parent.any, "system object parameter") num_squash_per_cycle = Param.Unsigned(2, @@ -57,3 +59,28 @@ class ArmTLB(SimObject): cxx_header = "arch/arm/tlb.hh" size = Param.Int(64, "TLB size") walker = Param.ArmTableWalker(ArmTableWalker(), "HW Table walker") + is_stage2 = Param.Bool(False, "Is this a stage 2 TLB?") + +# Stage 2 translation objects, only used when virtualisation is being used +class ArmStage2TableWalker(ArmTableWalker): + is_stage2 = True + +class ArmStage2TLB(ArmTLB): + size = 32 + walker = ArmStage2TableWalker() + is_stage2 = True + +class ArmStage2MMU(SimObject): + type = 'ArmStage2MMU' + cxx_class = 'ArmISA::Stage2MMU' + cxx_header = 'arch/arm/stage2_mmu.hh' + tlb = Param.ArmTLB("Stage 1 TLB") + stage2_tlb = Param.ArmTLB("Stage 2 TLB") + +class ArmStage2IMMU(ArmStage2MMU): + tlb = Parent.itb + stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker()) + +class ArmStage2DMMU(ArmStage2MMU): + tlb = Parent.dtb + stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker()) diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript index 8d13a9b2d..aa9ce417b 100644 --- a/src/arch/arm/SConscript +++ b/src/arch/arm/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -49,12 +49,17 @@ if env['TARGET_ISA'] == 'arm': Dir('isa/formats') Source('decoder.cc') Source('faults.cc') + Source('insts/branch64.cc') + Source('insts/data64.cc') Source('insts/macromem.cc') Source('insts/mem.cc') + Source('insts/mem64.cc') Source('insts/misc.cc') + Source('insts/misc64.cc') Source('insts/pred_inst.cc') Source('insts/static_inst.cc') Source('insts/vfp.cc') + Source('insts/fplib.cc') Source('interrupts.cc') Source('isa.cc') Source('linux/linux.cc') @@ -67,6 +72,8 @@ if env['TARGET_ISA'] == 'arm': Source('stacktrace.cc') Source('system.cc') Source('table_walker.cc') + Source('stage2_mmu.cc') + Source('stage2_lookup.cc') Source('tlb.cc') Source('utility.cc') Source('vtophys.cc') diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc index e957ce0e7..940d85b8e 100644 --- a/src/arch/arm/decoder.cc +++ b/src/arch/arm/decoder.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2012 Google * All rights reserved. * @@ -47,9 +59,11 @@ Decoder::process() if (!emi.thumb) { emi.instBits = data; - emi.sevenAndFour = bits(data, 7) && bits(data, 4); - emi.isMisc = (bits(data, 24, 23) == 0x2 && - bits(data, 20) == 0); + if (!emi.aarch64) { + emi.sevenAndFour = bits(data, 7) && bits(data, 4); + emi.isMisc = (bits(data, 24, 23) == 0x2 && + bits(data, 20) == 0); + } consumeBytes(4); DPRINTF(Decoder, "Arm inst: %#x.\n", (uint64_t)emi); } else { @@ -112,6 +126,7 @@ Decoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst) data = inst; offset = (fetchPC >= pc.instAddr()) ? 0 : pc.instAddr() - fetchPC; emi.thumb = pc.thumb(); + emi.aarch64 = pc.aarch64(); emi.fpscrLen = fpscrLen; emi.fpscrStride = fpscrStride; diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh index 72776bcfd..315a3b6ad 100644 --- a/src/arch/arm/decoder.hh +++ b/src/arch/arm/decoder.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2012 Google * All rights reserved. * diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc index be1c7ecc2..f8313efd2 100644 --- a/src/arch/arm/faults.cc +++ b/src/arch/arm/faults.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,9 +40,15 @@ * * Authors: Ali Saidi * Gabe Black + * Giacomo Gabrielli + * Thomas Grocutt */ #include "arch/arm/faults.hh" +#include "arch/arm/system.hh" +#include "arch/arm/utility.hh" +#include "arch/arm/insts/static_inst.hh" +#include "base/compiler.hh" #include "base/trace.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" @@ -52,61 +58,413 @@ namespace ArmISA { -template<> ArmFault::FaultVals ArmFaultVals<Reset>::vals = -{"reset", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; +uint8_t ArmFault::shortDescFaultSources[] = { + 0x01, // AlignmentFault + 0x04, // InstructionCacheMaintenance + 0xff, // SynchExtAbtOnTranslTableWalkL0 (INVALID) + 0x0c, // SynchExtAbtOnTranslTableWalkL1 + 0x0e, // SynchExtAbtOnTranslTableWalkL2 + 0xff, // SynchExtAbtOnTranslTableWalkL3 (INVALID) + 0xff, // SynchPtyErrOnTranslTableWalkL0 (INVALID) + 0x1c, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0xff, // SynchPtyErrOnTranslTableWalkL3 (INVALID) + 0xff, // TranslationL0 (INVALID) + 0x05, // TranslationL1 + 0x07, // TranslationL2 + 0xff, // TranslationL3 (INVALID) + 0xff, // AccessFlagL0 (INVALID) + 0x03, // AccessFlagL1 + 0x06, // AccessFlagL2 + 0xff, // AccessFlagL3 (INVALID) + 0xff, // DomainL0 (INVALID) + 0x09, // DomainL1 + 0x0b, // DomainL2 + 0xff, // DomainL3 (INVALID) + 0xff, // PermissionL0 (INVALID) + 0x0d, // PermissionL1 + 0x0f, // PermissionL2 + 0xff, // PermissionL3 (INVALID) + 0x02, // DebugEvent + 0x08, // SynchronousExternalAbort + 0x10, // TLBConflictAbort + 0x19, // SynchPtyErrOnMemoryAccess + 0x16, // AsynchronousExternalAbort + 0x18, // AsynchPtyErrOnMemoryAccess + 0xff, // AddressSizeL0 (INVALID) + 0xff, // AddressSizeL1 (INVALID) + 0xff, // AddressSizeL2 (INVALID) + 0xff, // AddressSizeL3 (INVALID) + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals<UndefinedInstruction>::vals = -{"Undefined Instruction", 0x04, MODE_UNDEFINED, 4 ,2, false, false, - FaultStat()} ; +static_assert(sizeof(ArmFault::shortDescFaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::shortDescFaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals<SupervisorCall>::vals = -{"Supervisor Call", 0x08, MODE_SVC, 4, 2, false, false, FaultStat()}; +uint8_t ArmFault::longDescFaultSources[] = { + 0x21, // AlignmentFault + 0xff, // InstructionCacheMaintenance (INVALID) + 0xff, // SynchExtAbtOnTranslTableWalkL0 (INVALID) + 0x15, // SynchExtAbtOnTranslTableWalkL1 + 0x16, // SynchExtAbtOnTranslTableWalkL2 + 0x17, // SynchExtAbtOnTranslTableWalkL3 + 0xff, // SynchPtyErrOnTranslTableWalkL0 (INVALID) + 0x1d, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0x1f, // SynchPtyErrOnTranslTableWalkL3 + 0xff, // TranslationL0 (INVALID) + 0x05, // TranslationL1 + 0x06, // TranslationL2 + 0x07, // TranslationL3 + 0xff, // AccessFlagL0 (INVALID) + 0x09, // AccessFlagL1 + 0x0a, // AccessFlagL2 + 0x0b, // AccessFlagL3 + 0xff, // DomainL0 (INVALID) + 0x3d, // DomainL1 + 0x3e, // DomainL2 + 0xff, // DomainL3 (RESERVED) + 0xff, // PermissionL0 (INVALID) + 0x0d, // PermissionL1 + 0x0e, // PermissionL2 + 0x0f, // PermissionL3 + 0x22, // DebugEvent + 0x10, // SynchronousExternalAbort + 0x30, // TLBConflictAbort + 0x18, // SynchPtyErrOnMemoryAccess + 0x11, // AsynchronousExternalAbort + 0x19, // AsynchPtyErrOnMemoryAccess + 0xff, // AddressSizeL0 (INVALID) + 0xff, // AddressSizeL1 (INVALID) + 0xff, // AddressSizeL2 (INVALID) + 0xff, // AddressSizeL3 (INVALID) + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals<PrefetchAbort>::vals = -{"Prefetch Abort", 0x0C, MODE_ABORT, 4, 4, true, false, FaultStat()}; +static_assert(sizeof(ArmFault::longDescFaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::longDescFaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals<DataAbort>::vals = -{"Data Abort", 0x10, MODE_ABORT, 8, 8, true, false, FaultStat()}; +uint8_t ArmFault::aarch64FaultSources[] = { + 0x21, // AlignmentFault + 0xff, // InstructionCacheMaintenance (INVALID) + 0x14, // SynchExtAbtOnTranslTableWalkL0 + 0x15, // SynchExtAbtOnTranslTableWalkL1 + 0x16, // SynchExtAbtOnTranslTableWalkL2 + 0x17, // SynchExtAbtOnTranslTableWalkL3 + 0x1c, // SynchPtyErrOnTranslTableWalkL0 + 0x1d, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0x1f, // SynchPtyErrOnTranslTableWalkL3 + 0x04, // TranslationL0 + 0x05, // TranslationL1 + 0x06, // TranslationL2 + 0x07, // TranslationL3 + 0x08, // AccessFlagL0 + 0x09, // AccessFlagL1 + 0x0a, // AccessFlagL2 + 0x0b, // AccessFlagL3 + // @todo: Section & Page Domain Fault in AArch64? + 0xff, // DomainL0 (INVALID) + 0xff, // DomainL1 (INVALID) + 0xff, // DomainL2 (INVALID) + 0xff, // DomainL3 (INVALID) + 0x0c, // PermissionL0 + 0x0d, // PermissionL1 + 0x0e, // PermissionL2 + 0x0f, // PermissionL3 + 0xff, // DebugEvent (INVALID) + 0x10, // SynchronousExternalAbort + 0x30, // TLBConflictAbort + 0x18, // SynchPtyErrOnMemoryAccess + 0xff, // AsynchronousExternalAbort (INVALID) + 0xff, // AsynchPtyErrOnMemoryAccess (INVALID) + 0x00, // AddressSizeL0 + 0x01, // AddressSizeL1 + 0x02, // AddressSizeL2 + 0x03, // AddressSizeL3 + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals<Interrupt>::vals = -{"IRQ", 0x18, MODE_IRQ, 4, 4, true, false, FaultStat()}; +static_assert(sizeof(ArmFault::aarch64FaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::aarch64FaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals = -{"FIQ", 0x1C, MODE_FIQ, 4, 4, true, true, FaultStat()}; +// Fields: name, offset, cur{ELT,ELH}Offset, lowerEL{64,32}Offset, next mode, +// {ARM, Thumb, ARM_ELR, Thumb_ELR} PC offset, hyp trap, +// {A, F} disable, class, stat +template<> ArmFault::FaultVals ArmFaultVals<Reset>::vals = { + // Some dummy values (the reset vector has an IMPLEMENTATION DEFINED + // location in AArch64) + "Reset", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<UndefinedInstruction>::vals = { + "Undefined Instruction", 0x004, 0x000, 0x200, 0x400, 0x600, MODE_UNDEFINED, + 4, 2, 0, 0, true, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SupervisorCall>::vals = { + "Supervisor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 4, 2, 4, 2, true, false, false, EC_SVC_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SecureMonitorCall>::vals = { + "Secure Monitor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_MON, + 4, 4, 4, 4, false, true, true, EC_SMC_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<HypervisorCall>::vals = { + "Hypervisor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_HYP, + 4, 4, 4, 4, true, false, false, EC_HVC, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<PrefetchAbort>::vals = { + "Prefetch Abort", 0x00C, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 4, 4, 0, 0, true, true, false, EC_PREFETCH_ABORT_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<DataAbort>::vals = { + "Data Abort", 0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 8, 8, 0, 0, true, true, false, EC_DATA_ABORT_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<VirtualDataAbort>::vals = { + "Virtual Data Abort", 0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 8, 8, 0, 0, true, true, false, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<HypervisorTrap>::vals = { + // @todo: double check these values + "Hypervisor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_HYP, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<Interrupt>::vals = { + "IRQ", 0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ, + 4, 4, 0, 0, false, true, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<VirtualInterrupt>::vals = { + "Virtual IRQ", 0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ, + 4, 4, 0, 0, false, true, false, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals = { + "FIQ", 0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ, + 4, 4, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<VirtualFastInterrupt>::vals = { + "Virtual FIQ", 0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ, + 4, 4, 0, 0, false, true, true, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SupervisorTrap>::vals = { + // Some dummy values (SupervisorTrap is AArch64-only) + "Supervisor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SecureMonitorTrap>::vals = { + // Some dummy values (SecureMonitorTrap is AArch64-only) + "Secure Monitor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_MON, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<PCAlignmentFault>::vals = { + // Some dummy values (PCAlignmentFault is AArch64-only) + "PC Alignment Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_PC_ALIGNMENT, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SPAlignmentFault>::vals = { + // Some dummy values (SPAlignmentFault is AArch64-only) + "SP Alignment Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_STACK_PTR_ALIGNMENT, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SystemError>::vals = { + // Some dummy values (SError is AArch64-only) + "SError", 0x000, 0x180, 0x380, 0x580, 0x780, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_SERROR, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals = { + // Some dummy values + "Pipe Flush", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals = { + // Some dummy values + "ArmSev Flush", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<IllegalInstSetStateFault>::vals = { + // Some dummy values (SPAlignmentFault is AArch64-only) + "Illegal Inst Set State Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_ILLEGAL_INST, FaultStat() +}; -template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals = -{"Pipe Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values - -template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals = -{"ArmSev Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values -Addr +Addr ArmFault::getVector(ThreadContext *tc) { - // ARM ARM B1-3 + Addr base; - SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); + // ARM ARM issue C B1.8.1 + bool haveSecurity = ArmSystem::haveSecurity(tc); // panic if SCTLR.VE because I have no idea what to do with vectored // interrupts + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); assert(!sctlr.ve); + // Check for invalid modes + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + assert(haveSecurity || cpsr.mode != MODE_MON); + assert(ArmSystem::haveVirtualization(tc) || cpsr.mode != MODE_HYP); - if (!sctlr.v) - return offset(); - return offset() + HighVecs; + switch (cpsr.mode) + { + case MODE_MON: + base = tc->readMiscReg(MISCREG_MVBAR); + break; + case MODE_HYP: + base = tc->readMiscReg(MISCREG_HVBAR); + break; + default: + if (sctlr.v) { + base = HighVecs; + } else { + base = haveSecurity ? tc->readMiscReg(MISCREG_VBAR) : 0; + } + break; + } + return base + offset(tc); +} +Addr +ArmFault::getVector64(ThreadContext *tc) +{ + Addr vbar; + switch (toEL) { + case EL3: + assert(ArmSystem::haveSecurity(tc)); + vbar = tc->readMiscReg(MISCREG_VBAR_EL3); + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization(tc)); + // vbar = tc->readMiscReg(MISCREG_VBAR_EL2); + // break; + case EL1: + vbar = tc->readMiscReg(MISCREG_VBAR_EL1); + break; + default: + panic("Invalid target exception level"); + break; + } + return vbar + offset64(); } -void +MiscRegIndex +ArmFault::getSyndromeReg64() const +{ + switch (toEL) { + case EL1: + return MISCREG_ESR_EL1; + case EL2: + return MISCREG_ESR_EL2; + case EL3: + return MISCREG_ESR_EL3; + default: + panic("Invalid exception level"); + break; + } +} + +MiscRegIndex +ArmFault::getFaultAddrReg64() const +{ + switch (toEL) { + case EL1: + return MISCREG_FAR_EL1; + case EL2: + return MISCREG_FAR_EL2; + case EL3: + return MISCREG_FAR_EL3; + default: + panic("Invalid exception level"); + break; + } +} + +void +ArmFault::setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg) +{ + uint32_t value; + uint32_t exc_class = (uint32_t) ec(tc); + uint32_t issVal = iss(); + assert(!from64 || ArmSystem::highestELIs64(tc)); + + value = exc_class << 26; + + // HSR.IL not valid for Prefetch Aborts (0x20, 0x21) and Data Aborts (0x24, + // 0x25) for which the ISS information is not valid (ARMv7). + // @todo: ARMv8 revises AArch32 functionality: when HSR.IL is not + // valid it is treated as RES1. + if (to64) { + value |= 1 << 25; + } else if ((bits(exc_class, 5, 3) != 4) || + (bits(exc_class, 2) && bits(issVal, 24))) { + if (!machInst.thumb || machInst.bigThumb) + value |= 1 << 25; + } + // Condition code valid for EC[5:4] nonzero + if (!from64 && ((bits(exc_class, 5, 4) == 0) && + (bits(exc_class, 3, 0) != 0))) { + if (!machInst.thumb) { + uint32_t cond; + ConditionCode condCode = (ConditionCode) (uint32_t) machInst.condCode; + // If its on unconditional instruction report with a cond code of + // 0xE, ie the unconditional code + cond = (condCode == COND_UC) ? COND_AL : condCode; + value |= cond << 20; + value |= 1 << 24; + } + value |= bits(issVal, 19, 0); + } else { + value |= issVal; + } + tc->setMiscReg(syndrome_reg, value); +} + +void ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) { - // ARM ARM B1.6.3 + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + + if (ArmSystem::highestELIs64(tc)) { // ARMv8 + // Determine source exception level and mode + fromMode = (OperatingMode) (uint8_t) cpsr.mode; + fromEL = opModeToEL(fromMode); + if (opModeIs64(fromMode)) + from64 = true; + + // Determine target exception level + if (ArmSystem::haveSecurity(tc) && routeToMonitor(tc)) + toEL = EL3; + else + toEL = opModeToEL(nextMode()); + if (fromEL > toEL) + toEL = fromEL; + + if (toEL == ArmSystem::highestEL(tc) || ELIs64(tc, toEL)) { + // Invoke exception handler in AArch64 state + to64 = true; + invoke64(tc, inst); + return; + } + } + + // ARMv7 (ARM ARM issue C B1.9) + + bool have_security = ArmSystem::haveSecurity(tc); + bool have_virtualization = ArmSystem::haveVirtualization(tc); + FaultBase::invoke(tc); if (!FullSystem) return; countStat()++; SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr = tc->readMiscReg(MISCREG_SCR); CPSR saved_cpsr = tc->readMiscReg(MISCREG_CPSR); saved_cpsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); saved_cpsr.c = tc->readIntReg(INTREG_CONDCODES_C); @@ -118,22 +476,73 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) saved_cpsr.it2 = it.top6; saved_cpsr.it1 = it.bottom2; - cpsr.mode = nextMode(); + // if we have a valid instruction then use it to annotate this fault with + // extra information. This is used to generate the correct fault syndrome + // information + if (inst) { + ArmStaticInst *armInst = reinterpret_cast<ArmStaticInst *>(inst.get()); + armInst->annotateFault(this); + } + + if (have_security && routeToMonitor(tc)) + cpsr.mode = MODE_MON; + else if (have_virtualization && routeToHyp(tc)) + cpsr.mode = MODE_HYP; + else + cpsr.mode = nextMode(); + + // Ensure Secure state if initially in Monitor mode + if (have_security && saved_cpsr.mode == MODE_MON) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + if (scr.ns) { + scr.ns = 0; + tc->setMiscRegNoEffect(MISCREG_SCR, scr); + } + } + + // some bits are set differently if we have been routed to hyp mode + if (cpsr.mode == MODE_HYP) { + SCTLR hsctlr = tc->readMiscReg(MISCREG_HSCTLR); + cpsr.t = hsctlr.te; + cpsr.e = hsctlr.ee; + if (!scr.ea) {cpsr.a = 1;} + if (!scr.fiq) {cpsr.f = 1;} + if (!scr.irq) {cpsr.i = 1;} + } else if (cpsr.mode == MODE_MON) { + // Special case handling when entering monitor mode + cpsr.t = sctlr.te; + cpsr.e = sctlr.ee; + cpsr.a = 1; + cpsr.f = 1; + cpsr.i = 1; + } else { + cpsr.t = sctlr.te; + cpsr.e = sctlr.ee; + + // The *Disable functions are virtual and different per fault + cpsr.a = cpsr.a | abortDisable(tc); + cpsr.f = cpsr.f | fiqDisable(tc); + cpsr.i = 1; + } cpsr.it1 = cpsr.it2 = 0; cpsr.j = 0; - - cpsr.t = sctlr.te; - cpsr.a = cpsr.a | abortDisable(); - cpsr.f = cpsr.f | fiqDisable(); - cpsr.i = 1; - cpsr.e = sctlr.ee; tc->setMiscReg(MISCREG_CPSR, cpsr); + // Make sure mailbox sets to one always tc->setMiscReg(MISCREG_SEV_MAILBOX, 1); - tc->setIntReg(INTREG_LR, curPc + - (saved_cpsr.t ? thumbPcOffset() : armPcOffset())); - switch (nextMode()) { + // Clear the exclusive monitor + tc->setMiscReg(MISCREG_LOCKFLAG, 0); + + if (cpsr.mode == MODE_HYP) { + tc->setMiscReg(MISCREG_ELR_HYP, curPc + + (saved_cpsr.t ? thumbPcOffset(true) : armPcOffset(true))); + } else { + tc->setIntReg(INTREG_LR, curPc + + (saved_cpsr.t ? thumbPcOffset(false) : armPcOffset(false))); + } + + switch (cpsr.mode) { case MODE_FIQ: tc->setMiscReg(MISCREG_SPSR_FIQ, saved_cpsr); break; @@ -143,12 +552,23 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) case MODE_SVC: tc->setMiscReg(MISCREG_SPSR_SVC, saved_cpsr); break; - case MODE_UNDEFINED: - tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr); + case MODE_MON: + assert(have_security); + tc->setMiscReg(MISCREG_SPSR_MON, saved_cpsr); break; case MODE_ABORT: tc->setMiscReg(MISCREG_SPSR_ABT, saved_cpsr); break; + case MODE_UNDEFINED: + tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr); + if (ec(tc) != EC_UNKNOWN) + setSyndrome(tc, MISCREG_HSR); + break; + case MODE_HYP: + assert(have_virtualization); + tc->setMiscReg(MISCREG_SPSR_HYP, saved_cpsr); + setSyndrome(tc, MISCREG_HSR); + break; default: panic("unknown Mode\n"); } @@ -161,7 +581,100 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) pc.nextThumb(pc.thumb()); pc.jazelle(cpsr.j); pc.nextJazelle(pc.jazelle()); + pc.aarch64(!cpsr.width); + pc.nextAArch64(!cpsr.width); + tc->pcState(pc); +} + +void +ArmFault::invoke64(ThreadContext *tc, StaticInstPtr inst) +{ + // Determine actual misc. register indices for ELR_ELx and SPSR_ELx + MiscRegIndex elr_idx, spsr_idx; + switch (toEL) { + case EL1: + elr_idx = MISCREG_ELR_EL1; + spsr_idx = MISCREG_SPSR_EL1; + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization()); + // elr_idx = MISCREG_ELR_EL2; + // spsr_idx = MISCREG_SPSR_EL2; + // break; + case EL3: + assert(ArmSystem::haveSecurity(tc)); + elr_idx = MISCREG_ELR_EL3; + spsr_idx = MISCREG_SPSR_EL3; + break; + default: + panic("Invalid target exception level"); + break; + } + + // Save process state into SPSR_ELx + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + CPSR spsr = cpsr; + spsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); + spsr.c = tc->readIntReg(INTREG_CONDCODES_C); + spsr.v = tc->readIntReg(INTREG_CONDCODES_V); + if (from64) { + // Force some bitfields to 0 + spsr.q = 0; + spsr.it1 = 0; + spsr.j = 0; + spsr.res0_23_22 = 0; + spsr.ge = 0; + spsr.it2 = 0; + spsr.t = 0; + } else { + spsr.ge = tc->readIntReg(INTREG_CONDCODES_GE); + ITSTATE it = tc->pcState().itstate(); + spsr.it2 = it.top6; + spsr.it1 = it.bottom2; + // Force some bitfields to 0 + spsr.res0_23_22 = 0; + spsr.ss = 0; + } + tc->setMiscReg(spsr_idx, spsr); + + // Save preferred return address into ELR_ELx + Addr curr_pc = tc->pcState().pc(); + Addr ret_addr = curr_pc; + if (from64) + ret_addr += armPcElrOffset(); + else + ret_addr += spsr.t ? thumbPcElrOffset() : armPcElrOffset(); + tc->setMiscReg(elr_idx, ret_addr); + + // Update process state + OperatingMode64 mode = 0; + mode.spX = 1; + mode.el = toEL; + mode.width = 0; + cpsr.mode = mode; + cpsr.daif = 0xf; + cpsr.il = 0; + cpsr.ss = 0; + tc->setMiscReg(MISCREG_CPSR, cpsr); + + // Set PC to start of exception handler + Addr new_pc = purifyTaggedAddr(getVector64(tc), tc, toEL); + DPRINTF(Faults, "Invoking Fault (AArch64 target EL):%s cpsr:%#x PC:%#x " + "elr:%#x newVec: %#x\n", name(), cpsr, curr_pc, ret_addr, new_pc); + PCState pc(new_pc); + pc.aarch64(!cpsr.width); + pc.nextAArch64(!cpsr.width); tc->pcState(pc); + + // If we have a valid instruction then use it to annotate this fault with + // extra information. This is used to generate the correct fault syndrome + // information + if (inst) + reinterpret_cast<ArmStaticInst *>(inst.get())->annotateFault(this); + // Save exception syndrome + if ((nextMode() != MODE_IRQ) && (nextMode() != MODE_FIQ)) + setSyndrome(tc, getSyndromeReg64()); } void @@ -171,7 +684,25 @@ Reset::invoke(ThreadContext *tc, StaticInstPtr inst) tc->getCpuPtr()->clearInterrupts(); tc->clearArchRegs(); } - ArmFault::invoke(tc, inst); + if (!ArmSystem::highestELIs64(tc)) { + ArmFault::invoke(tc, inst); + tc->setMiscReg(MISCREG_VMPIDR, + getMPIDR(dynamic_cast<ArmSystem*>(tc->getSystemPtr()), tc)); + + // Unless we have SMC code to get us there, boot in HYP! + if (ArmSystem::haveVirtualization(tc) && + !ArmSystem::haveSecurity(tc)) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + cpsr.mode = MODE_HYP; + tc->setMiscReg(MISCREG_CPSR, cpsr); + } + } else { + // Advance the PC to the IMPLEMENTATION DEFINED reset value + PCState pc = ArmSystem::resetAddr64(tc); + pc.aarch64(true); + pc.nextAArch64(true); + tc->pcState(pc); + } } void @@ -196,6 +727,45 @@ UndefinedInstruction::invoke(ThreadContext *tc, StaticInstPtr inst) } } +bool +UndefinedInstruction::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector + toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER); + return toHyp; +} + +uint32_t +UndefinedInstruction::iss() const +{ + if (overrideEc == EC_INVALID) + return issRaw; + + uint32_t new_iss = 0; + uint32_t op0, op1, op2, CRn, CRm, Rt, dir; + + dir = bits(machInst, 21, 21); + op0 = bits(machInst, 20, 19); + op1 = bits(machInst, 18, 16); + CRn = bits(machInst, 15, 12); + CRm = bits(machInst, 11, 8); + op2 = bits(machInst, 7, 5); + Rt = bits(machInst, 4, 0); + + new_iss = op0 << 20 | op2 << 17 | op1 << 14 | CRn << 10 | + Rt << 5 | CRm << 1 | dir; + + return new_iss; +} + void SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) { @@ -207,7 +777,12 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) // As of now, there isn't a 32 bit thumb version of this instruction. assert(!machInst.bigThumb); uint32_t callNum; - callNum = tc->readIntReg(INTREG_R7); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + OperatingMode mode = (OperatingMode)(uint8_t)cpsr.mode; + if (opModeIs64(mode)) + callNum = tc->readIntReg(INTREG_X8); + else + callNum = tc->readIntReg(INTREG_R7); tc->syscall(callNum); // Advance the PC since that won't happen automatically. @@ -217,21 +792,593 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) tc->pcState(pc); } +bool +SupervisorCall::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector + toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER); + return toHyp; +} + +ExceptionClass +SupervisorCall::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : + (from64 ? EC_SVC_64 : vals.ec); +} + +uint32_t +SupervisorCall::iss() const +{ + // Even if we have a 24 bit imm from an arm32 instruction then we only use + // the bottom 16 bits for the ISS value (it doesn't hurt for AArch64 SVC). + return issRaw & 0xFFFF; +} + +uint32_t +SecureMonitorCall::iss() const +{ + if (from64) + return bits(machInst, 20, 5); + return 0; +} + +ExceptionClass +UndefinedInstruction::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + + +HypervisorCall::HypervisorCall(ExtMachInst _machInst, uint32_t _imm) : + ArmFaultVals<HypervisorCall>(_machInst, _imm) +{} + +ExceptionClass +HypervisorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + +template<class T> +FaultOffset +ArmFaultVals<T>::offset(ThreadContext *tc) +{ + bool isHypTrap = false; + + // Normally we just use the exception vector from the table at the top if + // this file, however if this exception has caused a transition to hype + // mode, and its an exception type that would only do this if it has been + // trapped then we use the hyp trap vector instead of the normal vector + if (vals.hypTrappable) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + if (cpsr.mode == MODE_HYP) { + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + isHypTrap = spsr.mode != MODE_HYP; + } + } + return isHypTrap ? 0x14 : vals.offset; +} + +// void +// SupervisorCall::setSyndrome64(ThreadContext *tc, MiscRegIndex esr_idx) +// { +// ESR esr = 0; +// esr.ec = machInst.aarch64 ? SvcAArch64 : SvcAArch32; +// esr.il = !machInst.thumb; +// if (machInst.aarch64) +// esr.imm16 = bits(machInst.instBits, 20, 5); +// else if (machInst.thumb) +// esr.imm16 = bits(machInst.instBits, 7, 0); +// else +// esr.imm16 = bits(machInst.instBits, 15, 0); +// tc->setMiscReg(esr_idx, esr); +// } + +void +SecureMonitorCall::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + if (FullSystem) { + ArmFault::invoke(tc, inst); + return; + } +} + +ExceptionClass +SecureMonitorCall::ec(ThreadContext *tc) const +{ + return (from64 ? EC_SMC_64 : vals.ec); +} + +ExceptionClass +SupervisorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + +ExceptionClass +SecureMonitorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : + (from64 ? EC_SMC_64 : vals.ec); +} + template<class T> void AbortFault<T>::invoke(ThreadContext *tc, StaticInstPtr inst) { + if (tranMethod == ArmFault::UnknownTran) { + tranMethod = longDescFormatInUse(tc) ? ArmFault::LpaeTran + : ArmFault::VmsaTran; + + if ((tranMethod == ArmFault::VmsaTran) && this->routeToMonitor(tc)) { + // See ARM ARM B3-1416 + bool override_LPAE = false; + TTBCR ttbcr_s = tc->readMiscReg(MISCREG_TTBCR_S); + TTBCR M5_VAR_USED ttbcr_ns = tc->readMiscReg(MISCREG_TTBCR_NS); + if (ttbcr_s.eae) { + override_LPAE = true; + } else { + // Unimplemented code option, not seen in testing. May need + // extension according to the manual exceprt above. + DPRINTF(Faults, "Warning: Incomplete translation method " + "override detected.\n"); + } + if (override_LPAE) + tranMethod = ArmFault::LpaeTran; + } + } + + if (source == ArmFault::AsynchronousExternalAbort) { + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); + } + // Get effective fault source encoding + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + FSR fsr = getFsr(tc); + + // source must be determined BEFORE invoking generic routines which will + // try to set hsr etc. and are based upon source! ArmFaultVals<T>::invoke(tc, inst); + + if (cpsr.width) { // AArch32 + if (cpsr.mode == MODE_HYP) { + tc->setMiscReg(T::HFarIndex, faultAddr); + } else if (stage2) { + tc->setMiscReg(MISCREG_HPFAR, (faultAddr >> 8) & ~0xf); + tc->setMiscReg(T::HFarIndex, OVAddr); + } else { + tc->setMiscReg(T::FsrIndex, fsr); + tc->setMiscReg(T::FarIndex, faultAddr); + } + DPRINTF(Faults, "Abort Fault source=%#x fsr=%#x faultAddr=%#x "\ + "tranMethod=%#x\n", source, fsr, faultAddr, tranMethod); + } else { // AArch64 + // Set the FAR register. Nothing else to do if we are in AArch64 state + // because the syndrome register has already been set inside invoke64() + tc->setMiscReg(AbortFault<T>::getFaultAddrReg64(), faultAddr); + } +} + +template<class T> +FSR +AbortFault<T>::getFsr(ThreadContext *tc) +{ FSR fsr = 0; - fsr.fsLow = bits(status, 3, 0); - fsr.fsHigh = bits(status, 4); - fsr.domain = domain; - fsr.wnr = (write ? 1 : 0); - fsr.ext = 0; - tc->setMiscReg(T::FsrIndex, fsr); - tc->setMiscReg(T::FarIndex, faultAddr); - DPRINTF(Faults, "Abort Fault fsr=%#x faultAddr=%#x\n", fsr, faultAddr); + if (((CPSR) tc->readMiscRegNoEffect(MISCREG_CPSR)).width) { + // AArch32 + assert(tranMethod != ArmFault::UnknownTran); + if (tranMethod == ArmFault::LpaeTran) { + srcEncoded = ArmFault::longDescFaultSources[source]; + fsr.status = srcEncoded; + fsr.lpae = 1; + } else { + srcEncoded = ArmFault::shortDescFaultSources[source]; + fsr.fsLow = bits(srcEncoded, 3, 0); + fsr.fsHigh = bits(srcEncoded, 4); + fsr.domain = static_cast<uint8_t>(domain); + } + fsr.wnr = (write ? 1 : 0); + fsr.ext = 0; + } else { + // AArch64 + srcEncoded = ArmFault::aarch64FaultSources[source]; + } + if (srcEncoded == ArmFault::FaultSourceInvalid) { + panic("Invalid fault source\n"); + } + return fsr; +} + +template<class T> +bool +AbortFault<T>::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +template<class T> +void +AbortFault<T>::annotate(ArmFault::AnnotationIDs id, uint64_t val) +{ + switch (id) + { + case ArmFault::S1PTW: + s1ptw = val; + break; + case ArmFault::OVA: + OVAddr = val; + break; + + // Just ignore unknown ID's + default: + break; + } +} + +template<class T> +uint32_t +AbortFault<T>::iss() const +{ + uint32_t val; + + val = srcEncoded & 0x3F; + val |= write << 6; + val |= s1ptw << 7; + return (val); +} + +template<class T> +bool +AbortFault<T>::isMMUFault() const +{ + // NOTE: Not relying on LL information being aligned to lowest bits here + return + (source == ArmFault::AlignmentFault) || + ((source >= ArmFault::TranslationLL) && + (source < ArmFault::TranslationLL + 4)) || + ((source >= ArmFault::AccessFlagLL) && + (source < ArmFault::AccessFlagLL + 4)) || + ((source >= ArmFault::DomainLL) && + (source < ArmFault::DomainLL + 4)) || + ((source >= ArmFault::PermissionLL) && + (source < ArmFault::PermissionLL + 4)); +} + +ExceptionClass +PrefetchAbort::ec(ThreadContext *tc) const +{ + if (to64) { + // AArch64 + if (toEL == fromEL) + return EC_PREFETCH_ABORT_CURR_EL; + else + return EC_PREFETCH_ABORT_LOWER_EL; + } else { + // AArch32 + // Abort faults have different EC codes depending on whether + // the fault originated within HYP mode, or not. So override + // the method and add the extra adjustment of the EC value. + + ExceptionClass ec = ArmFaultVals<PrefetchAbort>::vals.ec; + + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + if (spsr.mode == MODE_HYP) { + ec = ((ExceptionClass) (((uint32_t) ec) + 1)); + } + return ec; + } +} + +bool +PrefetchAbort::routeToMonitor(ThreadContext *tc) const +{ + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + + return scr.ea && !isMMUFault(); +} + +bool +PrefetchAbort::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // otherwise, check whether to take to Hyp mode through Hyp Trap vector + toHyp |= (stage2 || + ( (source == DebugEvent) && hdcr.tde && (cpsr.mode != MODE_HYP)) || + ( (source == SynchronousExternalAbort) && hcr.tge && (cpsr.mode == MODE_USER)) + ) && !inSecureState(scr, cpsr); + return toHyp; +} + +ExceptionClass +DataAbort::ec(ThreadContext *tc) const +{ + if (to64) { + // AArch64 + if (source == ArmFault::AsynchronousExternalAbort) { + panic("Asynchronous External Abort should be handled with \ + SystemErrors (SErrors)!"); + } + if (toEL == fromEL) + return EC_DATA_ABORT_CURR_EL; + else + return EC_DATA_ABORT_LOWER_EL; + } else { + // AArch32 + // Abort faults have different EC codes depending on whether + // the fault originated within HYP mode, or not. So override + // the method and add the extra adjustment of the EC value. + + ExceptionClass ec = ArmFaultVals<DataAbort>::vals.ec; + + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + if (spsr.mode == MODE_HYP) { + ec = ((ExceptionClass) (((uint32_t) ec) + 1)); + } + return ec; + } +} + +bool +DataAbort::routeToMonitor(ThreadContext *tc) const +{ + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + + return scr.ea && !isMMUFault(); +} + +bool +DataAbort::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // otherwise, check whether to take to Hyp mode through Hyp Trap vector + toHyp |= (stage2 || + ( (cpsr.mode != MODE_HYP) && ( ((source == AsynchronousExternalAbort) && hcr.amo) || + ((source == DebugEvent) && hdcr.tde) ) + ) || + ( (cpsr.mode == MODE_USER) && hcr.tge && + ((source == AlignmentFault) || + (source == SynchronousExternalAbort)) + ) + ) && !inSecureState(scr, cpsr); + return toHyp; +} + +uint32_t +DataAbort::iss() const +{ + uint32_t val; + + // Add on the data abort specific fields to the generic abort ISS value + val = AbortFault<DataAbort>::iss(); + // ISS is valid if not caused by a stage 1 page table walk, and when taken + // to AArch64 only when directed to EL2 + if (!s1ptw && (!to64 || toEL == EL2)) { + val |= isv << 24; + if (isv) { + val |= sas << 22; + val |= sse << 21; + val |= srt << 16; + // AArch64 only. These assignments are safe on AArch32 as well + // because these vars are initialized to false + val |= sf << 15; + val |= ar << 14; + } + } + return (val); +} + +void +DataAbort::annotate(AnnotationIDs id, uint64_t val) +{ + AbortFault<DataAbort>::annotate(id, val); + switch (id) + { + case SAS: + isv = true; + sas = val; + break; + case SSE: + isv = true; + sse = val; + break; + case SRT: + isv = true; + srt = val; + break; + case SF: + isv = true; + sf = val; + break; + case AR: + isv = true; + ar = val; + break; + // Just ignore unknown ID's + default: + break; + } +} + +void +VirtualDataAbort::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + AbortFault<VirtualDataAbort>::invoke(tc, inst); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + hcr.va = 0; + tc->setMiscRegNoEffect(MISCREG_HCR, hcr); +} + +bool +Interrupt::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return scr.irq; +} + +bool +Interrupt::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + // Determine whether IRQs are routed to Hyp mode. + toHyp = (!scr.irq && hcr.imo && !inSecureState(scr, cpsr)) || + (cpsr.mode == MODE_HYP); + return toHyp; +} + +bool +Interrupt::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +VirtualInterrupt::VirtualInterrupt() +{} + +bool +FastInterrupt::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return scr.fiq; +} + +bool +FastInterrupt::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + // Determine whether IRQs are routed to Hyp mode. + toHyp = (!scr.fiq && hcr.fmo && !inSecureState(scr, cpsr)) || + (cpsr.mode == MODE_HYP); + return toHyp; +} + +bool +FastInterrupt::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +bool +FastInterrupt::fiqDisable(ThreadContext *tc) +{ + if (ArmSystem::haveVirtualization(tc)) { + return true; + } else if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.fw); + } + return true; +} + +VirtualFastInterrupt::VirtualFastInterrupt() +{} + +void +PCAlignmentFault::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + ArmFaultVals<PCAlignmentFault>::invoke(tc, inst); + assert(from64); + // Set the FAR + tc->setMiscReg(getFaultAddrReg64(), faultPC); +} + +SPAlignmentFault::SPAlignmentFault() +{} + +SystemError::SystemError() +{} + +void +SystemError::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); + ArmFault::invoke(tc, inst); +} + +bool +SystemError::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + assert(from64); + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + return scr.ea; +} + +bool +SystemError::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + assert(from64); + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + toHyp = (!scr.ea && hcr.amo && !inSecureState(scr, cpsr)) || + (!scr.ea && !scr.rw && !hcr.amo && !inSecureState(scr,cpsr)); + return toHyp; } void @@ -247,11 +1394,6 @@ FlushPipe::invoke(ThreadContext *tc, StaticInstPtr inst) { tc->pcState(pc); } -template void AbortFault<PrefetchAbort>::invoke(ThreadContext *tc, - StaticInstPtr inst); -template void AbortFault<DataAbort>::invoke(ThreadContext *tc, - StaticInstPtr inst); - void ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) { DPRINTF(Faults, "Invoking ArmSev Fault\n"); @@ -265,6 +1407,34 @@ ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) { tc->getCpuPtr()->clearInterrupt(INT_SEV, 0); } -// return via SUBS pc, lr, xxx; rfe, movs, ldm +// Instantiate all the templates to make the linker happy +template class ArmFaultVals<Reset>; +template class ArmFaultVals<UndefinedInstruction>; +template class ArmFaultVals<SupervisorCall>; +template class ArmFaultVals<SecureMonitorCall>; +template class ArmFaultVals<HypervisorCall>; +template class ArmFaultVals<PrefetchAbort>; +template class ArmFaultVals<DataAbort>; +template class ArmFaultVals<VirtualDataAbort>; +template class ArmFaultVals<HypervisorTrap>; +template class ArmFaultVals<Interrupt>; +template class ArmFaultVals<VirtualInterrupt>; +template class ArmFaultVals<FastInterrupt>; +template class ArmFaultVals<VirtualFastInterrupt>; +template class ArmFaultVals<SupervisorTrap>; +template class ArmFaultVals<SecureMonitorTrap>; +template class ArmFaultVals<PCAlignmentFault>; +template class ArmFaultVals<SPAlignmentFault>; +template class ArmFaultVals<SystemError>; +template class ArmFaultVals<FlushPipe>; +template class ArmFaultVals<ArmSev>; +template class AbortFault<PrefetchAbort>; +template class AbortFault<DataAbort>; +template class AbortFault<VirtualDataAbort>; + + +IllegalInstSetStateFault::IllegalInstSetStateFault() +{} + } // namespace ArmISA diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh index 9858e52ef..a5720f115 100644 --- a/src/arch/arm/faults.hh +++ b/src/arch/arm/faults.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,12 +40,15 @@ * * Authors: Ali Saidi * Gabe Black + * Giacomo Gabrielli + * Thomas Grocutt */ #ifndef __ARM_FAULTS_HH__ #define __ARM_FAULTS_HH__ #include "arch/arm/miscregs.hh" +#include "arch/arm/pagetable.hh" #include "arch/arm/types.hh" #include "base/misc.hh" #include "sim/faults.hh" @@ -60,63 +63,146 @@ typedef const Addr FaultOffset; class ArmFault : public FaultBase { protected: + ExtMachInst machInst; + uint32_t issRaw; + + // Helper variables for ARMv8 exception handling + bool from64; // True if the exception is generated from the AArch64 state + bool to64; // True if the exception is taken in AArch64 state + ExceptionLevel fromEL; // Source exception level + ExceptionLevel toEL; // Target exception level + OperatingMode fromMode; // Source operating mode + Addr getVector(ThreadContext *tc); + Addr getVector64(ThreadContext *tc); public: - enum StatusEncoding + /// Generic fault source enums used to index into + /// {short/long/aarch64}DescFaultSources[] to get the actual encodings based + /// on the current register width state and the translation table format in + /// use + enum FaultSource { - // Fault Status register encodings - // ARM ARM B3.9.4 - AlignmentFault = 0x1, - DebugEvent = 0x2, - AccessFlag0 = 0x3, - InstructionCacheMaintenance = 0x4, - Translation0 = 0x5, - AccessFlag1 = 0x6, - Translation1 = 0x7, - SynchronousExternalAbort0 = 0x8, - Domain0 = 0x9, - SynchronousExternalAbort1 = 0x8, - Domain1 = 0xb, - TranslationTableWalkExtAbt0 = 0xc, - Permission0 = 0xd, - TranslationTableWalkExtAbt1 = 0xe, - Permission1 = 0xf, - AsynchronousExternalAbort = 0x16, - MemoryAccessAsynchronousParityError = 0x18, - MemoryAccessSynchronousParityError = 0x19, - TranslationTableWalkPrtyErr0 = 0x1c, - TranslationTableWalkPrtyErr1 = 0x1e, - - // not a real fault. This is a status code - // to allow the translation function to inform - // the memory access function not to proceed - // for a Prefetch that misses in the TLB. - PrefetchTLBMiss = 0x1f, - PrefetchUncacheable = 0x20 + AlignmentFault = 0, + InstructionCacheMaintenance, // Short-desc. format only + SynchExtAbtOnTranslTableWalkLL, + SynchPtyErrOnTranslTableWalkLL = SynchExtAbtOnTranslTableWalkLL + 4, + TranslationLL = SynchPtyErrOnTranslTableWalkLL + 4, + AccessFlagLL = TranslationLL + 4, + DomainLL = AccessFlagLL + 4, + PermissionLL = DomainLL + 4, + DebugEvent = PermissionLL + 4, + SynchronousExternalAbort, + TLBConflictAbort, // Requires LPAE + SynchPtyErrOnMemoryAccess, + AsynchronousExternalAbort, + AsynchPtyErrOnMemoryAccess, + AddressSizeLL, // AArch64 only + + // Not real faults. These are faults to allow the translation function + // to inform the memory access function not to proceed for a prefetch + // that misses in the TLB or that targets an uncacheable address + PrefetchTLBMiss = AddressSizeLL + 4, + PrefetchUncacheable, + + NumFaultSources, + FaultSourceInvalid = 0xff + }; + + /// Encodings of the fault sources when the short-desc. translation table + /// format is in use (ARM ARM Issue C B3.13.3) + static uint8_t shortDescFaultSources[NumFaultSources]; + /// Encodings of the fault sources when the long-desc. translation table + /// format is in use (ARM ARM Issue C B3.13.3) + static uint8_t longDescFaultSources[NumFaultSources]; + /// Encodings of the fault sources in AArch64 state + static uint8_t aarch64FaultSources[NumFaultSources]; + + enum AnnotationIDs + { + S1PTW, // DataAbort, PrefetchAbort: Stage 1 Page Table Walk, + OVA, // DataAbort, PrefetchAbort: stage 1 Virtual Address for stage 2 faults + SAS, // DataAbort: Syndrome Access Size + SSE, // DataAbort: Syndrome Sign Extend + SRT, // DataAbort: Syndrome Register Transfer + + // AArch64 only + SF, // DataAbort: width of the accessed register is SixtyFour + AR // DataAbort: Acquire/Release semantics + }; + + enum TranMethod + { + LpaeTran, + VmsaTran, + UnknownTran }; struct FaultVals { const FaultName name; + const FaultOffset offset; + + // Offsets used for exceptions taken in AArch64 state + const uint16_t currELTOffset; + const uint16_t currELHOffset; + const uint16_t lowerEL64Offset; + const uint16_t lowerEL32Offset; + const OperatingMode nextMode; + const uint8_t armPcOffset; const uint8_t thumbPcOffset; + // The following two values are used in place of armPcOffset and + // thumbPcOffset when the exception return address is saved into ELR + // registers (exceptions taken in HYP mode or in AArch64 state) + const uint8_t armPcElrOffset; + const uint8_t thumbPcElrOffset; + + const bool hypTrappable; const bool abortDisable; const bool fiqDisable; + + // Exception class used to appropriately set the syndrome register + // (exceptions taken in HYP mode or in AArch64 state) + const ExceptionClass ec; + FaultStat count; }; + ArmFault(ExtMachInst _machInst = 0, uint32_t _iss = 0) : + machInst(_machInst), issRaw(_iss), from64(false), to64(false) {} + + // Returns the actual syndrome register to use based on the target + // exception level + MiscRegIndex getSyndromeReg64() const; + // Returns the actual fault address register to use based on the target + // exception level + MiscRegIndex getFaultAddrReg64() const; + void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + void invoke64(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + virtual void annotate(AnnotationIDs id, uint64_t val) {} virtual FaultStat& countStat() = 0; - virtual FaultOffset offset() = 0; + virtual FaultOffset offset(ThreadContext *tc) = 0; + virtual FaultOffset offset64() = 0; virtual OperatingMode nextMode() = 0; - virtual uint8_t armPcOffset() = 0; - virtual uint8_t thumbPcOffset() = 0; - virtual bool abortDisable() = 0; - virtual bool fiqDisable() = 0; + virtual bool routeToMonitor(ThreadContext *tc) const = 0; + virtual bool routeToHyp(ThreadContext *tc) const { return false; } + virtual uint8_t armPcOffset(bool isHyp) = 0; + virtual uint8_t thumbPcOffset(bool isHyp) = 0; + virtual uint8_t armPcElrOffset() = 0; + virtual uint8_t thumbPcElrOffset() = 0; + virtual bool abortDisable(ThreadContext *tc) = 0; + virtual bool fiqDisable(ThreadContext *tc) = 0; + virtual ExceptionClass ec(ThreadContext *tc) const = 0; + virtual uint32_t iss() const = 0; + virtual bool isStage2() const { return false; } + virtual FSR getFsr(ThreadContext *tc) { return 0; } + virtual void setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg); }; template<typename T> @@ -126,14 +212,38 @@ class ArmFaultVals : public ArmFault static FaultVals vals; public: + ArmFaultVals<T>(ExtMachInst _machInst = 0, uint32_t _iss = 0) : + ArmFault(_machInst, _iss) {} FaultName name() const { return vals.name; } - FaultStat & countStat() {return vals.count;} - FaultOffset offset() { return vals.offset; } + FaultStat & countStat() { return vals.count; } + FaultOffset offset(ThreadContext *tc); + + FaultOffset + offset64() + { + if (toEL == fromEL) { + if (opModeIsT(fromMode)) + return vals.currELTOffset; + return vals.currELHOffset; + } else { + if (from64) + return vals.lowerEL64Offset; + return vals.lowerEL32Offset; + } + } + OperatingMode nextMode() { return vals.nextMode; } - uint8_t armPcOffset() { return vals.armPcOffset; } - uint8_t thumbPcOffset() { return vals.thumbPcOffset; } - bool abortDisable() { return vals.abortDisable; } - bool fiqDisable() { return vals.fiqDisable; } + virtual bool routeToMonitor(ThreadContext *tc) const { return false; } + uint8_t armPcOffset(bool isHyp) { return isHyp ? vals.armPcElrOffset + : vals.armPcOffset; } + uint8_t thumbPcOffset(bool isHyp) { return isHyp ? vals.thumbPcElrOffset + : vals.thumbPcOffset; } + uint8_t armPcElrOffset() { return vals.armPcElrOffset; } + uint8_t thumbPcElrOffset() { return vals.thumbPcElrOffset; } + virtual bool abortDisable(ThreadContext* tc) { return vals.abortDisable; } + virtual bool fiqDisable(ThreadContext* tc) { return vals.fiqDisable; } + virtual ExceptionClass ec(ThreadContext *tc) const { return vals.ec; } + virtual uint32_t iss() const { return issRaw; } }; class Reset : public ArmFaultVals<Reset> @@ -146,87 +256,283 @@ class Reset : public ArmFaultVals<Reset> class UndefinedInstruction : public ArmFaultVals<UndefinedInstruction> { protected: - ExtMachInst machInst; bool unknown; const char *mnemonic; bool disabled; + ExceptionClass overrideEc; public: UndefinedInstruction(ExtMachInst _machInst, bool _unknown, const char *_mnemonic = NULL, bool _disabled = false) : - machInst(_machInst), unknown(_unknown), - mnemonic(_mnemonic), disabled(_disabled) - { - } - UndefinedInstruction() : - machInst(0), unknown(false), mnemonic("undefined"), disabled(false) + ArmFaultVals<UndefinedInstruction>(_machInst), + unknown(_unknown), mnemonic(_mnemonic), disabled(_disabled), + overrideEc(EC_INVALID) + {} + UndefinedInstruction(ExtMachInst _machInst, uint32_t _iss, ExceptionClass _overrideEc) : + ArmFaultVals<UndefinedInstruction>(_machInst, _iss), + overrideEc(_overrideEc) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToHyp(ThreadContext *tc) const; + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; }; class SupervisorCall : public ArmFaultVals<SupervisorCall> { protected: - ExtMachInst machInst; - + ExceptionClass overrideEc; public: - SupervisorCall(ExtMachInst _machInst) : machInst(_machInst) + SupervisorCall(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<SupervisorCall>(_machInst, _iss), + overrideEc(_overrideEc) {} - SupervisorCall() : machInst(0) + + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToHyp(ThreadContext *tc) const; + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; +}; + +class SecureMonitorCall : public ArmFaultVals<SecureMonitorCall> +{ + public: + SecureMonitorCall(ExtMachInst _machInst) : + ArmFaultVals<SecureMonitorCall>(_machInst) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; +}; + +class SupervisorTrap : public ArmFaultVals<SupervisorTrap> +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + SupervisorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<SupervisorTrap>(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; +}; + +class SecureMonitorTrap : public ArmFaultVals<SecureMonitorTrap> +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + SecureMonitorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<SecureMonitorTrap>(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; +}; + +class HypervisorCall : public ArmFaultVals<HypervisorCall> +{ + public: + HypervisorCall(ExtMachInst _machInst, uint32_t _imm); +}; + +class HypervisorTrap : public ArmFaultVals<HypervisorTrap> +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + HypervisorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<HypervisorTrap>(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; }; template <class T> class AbortFault : public ArmFaultVals<T> { protected: + /** + * The virtual address the fault occured at. If 2 stages of + * translation are being used then this is the intermediate + * physical address that is the starting point for the second + * stage of translation. + */ Addr faultAddr; + /** + * Original virtual address. If the fault was generated on the + * second stage of translation then this variable stores the + * virtual address used in the original stage 1 translation. + */ + Addr OVAddr; bool write; - uint8_t domain; - uint8_t status; + TlbEntry::DomainType domain; + uint8_t source; + uint8_t srcEncoded; + bool stage2; + bool s1ptw; + ArmFault::TranMethod tranMethod; public: - AbortFault(Addr _faultAddr, bool _write, - uint8_t _domain, uint8_t _status) : - faultAddr(_faultAddr), write(_write), - domain(_domain), status(_status) + AbortFault(Addr _faultAddr, bool _write, TlbEntry::DomainType _domain, uint8_t _source, + bool _stage2, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + faultAddr(_faultAddr), write(_write), domain(_domain), source(_source), + stage2(_stage2), s1ptw(false), tranMethod(_tranMethod) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + + FSR getFsr(ThreadContext *tc); + bool abortDisable(ThreadContext *tc); + uint32_t iss() const; + bool isStage2() const { return stage2; } + void annotate(ArmFault::AnnotationIDs id, uint64_t val); + bool isMMUFault() const; }; class PrefetchAbort : public AbortFault<PrefetchAbort> { public: - static const MiscRegIndex FsrIndex = MISCREG_IFSR; - static const MiscRegIndex FarIndex = MISCREG_IFAR; + static const MiscRegIndex FsrIndex = MISCREG_IFSR; + static const MiscRegIndex FarIndex = MISCREG_IFAR; + static const MiscRegIndex HFarIndex = MISCREG_HIFAR; - PrefetchAbort(Addr _addr, uint8_t _status) : - AbortFault<PrefetchAbort>(_addr, false, 0, _status) + PrefetchAbort(Addr _addr, uint8_t _source, bool _stage2 = false, + ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + AbortFault<PrefetchAbort>(_addr, false, TlbEntry::DomainType::NoAccess, + _source, _stage2, _tranMethod) {} + + ExceptionClass ec(ThreadContext *tc) const; + // @todo: external aborts should be routed if SCR.EA == 1 + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; }; class DataAbort : public AbortFault<DataAbort> { public: - static const MiscRegIndex FsrIndex = MISCREG_DFSR; - static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex FsrIndex = MISCREG_DFSR; + static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex HFarIndex = MISCREG_HDFAR; + bool isv; + uint8_t sas; + uint8_t sse; + uint8_t srt; + + // AArch64 only + bool sf; + bool ar; + + DataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write, uint8_t _source, + bool _stage2 = false, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + AbortFault<DataAbort>(_addr, _write, _domain, _source, _stage2, + _tranMethod), + isv(false), sas (0), sse(0), srt(0), sf(false), ar(false) + {} + + ExceptionClass ec(ThreadContext *tc) const; + // @todo: external aborts should be routed if SCR.EA == 1 + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + uint32_t iss() const; + void annotate(AnnotationIDs id, uint64_t val); +}; + +class VirtualDataAbort : public AbortFault<VirtualDataAbort> +{ + public: + static const MiscRegIndex FsrIndex = MISCREG_DFSR; + static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex HFarIndex = MISCREG_HDFAR; - DataAbort(Addr _addr, uint8_t _domain, bool _write, uint8_t _status) : - AbortFault<DataAbort>(_addr, _write, _domain, _status) + VirtualDataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write, + uint8_t _source) : + AbortFault<VirtualDataAbort>(_addr, _write, _domain, _source, false) {} + + void invoke(ThreadContext *tc, StaticInstPtr inst); }; -class Interrupt : public ArmFaultVals<Interrupt> {}; -class FastInterrupt : public ArmFaultVals<FastInterrupt> {}; +class Interrupt : public ArmFaultVals<Interrupt> +{ + public: + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + bool abortDisable(ThreadContext *tc); +}; + +class VirtualInterrupt : public ArmFaultVals<VirtualInterrupt> +{ + public: + VirtualInterrupt(); +}; + +class FastInterrupt : public ArmFaultVals<FastInterrupt> +{ + public: + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + bool abortDisable(ThreadContext *tc); + bool fiqDisable(ThreadContext *tc); +}; + +class VirtualFastInterrupt : public ArmFaultVals<VirtualFastInterrupt> +{ + public: + VirtualFastInterrupt(); +}; + +/// PC alignment fault (AArch64 only) +class PCAlignmentFault : public ArmFaultVals<PCAlignmentFault> +{ + protected: + /// The unaligned value of the PC + Addr faultPC; + public: + PCAlignmentFault(Addr _faultPC) : faultPC(_faultPC) + {} + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); +}; + +/// Stack pointer alignment fault (AArch64 only) +class SPAlignmentFault : public ArmFaultVals<SPAlignmentFault> +{ + public: + SPAlignmentFault(); +}; + +/// System error (AArch64 only) +class SystemError : public ArmFaultVals<SystemError> +{ + public: + SystemError(); + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; +}; // A fault that flushes the pipe, excluding the faulting instructions class FlushPipe : public ArmFaultVals<FlushPipe> @@ -246,6 +552,13 @@ class ArmSev : public ArmFaultVals<ArmSev> StaticInstPtr inst = StaticInst::nullStaticInstPtr); }; +/// Illegal Instruction Set State fault (AArch64 only) +class IllegalInstSetStateFault : public ArmFaultVals<IllegalInstSetStateFault> +{ + public: + IllegalInstSetStateFault(); +}; + } // namespace ArmISA #endif // __ARM_FAULTS_HH__ diff --git a/src/arch/arm/insts/branch64.cc b/src/arch/arm/insts/branch64.cc new file mode 100644 index 000000000..49ba3402a --- /dev/null +++ b/src/arch/arm/insts/branch64.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/branch64.hh" + +namespace ArmISA +{ + +ArmISA::PCState +BranchImm64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm2); + pcs.advance(); + return pcs; +} + +std::string +BranchImmCond64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false, true, condCode); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImm64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchRet64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + if (op1 != INTREG_X30) + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchEret64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + return ss.str(); +} + +std::string +BranchImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImmImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%#x, ", imm1); + printTarget(ss, pc + imm2, symtab); + return ss.str(); +} + +} // namespace ArmISA diff --git a/src/arch/arm/insts/branch64.hh b/src/arch/arm/insts/branch64.hh new file mode 100644 index 000000000..48881e0c2 --- /dev/null +++ b/src/arch/arm/insts/branch64.hh @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_BRANCH64_HH__ +#define __ARCH_ARM_INSTS_BRANCH64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ +// Branch to a target computed with an immediate +class BranchImm64 : public ArmStaticInst +{ + protected: + int64_t imm; + + public: + BranchImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Conditionally Branch to a target computed with an immediate +class BranchImmCond64 : public BranchImm64 +{ + protected: + ConditionCode condCode; + + public: + BranchImmCond64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, ConditionCode _condCode) : + BranchImm64(mnem, _machInst, __opClass, _imm), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with a register +class BranchReg64 : public ArmStaticInst +{ + protected: + IntRegIndex op1; + + public: + BranchReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Ret instruction +class BranchRet64 : public BranchReg64 +{ + public: + BranchRet64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + BranchReg64(mnem, _machInst, __opClass, _op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Eret instruction +class BranchEret64 : public ArmStaticInst +{ + public: + BranchEret64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with an immediate and a register +class BranchImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm; + IntRegIndex op1; + + public: + BranchImmReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with two immediates +class BranchImmImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm1; + int64_t imm2; + IntRegIndex op1; + + public: + BranchImmImmReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, int64_t _imm1, int64_t _imm2, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), + imm1(_imm1), imm2(_imm2), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_BRANCH_HH__ diff --git a/src/arch/arm/insts/data64.cc b/src/arch/arm/insts/data64.cc new file mode 100644 index 000000000..f65219870 --- /dev/null +++ b/src/arch/arm/insts/data64.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/data64.hh" + +namespace ArmISA +{ + +std::string +DataXImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, true, false, /*XXX not really s*/ false, dest, op1, + INTREG_ZERO, INTREG_ZERO, 0, LSL, imm); + return ss.str(); +} + +std::string +DataXImmOnlyOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataXSRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, shiftType, 0); + return ss.str(); +} + +std::string +DataXERegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, LSL, 0); + return ss.str(); +} + +std::string +DataX1RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + return ss.str(); +} + +std::string +DataX1RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX1Reg2ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +DataX2RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + return ss.str(); +} + +std::string +DataX2RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX3RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printReg(ss, op3); + return ss.str(); +} + +std::string +DataXCondCompImmOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm, defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +} diff --git a/src/arch/arm/insts/data64.hh b/src/arch/arm/insts/data64.hh new file mode 100644 index 000000000..8c0677b3d --- /dev/null +++ b/src/arch/arm/insts/data64.hh @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_DATA64_HH__ +#define __ARCH_ARM_INSTS_DATA64_HH__ + +#include "arch/arm/insts/static_inst.hh" +#include "base/trace.hh" + +namespace ArmISA +{ + +class DataXImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataXImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXImmOnlyOp : public ArmStaticInst +{ + protected: + IntRegIndex dest; + uint64_t imm; + + DataXImmOnlyOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXSRegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + int32_t shiftAmt; + ArmShiftType shiftType; + + DataXSRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + int32_t _shiftAmt, ArmShiftType _shiftType) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + shiftAmt(_shiftAmt), shiftType(_shiftType) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXERegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ArmExtendType extendType; + int32_t shiftAmt; + + DataXERegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ArmExtendType _extendType, int32_t _shiftAmt) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + extendType(_extendType), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + + DataX1RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataX1RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1Reg2ImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm1, imm2; + + DataX1Reg2ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1, + uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + + DataX2RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + uint64_t imm; + + DataX2RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX3RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2, op3; + + DataX3RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), op3(_op3) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompImmOp : public ArmStaticInst +{ + protected: + IntRegIndex op1; + uint64_t imm; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompImmOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, uint64_t _imm, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), imm(_imm), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompRegOp : public ArmStaticInst +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondSelOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + DataXCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_PREDINST_HH__ diff --git a/src/arch/arm/insts/fplib.cc b/src/arch/arm/insts/fplib.cc new file mode 100644 index 000000000..1f44eed09 --- /dev/null +++ b/src/arch/arm/insts/fplib.cc @@ -0,0 +1,3086 @@ +/* +* Copyright (c) 2012-2013 ARM Limited +* All rights reserved +* +* The license below extends only to copyright in the software and shall +* not be construed as granting a license to any other intellectual +* property including but not limited to intellectual property relating +* to a hardware implementation of the functionality of the software +* licensed hereunder. You may use the software subject to the license +* terms below provided that you ensure that this notice is replicated +* unmodified and in its entirety in all distributions of the software, +* modified or unmodified, in source code or in binary form. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer; +* redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution; +* neither the name of the copyright holders nor the names of its +* contributors may be used to endorse or promote products derived from +* this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* Authors: Edmund Grimley Evans +* Thomas Grocutt +*/ + +#include <stdint.h> + +#include <cassert> + +#include "fplib.hh" + +namespace ArmISA +{ + +#define FPLIB_RN 0 +#define FPLIB_RP 1 +#define FPLIB_RM 2 +#define FPLIB_RZ 3 +#define FPLIB_FZ 4 +#define FPLIB_DN 8 +#define FPLIB_AHP 16 + +#define FPLIB_IDC 128 // Input Denormal +#define FPLIB_IXC 16 // Inexact +#define FPLIB_UFC 8 // Underflow +#define FPLIB_OFC 4 // Overflow +#define FPLIB_DZC 2 // Division by Zero +#define FPLIB_IOC 1 // Invalid Operation + +static inline uint16_t +lsl16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x << shift : 0; +} + +static inline uint16_t +lsr16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x >> shift : 0; +} + +static inline uint32_t +lsl32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x << shift : 0; +} + +static inline uint32_t +lsr32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x >> shift : 0; +} + +static inline uint64_t +lsl64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x << shift : 0; +} + +static inline uint64_t +lsr64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x >> shift : 0; +} + +static inline void +lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r1 = x1 << shift | x0 >> (64 - shift); + *r0 = x0 << shift; + } else if (shift < 128) { + *r1 = x0 << (shift - 64); + *r0 = 0; + } else { + *r1 = 0; + *r0 = 0; + } +} + +static inline void +lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r0 = x0 >> shift | x1 << (64 - shift); + *r1 = x1 >> shift; + } else if (shift < 128) { + *r0 = x1 >> (shift - 64); + *r1 = 0; + } else { + *r0 = 0; + *r1 = 0; + } +} + +static inline void +mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint32_t mask = ((uint32_t)1 << 31) - 1; + uint64_t a0 = a & mask; + uint64_t a1 = a >> 31 & mask; + uint64_t b0 = b & mask; + uint64_t b1 = b >> 31 & mask; + uint64_t p0 = a0 * b0; + uint64_t p2 = a1 * b1; + uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; + uint64_t s0 = p0; + uint64_t s1 = (s0 >> 31) + p1; + uint64_t s2 = (s1 >> 31) + p2; + *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; + *x1 = s2 >> 2; +} + +static inline +void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) +{ + uint64_t t0 = (uint64_t)(uint32_t)a * b; + uint64_t t1 = (t0 >> 32) + (a >> 32) * b; + *x0 = t1 << 32 | (uint32_t)t0; + *x1 = t1 >> 32; +} + +static inline void +mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint64_t a0 = (uint32_t)a; + uint64_t a1 = a >> 32; + uint64_t b0 = (uint32_t)b; + uint64_t b1 = b >> 32; + uint64_t t1 = (a0 * b0 >> 32) + a1 * b0; + uint64_t t2 = a0 * b1; + uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32; + x += t1 >> 32; + x += t2 >> 32; + x += a1 * b1; + *x0 = a * b; + *x1 = x; +} + +static inline void +add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 + b0; + *x1 = a1 + b1 + (*x0 < a0); +} + +static inline void +sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 - b0; + *x1 = a1 - b1 - (*x0 > a0); +} + +static inline int +cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); +} + +static inline uint16_t +fp16_normalise(uint16_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 8; shift; shift >>= 1) { + if (!(mnt >> (16 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint32_t +fp32_normalise(uint32_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 16; shift; shift >>= 1) { + if (!(mnt >> (32 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint64_t +fp64_normalise(uint64_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(mnt >> (64 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline void +fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) +{ + uint64_t x0 = *mnt0; + uint64_t x1 = *mnt1; + int shift; + + if (!x0 && !x1) { + return; + } + + if (!x1) { + x1 = x0; + x0 = 0; + *exp -= 64; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(x1 >> (64 - shift))) { + x1 = x1 << shift | x0 >> (64 - shift); + x0 <<= shift; + *exp -= shift; + } + } + + *mnt0 = x0; + *mnt1 = x1; +} + +static inline uint16_t +fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) +{ + return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); +} + +static inline uint32_t +fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) +{ + return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); +} + +static inline uint64_t +fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) +{ + return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); +} + +static inline uint16_t +fp16_zero(int sgn) +{ + return fp16_pack(sgn, 0, 0); +} + +static inline uint32_t +fp32_zero(int sgn) +{ + return fp32_pack(sgn, 0, 0); +} + +static inline uint64_t +fp64_zero(int sgn) +{ + return fp64_pack(sgn, 0, 0); +} + +static inline uint16_t +fp16_max_normal(int sgn) +{ + return fp16_pack(sgn, 30, -1); +} + +static inline uint32_t +fp32_max_normal(int sgn) +{ + return fp32_pack(sgn, 254, -1); +} + +static inline uint64_t +fp64_max_normal(int sgn) +{ + return fp64_pack(sgn, 2046, -1); +} + +static inline uint16_t +fp16_infinity(int sgn) +{ + return fp16_pack(sgn, 31, 0); +} + +static inline uint32_t +fp32_infinity(int sgn) +{ + return fp32_pack(sgn, 255, 0); +} + +static inline uint64_t +fp64_infinity(int sgn) +{ + return fp64_pack(sgn, 2047, 0); +} + +static inline uint16_t +fp16_defaultNaN() +{ + return fp16_pack(0, 31, (uint16_t)1 << 9); +} + +static inline uint32_t +fp32_defaultNaN() +{ + return fp32_pack(0, 255, (uint32_t)1 << 22); +} + +static inline uint64_t +fp64_defaultNaN() +{ + return fp64_pack(0, 2047, (uint64_t)1 << 51); +} + +static inline void +fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, + int *flags) +{ + *sgn = x >> 15; + *exp = x >> 10 & 31; + *mnt = x & (((uint16_t)1 << 10) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint16_t)1 << 10; + } else { + ++*exp; + // There is no flush to zero in this case! + } +} + +static inline void +fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, + int *flags) +{ + *sgn = x >> 31; + *exp = x >> 23 & 255; + *mnt = x & (((uint32_t)1 << 23) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint32_t)1 << 23; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline void +fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, + int *flags) +{ + *sgn = x >> 63; + *exp = x >> 52 & 2047; + *mnt = x & (((uint64_t)1 << 52) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint64_t)1 << 52; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline uint32_t +fp32_process_NaN(uint32_t a, int mode, int *flags) +{ + if (!(a >> 22 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint32_t)1 << 22; + } + return mode & FPLIB_DN ? fp32_defaultNaN() : a; +} + +static inline uint64_t +fp64_process_NaN(uint64_t a, int mode, int *flags) +{ + if (!(a >> 51 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint64_t)1 << 51; + } + return mode & FPLIB_DN ? fp64_defaultNaN() : a; +} + +static uint32_t +fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + + return 0; +} + +static uint32_t +fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + int c_exp = c >> 23 & 255; + uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) + return fp32_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt) + return fp32_process_NaN(c, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + int c_exp = c >> 52 & 2047; + uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) + return fp64_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt) + return fp64_process_NaN(c, mode, flags); + + return 0; +} + +static uint16_t +fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint16_t int_mant; // mantissa for result, less than (1 << 11) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // There is no flush to zero in this case! + + // The bottom 5 bits of mnt are orred together: + mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr16(mnt, 3 - exp); + error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 10) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 11) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (!(mode & FPLIB_AHP)) { + if (biased_exp >= 31) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp16_infinity(sgn); + } else { + return fp16_max_normal(sgn); + } + } + } else { + if (biased_exp >= 32) { + *flags |= FPLIB_IOC; + return fp16_pack(sgn, 31, -1); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp16_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint32_t int_mant; // mantissa for result, less than (1 << 24) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp32_zero(sgn); + } + + // The bottom 8 bits of mnt are orred together: + mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr32(mnt, 3 - exp); + error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 23) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 24) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 255) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp32_infinity(sgn); + } else { + return fp32_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp32_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) +{ + return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static uint64_t +fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint64_t int_mant; // mantissa for result, less than (1 << 52) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp64_zero(sgn); + } + + // The bottom 11 bits of mnt are orred together: + mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr64(mnt, 3 - exp); + error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint64_t)1 << 52) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint64_t)1 << 53) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 2047) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp64_infinity(sgn); + } else { + return fp64_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp64_pack(sgn, biased_exp, int_mant); +} + +static uint64_t +fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) +{ + return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static int +fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static int +fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static uint32_t +fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x, x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255) { + return fp32_infinity(a_sgn); + } else if (b_exp == 255) { + return fp32_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp32_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr32(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr32(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); +} + +static uint64_t +fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x, x_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047) { + return fp64_infinity(a_sgn); + } else if (b_exp == 2047) { + return fp64_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp64_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr64(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr64(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); +} + +static uint32_t +fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255 || b_exp == 255) { + return fp32_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp32_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 110; + x_mnt = (uint64_t)a_mnt * b_mnt; + x_mnt = fp64_normalise(x_mnt, &x_exp); + + // Convert to 32 bits, collapsing error into bottom bit: + x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x; + uint64_t x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047 || b_exp == 2047) { + return fp64_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp64_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 1000; + mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + + // Convert to 64 bits, collapsing error into bottom bit: + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); +} + +static uint32_t +fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint32_t a_mnt, b_mnt, c_mnt, x; + uint64_t x_mnt, y_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp32_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 255 && (a_mnt >> 22 & 1) && + ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || + (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { + x = fp32_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 255 && !c_mnt) || + (c_exp == 255 && !b_mnt) || + (a_exp == 255 && (b_exp == 255 || c_exp == 255) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255) + return fp32_infinity(a_sgn); + if (b_exp == 255 || c_exp == 255) + return fp32_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp32_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 13; + x_mnt = (uint64_t)a_mnt << 27; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 113; + y_mnt = (uint64_t)b_mnt * c_mnt << 3; + if (!y_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + y_mnt = (lsr64(y_mnt, x_exp - y_exp) | + !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); + y_exp = x_exp; + } else { + x_mnt = (lsr64(x_mnt, y_exp - x_exp) | + !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + x_mnt = x_mnt + y_mnt; + } else if (x_mnt >= y_mnt) { + x_mnt = x_mnt - y_mnt; + } else { + x_sgn ^= 1; + x_mnt = y_mnt - x_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); +} + +static uint64_t +fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint64_t a_mnt, b_mnt, c_mnt, x; + uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp64_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 2047 && (a_mnt >> 51 & 1) && + ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || + (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { + x = fp64_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 2047 && !c_mnt) || + (c_exp == 2047 && !b_mnt) || + (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047) + return fp64_infinity(a_sgn); + if (b_exp == 2047 || c_exp == 2047) + return fp64_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp64_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 11; + x0_mnt = 0; + x1_mnt = a_mnt; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 1003; + mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); + if (!y0_mnt && !y1_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + uint64_t t0, t1; + lsl128(&t0, &t1, y0_mnt, y1_mnt, + x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); + lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); + y0_mnt |= !!(t0 | t1); + y_exp = x_exp; + } else { + uint64_t t0, t1; + lsl128(&t0, &t1, x0_mnt, x1_mnt, + y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); + x0_mnt |= !!(t0 | t1); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { + sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else { + x_sgn ^= 1; + sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); + } + + if (!x0_mnt && !x1_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + // Normalise and convert to 64 bits, collapsing error into bottom bit: + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); +} + +static uint32_t +fp32_div(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255 || !b_mnt) { + if (a_exp != 255) + *flags |= FPLIB_DZC; + return fp32_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 255) + return fp32_zero(a_sgn ^ b_sgn); + + // Divide, setting bottom bit if inexact: + a_mnt = fp32_normalise(a_mnt, &a_exp); + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 172; + x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; + x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_div(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; + uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047 || !b_mnt) { + if (a_exp != 2047) + *flags |= FPLIB_DZC; + return fp64_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 2047) + return fp64_zero(a_sgn ^ b_sgn); + + // Find reciprocal of divisor with Newton-Raphson: + a_mnt = fp64_normalise(a_mnt, &a_exp); + b_mnt = fp64_normalise(b_mnt, &b_exp); + x_mnt = ~(uint64_t)0 / (b_mnt >> 31); + mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); + sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); + mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); + + // Multiply by dividend: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 1031; + mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); + x_mnt = x1_mnt; + + // This is an underestimate, so try adding one: + mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough + c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); + if (c <= 0) { + ++x_mnt; + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static void +set_fpscr0(FPSCR &fpscr, int flags) +{ + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + fpscr.ufc = 1; + } + if (flags & FPLIB_IXC) { + fpscr.ixc = 1; + } +} + +static uint32_t +fp32_sqrt(uint32_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp; + uint32_t a_mnt, x, x_mnt; + uint64_t t0, t1; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 255 && (uint32_t)(a_mnt << 9)) + return fp32_process_NaN(a, mode, flags); + + // Handle infinities and zeroes: + if (!a_mnt) { + return fp32_zero(a_sgn); + } + if (a_exp == 255 && !a_sgn) { + return fp32_infinity(a_sgn); + } + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + + a_mnt = fp32_normalise(a_mnt, &a_exp); + if (!(a_exp & 1)) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); + + x_sgn = 0; + x_exp = (a_exp + 147) >> 1; + x_mnt = ((x - (1 << 5)) >> 6) + 1; + t1 = (uint64_t)x_mnt * x_mnt; + t0 = (uint64_t)a_mnt << 19; + if (t1 > t0) { + --x_mnt; + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); +} + +static uint64_t +fp64_sqrt(uint64_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp, c; + uint64_t a_mnt, x_mnt, r, x0, x1; + uint32_t x; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { + return fp64_process_NaN(a, mode, flags); + } + + // Handle infinities and zeroes: + if (!a_mnt) + return fp64_zero(a_sgn); + if (a_exp == 2047 && !a_sgn) + return fp64_infinity(a_sgn); + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + + a_mnt = fp64_normalise(a_mnt, &a_exp); + if (a_exp & 1) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((a_mnt / x) >> 2) + (x >> 1); + + // r = 1 / x; // 32-bit accuracy + r = ((uint64_t)1 << 62) / x; + + // r = r * (2 - x * r); // 64-bit accuracy + mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); + lsr128(&x0, &x1, x0, x1, 31); + + // x = (x + a * r) / 2; // 64-bit accuracy + mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); + lsl128(&x0, &x1, x0, x1, 5); + lsr128(&x0, &x1, x0, x1, 56); + + x0 = ((uint64_t)x << 31) + (x0 >> 1); + + x_sgn = 0; + x_exp = (a_exp + 1053) >> 1; + x_mnt = x0; + x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; + mul62x62(&x0, &x1, x_mnt, x_mnt); + lsl128(&x0, &x1, x0, x1, 19); + c = cmp128(x0, x1, 0, a_mnt); + if (c > 0) + --x_mnt; + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static int +modeConv(FPSCR fpscr) +{ + return (((int) fpscr) >> 22) & 0xF; +} + +static void +set_fpscr(FPSCR &fpscr, int flags) +{ + // translate back to FPSCR + bool underflow = false; + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + underflow = true; //xx Why is this required? + fpscr.ufc = 1; + } + if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { + fpscr.ixc = 1; + } +} + +template <> +bool +fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +uint32_t +fplibAbs(uint32_t op) +{ + return op & ~((uint32_t)1 << 31); +} + +template <> +uint64_t +fplibAbs(uint64_t op) +{ + return op & ~((uint64_t)1 << 63); +} + +template <> +uint32_t +fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +int +fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint32_t mnt1, mnt2; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9))) { + result = 3; + if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +int +fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint64_t mnt1, mnt2; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { + result = 3; + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint16_t +fp16_FPConvertNaN_32(uint32_t op) +{ + return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); +} + +static uint16_t +fp16_FPConvertNaN_64(uint64_t op) +{ + return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); +} + +static uint32_t +fp32_FPConvertNaN_16(uint16_t op) +{ + return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); +} + +static uint32_t +fp32_FPConvertNaN_64(uint64_t op) +{ + return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); +} + +static uint64_t +fp64_FPConvertNaN_16(uint16_t op) +{ + return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); +} + +static uint64_t +fp64_FPConvertNaN_32(uint32_t op) +{ + return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); +} + +static uint32_t +fp32_FPOnePointFive(int sgn) +{ + return fp32_pack(sgn, 127, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPOnePointFive(int sgn) +{ + return fp64_pack(sgn, 1023, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPThree(int sgn) +{ + return fp32_pack(sgn, 128, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPThree(int sgn) +{ + return fp64_pack(sgn, 1024, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPTwo(int sgn) +{ + return fp32_pack(sgn, 128, 0); +} + +static uint64_t +fp64_FPTwo(int sgn) +{ + return fp64_pack(sgn, 1024, 0); +} + +template <> +uint16_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 127 + 15, + mnt >> 7 | !!(uint32_t)(mnt << 25), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint16_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 1023 + 15, + mnt >> 36 | !!(uint64_t)(mnt << 28), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + result = fp32_round_(sgn, exp - 1023 + 127, + mnt >> 20 | !!(uint64_t)(mnt << 44), + rounding, mode, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp32_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint32_t +fp32_repack(int sgn, int exp, uint32_t mnt) +{ + return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); +} + +static uint64_t +fp64_repack(int sgn, int exp, uint64_t mnt) +{ + return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); +} + +static void +fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) + *op1 = fp32_infinity(sgn); + if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) + *op2 = fp32_infinity(sgn); +} + +static void +fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) + *op1 = fp64_infinity(sgn); + if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) + *op2 = fp64_infinity(sgn); +} + +template <> +uint32_t +fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 1); + return fplibMax<uint32_t>(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 1); + return fplibMax<uint64_t>(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 0); + return fplibMin<uint32_t>(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 0); + return fplibMin<uint64_t>(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp32_zero(sgn1 ^ sgn2); + } else { + result = fp32_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp64_zero(sgn1 ^ sgn2); + } else { + result = fp64_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibNeg(uint32_t op) +{ + return op ^ (uint32_t)1 << 31; +} + +template <> +uint64_t +fplibNeg(uint64_t op) +{ + return op ^ (uint64_t)1 << 63; +} + +static const uint8_t recip_sqrt_estimate[256] = { + 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, + 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, + 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, + 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, + 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, + 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, + 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, + 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, + 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, + 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, + 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, + 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, + 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, + 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, + 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, + 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 +}; + +template <> +uint32_t +fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp32_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 255) { + result = fp32_zero(0); + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; + result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp64_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 2047) { + result = fp32_zero(0); + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; + result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg<uint32_t>(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPOnePointFive(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg<uint64_t>(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPOnePointFive(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg<uint32_t>(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_zero(sgn); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint32_t)(op << 1) >> 22)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 253) { + result = fp32_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + int result_exp = 253 - exp; + uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; + fraction <<= 15; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp32_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_zero(sgn); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint64_t)(op << 1) >> 51)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 2045) { + result = fp64_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + int result_exp = 2045 - exp; + uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; + fraction <<= 44; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp64_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg<uint64_t>(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecpX(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp32_pack(sgn, 254, 0); + } else { // Infinities and normals + result = fp32_pack(sgn, exp ^ 255, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecpX(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp64_pack(sgn, 2046, 0); + } else { // Infinities and normals + result = fp64_pack(sgn, exp ^ 2047, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else if (exp >= 150) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); + int err = exp < 118 ? 1 : + (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp32_zero(sgn); + } else { + exp = 150; + mnt = fp32_normalise(x, &exp); + result = fp32_pack(sgn, exp + 8, mnt >> 8); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else if (exp >= 1075) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); + int err = exp < 1011 ? 1 : + (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp64_zero(sgn); + } else { + exp = 1075; + mnt = fp64_normalise(x, &exp); + result = fp64_pack(sgn, exp + 11, mnt >> 11); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibSqrt(uint32_t op, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSqrt(uint64_t op, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint64_t +FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x; + int err; + + if (exp > 1023 + 63) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + x = lsr64(mnt << 11, 1023 + 63 - exp); + err = (exp > 1023 + 63 - 2 ? 0 : + (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | + !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); + + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + if (err) { + *flags = FPLIB_IXC; + } + + return sgn ? -x : x; +} + +static uint32_t +FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); + if (u ? x >= (uint64_t)1 << 32 : + !(x < (uint64_t)1 << 31 || + (uint64_t)-x <= (uint64_t)1 << 31)) { + *flags = FPLIB_IOC; + x = ((uint32_t)!u << 31) - !sgn; + } + return x; +} + +template <> +uint32_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint32_t +fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 190 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp32_zero(0); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 1024 + 62 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp64_zero(0); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); +} + +template <> +uint32_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint32_t res = fp32_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +template <> +uint64_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint64_t res = fp64_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +} diff --git a/src/arch/arm/insts/fplib.hh b/src/arch/arm/insts/fplib.hh new file mode 100644 index 000000000..6263687fc --- /dev/null +++ b/src/arch/arm/insts/fplib.hh @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Edmund Grimley Evans + * Thomas Grocutt + */ + +/** + * @file + * Floating-point library code, which will gradually replace vfp.hh. For + * portability, this library does not use floating-point data types. Currently, + * C's standard integer types are used in the API, though this could be changed + * to something like class Fp32 { uint32_t x; }, etc. + */ + +#ifndef __ARCH_ARM_INSTS_FPLIB_HH__ +#define __ARCH_ARM_INSTS_FPLIB_HH__ + +#include <stdint.h> + +#include "arch/arm/miscregs.hh" + +namespace ArmISA +{ + +enum FPRounding { + FPRounding_TIEEVEN = 0, + FPRounding_POSINF = 1, + FPRounding_NEGINF = 2, + FPRounding_ZERO = 3, + FPRounding_TIEAWAY = 4, + FPRounding_ODD = 5 +}; + +static inline FPRounding +FPCRRounding(FPSCR &fpscr) +{ + return (FPRounding)((uint32_t)fpscr >> 22 & 3); +} + +/** Floating-point absolute value. */ +template <class T> +T fplibAbs(T op); +/** Floating-point add. */ +template <class T> +T fplibAdd(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare (quiet and signaling). */ +template <class T> +int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr); +/** Floating-point compare equal. */ +template <class T> +bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than or equal. */ +template <class T> +bool fplibCompareGE(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than. */ +template <class T> +bool fplibCompareGT(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert precision. */ +template <class T1, class T2> +T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr); +/** Floating-point division. */ +template <class T> +T fplibDiv(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum. */ +template <class T> +T fplibMax(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum number. */ +template <class T> +T fplibMaxNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum. */ +template <class T> +T fplibMin(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum number. */ +template <class T> +T fplibMinNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply. */ +template <class T> +T fplibMul(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply-add. */ +template <class T> +T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply extended. */ +template <class T> +T fplibMulX(T op1, T op2, FPSCR &fpscr); +/** Floating-point negate. */ +template <class T> +T fplibNeg(T op); +/** Floating-point reciprocal square root estimate. */ +template <class T> +T fplibRSqrtEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal square root step. */ +template <class T> +T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal estimate. */ +template <class T> +T fplibRecipEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal step. */ +template <class T> +T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal exponent. */ +template <class T> +T fplibRecpX(T op, FPSCR &fpscr); +/** Floating-point convert to integer. */ +template <class T> +T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr); +/** Floating-point square root. */ +template <class T> +T fplibSqrt(T op, FPSCR &fpscr); +/** Floating-point subtract. */ +template <class T> +T fplibSub(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert to fixed-point. */ +template <class T1, class T2> +T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr); +/** Floating-point convert from fixed-point. */ +template <class T> +T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); + +/* Function specializations... */ +template <> +uint32_t fplibAbs(uint32_t op); +template <> +uint64_t fplibAbs(uint64_t op); +template <> +uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr); +template <> +int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, + FPSCR &fpscr); +template <> +uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, + FPSCR &fpscr); +template <> +uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibNeg(uint32_t op); +template <> +uint64_t fplibNeg(uint64_t op); +template <> +uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr); +template<> +uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +} + +#endif diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc index 26a916fc7..42cb98a7c 100644 --- a/src/arch/arm/insts/macromem.cc +++ b/src/arch/arm/insts/macromem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,7 +43,9 @@ #include <sstream> #include "arch/arm/insts/macromem.hh" + #include "arch/arm/generated/decoder.hh" +#include "arch/arm/insts/neon64_mem.hh" using namespace std; using namespace ArmISAInst; @@ -177,6 +179,212 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, } } +PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, + bool signExt, bool exclusive, bool acrel, + int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : + PredMacroOp(mnem, machInst, __opClass) +{ + bool writeback = (mode != AddrMd_Offset); + numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); + microOps = new StaticInstPtr[numMicroops]; + + StaticInstPtr *uop = microOps; + + bool post = (mode == AddrMd_PostIndex); + + rn = makeSP(rn); + + *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); + + if (fp) { + if (size == 16) { + if (load) { + *++uop = new MicroLdrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } + } else if (size == 8) { + if (load) { + *++uop = new MicroLdrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } else { + if (size == 8) { + if (load) { + *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + if (signExt) { + *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } else { + *++uop = new MicroStrDXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } + + if (writeback) { + *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, + post ? imm : 0); + } + + (*uop)->setLastMicroop(); + + for (StaticInstPtr *curUop = microOps; + !(*curUop)->isLastMicroop(); curUop++) { + (*curUop)->setDelayedCommit(); + } +} + +BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, IntRegIndex offset, + ArmExtendType type, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } else { + microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, IntRegIndex dest, + int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); + microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : @@ -193,7 +401,7 @@ VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, if (deinterleave) numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; uint32_t noAlign = TLB::MustBeOne; @@ -295,7 +503,7 @@ VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (loadSize) { @@ -556,7 +764,7 @@ VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t noAlign = TLB::MustBeOne; - RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; unsigned uopIdx = 0; if (interleave) { @@ -657,7 +865,7 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (elems) { @@ -834,6 +1042,285 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, microOps[numMicroops - 1]->setLastMicroop(); } +VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for (int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroDeintNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; ++i) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroIntNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for (; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroUnpackNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroPackNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accsize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex rn, RegIndex vd, bool single, bool up, @@ -846,14 +1333,14 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, // to be functionally identical except that fldmx is deprecated. For now // we'll assume they're otherwise interchangable. int count = (single ? offset : (offset / 2)); - if (count == 0 || count > NumFloatArchRegs) + if (count == 0 || count > NumFloatV7ArchRegs) warn_once("Bad offset field for VFP load/store multiple.\n"); if (count == 0) { // Force there to be at least one microop so the macroop makes sense. writeback = true; } - if (count > NumFloatArchRegs) - count = NumFloatArchRegs; + if (count > NumFloatV7ArchRegs) + count = NumFloatV7ArchRegs; numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); microOps = new StaticInstPtr[numMicroops]; @@ -934,6 +1421,19 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ss << ", "; + printReg(ss, urb); + ss << ", "; + ccprintf(ss, "#%d", imm); + return ss.str(); +} + +std::string MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -943,6 +1443,18 @@ MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ccprintf(ss, ", "); + printReg(ss, urb); + printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); + return ss.str(); +} + +std::string MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh index 4933a1e7c..fc8e3e1b7 100644 --- a/src/arch/arm/insts/macromem.hh +++ b/src/arch/arm/insts/macromem.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -85,6 +85,27 @@ class MicroOp : public PredOp } }; +class MicroOpX : public ArmStaticInst +{ + protected: + MicroOpX(const char *mnem, ExtMachInst machInst, OpClass __opClass) + : ArmStaticInst(mnem, machInst, __opClass) + {} + + public: + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + /** * Microops for Neon loads/stores */ @@ -136,6 +157,96 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp }; /** + * Microops for AArch64 NEON load/store (de)interleaving + */ +class MicroNeonMixOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, numRegs, step; + + MicroNeonMixOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _dest, RegIndex _op1, uint8_t _eSize, + uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _numRegs, uint8_t _step) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + numRegs(_numRegs), step(_step) + { + } +}; + +class MicroNeonMixLaneOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, lane, step; + bool replicate; + + MicroNeonMixLaneOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _lane, uint8_t _step, + bool _replicate = false) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + lane(_lane), step(_step), replicate(_replicate) + { + } +}; + +/** + * Base classes for microcoded AArch64 NEON memory instructions. + */ +class VldMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VstMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VldSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + +class VstSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + +/** * Microops of the form * PC = IntRegA * CPSR = IntRegB @@ -180,10 +291,10 @@ class MicroIntImmOp : public MicroOp { protected: RegIndex ura, urb; - uint32_t imm; + int32_t imm; MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, - RegIndex _ura, RegIndex _urb, uint32_t _imm) + RegIndex _ura, RegIndex _urb, int32_t _imm) : MicroOp(mnem, machInst, __opClass), ura(_ura), urb(_urb), imm(_imm) { @@ -192,6 +303,22 @@ class MicroIntImmOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntImmXOp : public MicroOpX +{ + protected: + RegIndex ura, urb; + int64_t imm; + + MicroIntImmXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, int64_t _imm) + : MicroOpX(mnem, machInst, __opClass), + ura(_ura), urb(_urb), imm(_imm) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op IntRegC */ @@ -210,6 +337,25 @@ class MicroIntOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntRegXOp : public MicroOp +{ + protected: + RegIndex ura, urb, urc; + ArmExtendType type; + uint32_t shiftAmt; + + MicroIntRegXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt) + : MicroOp(mnem, machInst, __opClass), + ura(_ura), urb(_urb), urc(_urc), + type(_type), shiftAmt(_shiftAmt) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op shifted IntRegC */ @@ -261,6 +407,61 @@ class MacroMemOp : public PredMacroOp }; /** + * Base class for pair load/store instructions. + */ +class PairMemOp : public PredMacroOp +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2); +}; + +class BigFpMemImmOp : public PredMacroOp +{ + protected: + BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPostOp : public PredMacroOp +{ + protected: + BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPreOp : public PredMacroOp +{ + protected: + BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemRegOp : public PredMacroOp +{ + protected: + BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm); +}; + +class BigFpMemLitOp : public PredMacroOp +{ + protected: + BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex dest, int64_t imm); +}; + +/** * Base classes for microcoded integer memory instructions. */ class VldMultOp : public PredMacroOp diff --git a/src/arch/arm/insts/mem.cc b/src/arch/arm/insts/mem.cc index 552803b6a..15702ff83 100644 --- a/src/arch/arm/insts/mem.cc +++ b/src/arch/arm/insts/mem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -157,6 +157,9 @@ SrsOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const case MODE_ABORT: ss << "abort"; break; + case MODE_HYP: + ss << "hyp"; + break; case MODE_UNDEFINED: ss << "undefined"; break; diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc new file mode 100644 index 000000000..4d1fdd302 --- /dev/null +++ b/src/arch/arm/insts/mem64.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/mem64.hh" +#include "arch/arm/tlb.hh" +#include "base/loader/symtab.hh" +#include "mem/request.hh" + +using namespace std; + +namespace ArmISA +{ + +std::string +SysDC64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + + + +void +Memory64::startDisassembly(std::ostream &os) const +{ + printMnemonic(os, "", false); + printReg(os, dest); + ccprintf(os, ", ["); + printReg(os, base); +} + +void +Memory64::setExcAcRel(bool exclusive, bool acrel) +{ + if (exclusive) + memAccessFlags |= Request::LLSC; + else + memAccessFlags |= ArmISA::TLB::AllowUnaligned; + if (acrel) { + flags[IsMemBarrier] = true; + flags[IsWriteBarrier] = true; + flags[IsReadBarrier] = true; + } +} + +std::string +MemoryImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImmEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, result); + ccprintf(ss, ", "); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryPreIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, ", #%d]!", imm); + return ss.str(); +} + +std::string +MemoryPostIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, "], #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryReg64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + printExtendOperand(false, ss, offset, type, shiftAmt); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryRaw64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, result); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryLiteral64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", pc + imm); + return ss.str(); +} +} diff --git a/src/arch/arm/insts/mem64.hh b/src/arch/arm/insts/mem64.hh new file mode 100644 index 000000000..21c1e1ea8 --- /dev/null +++ b/src/arch/arm/insts/mem64.hh @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_MEM64_HH__ +#define __ARCH_ARM_MEM64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ + +class SysDC64 : public ArmStaticInst +{ + protected: + IntRegIndex base; + IntRegIndex dest; + uint64_t imm; + + SysDC64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _base, IntRegIndex _dest, uint64_t _imm) + : ArmStaticInst(mnem, _machInst, __opClass), base(_base), dest(_dest), + imm(_imm) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MightBeMicro64 : public ArmStaticInst +{ + protected: + MightBeMicro64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) + : ArmStaticInst(mnem, _machInst, __opClass) + {} + + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + +class Memory64 : public MightBeMicro64 +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + + IntRegIndex dest; + IntRegIndex base; + /// True if the base register is SP (used for SP alignment checking). + bool baseIsSP; + static const unsigned numMicroops = 3; + + StaticInstPtr *uops; + + Memory64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base) + : MightBeMicro64(mnem, _machInst, __opClass), + dest(_dest), base(_base), uops(NULL) + { + baseIsSP = isSP(_base); + } + + virtual + ~Memory64() + { + delete [] uops; + } + + StaticInstPtr + fetchMicroop(MicroPC microPC) const + { + assert(uops != NULL && microPC < numMicroops); + return uops[microPC]; + } + + void startDisassembly(std::ostream &os) const; + + unsigned memAccessFlags; + + void setExcAcRel(bool exclusive, bool acrel); +}; + +class MemoryImm64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, _base), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImm64 : public MemoryImm64 +{ + protected: + IntRegIndex dest2; + + MemoryDImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm), + dest2(_dest2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImmEx64 : public MemoryDImm64 +{ + protected: + IntRegIndex result; + + MemoryDImmEx64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int32_t _imm) + : MemoryDImm64(mnem, _machInst, __opClass, _dest, _dest2, + _base, _imm), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPreIndex64 : public MemoryImm64 +{ + protected: + MemoryPreIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPostIndex64 : public MemoryImm64 +{ + protected: + MemoryPostIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryReg64 : public Memory64 +{ + protected: + IntRegIndex offset; + ArmExtendType type; + uint64_t shiftAmt; + + MemoryReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _offset, ArmExtendType _type, + uint64_t _shiftAmt) + : Memory64(mnem, _machInst, __opClass, _dest, _base), + offset(_offset), type(_type), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryRaw64 : public Memory64 +{ + protected: + MemoryRaw64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base) + : Memory64(mnem, _machInst, __opClass, _dest, _base) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryEx64 : public Memory64 +{ + protected: + IntRegIndex result; + + MemoryEx64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _result) + : Memory64(mnem, _machInst, __opClass, _dest, _base), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryLiteral64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryLiteral64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, INTREG_ZERO), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; +} + +#endif //__ARCH_ARM_INSTS_MEM_HH__ diff --git a/src/arch/arm/insts/misc.cc b/src/arch/arm/insts/misc.cc index 6320bb6da..efc334c4b 100644 --- a/src/arch/arm/insts/misc.cc +++ b/src/arch/arm/insts/misc.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -146,6 +146,32 @@ MsrRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MrrcOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, dest2); + ss << ", "; + printReg(ss, op1); + return ss.str(); +} + +std::string +McrrOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + return ss.str(); +} + +std::string ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -230,6 +256,16 @@ RegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +RegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string RegRegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; diff --git a/src/arch/arm/insts/misc.hh b/src/arch/arm/insts/misc.hh index c9e114f85..3d947a272 100644 --- a/src/arch/arm/insts/misc.hh +++ b/src/arch/arm/insts/misc.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -94,6 +94,42 @@ class MsrRegOp : public MsrBase std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MrrcOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex dest; + IntRegIndex dest2; + uint32_t imm; + + MrrcOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _dest, IntRegIndex _dest2, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), dest(_dest), + dest2(_dest2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class McrrOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex dest; + uint32_t imm; + + McrrOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _dest, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), op2(_op2), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class ImmOp : public PredOp { protected: @@ -220,6 +256,23 @@ class RegRegImmOp : public PredOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class RegImmImmOp : public PredOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegImmImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2) : + PredOp(mnem, _machInst, __opClass), + dest(_dest), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class RegRegImmImmOp : public PredOp { protected: diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc new file mode 100644 index 000000000..3553020da --- /dev/null +++ b/src/arch/arm/insts/misc64.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/misc64.hh" + +std::string +RegRegImmImmOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +RegRegRegImmOp64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +UnknownOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + return csprintf("%-10s (inst %#08x)", "unknown", machInst); +} diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh new file mode 100644 index 000000000..5a0e18224 --- /dev/null +++ b/src/arch/arm/insts/misc64.hh @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_ARM_INSTS_MISC64_HH__ +#define __ARCH_ARM_INSTS_MISC64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +class RegRegImmImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegRegImmImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm1, uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class RegRegRegImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + uint64_t imm; + + RegRegRegImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class UnknownOp64 : public ArmStaticInst +{ + protected: + + UnknownOp64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +#endif diff --git a/src/arch/arm/insts/neon64_mem.hh b/src/arch/arm/insts/neon64_mem.hh new file mode 100644 index 000000000..01ce1b624 --- /dev/null +++ b/src/arch/arm/insts/neon64_mem.hh @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Mbou Eyole + * Giacomo Gabrielli + */ + +/// @file +/// Utility functions and datatypes used by AArch64 NEON memory instructions. + +#ifndef __ARCH_ARM_INSTS_NEON64_MEM_HH__ +#define __ARCH_ARM_INSTS_NEON64_MEM_HH__ + +namespace ArmISA +{ + +typedef uint64_t XReg; + +/// 128-bit NEON vector register. +struct VReg { + XReg hi; + XReg lo; +}; + +/// Write a single NEON vector element leaving the others untouched. +inline void +writeVecElem(VReg *dest, XReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + XReg sMask = maskBits; + maskBits = sMask << shiftAmt; + + if (lsbPos < 64) { + dest->lo = (dest->lo & (~maskBits)) | ((src & sMask) << shiftAmt); + } else { + dest->hi = (dest->hi & (~maskBits)) | ((src & sMask) << shiftAmt); + } +} + +/// Read a single NEON vector element. +inline XReg +readVecElem(VReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + XReg data; + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + if (lsbPos < 64) { + data = (src.lo >> shiftAmt) & maskBits; + } else { + data = (src.hi >> shiftAmt) & maskBits; + } + return data; +} + +} // namespace ArmISA + +#endif // __ARCH_ARM_INSTS_NEON64_MEM_HH__ diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh index c441d1f32..c5e2ab386 100644 --- a/src/arch/arm/insts/pred_inst.hh +++ b/src/arch/arm/insts/pred_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -78,7 +78,8 @@ modified_imm(uint8_t ctrlImm, uint8_t dataImm) } static inline uint64_t -simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) +simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid, + bool isAarch64 = false) { uint64_t bigData = data; immValid = true; @@ -133,12 +134,20 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) } break; case 0xf: - if (!op) { - uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20); - bigData = (bits(bigData, 5, 0) << 19) | - (bVal << 25) | (bits(bigData, 7) << 31); - bigData |= (bigData << 32); - break; + { + uint64_t bVal = 0; + if (!op) { + bVal = bits(bigData, 6) ? (0x1F) : (0x20); + bigData = (bits(bigData, 5, 0) << 19) | + (bVal << 25) | (bits(bigData, 7) << 31); + bigData |= (bigData << 32); + break; + } else if (isAarch64) { + bVal = bits(bigData, 6) ? (0x0FF) : (0x100); + bigData = (bits(bigData, 5, 0) << 48) | + (bVal << 54) | (bits(bigData, 7) << 63); + break; + } } // Fall through, immediate encoding is invalid. default: @@ -179,11 +188,14 @@ class PredOp : public ArmStaticInst /// Constructor PredOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : - ArmStaticInst(mnem, _machInst, __opClass), - condCode(machInst.itstateMask ? - (ConditionCode)(uint8_t)machInst.itstateCond : - (ConditionCode)(unsigned)machInst.condCode) + ArmStaticInst(mnem, _machInst, __opClass) { + if (machInst.aarch64) + condCode = COND_UC; + else if (machInst.itstateMask) + condCode = (ConditionCode)(uint8_t)machInst.itstateCond; + else + condCode = (ConditionCode)(unsigned)machInst.condCode; } }; diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 2a8dee162..260c29a84 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -86,6 +86,90 @@ ArmStaticInst::shift_rm_imm(uint32_t base, uint32_t shamt, return 0; } +int64_t +ArmStaticInst::shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const +{ + shiftAmt = shiftAmt % width; + ArmShiftType shiftType; + shiftType = (ArmShiftType)type; + + switch (shiftType) + { + case LSL: + return base << shiftAmt; + case LSR: + if (shiftAmt == 0) + return base; + else + return (base & mask(width)) >> shiftAmt; + case ASR: + if (shiftAmt == 0) { + return base; + } else { + int sign_bit = bits(base, intWidth - 1); + base >>= shiftAmt; + base = sign_bit ? (base | ~mask(intWidth - shiftAmt)) : base; + return base & mask(intWidth); + } + case ROR: + if (shiftAmt == 0) + return base; + else + return (base << (width - shiftAmt)) | (base >> shiftAmt); + default: + ccprintf(std::cerr, "Unhandled shift type\n"); + exit(1); + break; + } + return 0; +} + +int64_t +ArmStaticInst::extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const +{ + bool sign_extend = false; + int len = 0; + switch (type) { + case UXTB: + len = 8; + break; + case UXTH: + len = 16; + break; + case UXTW: + len = 32; + break; + case UXTX: + len = 64; + break; + case SXTB: + len = 8; + sign_extend = true; + break; + case SXTH: + len = 16; + sign_extend = true; + break; + case SXTW: + len = 32; + sign_extend = true; + break; + case SXTX: + len = 64; + sign_extend = true; + break; + } + len = len <= width - shiftAmt ? len : width - shiftAmt; + uint64_t tmp = (uint64_t) bits(base, len - 1, 0) << shiftAmt; + if (sign_extend) { + int sign_bit = bits(tmp, len + shiftAmt - 1); + tmp = sign_bit ? (tmp | ~mask(len + shiftAmt)) : tmp; + } + return tmp & mask(width); +} + // Shift Rm by Rs int32_t ArmStaticInst::shift_rm_rs(uint32_t base, uint32_t shamt, @@ -214,22 +298,33 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const switch (regIdxToClass(reg, &rel_reg)) { case IntRegClass: - switch (rel_reg) { - case PCReg: - ccprintf(os, "pc"); - break; - case StackPointerReg: - ccprintf(os, "sp"); - break; - case FramePointerReg: - ccprintf(os, "fp"); - break; - case ReturnAddressReg: - ccprintf(os, "lr"); - break; - default: - ccprintf(os, "r%d", reg); - break; + if (aarch64) { + if (reg == INTREG_UREG0) + ccprintf(os, "ureg0"); + else if (reg == INTREG_SPX) + ccprintf(os, "%s%s", (intWidth == 32) ? "w" : "", "sp"); + else if (reg == INTREG_X31) + ccprintf(os, "%szr", (intWidth == 32) ? "w" : "x"); + else + ccprintf(os, "%s%d", (intWidth == 32) ? "w" : "x", reg); + } else { + switch (rel_reg) { + case PCReg: + ccprintf(os, "pc"); + break; + case StackPointerReg: + ccprintf(os, "sp"); + break; + case FramePointerReg: + ccprintf(os, "fp"); + break; + case ReturnAddressReg: + ccprintf(os, "lr"); + break; + default: + ccprintf(os, "r%d", reg); + break; + } } break; case FloatRegClass: @@ -247,67 +342,102 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const void ArmStaticInst::printMnemonic(std::ostream &os, const std::string &suffix, - bool withPred) const + bool withPred, + bool withCond64, + ConditionCode cond64) const { os << " " << mnemonic; - if (withPred) { - unsigned condCode = machInst.condCode; - switch (condCode) { - case COND_EQ: - os << "eq"; - break; - case COND_NE: - os << "ne"; - break; - case COND_CS: - os << "cs"; - break; - case COND_CC: - os << "cc"; - break; - case COND_MI: - os << "mi"; - break; - case COND_PL: - os << "pl"; - break; - case COND_VS: - os << "vs"; - break; - case COND_VC: - os << "vc"; - break; - case COND_HI: - os << "hi"; - break; - case COND_LS: - os << "ls"; - break; - case COND_GE: - os << "ge"; - break; - case COND_LT: - os << "lt"; - break; - case COND_GT: - os << "gt"; - break; - case COND_LE: - os << "le"; - break; - case COND_AL: - // This one is implicit. - break; - case COND_UC: - // Unconditional. - break; - default: - panic("Unrecognized condition code %d.\n", condCode); - } + if (withPred && !aarch64) { + printCondition(os, machInst.condCode); + os << suffix; + } else if (withCond64) { + os << "."; + printCondition(os, cond64); os << suffix; - if (machInst.bigThumb) - os << ".w"; - os << " "; + } + if (machInst.bigThumb) + os << ".w"; + os << " "; +} + +void +ArmStaticInst::printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const +{ + Addr symbolAddr; + std::string symbol; + + if (symtab && symtab->findNearestSymbol(target, symbol, symbolAddr)) { + ccprintf(os, "<%s", symbol); + if (symbolAddr != target) + ccprintf(os, "+%d>", target - symbolAddr); + else + ccprintf(os, ">"); + } else { + ccprintf(os, "%#x", target); + } +} + +void +ArmStaticInst::printCondition(std::ostream &os, + unsigned code, + bool noImplicit) const +{ + switch (code) { + case COND_EQ: + os << "eq"; + break; + case COND_NE: + os << "ne"; + break; + case COND_CS: + os << "cs"; + break; + case COND_CC: + os << "cc"; + break; + case COND_MI: + os << "mi"; + break; + case COND_PL: + os << "pl"; + break; + case COND_VS: + os << "vs"; + break; + case COND_VC: + os << "vc"; + break; + case COND_HI: + os << "hi"; + break; + case COND_LS: + os << "ls"; + break; + case COND_GE: + os << "ge"; + break; + case COND_LT: + os << "lt"; + break; + case COND_GT: + os << "gt"; + break; + case COND_LE: + os << "le"; + break; + case COND_AL: + // This one is implicit. + if (noImplicit) + os << "al"; + break; + case COND_UC: + // Unconditional. + if (noImplicit) + os << "uc"; + break; + default: + panic("Unrecognized condition code %d.\n", code); } } @@ -393,6 +523,38 @@ ArmStaticInst::printShiftOperand(std::ostream &os, } void +ArmStaticInst::printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const +{ + if (!firstOperand) + ccprintf(os, ", "); + printReg(os, rm); + if (type == UXTX && shiftAmt == 0) + return; + switch (type) { + case UXTB: ccprintf(os, ", UXTB"); + break; + case UXTH: ccprintf(os, ", UXTH"); + break; + case UXTW: ccprintf(os, ", UXTW"); + break; + case UXTX: ccprintf(os, ", LSL"); + break; + case SXTB: ccprintf(os, ", SXTB"); + break; + case SXTH: ccprintf(os, ", SXTH"); + break; + case SXTW: ccprintf(os, ", SXTW"); + break; + case SXTX: ccprintf(os, ", SXTW"); + break; + } + if (type == UXTX || shiftAmt) + ccprintf(os, " #%d", shiftAmt); +} + +void ArmStaticInst::printDataInst(std::ostream &os, bool withImm, bool immShift, bool s, IntRegIndex rd, IntRegIndex rn, IntRegIndex rm, IntRegIndex rs, uint32_t shiftAmt, diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index c36024ecd..aeec67ec2 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #include "arch/arm/faults.hh" #include "arch/arm/utility.hh" +#include "arch/arm/system.hh" #include "base/trace.hh" #include "cpu/static_inst.hh" #include "sim/byteswap.hh" @@ -55,6 +56,9 @@ namespace ArmISA class ArmStaticInst : public StaticInst { protected: + bool aarch64; + uint8_t intWidth; + int32_t shift_rm_imm(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; int32_t shift_rm_rs(uint32_t base, uint32_t shamt, @@ -65,6 +69,11 @@ class ArmStaticInst : public StaticInst bool shift_carry_rs(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; + int64_t shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const; + int64_t extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const; + template<int width> static inline bool saturateOp(int32_t &res, int64_t op1, int64_t op2, bool sub=false) @@ -135,6 +144,11 @@ class ArmStaticInst : public StaticInst OpClass __opClass) : StaticInst(mnem, _machInst, __opClass) { + aarch64 = machInst.aarch64; + if (bits(machInst, 28, 24) == 0x10) + intWidth = 64; // Force 64-bit width for ADR/ADRP + else + intWidth = (aarch64 && bits(machInst, 31)) ? 64 : 32; } /// Print a register name for disassembly given the unique @@ -142,13 +156,22 @@ class ArmStaticInst : public StaticInst void printReg(std::ostream &os, int reg) const; void printMnemonic(std::ostream &os, const std::string &suffix = "", - bool withPred = true) const; + bool withPred = true, + bool withCond64 = false, + ConditionCode cond64 = COND_UC) const; + void printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const; + void printCondition(std::ostream &os, unsigned code, + bool noImplicit=false) const; void printMemSymbol(std::ostream &os, const SymbolTable *symtab, const std::string &prefix, const Addr addr, const std::string &suffix) const; void printShiftOperand(std::ostream &os, IntRegIndex rm, bool immShift, uint32_t shiftAmt, IntRegIndex rs, ArmShiftType type) const; + void printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const; void printDataInst(std::ostream &os, bool withImm) const; @@ -166,10 +189,13 @@ class ArmStaticInst : public StaticInst std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; static inline uint32_t - cpsrWriteByInstr(CPSR cpsr, uint32_t val, - uint8_t byteMask, bool affectState, bool nmfi) + cpsrWriteByInstr(CPSR cpsr, uint32_t val, SCR scr, NSACR nsacr, + uint8_t byteMask, bool affectState, bool nmfi, ThreadContext *tc) { - bool privileged = (cpsr.mode != MODE_USER); + bool privileged = (cpsr.mode != MODE_USER); + bool haveVirt = ArmSystem::haveVirtualization(tc); + bool haveSecurity = ArmSystem::haveSecurity(tc); + bool isSecure = inSecureState(scr, cpsr) || !haveSecurity; uint32_t bitMask = 0; @@ -182,14 +208,53 @@ class ArmStaticInst : public StaticInst } if (bits(byteMask, 1)) { unsigned highIdx = affectState ? 15 : 9; - unsigned lowIdx = privileged ? 8 : 9; + unsigned lowIdx = (privileged && (isSecure || scr.aw || haveVirt)) + ? 8 : 9; bitMask = bitMask | mask(highIdx, lowIdx); } if (bits(byteMask, 0)) { if (privileged) { - bitMask = bitMask | mask(7, 6); - if (!badMode((OperatingMode)(val & mask(5)))) { - bitMask = bitMask | mask(5); + bitMask |= 1 << 7; + if ( (!nmfi || !((val >> 6) & 0x1)) && + (isSecure || scr.fw || haveVirt) ) { + bitMask |= 1 << 6; + } + // Now check the new mode is allowed + OperatingMode newMode = (OperatingMode) (val & mask(5)); + OperatingMode oldMode = (OperatingMode)(uint32_t)cpsr.mode; + if (!badMode(newMode)) { + bool validModeChange = true; + // Check for attempts to enter modes only permitted in + // Secure state from Non-secure state. These are Monitor + // mode ('10110'), and FIQ mode ('10001') if the Security + // Extensions have reserved it. + if (!isSecure && newMode == MODE_MON) + validModeChange = false; + if (!isSecure && newMode == MODE_FIQ && nsacr.rfr == '1') + validModeChange = false; + // There is no Hyp mode ('11010') in Secure state, so that + // is UNPREDICTABLE + if (scr.ns == '0' && newMode == MODE_HYP) + validModeChange = false; + // Cannot move into Hyp mode directly from a Non-secure + // PL1 mode + if (!isSecure && oldMode != MODE_HYP && newMode == MODE_HYP) + validModeChange = false; + // Cannot move out of Hyp mode with this function except + // on an exception return + if (oldMode == MODE_HYP && newMode != MODE_HYP && !affectState) + validModeChange = false; + // Must not change to 64 bit when running in 32 bit mode + if (!opModeIs64(oldMode) && opModeIs64(newMode)) + validModeChange = false; + + // If we passed all of the above then set the bit mask to + // copy the mode accross + if (validModeChange) { + bitMask = bitMask | mask(5); + } else { + warn_once("Illegal change to CPSR mode attempted\n"); + } } else { warn_once("Ignoring write of bad mode to CPSR.\n"); } @@ -198,11 +263,7 @@ class ArmStaticInst : public StaticInst bitMask = bitMask | (1 << 5); } - bool cpsr_f = cpsr.f; - uint32_t new_cpsr = ((uint32_t)cpsr & ~bitMask) | (val & bitMask); - if (nmfi && !cpsr_f) - new_cpsr &= ~(1 << 6); - return new_cpsr; + return ((uint32_t)cpsr & ~bitMask) | (val & bitMask); } static inline uint32_t @@ -296,12 +357,12 @@ class ArmStaticInst : public StaticInst inline Fault disabledFault() const { - if (FullSystem) { - return new UndefinedInstruction(); - } else { - return new UndefinedInstruction(machInst, false, mnemonic, true); - } + return new UndefinedInstruction(machInst, false, mnemonic, true); } + + public: + virtual void + annotateFault(ArmFault *fault) {} }; } diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc index ca0f58226..03fdc83fa 100644 --- a/src/arch/arm/insts/vfp.cc +++ b/src/arch/arm/insts/vfp.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -46,6 +46,37 @@ */ std::string +FpCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +FpCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -92,6 +123,21 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest + FP_Reg_Base); + ss << ", "; + printReg(ss, op1 + FP_Reg_Base); + ss << ", "; + printReg(ss, op2 + FP_Reg_Base); + ss << ", "; + printReg(ss, op3 + FP_Reg_Base); + return ss.str(); +} + +std::string FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -131,24 +177,25 @@ prepFpState(uint32_t rMode) } void -finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush) +finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask) { int exceptions = fetestexcept(FeAllExceptions); bool underflow = false; - if (exceptions & FeInvalid) { + if ((exceptions & FeInvalid) && mask.ioc) { fpscr.ioc = 1; } - if (exceptions & FeDivByZero) { + if ((exceptions & FeDivByZero) && mask.dzc) { fpscr.dzc = 1; } - if (exceptions & FeOverflow) { + if ((exceptions & FeOverflow) && mask.ofc) { fpscr.ofc = 1; } if (exceptions & FeUnderflow) { underflow = true; - fpscr.ufc = 1; + if (mask.ufc) + fpscr.ufc = 1; } - if ((exceptions & FeInexact) && !(underflow && flush)) { + if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) { fpscr.ixc = 1; } fesetround(state); @@ -329,19 +376,33 @@ fixFpSFpDDest(FPSCR fpscr, float val) return mid; } -uint16_t -vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, - uint32_t rMode, bool ahp, float op) +static inline uint16_t +vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble) { - uint32_t opBits = fpToBits(op); + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the operand. - bool neg = bits(opBits, 31); - uint32_t exponent = bits(opBits, 30, 23); - uint32_t oldMantissa = bits(opBits, 22, 0); - uint32_t mantissa = oldMantissa >> (23 - 10); + bool neg = bits(opBits, sBitPos); + uint32_t exponent = bits(opBits, sBitPos-1, mWidth); + uint64_t oldMantissa = bits(opBits, mWidth-1, 0); + uint32_t mantissa = oldMantissa >> (mWidth - 10); // Do the conversion. - uint32_t extra = oldMantissa & mask(23 - 10); - if (exponent == 0xff) { + uint64_t extra = oldMantissa & mask(mWidth - 10); + if (exponent == mask(eWidth)) { if (oldMantissa != 0) { // Nans. if (bits(mantissa, 9) == 0) { @@ -379,7 +440,6 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, if (exponent == 0) { // Denormalized. - // If flush to zero is on, this shouldn't happen. assert(!flush); @@ -407,13 +467,13 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, // We need to track the dropped bits differently since // more can be dropped by denormalizing. - bool topOne = bits(extra, 12); - bool restZeros = bits(extra, 11, 0) == 0; + bool topOne = bits(extra, mWidth - 10 - 1); + bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0; - if (exponent <= (127 - 15)) { + if (exponent <= (eHalfRange - 15)) { // The result is too small. Denormalize. mantissa |= (1 << 10); - while (mantissa && exponent <= (127 - 15)) { + while (mantissa && exponent <= (eHalfRange - 15)) { restZeros = restZeros && !topOne; topOne = bits(mantissa, 0); mantissa = mantissa >> 1; @@ -424,7 +484,7 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, exponent = 0; } else { // Change bias. - exponent -= (127 - 15); + exponent -= (eHalfRange - 15); } if (exponent == 0 && (inexact || fpscr.ufe)) { @@ -488,155 +548,115 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, return result; } -float -vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +uint16_t +vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, float op) { - float junk = 0.0; + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false); +} + +uint16_t +vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op) +{ + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true); +} + +static inline uint64_t +vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble) +{ + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the bitfields. bool neg = bits(op, 15); uint32_t exponent = bits(op, 14, 10); - uint32_t mantissa = bits(op, 9, 0); + uint64_t mantissa = bits(op, 9, 0); // Do the conversion. if (exponent == 0) { if (mantissa != 0) { // Normalize the value. - exponent = exponent + (127 - 15) + 1; + exponent = exponent + (eHalfRange - 15) + 1; while (mantissa < (1 << 10)) { mantissa = mantissa << 1; exponent--; } } - mantissa = mantissa << (23 - 10); + mantissa = mantissa << (mWidth - 10); } else if (exponent == 0x1f && !ahp) { // Infinities and nans. - exponent = 0xff; + exponent = mask(eWidth); if (mantissa != 0) { // Nans. - mantissa = mantissa << (23 - 10); - if (bits(mantissa, 22) == 0) { + mantissa = mantissa << (mWidth - 10); + if (bits(mantissa, mWidth-1) == 0) { // Signalling nan. fpscr.ioc = 1; - mantissa |= (1 << 22); + mantissa |= (((uint64_t) 1) << (mWidth-1)); } if (defaultNan) { - mantissa &= ~mask(22); + mantissa &= ~mask(mWidth-1); neg = false; } } } else { - exponent = exponent + (127 - 15); - mantissa = mantissa << (23 - 10); + exponent = exponent + (eHalfRange - 15); + mantissa = mantissa << (mWidth - 10); } // Reassemble the result. - uint32_t result = bits(mantissa, 22, 0); - replaceBits(result, 30, 23, exponent); - if (neg) - result |= (1 << 31); + uint64_t result = bits(mantissa, mWidth-1, 0); + replaceBits(result, sBitPos-1, mWidth, exponent); + if (neg) { + result |= (((uint64_t) 1) << sBitPos); + } + return result; +} + +double +vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +{ + double junk = 0.0; + uint64_t result; + + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true); return bitsToFp(result, junk); } -uint64_t -vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero) +float +vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) { - int rmode = rzero ? FeRoundZero : fegetround(); - __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); - fesetround(FeRoundNearest); - val = val * powf(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - float origVal = val; - val = rintf(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } + float junk = 0.0; + uint64_t result; - if (isSigned) { - if (half) { - if ((double)val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if ((double)val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if ((double)val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if ((double)val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(16))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(32))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false); + return bitsToFp(result, junk); } float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -646,11 +666,16 @@ vfpUFixedToFpS(bool flush, bool defaultNan, float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -658,106 +683,19 @@ vfpSFixedToFpS(bool flush, bool defaultNan, return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); } -uint64_t -vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero) -{ - int rmode = rzero ? FeRoundZero : fegetround(); - fesetround(FeRoundNearest); - val = val * pow(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - double origVal = val; - val = rint(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } - if (isSigned) { - if (half) { - if (val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if (val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if (val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if (val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(16)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(32)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } -} double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -767,11 +705,16 @@ vfpUFixedToFpD(bool flush, bool defaultNan, double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -976,6 +919,85 @@ template double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, double op1, double op2) const; +// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB +template <class fpType> +fpType +FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const +{ + const bool single = (sizeof(fpType) == sizeof(float)); + fpType junk = 0.0; + + if (flush && (flushToZero(op1, op2) || flushToZero(op3))) + fpscr.idc = 1; + VfpSavedState state = prepFpState(rMode); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state) + : "m" (op1), "m" (op2), "m" (op3), "m" (state)); + fpType dest = func(op1, op2, op3); + __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); + + int fpClass = std::fpclassify(dest); + // Get NAN behavior right. This varies between x86 and ARM. + if (fpClass == FP_NAN) { + const uint64_t qnan = + single ? 0x7fc00000 : ULL(0x7ff8000000000000); + const bool nan1 = std::isnan(op1); + const bool nan2 = std::isnan(op2); + const bool nan3 = std::isnan(op3); + const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); + const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); + const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan); + if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) { + dest = bitsToFp(qnan, junk); + } else if (signal1) { + dest = bitsToFp(fpToBits(op1) | qnan, junk); + } else if (signal2) { + dest = bitsToFp(fpToBits(op2) | qnan, junk); + } else if (signal3) { + dest = bitsToFp(fpToBits(op3) | qnan, junk); + } else if (nan1) { + dest = op1; + } else if (nan2) { + dest = op2; + } else if (nan3) { + dest = op3; + } + } else if (flush && flushToZero(dest)) { + feraiseexcept(FeUnderflow); + } else if (( + (single && (dest == bitsToFp(0x00800000, junk) || + dest == bitsToFp(0x80800000, junk))) || + (!single && + (dest == bitsToFp(ULL(0x0010000000000000), junk) || + dest == bitsToFp(ULL(0x8010000000000000), junk))) + ) && rMode != VfpRoundZero) { + /* + * Correct for the fact that underflow is detected -before- rounding + * in ARM and -after- rounding in x86. + */ + fesetround(FeRoundZero); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3) + : "m" (op1), "m" (op2), "m" (op3)); + fpType temp = func(op1, op2, op2); + __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); + if (flush && flushToZero(temp)) { + dest = temp; + } + } + finishVfp(fpscr, state, flush); + return dest; +} + +template +float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3, + float (*func)(float, float, float), + bool flush, bool defaultNan, uint32_t rMode) const; +template +double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3, + double (*func)(double, double, double), + bool flush, bool defaultNan, uint32_t rMode) const; + template <class fpType> fpType FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 9babaae04..f17f90973 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -104,7 +104,8 @@ enum VfpRoundingMode VfpRoundNearest = 0, VfpRoundUpward = 1, VfpRoundDown = 2, - VfpRoundZero = 3 + VfpRoundZero = 3, + VfpRoundAway = 4 }; static inline float bitsToFp(uint64_t, float); @@ -212,7 +213,7 @@ isSnan(fpType val) typedef int VfpSavedState; VfpSavedState prepFpState(uint32_t rMode); -void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush); +void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask); template <class fpType> fpType fixDest(FPSCR fpscr, fpType val, fpType op1); @@ -228,7 +229,11 @@ double fixFpSFpDDest(FPSCR fpscr, float val); uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op); -float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op); + +float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); static inline double makeDouble(uint32_t low, uint32_t high) @@ -249,19 +254,192 @@ highFromDouble(double val) return fpToBits(val) >> 32; } -uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); +static inline void +setFPExceptions(int exceptions) { + feclearexcept(FeAllExceptions); + feraiseexcept(exceptions); +} + +template <typename T> +uint64_t +vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool + useRmode = true, VfpRoundingMode roundMode = VfpRoundZero, + bool aarch64 = false) +{ + int rmode; + bool roundAwayFix = false; + + if (!useRmode) { + rmode = fegetround(); + } else { + switch (roundMode) + { + case VfpRoundNearest: + rmode = FeRoundNearest; + break; + case VfpRoundUpward: + rmode = FeRoundUpward; + break; + case VfpRoundDown: + rmode = FeRoundDown; + break; + case VfpRoundZero: + rmode = FeRoundZero; + break; + case VfpRoundAway: + // There is no equivalent rounding mode, use round down and we'll + // fix it later + rmode = FeRoundDown; + roundAwayFix = true; + break; + default: + panic("Unsupported roundMode %d\n", roundMode); + } + } + __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); + fesetround(FeRoundNearest); + val = val * pow(2.0, imm); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + fesetround(rmode); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + T origVal = val; + val = rint(val); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + int exceptions = fetestexcept(FeAllExceptions); + + int fpType = std::fpclassify(val); + if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { + if (fpType == FP_NAN) { + exceptions |= FeInvalid; + } + val = 0.0; + } else if (origVal != val) { + switch (rmode) { + case FeRoundNearest: + if (origVal - val > 0.5) + val += 1.0; + else if (val - origVal > 0.5) + val -= 1.0; + break; + case FeRoundDown: + if (roundAwayFix) { + // The ordering on the subtraction looks a bit odd in that we + // don't do the obvious origVal - val, instead we do + // -(val - origVal). This is required to get the corruct bit + // exact behaviour when very close to the 0.5 threshold. + volatile T error = val; + error -= origVal; + error = -error; + if ( (error > 0.5) || + ((error == 0.5) && (val >= 0)) ) + val += 1.0; + } else { + if (origVal < val) + val -= 1.0; + } + break; + case FeRoundUpward: + if (origVal > val) + val += 1.0; + break; + } + exceptions |= FeInexact; + } + + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + if (isSigned) { + bool outOfRange = false; + int64_t result = (int64_t) val; + uint64_t finalVal; + + if (!aarch64) { + if (width == 16) { + finalVal = (int16_t)val; + } else if (width == 32) { + finalVal =(int32_t)val; + } else if (width == 64) { + finalVal = result; + } else { + panic("Unsupported width %d\n", width); + } + + // check if value is in range + int64_t minVal = ~mask(width-1); + if ((double)val < minVal) { + outOfRange = true; + finalVal = minVal; + } + int64_t maxVal = mask(width-1); + if ((double)val > maxVal) { + outOfRange = true; + finalVal = maxVal; + } + } else { + bool isNeg = val < 0; + finalVal = result & mask(width); + // If the result is supposed to be less than 64 bits check that the + // upper bits that got thrown away are just sign extension bits + if (width != 64) { + outOfRange = ((uint64_t) result >> (width - 1)) != + (isNeg ? mask(64-width+1) : 0); + } + // Check if the original floating point value doesn't matches the + // integer version we are also out of range. So create a saturated + // result. + if (isNeg) { + outOfRange |= val < result; + if (outOfRange) { + finalVal = 1LL << (width-1); + } + } else { + outOfRange |= val > result; + if (outOfRange) { + finalVal = mask(width-1); + } + } + } + + // Raise an exception if the value was out of range + if (outOfRange) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + } + setFPExceptions(exceptions); + return finalVal; + } else { + if ((double)val < 0) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return 0; + } + + uint64_t result = ((uint64_t) val) & mask(width); + if (val > result) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return mask(width); + } + + setFPExceptions(exceptions); + return result; + } +}; + + float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); -uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); float fprSqrtEstimate(FPSCR &fpscr, float op); uint32_t unsignedRSqrtEstimate(uint32_t op); @@ -292,6 +470,20 @@ class VfpMacroOp : public PredMacroOp void nextIdxs(IntRegIndex &dest); }; +template <typename T> +static inline T +fpAdd(T a, T b) +{ + return a + b; +}; + +template <typename T> +static inline T +fpSub(T a, T b) +{ + return a - b; +}; + static inline float fpAddS(float a, float b) { @@ -328,6 +520,54 @@ fpDivD(double a, double b) return a / b; } +template <typename T> +static inline T +fpDiv(T a, T b) +{ + return a / b; +}; + +template <typename T> +static inline T +fpMulX(T a, T b) +{ + uint64_t opData; + uint32_t sign1; + uint32_t sign2; + const bool single = (sizeof(T) == sizeof(float)); + if (single) { + opData = (fpToBits(a)); + sign1 = opData>>31; + opData = (fpToBits(b)); + sign2 = opData>>31; + } else { + opData = (fpToBits(a)); + sign1 = opData>>63; + opData = (fpToBits(b)); + sign2 = opData>>63; + } + bool inf1 = (std::fpclassify(a) == FP_INFINITE); + bool inf2 = (std::fpclassify(b) == FP_INFINITE); + bool zero1 = (std::fpclassify(a) == FP_ZERO); + bool zero2 = (std::fpclassify(b) == FP_ZERO); + if ((inf1 && zero2) || (zero1 && inf2)) { + if(sign1 ^ sign2) + return (T)(-2.0); + else + return (T)(2.0); + } else { + return (a * b); + } +}; + + +template <typename T> +static inline T +fpMul(T a, T b) +{ + return a * b; +}; + static inline float fpMulS(float a, float b) { @@ -340,23 +580,140 @@ fpMulD(double a, double b) return a * b; } -static inline float -fpMaxS(float a, float b) +template <typename T> +static inline T +// @todo remove this when all calls to it have been replaced with the new fplib implementation +fpMulAdd(T op1, T op2, T addend) +{ + T result; + + if (sizeof(T) == sizeof(float)) + result = fmaf(op1, op2, addend); + else + result = fma(op1, op2, addend); + + // ARM doesn't generate signed nan's from this opperation, so fix up the result + if (std::isnan(result) && !std::isnan(op1) && + !std::isnan(op2) && !std::isnan(addend)) + { + uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1); + result = bitsToFp(fpToBits(result) & ~bitMask, op1); + } + return result; +} + +template <typename T> +static inline T +fpRIntX(T a, FPSCR &fpscr) +{ + T rVal; + + rVal = rint(a); + if (rVal != a && !std::isnan(a)) + fpscr.ixc = 1; + return (rVal); +}; + +template <typename T> +static inline T +fpMaxNum(T a, T b) { + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (!std::signbit(a) && std::signbit(b)) return a; - return fmaxf(a, b); -} + return fmax(a, b); +}; -static inline float -fpMinS(float a, float b) +template <typename T> +static inline T +fpMax(T a, T b) { + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMaxNum<T>(a, b); +}; + +template <typename T> +static inline T +fpMinNum(T a, T b) +{ + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (std::signbit(a) && !std::signbit(b)) return a; - return fminf(a, b); -} + return fmin(a, b); +}; + +template <typename T> +static inline T +fpMin(T a, T b) +{ + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMinNum<T>(a, b); +}; + +template <typename T> +static inline T +fpRSqrts(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 1.5; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 1.5; + } + return (3.0 - (a * b)) / 2.0; +}; + +template <typename T> +static inline T +fpRecps(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 2.0; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 2.0; + } + return 2.0 - (a * b); +}; + static inline float fpRSqrtsS(float a, float b) @@ -400,6 +757,23 @@ fpRecpsS(float a, float b) return 2.0 - (a * b); } +template <typename T> +static inline T +roundNEven(T a) { + T val; + + val = round(a); + if (a - val == 0.5) { + if ( (((int) a) & 1) == 0 ) val += 1.0; + } + else if (a - val == -0.5) { + if ( (((int) a) & 1) == 0 ) val -= 1.0; + } + return val; +} + + + class FpOp : public PredOp { protected: @@ -457,6 +831,12 @@ class FpOp : public PredOp template <class fpType> fpType + ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const; + + template <class fpType> + fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType (*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const; @@ -478,6 +858,55 @@ class FpOp : public PredOp pcState.advance(); } } + + float + fpSqrt (FPSCR fpscr,float x) const + { + + return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode); + + } + + double + fpSqrt (FPSCR fpscr,double x) const + { + + return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode); + + } +}; + +class FpCondCompRegOp : public FpOp +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + FpCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + FpOp(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class FpCondSelOp : public FpOp +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + FpOp(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; class FpRegRegOp : public FpOp @@ -550,6 +979,26 @@ class FpRegRegRegOp : public FpOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class FpRegRegRegRegOp : public FpOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex op3; + + FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) : + FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2), + op3(_op3) + { + setVfpMicroFlags(mode, flags); + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class FpRegRegRegImmOp : public FpOp { protected: diff --git a/src/arch/arm/interrupts.cc b/src/arch/arm/interrupts.cc index c05ae984e..6682b75a0 100644 --- a/src/arch/arm/interrupts.cc +++ b/src/arch/arm/interrupts.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 ARM Limited + * Copyright (c) 2009, 2012-2013 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -38,9 +38,128 @@ */ #include "arch/arm/interrupts.hh" +#include "arch/arm/system.hh" ArmISA::Interrupts * ArmInterruptsParams::create() { return new ArmISA::Interrupts(this); } + +bool +ArmISA::Interrupts::takeInt(ThreadContext *tc, InterruptTypes int_type) const +{ + // Table G1-17~19 of ARM V8 ARM + InterruptMask mask; + bool highest_el_is_64 = ArmSystem::highestELIs64(tc); + + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr; + HCR hcr; + hcr = tc->readMiscReg(MISCREG_HCR); + ExceptionLevel el = (ExceptionLevel) ((uint32_t) cpsr.el); + bool cpsr_mask_bit, scr_routing_bit, scr_fwaw_bit, hcr_mask_override_bit; + + if (!highest_el_is_64) + scr = tc->readMiscReg(MISCREG_SCR); + else + scr = tc->readMiscReg(MISCREG_SCR_EL3); + + bool is_secure = inSecureState(scr, cpsr); + + switch(int_type) { + case INT_FIQ: + cpsr_mask_bit = cpsr.f; + scr_routing_bit = scr.fiq; + scr_fwaw_bit = scr.fw; + hcr_mask_override_bit = hcr.fmo; + break; + case INT_IRQ: + cpsr_mask_bit = cpsr.i; + scr_routing_bit = scr.irq; + scr_fwaw_bit = 1; + hcr_mask_override_bit = hcr.imo; + break; + case INT_ABT: + cpsr_mask_bit = cpsr.a; + scr_routing_bit = scr.ea; + scr_fwaw_bit = scr.aw; + hcr_mask_override_bit = hcr.amo; + break; + default: + panic("Unhandled interrupt type!"); + } + + if (hcr.tge) + hcr_mask_override_bit = 1; + + if (!highest_el_is_64) { + // AArch32 + if (!scr_routing_bit) { + // SCR IRQ == 0 + if (!hcr_mask_override_bit) + mask = INT_MASK_M; + else { + if (!is_secure && (el == EL0 || el == EL1)) + mask = INT_MASK_T; + else + mask = INT_MASK_M; + } + } else { + // SCR IRQ == 1 + if ((!is_secure) && + (hcr_mask_override_bit || + (!scr_fwaw_bit && !hcr_mask_override_bit))) + mask = INT_MASK_T; + else + mask = INT_MASK_M; + } + } else { + // AArch64 + if (!scr_routing_bit) { + // SCR IRQ == 0 + if (!scr.rw) { + // SCR RW == 0 + if (!hcr_mask_override_bit) { + if (el == EL3) + mask = INT_MASK_P; + else + mask = INT_MASK_M; + } else { + if (el == EL3) + mask = INT_MASK_T; + else if (is_secure || el == EL2) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } else { + // SCR RW == 1 + if (!hcr_mask_override_bit) { + if (el == EL3 || el == EL2) + mask = INT_MASK_P; + else + mask = INT_MASK_M; + } else { + if (el == EL3) + mask = INT_MASK_P; + else if (is_secure || el == EL2) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } + } else { + // SCR IRQ == 1 + if (el == EL3) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } + + return ((mask == INT_MASK_T) || + ((mask == INT_MASK_M) && !cpsr_mask_bit)) && + (mask != INT_MASK_P); +} + diff --git a/src/arch/arm/interrupts.hh b/src/arch/arm/interrupts.hh index 7def6ddd6..8e6c2b261 100644 --- a/src/arch/arm/interrupts.hh +++ b/src/arch/arm/interrupts.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010,2012 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ #include "arch/arm/isa_traits.hh" #include "arch/arm/miscregs.hh" #include "arch/arm/registers.hh" +#include "arch/arm/utility.hh" #include "cpu/thread_context.hh" #include "debug/Interrupt.hh" #include "params/ArmInterrupts.hh" @@ -123,31 +124,79 @@ class Interrupts : public SimObject memset(interrupts, 0, sizeof(interrupts)); } + enum InterruptMask { + INT_MASK_M, // masked (subject to PSTATE.{A,I,F} mask bit + INT_MASK_T, // taken regardless of mask + INT_MASK_P // pending + }; + + bool takeInt(ThreadContext *tc, InterruptTypes int_type) const; + bool checkInterrupts(ThreadContext *tc) const { - if (!intStatus) + HCR hcr = tc->readMiscReg(MISCREG_HCR); + + if (!(intStatus || hcr.va || hcr.vi || hcr.vf)) return false; CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); - - return ((interrupts[INT_IRQ] && !cpsr.i) || - (interrupts[INT_FIQ] && !cpsr.f) || - (interrupts[INT_ABT] && !cpsr.a) || - (interrupts[INT_RST]) || - (interrupts[INT_SEV])); + SCR scr = tc->readMiscReg(MISCREG_SCR); + + bool isHypMode = cpsr.mode == MODE_HYP; + bool isSecure = inSecureState(scr, cpsr); + bool allowVIrq = !cpsr.i && hcr.imo && !isSecure && !isHypMode; + bool allowVFiq = !cpsr.f && hcr.fmo && !isSecure && !isHypMode; + bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode; + + bool take_irq = takeInt(tc, INT_IRQ); + bool take_fiq = takeInt(tc, INT_FIQ); + bool take_ea = takeInt(tc, INT_ABT); + + return ((interrupts[INT_IRQ] && take_irq) || + (interrupts[INT_FIQ] && take_fiq) || + (interrupts[INT_ABT] && take_ea) || + ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq) || + ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq) || + (hcr.va && allowVAbort) || + (interrupts[INT_RST]) || + (interrupts[INT_SEV]) + ); } /** - * Check the raw interrupt state. * This function is used to check if a wfi operation should sleep. If there * is an interrupt pending, even if it's masked, wfi doesn't sleep. * @return any interrupts pending */ bool - checkRaw() const + checkWfiWake(HCR hcr, CPSR cpsr, SCR scr) const + { + uint64_t maskedIntStatus; + bool virtWake; + + maskedIntStatus = intStatus & ~((1 << INT_VIRT_IRQ) | + (1 << INT_VIRT_FIQ)); + virtWake = (hcr.vi || interrupts[INT_VIRT_IRQ]) && hcr.imo; + virtWake |= (hcr.vf || interrupts[INT_VIRT_FIQ]) && hcr.fmo; + virtWake |= hcr.va && hcr.amo; + virtWake &= (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr); + return maskedIntStatus || virtWake; + } + + uint32_t + getISR(HCR hcr, CPSR cpsr, SCR scr) { - return intStatus; + bool useHcrMux; + CPSR isr = 0; // ARM ARM states ISR reg uses same bit possitions as CPSR + + useHcrMux = (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr); + isr.i = (useHcrMux & hcr.imo) ? (interrupts[INT_VIRT_IRQ] || hcr.vi) + : interrupts[INT_IRQ]; + isr.f = (useHcrMux & hcr.fmo) ? (interrupts[INT_VIRT_FIQ] || hcr.vf) + : interrupts[INT_FIQ]; + isr.a = (useHcrMux & hcr.amo) ? hcr.va : interrupts[INT_ABT]; + return isr; } /** @@ -172,22 +221,45 @@ class Interrupts : public SimObject Fault getInterrupt(ThreadContext *tc) { - if (!intStatus) + HCR hcr = tc->readMiscReg(MISCREG_HCR); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr = tc->readMiscReg(MISCREG_SCR); + + // Calculate a few temp vars so we can work out if there's a pending + // virtual interrupt, and if its allowed to happen + // ARM ARM Issue C section B1.9.9, B1.9.11, and B1.9.13 + bool isHypMode = cpsr.mode == MODE_HYP; + bool isSecure = inSecureState(scr, cpsr); + bool allowVIrq = !cpsr.i && hcr.imo && !isSecure && !isHypMode; + bool allowVFiq = !cpsr.f && hcr.fmo && !isSecure && !isHypMode; + bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode; + + if ( !(intStatus || (hcr.vi && allowVIrq) || (hcr.vf && allowVFiq) || + (hcr.va && allowVAbort)) ) return NoFault; - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + bool take_irq = takeInt(tc, INT_IRQ); + bool take_fiq = takeInt(tc, INT_FIQ); + bool take_ea = takeInt(tc, INT_ABT); + - if (interrupts[INT_IRQ] && !cpsr.i) + if (interrupts[INT_IRQ] && take_irq) return new Interrupt; - if (interrupts[INT_FIQ] && !cpsr.f) + if ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq) + return new VirtualInterrupt; + if (interrupts[INT_FIQ] && take_fiq) return new FastInterrupt; - if (interrupts[INT_ABT] && !cpsr.a) - return new DataAbort(0, false, 0, - ArmFault::AsynchronousExternalAbort); + if ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq) + return new VirtualFastInterrupt; + if (interrupts[INT_ABT] && take_ea) + return new SystemError; + if (hcr.va && allowVAbort) + return new VirtualDataAbort(0, TlbEntry::DomainType::NoAccess, false, + ArmFault::AsynchronousExternalAbort); if (interrupts[INT_RST]) - return new Reset; + return new Reset; if (interrupts[INT_SEV]) - return new ArmSev; + return new ArmSev; panic("intStatus and interrupts not in sync\n"); } diff --git a/src/arch/arm/intregs.hh b/src/arch/arm/intregs.hh index 3fe00b765..fa18aa68d 100644 --- a/src/arch/arm/intregs.hh +++ b/src/arch/arm/intregs.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -83,6 +83,9 @@ enum IntRegIndex INTREG_R14_MON, INTREG_LR_MON = INTREG_R14_MON, + INTREG_R13_HYP, + INTREG_SP_HYP = INTREG_R13_HYP, + INTREG_R13_ABT, INTREG_SP_ABT = INTREG_R13_ABT, INTREG_R14_ABT, @@ -108,7 +111,7 @@ enum IntRegIndex INTREG_R14_FIQ, INTREG_LR_FIQ = INTREG_R14_FIQ, - INTREG_ZERO, // Dummy zero reg since there has to be one. + INTREG_ZERO, INTREG_UREG0, INTREG_UREG1, INTREG_UREG2, @@ -117,12 +120,54 @@ enum IntRegIndex INTREG_CONDCODES_V, INTREG_CONDCODES_GE, INTREG_FPCONDCODES, + INTREG_DUMMY, // Dummy reg used to throw away int reg results + + INTREG_SP0, + INTREG_SP1, + INTREG_SP2, + INTREG_SP3, NUM_INTREGS, - NUM_ARCH_INTREGS = INTREG_PC + 1, + NUM_ARCH_INTREGS = 32, + + /* AArch64 registers */ + INTREG_X0 = 0, + INTREG_X1, + INTREG_X2, + INTREG_X3, + INTREG_X4, + INTREG_X5, + INTREG_X6, + INTREG_X7, + INTREG_X8, + INTREG_X9, + INTREG_X10, + INTREG_X11, + INTREG_X12, + INTREG_X13, + INTREG_X14, + INTREG_X15, + INTREG_X16, + INTREG_X17, + INTREG_X18, + INTREG_X19, + INTREG_X20, + INTREG_X21, + INTREG_X22, + INTREG_X23, + INTREG_X24, + INTREG_X25, + INTREG_X26, + INTREG_X27, + INTREG_X28, + INTREG_X29, + INTREG_X30, + INTREG_X31, + + INTREG_SPX = NUM_INTREGS, /* All the aliased indexes. */ - + /* USR mode */ INTREG_R0_USR = INTREG_R0, INTREG_R1_USR = INTREG_R1, @@ -195,6 +240,25 @@ enum IntRegIndex INTREG_PC_ABT = INTREG_PC, INTREG_R15_ABT = INTREG_R15, + /* HYP mode */ + INTREG_R0_HYP = INTREG_R0, + INTREG_R1_HYP = INTREG_R1, + INTREG_R2_HYP = INTREG_R2, + INTREG_R3_HYP = INTREG_R3, + INTREG_R4_HYP = INTREG_R4, + INTREG_R5_HYP = INTREG_R5, + INTREG_R6_HYP = INTREG_R6, + INTREG_R7_HYP = INTREG_R7, + INTREG_R8_HYP = INTREG_R8, + INTREG_R9_HYP = INTREG_R9, + INTREG_R10_HYP = INTREG_R10, + INTREG_R11_HYP = INTREG_R11, + INTREG_R12_HYP = INTREG_R12, + INTREG_LR_HYP = INTREG_LR, + INTREG_R14_HYP = INTREG_R14, + INTREG_PC_HYP = INTREG_PC, + INTREG_R15_HYP = INTREG_R15, + /* UND mode */ INTREG_R0_UND = INTREG_R0, INTREG_R1_UND = INTREG_R1, @@ -244,11 +308,26 @@ enum IntRegIndex typedef IntRegIndex IntRegMap[NUM_ARCH_INTREGS]; +const IntRegMap IntReg64Map = { + INTREG_R0, INTREG_R1, INTREG_R2, INTREG_R3, + INTREG_R4, INTREG_R5, INTREG_R6, INTREG_R7, + INTREG_R8_USR, INTREG_R9_USR, INTREG_R10_USR, INTREG_R11_USR, + INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R13_HYP, + INTREG_R14_IRQ, INTREG_R13_IRQ, INTREG_R14_SVC, INTREG_R13_SVC, + INTREG_R14_ABT, INTREG_R13_ABT, INTREG_R14_UND, INTREG_R13_UND, + INTREG_R8_FIQ, INTREG_R9_FIQ, INTREG_R10_FIQ, INTREG_R11_FIQ, + INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_ZERO +}; + const IntRegMap IntRegUsrMap = { INTREG_R0_USR, INTREG_R1_USR, INTREG_R2_USR, INTREG_R3_USR, INTREG_R4_USR, INTREG_R5_USR, INTREG_R6_USR, INTREG_R7_USR, INTREG_R8_USR, INTREG_R9_USR, INTREG_R10_USR, INTREG_R11_USR, - INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR + INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -258,11 +337,33 @@ INTREG_USR(unsigned index) return IntRegUsrMap[index]; } +const IntRegMap IntRegHypMap = { + INTREG_R0_HYP, INTREG_R1_HYP, INTREG_R2_HYP, INTREG_R3_HYP, + INTREG_R4_HYP, INTREG_R5_HYP, INTREG_R6_HYP, INTREG_R7_HYP, + INTREG_R8_HYP, INTREG_R9_HYP, INTREG_R10_HYP, INTREG_R11_HYP, + INTREG_R12_HYP, INTREG_R13_HYP, INTREG_R14_HYP, INTREG_R15_HYP, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO +}; + +static inline IntRegIndex +INTREG_HYP(unsigned index) +{ + assert(index < NUM_ARCH_INTREGS); + return IntRegHypMap[index]; +} + const IntRegMap IntRegSvcMap = { INTREG_R0_SVC, INTREG_R1_SVC, INTREG_R2_SVC, INTREG_R3_SVC, INTREG_R4_SVC, INTREG_R5_SVC, INTREG_R6_SVC, INTREG_R7_SVC, INTREG_R8_SVC, INTREG_R9_SVC, INTREG_R10_SVC, INTREG_R11_SVC, - INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC + INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -276,7 +377,11 @@ const IntRegMap IntRegMonMap = { INTREG_R0_MON, INTREG_R1_MON, INTREG_R2_MON, INTREG_R3_MON, INTREG_R4_MON, INTREG_R5_MON, INTREG_R6_MON, INTREG_R7_MON, INTREG_R8_MON, INTREG_R9_MON, INTREG_R10_MON, INTREG_R11_MON, - INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON + INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -290,7 +395,11 @@ const IntRegMap IntRegAbtMap = { INTREG_R0_ABT, INTREG_R1_ABT, INTREG_R2_ABT, INTREG_R3_ABT, INTREG_R4_ABT, INTREG_R5_ABT, INTREG_R6_ABT, INTREG_R7_ABT, INTREG_R8_ABT, INTREG_R9_ABT, INTREG_R10_ABT, INTREG_R11_ABT, - INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT + INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -304,7 +413,11 @@ const IntRegMap IntRegUndMap = { INTREG_R0_UND, INTREG_R1_UND, INTREG_R2_UND, INTREG_R3_UND, INTREG_R4_UND, INTREG_R5_UND, INTREG_R6_UND, INTREG_R7_UND, INTREG_R8_UND, INTREG_R9_UND, INTREG_R10_UND, INTREG_R11_UND, - INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND + INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -318,7 +431,11 @@ const IntRegMap IntRegIrqMap = { INTREG_R0_IRQ, INTREG_R1_IRQ, INTREG_R2_IRQ, INTREG_R3_IRQ, INTREG_R4_IRQ, INTREG_R5_IRQ, INTREG_R6_IRQ, INTREG_R7_IRQ, INTREG_R8_IRQ, INTREG_R9_IRQ, INTREG_R10_IRQ, INTREG_R11_IRQ, - INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ + INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -332,7 +449,11 @@ const IntRegMap IntRegFiqMap = { INTREG_R0_FIQ, INTREG_R1_FIQ, INTREG_R2_FIQ, INTREG_R3_FIQ, INTREG_R4_FIQ, INTREG_R5_FIQ, INTREG_R6_FIQ, INTREG_R7_FIQ, INTREG_R8_FIQ, INTREG_R9_FIQ, INTREG_R10_FIQ, INTREG_R11_FIQ, - INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ + INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -351,6 +472,51 @@ intRegInMode(OperatingMode mode, int reg) return mode * intRegsPerMode + reg; } +static inline int +flattenIntRegModeIndex(int reg) +{ + int mode = reg / intRegsPerMode; + reg = reg % intRegsPerMode; + switch (mode) { + case MODE_USER: + case MODE_SYSTEM: + return INTREG_USR(reg); + case MODE_FIQ: + return INTREG_FIQ(reg); + case MODE_IRQ: + return INTREG_IRQ(reg); + case MODE_SVC: + return INTREG_SVC(reg); + case MODE_MON: + return INTREG_MON(reg); + case MODE_ABORT: + return INTREG_ABT(reg); + case MODE_HYP: + return INTREG_HYP(reg); + case MODE_UNDEFINED: + return INTREG_UND(reg); + default: + panic("%d: Flattening into an unknown mode: reg:%#x mode:%#x\n", + curTick(), reg, mode); + } +} + + +static inline IntRegIndex +makeSP(IntRegIndex reg) +{ + if (reg == INTREG_X31) + reg = INTREG_SPX; + return reg; +} + + +static inline bool +isSP(IntRegIndex reg) +{ + return reg == INTREG_SPX; +} + } #endif diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 86be2803d..4f1ef91ec 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -51,12 +51,111 @@ namespace ArmISA { + +/** + * Some registers aliase with others, and therefore need to be translated. + * For each entry: + * The first value is the misc register that is to be looked up + * the second value is the lower part of the translation + * the third the upper part + */ +const struct ISA::MiscRegInitializerEntry + ISA::MiscRegSwitch[miscRegTranslateMax] = { + {MISCREG_CSSELR_EL1, {MISCREG_CSSELR, 0}}, + {MISCREG_SCTLR_EL1, {MISCREG_SCTLR, 0}}, + {MISCREG_SCTLR_EL2, {MISCREG_HSCTLR, 0}}, + {MISCREG_ACTLR_EL1, {MISCREG_ACTLR, 0}}, + {MISCREG_ACTLR_EL2, {MISCREG_HACTLR, 0}}, + {MISCREG_CPACR_EL1, {MISCREG_CPACR, 0}}, + {MISCREG_CPTR_EL2, {MISCREG_HCPTR, 0}}, + {MISCREG_HCR_EL2, {MISCREG_HCR, 0}}, + {MISCREG_MDCR_EL2, {MISCREG_HDCR, 0}}, + {MISCREG_HSTR_EL2, {MISCREG_HSTR, 0}}, + {MISCREG_HACR_EL2, {MISCREG_HACR, 0}}, + {MISCREG_TTBR0_EL1, {MISCREG_TTBR0, 0}}, + {MISCREG_TTBR1_EL1, {MISCREG_TTBR1, 0}}, + {MISCREG_TTBR0_EL2, {MISCREG_HTTBR, 0}}, + {MISCREG_VTTBR_EL2, {MISCREG_VTTBR, 0}}, + {MISCREG_TCR_EL1, {MISCREG_TTBCR, 0}}, + {MISCREG_TCR_EL2, {MISCREG_HTCR, 0}}, + {MISCREG_VTCR_EL2, {MISCREG_VTCR, 0}}, + {MISCREG_AFSR0_EL1, {MISCREG_ADFSR, 0}}, + {MISCREG_AFSR1_EL1, {MISCREG_AIFSR, 0}}, + {MISCREG_AFSR0_EL2, {MISCREG_HADFSR, 0}}, + {MISCREG_AFSR1_EL2, {MISCREG_HAIFSR, 0}}, + {MISCREG_ESR_EL2, {MISCREG_HSR, 0}}, + {MISCREG_FAR_EL1, {MISCREG_DFAR, MISCREG_IFAR}}, + {MISCREG_FAR_EL2, {MISCREG_HDFAR, MISCREG_HIFAR}}, + {MISCREG_HPFAR_EL2, {MISCREG_HPFAR, 0}}, + {MISCREG_PAR_EL1, {MISCREG_PAR, 0}}, + {MISCREG_MAIR_EL1, {MISCREG_PRRR, MISCREG_NMRR}}, + {MISCREG_MAIR_EL2, {MISCREG_HMAIR0, MISCREG_HMAIR1}}, + {MISCREG_AMAIR_EL1, {MISCREG_AMAIR0, MISCREG_AMAIR1}}, + {MISCREG_VBAR_EL1, {MISCREG_VBAR, 0}}, + {MISCREG_VBAR_EL2, {MISCREG_HVBAR, 0}}, + {MISCREG_CONTEXTIDR_EL1, {MISCREG_CONTEXTIDR, 0}}, + {MISCREG_TPIDR_EL0, {MISCREG_TPIDRURW, 0}}, + {MISCREG_TPIDRRO_EL0, {MISCREG_TPIDRURO, 0}}, + {MISCREG_TPIDR_EL1, {MISCREG_TPIDRPRW, 0}}, + {MISCREG_TPIDR_EL2, {MISCREG_HTPIDR, 0}}, + {MISCREG_TEECR32_EL1, {MISCREG_TEECR, 0}}, + {MISCREG_CNTFRQ_EL0, {MISCREG_CNTFRQ, 0}}, + {MISCREG_CNTPCT_EL0, {MISCREG_CNTPCT, 0}}, + {MISCREG_CNTVCT_EL0, {MISCREG_CNTVCT, 0}}, + {MISCREG_CNTVOFF_EL2, {MISCREG_CNTVOFF, 0}}, + {MISCREG_CNTKCTL_EL1, {MISCREG_CNTKCTL, 0}}, + {MISCREG_CNTHCTL_EL2, {MISCREG_CNTHCTL, 0}}, + {MISCREG_CNTP_TVAL_EL0, {MISCREG_CNTP_TVAL, 0}}, + {MISCREG_CNTP_CTL_EL0, {MISCREG_CNTP_CTL, 0}}, + {MISCREG_CNTP_CVAL_EL0, {MISCREG_CNTP_CVAL, 0}}, + {MISCREG_CNTV_TVAL_EL0, {MISCREG_CNTV_TVAL, 0}}, + {MISCREG_CNTV_CTL_EL0, {MISCREG_CNTV_CTL, 0}}, + {MISCREG_CNTV_CVAL_EL0, {MISCREG_CNTV_CVAL, 0}}, + {MISCREG_CNTHP_TVAL_EL2, {MISCREG_CNTHP_TVAL, 0}}, + {MISCREG_CNTHP_CTL_EL2, {MISCREG_CNTHP_CTL, 0}}, + {MISCREG_CNTHP_CVAL_EL2, {MISCREG_CNTHP_CVAL, 0}}, + {MISCREG_DACR32_EL2, {MISCREG_DACR, 0}}, + {MISCREG_IFSR32_EL2, {MISCREG_IFSR, 0}}, + {MISCREG_TEEHBR32_EL1, {MISCREG_TEEHBR, 0}}, + {MISCREG_SDER32_EL3, {MISCREG_SDER, 0}} +}; + + ISA::ISA(Params *p) - : SimObject(p) + : SimObject(p), system(NULL), lookUpMiscReg(NUM_MISCREGS, {0,0}) { SCTLR sctlr; sctlr = 0; miscRegs[MISCREG_SCTLR_RST] = sctlr; + + system = dynamic_cast<ArmSystem *>(p->system); + DPRINTFN("ISA system set to: %p %p\n", system, p->system); + + // Cache system-level properties + if (FullSystem && system) { + haveSecurity = system->haveSecurity(); + haveLPAE = system->haveLPAE(); + haveVirtualization = system->haveVirtualization(); + haveLargeAsid64 = system->haveLargeAsid64(); + physAddrRange64 = system->physAddrRange64(); + } else { + haveSecurity = haveLPAE = haveVirtualization = false; + haveLargeAsid64 = false; + physAddrRange64 = 32; // dummy value + } + + /** Fill in the miscReg translation table */ + for (uint32_t i = 0; i < miscRegTranslateMax; i++) { + struct MiscRegLUTEntry new_entry; + + uint32_t select = MiscRegSwitch[i].index; + new_entry = MiscRegSwitch[i].entry; + + lookUpMiscReg[select] = new_entry; + } + + preUnflattenMiscReg(); + clear(); } @@ -73,27 +172,42 @@ ISA::clear() SCTLR sctlr_rst = miscRegs[MISCREG_SCTLR_RST]; memset(miscRegs, 0, sizeof(miscRegs)); + + // Initialize configurable default values + miscRegs[MISCREG_MIDR] = p->midr; + miscRegs[MISCREG_MIDR_EL1] = p->midr; + miscRegs[MISCREG_VPIDR] = p->midr; + + if (FullSystem && system->highestELIs64()) { + // Initialize AArch64 state + clear64(p); + return; + } + + // Initialize AArch32 state... + CPSR cpsr = 0; cpsr.mode = MODE_USER; miscRegs[MISCREG_CPSR] = cpsr; updateRegMap(cpsr); SCTLR sctlr = 0; - sctlr.te = (bool)sctlr_rst.te; - sctlr.nmfi = (bool)sctlr_rst.nmfi; - sctlr.v = (bool)sctlr_rst.v; - sctlr.u = 1; + sctlr.te = (bool) sctlr_rst.te; + sctlr.nmfi = (bool) sctlr_rst.nmfi; + sctlr.v = (bool) sctlr_rst.v; + sctlr.u = 1; sctlr.xp = 1; sctlr.rao2 = 1; sctlr.rao3 = 1; - sctlr.rao4 = 1; - miscRegs[MISCREG_SCTLR] = sctlr; + sctlr.rao4 = 0xf; // SCTLR[6:3] + miscRegs[MISCREG_SCTLR_NS] = sctlr; miscRegs[MISCREG_SCTLR_RST] = sctlr_rst; + miscRegs[MISCREG_HCPTR] = 0; - /* Start with an event in the mailbox */ + // Start with an event in the mailbox miscRegs[MISCREG_SEV_MAILBOX] = 1; - // Separate Instruction and Data TLBs. + // Separate Instruction and Data TLBs miscRegs[MISCREG_TLBTR] = 1; MVFR0 mvfr0 = 0; @@ -119,7 +233,8 @@ ISA::clear() // Reset values of PRRR and NMRR are implementation dependent - miscRegs[MISCREG_PRRR] = + // @todo: PRRR and NMRR in secure state? + miscRegs[MISCREG_PRRR_NS] = (1 << 19) | // 19 (0 << 18) | // 18 (0 << 17) | // 17 @@ -132,7 +247,7 @@ ISA::clear() (2 << 4) | // 5:4 (1 << 2) | // 3:2 0; // 1:0 - miscRegs[MISCREG_NMRR] = + miscRegs[MISCREG_NMRR_NS] = (1 << 30) | // 31:30 (0 << 26) | // 27:26 (0 << 24) | // 25:24 @@ -151,8 +266,6 @@ ISA::clear() miscRegs[MISCREG_CPACR] = 0; - // Initialize configurable default values - miscRegs[MISCREG_MIDR] = p->midr; miscRegs[MISCREG_ID_PFR0] = p->id_pfr0; miscRegs[MISCREG_ID_PFR1] = p->id_pfr1; @@ -169,27 +282,132 @@ ISA::clear() miscRegs[MISCREG_ID_ISAR4] = p->id_isar4; miscRegs[MISCREG_ID_ISAR5] = p->id_isar5; - miscRegs[MISCREG_FPSID] = p->fpsid; + if (haveLPAE) { + TTBCR ttbcr = miscRegs[MISCREG_TTBCR_NS]; + ttbcr.eae = 0; + miscRegs[MISCREG_TTBCR_NS] = ttbcr; + // Enforce consistency with system-level settings + miscRegs[MISCREG_ID_MMFR0] = (miscRegs[MISCREG_ID_MMFR0] & ~0xf) | 0x5; + } + + if (haveSecurity) { + miscRegs[MISCREG_SCTLR_S] = sctlr; + miscRegs[MISCREG_SCR] = 0; + miscRegs[MISCREG_VBAR_S] = 0; + } else { + // we're always non-secure + miscRegs[MISCREG_SCR] = 1; + } //XXX We need to initialize the rest of the state. } +void +ISA::clear64(const ArmISAParams *p) +{ + CPSR cpsr = 0; + Addr rvbar = system->resetAddr64(); + switch (system->highestEL()) { + // Set initial EL to highest implemented EL using associated stack + // pointer (SP_ELx); set RVBAR_ELx to implementation defined reset + // value + case EL3: + cpsr.mode = MODE_EL3H; + miscRegs[MISCREG_RVBAR_EL3] = rvbar; + break; + case EL2: + cpsr.mode = MODE_EL2H; + miscRegs[MISCREG_RVBAR_EL2] = rvbar; + break; + case EL1: + cpsr.mode = MODE_EL1H; + miscRegs[MISCREG_RVBAR_EL1] = rvbar; + break; + default: + panic("Invalid highest implemented exception level"); + break; + } + + // Initialize rest of CPSR + cpsr.daif = 0xf; // Mask all interrupts + cpsr.ss = 0; + cpsr.il = 0; + miscRegs[MISCREG_CPSR] = cpsr; + updateRegMap(cpsr); + + // Initialize other control registers + miscRegs[MISCREG_MPIDR_EL1] = 0x80000000; + if (haveSecurity) { + miscRegs[MISCREG_SCTLR_EL3] = 0x30c50870; + miscRegs[MISCREG_SCR_EL3] = 0x00000030; // RES1 fields + // @todo: uncomment this to enable Virtualization + // } else if (haveVirtualization) { + // miscRegs[MISCREG_SCTLR_EL2] = 0x30c50870; + } else { + miscRegs[MISCREG_SCTLR_EL1] = 0x30c50870; + // Always non-secure + miscRegs[MISCREG_SCR_EL3] = 1; + } + + // Initialize configurable id registers + miscRegs[MISCREG_ID_AA64AFR0_EL1] = p->id_aa64afr0_el1; + miscRegs[MISCREG_ID_AA64AFR1_EL1] = p->id_aa64afr1_el1; + miscRegs[MISCREG_ID_AA64DFR0_EL1] = p->id_aa64dfr0_el1; + miscRegs[MISCREG_ID_AA64DFR1_EL1] = p->id_aa64dfr1_el1; + miscRegs[MISCREG_ID_AA64ISAR0_EL1] = p->id_aa64isar0_el1; + miscRegs[MISCREG_ID_AA64ISAR1_EL1] = p->id_aa64isar1_el1; + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = p->id_aa64mmfr0_el1; + miscRegs[MISCREG_ID_AA64MMFR1_EL1] = p->id_aa64mmfr1_el1; + miscRegs[MISCREG_ID_AA64PFR0_EL1] = p->id_aa64pfr0_el1; + miscRegs[MISCREG_ID_AA64PFR1_EL1] = p->id_aa64pfr1_el1; + + // Enforce consistency with system-level settings... + + // EL3 + // (no AArch32/64 interprocessing support for now) + miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR0_EL1], 15, 12, + haveSecurity ? 0x1 : 0x0); + // EL2 + // (no AArch32/64 interprocessing support for now) + miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR0_EL1], 11, 8, + haveVirtualization ? 0x1 : 0x0); + // Large ASID support + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64MMFR0_EL1], 7, 4, + haveLargeAsid64 ? 0x2 : 0x0); + // Physical address size + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64MMFR0_EL1], 3, 0, + encodePhysAddrRange64(physAddrRange64)); +} + MiscReg ISA::readMiscRegNoEffect(int misc_reg) const { assert(misc_reg < NumMiscRegs); - int flat_idx; - if (misc_reg == MISCREG_SPSR) - flat_idx = flattenMiscIndex(misc_reg); - else - flat_idx = misc_reg; - MiscReg val = miscRegs[flat_idx]; + int flat_idx = flattenMiscIndex(misc_reg); // Note: indexes of AArch64 + // registers are left unchanged + MiscReg val; + + if (lookUpMiscReg[flat_idx].lower == 0 || flat_idx == MISCREG_SPSR + || flat_idx == MISCREG_SCTLR_EL1) { + if (flat_idx == MISCREG_SPSR) + flat_idx = flattenMiscIndex(MISCREG_SPSR); + if (flat_idx == MISCREG_SCTLR_EL1) + flat_idx = flattenMiscIndex(MISCREG_SCTLR); + val = miscRegs[flat_idx]; + } else + if (lookUpMiscReg[flat_idx].upper > 0) + val = ((miscRegs[lookUpMiscReg[flat_idx].lower] & mask(32)) + | (miscRegs[lookUpMiscReg[flat_idx].upper] << 32)); + else + val = miscRegs[lookUpMiscReg[flat_idx].lower]; - DPRINTF(MiscRegs, "Reading From misc reg %d (%d) : %#x\n", - misc_reg, flat_idx, val); return val; } @@ -197,33 +415,98 @@ ISA::readMiscRegNoEffect(int misc_reg) const MiscReg ISA::readMiscReg(int misc_reg, ThreadContext *tc) { - ArmSystem *arm_sys; + CPSR cpsr = 0; + PCState pc = 0; + SCR scr = 0; if (misc_reg == MISCREG_CPSR) { - CPSR cpsr = miscRegs[misc_reg]; - PCState pc = tc->pcState(); + cpsr = miscRegs[misc_reg]; + pc = tc->pcState(); cpsr.j = pc.jazelle() ? 1 : 0; cpsr.t = pc.thumb() ? 1 : 0; return cpsr; } - if (misc_reg >= MISCREG_CP15_UNIMP_START) - panic("Unimplemented CP15 register %s read.\n", - miscRegName[misc_reg]); - switch (misc_reg) { - case MISCREG_MPIDR: - arm_sys = dynamic_cast<ArmSystem*>(tc->getSystemPtr()); - assert(arm_sys); +#ifndef NDEBUG + if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) { + if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL]) + warn("Unimplemented system register %s read.\n", + miscRegName[misc_reg]); + else + panic("Unimplemented system register %s read.\n", + miscRegName[misc_reg]); + } +#endif - if (arm_sys->multiProc) { - return 0x80000000 | // multiprocessor extensions available - tc->cpuId(); + switch (unflattenMiscReg(misc_reg)) { + case MISCREG_HCR: + { + if (!haveVirtualization) + return 0; + else + return readMiscRegNoEffect(MISCREG_HCR); + } + case MISCREG_CPACR: + { + const uint32_t ones = (uint32_t)(-1); + CPACR cpacrMask = 0; + // Only cp10, cp11, and ase are implemented, nothing else should + // be readable? (straight copy from the write code) + cpacrMask.cp10 = ones; + cpacrMask.cp11 = ones; + cpacrMask.asedis = ones; + + // Security Extensions may limit the readability of CPACR + if (haveSecurity) { + scr = readMiscRegNoEffect(MISCREG_SCR); + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (scr.ns && (cpsr.mode != MODE_MON)) { + NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR); + // NB: Skipping the full loop, here + if (!nsacr.cp10) cpacrMask.cp10 = 0; + if (!nsacr.cp11) cpacrMask.cp11 = 0; + } + } + MiscReg val = readMiscRegNoEffect(MISCREG_CPACR); + val &= cpacrMask; + DPRINTF(MiscRegs, "Reading misc reg %s: %#x\n", + miscRegName[misc_reg], val); + return val; + } + case MISCREG_MPIDR: + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + scr = readMiscRegNoEffect(MISCREG_SCR); + if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) { + return getMPIDR(system, tc); + } else { + return readMiscReg(MISCREG_VMPIDR, tc); + } + break; + case MISCREG_MPIDR_EL1: + // @todo in the absence of v8 virtualization support just return MPIDR_EL1 + return getMPIDR(system, tc) & 0xffffffff; + case MISCREG_VMPIDR: + // top bit defined as RES1 + return readMiscRegNoEffect(misc_reg) | 0x80000000; + case MISCREG_ID_AFR0: // not implemented, so alias MIDR + case MISCREG_ID_DFR0: // not implemented, so alias MIDR + case MISCREG_REVIDR: // not implemented, so alias MIDR + case MISCREG_MIDR: + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + scr = readMiscRegNoEffect(MISCREG_SCR); + if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) { + return readMiscRegNoEffect(misc_reg); } else { - return 0x80000000 | // multiprocessor extensions available - 0x40000000 | // in up system - tc->cpuId(); + return readMiscRegNoEffect(MISCREG_VPIDR); } break; + case MISCREG_JOSCR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_JMCR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_JIDR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_AIDR: // AUX ID set to 0 + case MISCREG_TCMTR: // No TCM's + return 0; + case MISCREG_CLIDR: warn_once("The clidr register always reports 0 caches.\n"); warn_once("clidr LoUIS field of 0b001 to match current " @@ -276,6 +559,75 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrQcMask; case MISCREG_FPSCR_EXC: return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrExcMask; + case MISCREG_FPSR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioc = ones; + fpscrMask.dzc = ones; + fpscrMask.ofc = ones; + fpscrMask.ufc = ones; + fpscrMask.ixc = ones; + fpscrMask.idc = ones; + fpscrMask.qc = ones; + fpscrMask.v = ones; + fpscrMask.c = ones; + fpscrMask.z = ones; + fpscrMask.n = ones; + return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask; + } + case MISCREG_FPCR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; + fpscrMask.len = ones; + fpscrMask.stride = ones; + fpscrMask.rMode = ones; + fpscrMask.fz = ones; + fpscrMask.dn = ones; + fpscrMask.ahp = ones; + return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask; + } + case MISCREG_NZCV: + { + CPSR cpsr = 0; + cpsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); + cpsr.c = tc->readIntReg(INTREG_CONDCODES_C); + cpsr.v = tc->readIntReg(INTREG_CONDCODES_V); + return cpsr; + } + case MISCREG_DAIF: + { + CPSR cpsr = 0; + cpsr.daif = (uint8_t) ((CPSR) miscRegs[MISCREG_CPSR]).daif; + return cpsr; + } + case MISCREG_SP_EL0: + { + return tc->readIntReg(INTREG_SP0); + } + case MISCREG_SP_EL1: + { + return tc->readIntReg(INTREG_SP1); + } + case MISCREG_SP_EL2: + { + return tc->readIntReg(INTREG_SP2); + } + case MISCREG_SPSEL: + { + return miscRegs[MISCREG_CPSR] & 0x1; + } + case MISCREG_CURRENTEL: + { + return miscRegs[MISCREG_CPSR] & 0xc; + } case MISCREG_L2CTLR: { // mostly unimplemented, just set NumCPUs field from sim and return @@ -289,8 +641,120 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) * Return 0 as we don't support debug architecture yet. */ return 0; - case MISCREG_DBGDSCR_INT: + case MISCREG_DBGDSCRint: return 0; + case MISCREG_ISR: + return tc->getCpuPtr()->getInterruptController()->getISR( + readMiscRegNoEffect(MISCREG_HCR), + readMiscRegNoEffect(MISCREG_CPSR), + readMiscRegNoEffect(MISCREG_SCR)); + case MISCREG_ISR_EL1: + return tc->getCpuPtr()->getInterruptController()->getISR( + readMiscRegNoEffect(MISCREG_HCR_EL2), + readMiscRegNoEffect(MISCREG_CPSR), + readMiscRegNoEffect(MISCREG_SCR_EL3)); + case MISCREG_DCZID_EL0: + return 0x04; // DC ZVA clear 64-byte chunks + case MISCREG_HCPTR: + { + MiscReg val = readMiscRegNoEffect(misc_reg); + // The trap bit associated with CP14 is defined as RAZ + val &= ~(1 << 14); + // If a CP bit in NSACR is 0 then the corresponding bit in + // HCPTR is RAO/WI + bool secure_lookup = haveSecurity && + inSecureState(readMiscRegNoEffect(MISCREG_SCR), + readMiscRegNoEffect(MISCREG_CPSR)); + if (!secure_lookup) { + MiscReg mask = readMiscRegNoEffect(MISCREG_NSACR); + val |= (mask ^ 0x7FFF) & 0xBFFF; + } + // Set the bits for unimplemented coprocessors to RAO/WI + val |= 0x33FF; + return (val); + } + case MISCREG_HDFAR: // alias for secure DFAR + return readMiscRegNoEffect(MISCREG_DFAR_S); + case MISCREG_HIFAR: // alias for secure IFAR + return readMiscRegNoEffect(MISCREG_IFAR_S); + case MISCREG_HVBAR: // bottom bits reserved + return readMiscRegNoEffect(MISCREG_HVBAR) & 0xFFFFFFE0; + case MISCREG_SCTLR: // Some bits hardwired + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x72DD39FF) | 0x00C00818; // V8 SCTLR + case MISCREG_SCTLR_EL1: + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x37DDDBFF) | 0x30D00800; // V8 SCTLR_EL1 + case MISCREG_SCTLR_EL3: + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x32CD183F) | 0x30C50830; // V8 SCTLR_EL3 + case MISCREG_HSCTLR: // FI comes from SCTLR + { + uint32_t mask = 1 << 27; + return (readMiscRegNoEffect(MISCREG_HSCTLR) & ~mask) | + (readMiscRegNoEffect(MISCREG_SCTLR) & mask); + } + case MISCREG_SCR: + { + CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (cpsr.width) { + return readMiscRegNoEffect(MISCREG_SCR); + } else { + return readMiscRegNoEffect(MISCREG_SCR_EL3); + } + } + // Generic Timer registers + case MISCREG_CNTFRQ: + case MISCREG_CNTFRQ_EL0: + inform_once("Read CNTFREQ_EL0 frequency\n"); + return getSystemCounter(tc)->freq(); + case MISCREG_CNTPCT: + case MISCREG_CNTPCT_EL0: + return getSystemCounter(tc)->value(); + case MISCREG_CNTVCT: + return getSystemCounter(tc)->value(); + case MISCREG_CNTVCT_EL0: + return getSystemCounter(tc)->value(); + case MISCREG_CNTP_CVAL: + case MISCREG_CNTP_CVAL_EL0: + return getArchTimer(tc, tc->cpuId())->compareValue(); + case MISCREG_CNTP_TVAL: + case MISCREG_CNTP_TVAL_EL0: + return getArchTimer(tc, tc->cpuId())->timerValue(); + case MISCREG_CNTP_CTL: + case MISCREG_CNTP_CTL_EL0: + return getArchTimer(tc, tc->cpuId())->control(); + // PL1 phys. timer, secure + // AArch64 + case MISCREG_CNTPS_CVAL_EL1: + case MISCREG_CNTPS_TVAL_EL1: + case MISCREG_CNTPS_CTL_EL1: + // PL2 phys. timer, non-secure + // AArch32 + case MISCREG_CNTHCTL: + case MISCREG_CNTHP_CVAL: + case MISCREG_CNTHP_TVAL: + case MISCREG_CNTHP_CTL: + // AArch64 + case MISCREG_CNTHCTL_EL2: + case MISCREG_CNTHP_CVAL_EL2: + case MISCREG_CNTHP_TVAL_EL2: + case MISCREG_CNTHP_CTL_EL2: + // Virtual timer + // AArch32 + case MISCREG_CNTV_CVAL: + case MISCREG_CNTV_TVAL: + case MISCREG_CNTV_CTL: + // AArch64 + // case MISCREG_CNTV_CVAL_EL2: + // case MISCREG_CNTV_TVAL_EL2: + // case MISCREG_CNTV_CTL_EL2: + panic("Generic Timer register not implemented\n"); + break; + } return readMiscRegNoEffect(misc_reg); } @@ -300,15 +764,28 @@ ISA::setMiscRegNoEffect(int misc_reg, const MiscReg &val) { assert(misc_reg < NumMiscRegs); - int flat_idx; - if (misc_reg == MISCREG_SPSR) - flat_idx = flattenMiscIndex(misc_reg); - else - flat_idx = misc_reg; - miscRegs[flat_idx] = val; + int flat_idx = flattenMiscIndex(misc_reg); // Note: indexes of AArch64 + // registers are left unchanged + + int flat_idx2 = lookUpMiscReg[flat_idx].upper; - DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n", misc_reg, - flat_idx, val); + if (flat_idx2 > 0) { + miscRegs[lookUpMiscReg[flat_idx].lower] = bits(val, 31, 0); + miscRegs[flat_idx2] = bits(val, 63, 32); + DPRINTF(MiscRegs, "Writing to misc reg %d (%d:%d) : %#x\n", + misc_reg, flat_idx, flat_idx2, val); + } else { + if (flat_idx == MISCREG_SPSR) + flat_idx = flattenMiscIndex(MISCREG_SPSR); + else if (flat_idx == MISCREG_SCTLR_EL1) + flat_idx = flattenMiscIndex(MISCREG_SCTLR); + else + flat_idx = (lookUpMiscReg[flat_idx].lower > 0) ? + lookUpMiscReg[flat_idx].lower : flat_idx; + miscRegs[flat_idx] = val; + DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n", + misc_reg, flat_idx, val); + } } void @@ -317,8 +794,13 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) MiscReg newVal = val; int x; + bool secure_lookup; + bool hyp; System *sys; ThreadContext *oc; + uint8_t target_el; + uint16_t asid; + SCR scr; if (misc_reg == MISCREG_CPSR) { updateRegMap(val); @@ -346,12 +828,18 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) } else { tc->pcState(pc); } - } else if (misc_reg >= MISCREG_CP15_UNIMP_START && - misc_reg < MISCREG_CP15_END) { - panic("Unimplemented CP15 register %s wrote with %#x.\n", - miscRegName[misc_reg], val); } else { - switch (misc_reg) { +#ifndef NDEBUG + if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) { + if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL]) + warn("Unimplemented system register %s write with %#x.\n", + miscRegName[misc_reg], val); + else + panic("Unimplemented system register %s write with %#x.\n", + miscRegName[misc_reg], val); + } +#endif + switch (unflattenMiscReg(misc_reg)) { case MISCREG_CPACR: { @@ -362,7 +850,61 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) cpacrMask.cp10 = ones; cpacrMask.cp11 = ones; cpacrMask.asedis = ones; + + // Security Extensions may limit the writability of CPACR + if (haveSecurity) { + scr = readMiscRegNoEffect(MISCREG_SCR); + CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (scr.ns && (cpsr.mode != MODE_MON)) { + NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR); + // NB: Skipping the full loop, here + if (!nsacr.cp10) cpacrMask.cp10 = 0; + if (!nsacr.cp11) cpacrMask.cp11 = 0; + } + } + + MiscReg old_val = readMiscRegNoEffect(MISCREG_CPACR); newVal &= cpacrMask; + newVal |= old_val & ~cpacrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPACR_EL1: + { + const uint32_t ones = (uint32_t)(-1); + CPACR cpacrMask = 0; + cpacrMask.tta = ones; + cpacrMask.fpen = ones; + newVal &= cpacrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPTR_EL2: + { + const uint32_t ones = (uint32_t)(-1); + CPTR cptrMask = 0; + cptrMask.tcpac = ones; + cptrMask.tta = ones; + cptrMask.tfp = ones; + newVal &= cptrMask; + cptrMask = 0; + cptrMask.res1_13_12_el2 = ones; + cptrMask.res1_9_0_el2 = ones; + newVal |= cptrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPTR_EL3: + { + const uint32_t ones = (uint32_t)(-1); + CPTR cptrMask = 0; + cptrMask.tcpac = ones; + cptrMask.tta = ones; + cptrMask.tfp = ones; + newVal &= cptrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", miscRegName[misc_reg], newVal); } @@ -370,6 +912,11 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) case MISCREG_CSSELR: warn_once("The csselr register isn't implemented.\n"); return; + + case MISCREG_DC_ZVA_Xt: + warn("Calling DC ZVA! Not Implemeted! Expect WEIRD results\n"); + return; + case MISCREG_FPSCR: { const uint32_t ones = (uint32_t)(-1); @@ -380,6 +927,12 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) fpscrMask.ufc = ones; fpscrMask.ixc = ones; fpscrMask.idc = ones; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; fpscrMask.len = ones; fpscrMask.stride = ones; fpscrMask.rMode = ones; @@ -392,26 +945,72 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) fpscrMask.z = ones; fpscrMask.n = ones; newVal = (newVal & (uint32_t)fpscrMask) | - (miscRegs[MISCREG_FPSCR] & ~(uint32_t)fpscrMask); + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); tc->getDecoderPtr()->setContext(newVal); } break; + case MISCREG_FPSR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioc = ones; + fpscrMask.dzc = ones; + fpscrMask.ofc = ones; + fpscrMask.ufc = ones; + fpscrMask.ixc = ones; + fpscrMask.idc = ones; + fpscrMask.qc = ones; + fpscrMask.v = ones; + fpscrMask.c = ones; + fpscrMask.z = ones; + fpscrMask.n = ones; + newVal = (newVal & (uint32_t)fpscrMask) | + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); + misc_reg = MISCREG_FPSCR; + } + break; + case MISCREG_FPCR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; + fpscrMask.len = ones; + fpscrMask.stride = ones; + fpscrMask.rMode = ones; + fpscrMask.fz = ones; + fpscrMask.dn = ones; + fpscrMask.ahp = ones; + newVal = (newVal & (uint32_t)fpscrMask) | + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); + misc_reg = MISCREG_FPSCR; + } + break; case MISCREG_CPSR_Q: { assert(!(newVal & ~CpsrMaskQ)); - newVal = miscRegs[MISCREG_CPSR] | newVal; + newVal = readMiscRegNoEffect(MISCREG_CPSR) | newVal; misc_reg = MISCREG_CPSR; } break; case MISCREG_FPSCR_QC: { - newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrQcMask); + newVal = readMiscRegNoEffect(MISCREG_FPSCR) | + (newVal & FpscrQcMask); misc_reg = MISCREG_FPSCR; } break; case MISCREG_FPSCR_EXC: { - newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrExcMask); + newVal = readMiscRegNoEffect(MISCREG_FPSCR) | + (newVal & FpscrExcMask); misc_reg = MISCREG_FPSCR; } break; @@ -421,16 +1020,63 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) // bit 29 - valid only if fpexc[31] is 0 const uint32_t fpexcMask = 0x60000000; newVal = (newVal & fpexcMask) | - (miscRegs[MISCREG_FPEXC] & ~fpexcMask); + (readMiscRegNoEffect(MISCREG_FPEXC) & ~fpexcMask); + } + break; + case MISCREG_HCR: + { + if (!haveVirtualization) + return; + } + break; + case MISCREG_IFSR: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.96 + const uint32_t ifsrMask = + mask(31, 13) | mask(11, 11) | mask(8, 6); + newVal = newVal & ~ifsrMask; + } + break; + case MISCREG_DFSR: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.52 + const uint32_t dfsrMask = mask(31, 14) | mask(8, 8); + newVal = newVal & ~dfsrMask; + } + break; + case MISCREG_AMAIR0: + case MISCREG_AMAIR1: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.5 + // Valid only with LPAE + if (!haveLPAE) + return; + DPRINTF(MiscRegs, "Writing AMAIR: %#x\n", newVal); } break; + case MISCREG_SCR: + tc->getITBPtr()->invalidateMiscReg(); + tc->getDTBPtr()->invalidateMiscReg(); + break; case MISCREG_SCTLR: { DPRINTF(MiscRegs, "Writing SCTLR: %#x\n", newVal); - SCTLR sctlr = miscRegs[MISCREG_SCTLR]; + MiscRegIndex sctlr_idx; + scr = readMiscRegNoEffect(MISCREG_SCR); + if (haveSecurity && !scr.ns) { + sctlr_idx = MISCREG_SCTLR_S; + } else { + sctlr_idx = MISCREG_SCTLR_NS; + // The FI field (bit 21) is common between S/NS versions + // of the register, we store this in the secure copy of + // the reg + miscRegs[MISCREG_SCTLR_S] &= ~(1 << 21); + miscRegs[MISCREG_SCTLR_S] |= newVal & (1 << 21); + } + SCTLR sctlr = miscRegs[sctlr_idx]; SCTLR new_sctlr = newVal; - new_sctlr.nmfi = (bool)sctlr.nmfi; - miscRegs[MISCREG_SCTLR] = (MiscReg)new_sctlr; + new_sctlr.nmfi = ((bool)sctlr.nmfi) && !haveVirtualization; + miscRegs[sctlr_idx] = (MiscReg)new_sctlr; tc->getITBPtr()->invalidateMiscReg(); tc->getDTBPtr()->invalidateMiscReg(); @@ -440,6 +1086,7 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); + // @todo: double check this for security SCTLR other_sctlr = oc->readMiscRegNoEffect(MISCREG_SCTLR); if (!other_sctlr.c && oc->status() != ThreadContext::Halted) return; @@ -479,96 +1126,317 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) case MISCREG_TLBTR: case MISCREG_MVFR0: case MISCREG_MVFR1: + + case MISCREG_ID_AA64AFR0_EL1: + case MISCREG_ID_AA64AFR1_EL1: + case MISCREG_ID_AA64DFR0_EL1: + case MISCREG_ID_AA64DFR1_EL1: + case MISCREG_ID_AA64ISAR0_EL1: + case MISCREG_ID_AA64ISAR1_EL1: + case MISCREG_ID_AA64MMFR0_EL1: + case MISCREG_ID_AA64MMFR1_EL1: + case MISCREG_ID_AA64PFR0_EL1: + case MISCREG_ID_AA64PFR1_EL1: // ID registers are constants. return; + // TLBI all entries, EL0&1 inner sharable (ignored) case MISCREG_TLBIALLIS: - case MISCREG_TLBIALL: + case MISCREG_TLBIALL: // TLBI all entries, EL0&1, + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushAll(); - oc->getDTBPtr()->flushAll(); + oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); + oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); // If CheckerCPU is connected, need to notify it of a flush CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { - checker->getITBPtr()->flushAll(); - checker->getDTBPtr()->flushAll(); + checker->getITBPtr()->flushAllSecurity(secure_lookup, + target_el); + checker->getDTBPtr()->flushAllSecurity(secure_lookup, + target_el); } } return; + // TLBI all entries, EL0&1, instruction side case MISCREG_ITLBIALL: - tc->getITBPtr()->flushAll(); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); return; + // TLBI all entries, EL0&1, data side case MISCREG_DTLBIALL: - tc->getDTBPtr()->flushAll(); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); return; + // TLBI based on VA, EL0&1 inner sharable (ignored) case MISCREG_TLBIMVAIS: case MISCREG_TLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); oc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), + secure_lookup, target_el); oc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), + secure_lookup, target_el); CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { checker->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); checker->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); } } return; + // TLBI by ASID, EL0&1, inner sharable case MISCREG_TLBIASIDIS: case MISCREG_TLBIASID: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushAsid(bits(newVal, 7,0)); - oc->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + oc->getITBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); + oc->getDTBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { - checker->getITBPtr()->flushAsid(bits(newVal, 7,0)); - checker->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + checker->getITBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); + checker->getDTBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); } } return; + // TLBI by address, EL0&1, inner sharable (ignored) case MISCREG_TLBIMVAAIS: case MISCREG_TLBIMVAA: - sys = tc->getSystemPtr(); - for (x = 0; x < sys->numContexts(); x++) { - oc = sys->getThreadContext(x); - assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushMva(mbits(newVal, 31,12)); - oc->getDTBPtr()->flushMva(mbits(newVal, 31,12)); - - CheckerCPU *checker = oc->getCheckerCpuPtr(); - if (checker) { - checker->getITBPtr()->flushMva(mbits(newVal, 31,12)); - checker->getDTBPtr()->flushMva(mbits(newVal, 31,12)); - } - } + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + hyp = 0; + tlbiMVA(tc, newVal, secure_lookup, hyp, target_el); + return; + // TLBI by address, EL2, hypervisor mode + case MISCREG_TLBIMVAH: + case MISCREG_TLBIMVAHIS: + assert32(tc); + target_el = 1; // aarch32, use hyp bit + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + hyp = 1; + tlbiMVA(tc, newVal, secure_lookup, hyp, target_el); return; + // TLBI by address and asid, EL0&1, instruction side only case MISCREG_ITLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; tc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); return; + // TLBI by address and asid, EL0&1, data side only case MISCREG_DTLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; tc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); return; + // TLBI by ASID, EL0&1, instrution side only case MISCREG_ITLBIASID: - tc->getITBPtr()->flushAsid(bits(newVal, 7,0)); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getITBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup, + target_el); return; + // TLBI by ASID EL0&1 data size only case MISCREG_DTLBIASID: - tc->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getDTBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup, + target_el); + return; + // Invalidate entire Non-secure Hyp/Non-Hyp Unified TLB + case MISCREG_TLBIALLNSNH: + case MISCREG_TLBIALLNSNHIS: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + hyp = 0; + tlbiALLN(tc, hyp, target_el); + return; + // TLBI all entries, EL2, hyp, + case MISCREG_TLBIALLH: + case MISCREG_TLBIALLHIS: + assert32(tc); + target_el = 1; // aarch32, use hyp bit + hyp = 1; + tlbiALLN(tc, hyp, target_el); + return; + // AArch64 TLBI: invalidate all entries EL3 + case MISCREG_TLBI_ALLE3IS: + case MISCREG_TLBI_ALLE3: + assert64(tc); + target_el = 3; + secure_lookup = true; + tlbiALL(tc, secure_lookup, target_el); + return; + // @todo: uncomment this to enable Virtualization + // case MISCREG_TLBI_ALLE2IS: + // case MISCREG_TLBI_ALLE2: + // TLBI all entries, EL0&1 + case MISCREG_TLBI_ALLE1IS: + case MISCREG_TLBI_ALLE1: + // AArch64 TLBI: invalidate all entries, stage 1, current VMID + case MISCREG_TLBI_VMALLE1IS: + case MISCREG_TLBI_VMALLE1: + // AArch64 TLBI: invalidate all entries, stages 1 & 2, current VMID + case MISCREG_TLBI_VMALLS12E1IS: + case MISCREG_TLBI_VMALLS12E1: + // @todo: handle VMID and stage 2 to enable Virtualization + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiALL(tc, secure_lookup, target_el); + return; + // AArch64 TLBI: invalidate by VA and ASID, stage 1, current VMID + // VAEx(IS) and VALEx(IS) are the same because TLBs only store entries + // from the last level of translation table walks + // @todo: handle VMID to enable Virtualization + // TLBI all entries, EL0&1 + case MISCREG_TLBI_VAE3IS_Xt: + case MISCREG_TLBI_VAE3_Xt: + // TLBI by VA, EL3 regime stage 1, last level walk + case MISCREG_TLBI_VALE3IS_Xt: + case MISCREG_TLBI_VALE3_Xt: + assert64(tc); + target_el = 3; + asid = 0xbeef; // does not matter, tlbi is global + secure_lookup = true; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // TLBI by VA, EL2 + case MISCREG_TLBI_VAE2IS_Xt: + case MISCREG_TLBI_VAE2_Xt: + // TLBI by VA, EL2, stage1 last level walk + case MISCREG_TLBI_VALE2IS_Xt: + case MISCREG_TLBI_VALE2_Xt: + assert64(tc); + target_el = 2; + asid = 0xbeef; // does not matter, tlbi is global + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // TLBI by VA EL1 & 0, stage1, ASID, current VMID + case MISCREG_TLBI_VAE1IS_Xt: + case MISCREG_TLBI_VAE1_Xt: + case MISCREG_TLBI_VALE1IS_Xt: + case MISCREG_TLBI_VALE1_Xt: + assert64(tc); + asid = bits(newVal, 63, 48); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // AArch64 TLBI: invalidate by ASID, stage 1, current VMID + // @todo: handle VMID to enable Virtualization + case MISCREG_TLBI_ASIDE1IS_Xt: + case MISCREG_TLBI_ASIDE1_Xt: + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + sys = tc->getSystemPtr(); + for (x = 0; x < sys->numContexts(); x++) { + oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + asid = bits(newVal, 63, 48); + if (haveLargeAsid64) + asid &= mask(8); + oc->getITBPtr()->flushAsid(asid, secure_lookup, target_el); + oc->getDTBPtr()->flushAsid(asid, secure_lookup, target_el); + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAsid(asid, + secure_lookup, target_el); + checker->getDTBPtr()->flushAsid(asid, + secure_lookup, target_el); + } + } + return; + // AArch64 TLBI: invalidate by VA, ASID, stage 1, current VMID + // VAAE1(IS) and VAALE1(IS) are the same because TLBs only store + // entries from the last level of translation table walks + // @todo: handle VMID to enable Virtualization + case MISCREG_TLBI_VAAE1IS_Xt: + case MISCREG_TLBI_VAAE1_Xt: + case MISCREG_TLBI_VAALE1IS_Xt: + case MISCREG_TLBI_VAALE1_Xt: + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + sys = tc->getSystemPtr(); + for (x = 0; x < sys->numContexts(); x++) { + // @todo: extra controls on TLBI broadcast? + oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + Addr va = ((Addr) bits(newVal, 43, 0)) << 12; + oc->getITBPtr()->flushMva(va, + secure_lookup, false, target_el); + oc->getDTBPtr()->flushMva(va, + secure_lookup, false, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMva(va, + secure_lookup, false, target_el); + checker->getDTBPtr()->flushMva(va, + secure_lookup, false, target_el); + } + } + return; + // AArch64 TLBI: invalidate by IPA, stage 2, current VMID + case MISCREG_TLBI_IPAS2LE1IS_Xt: + case MISCREG_TLBI_IPAS2LE1_Xt: + case MISCREG_TLBI_IPAS2E1IS_Xt: + case MISCREG_TLBI_IPAS2E1_Xt: + assert64(tc); + // @todo: implement these as part of Virtualization + warn("Not doing anything for write of miscreg ITLB_IPAS2\n"); return; case MISCREG_ACTLR: warn("Not doing anything for write of miscreg ACTLR\n"); @@ -591,77 +1459,566 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) warn("Not doing anything for write to miscreg %s\n", miscRegName[misc_reg]); break; - case MISCREG_V2PCWPR: - case MISCREG_V2PCWPW: - case MISCREG_V2PCWUR: - case MISCREG_V2PCWUW: - case MISCREG_V2POWPR: - case MISCREG_V2POWPW: - case MISCREG_V2POWUR: - case MISCREG_V2POWUW: + case MISCREG_HSTR: // TJDBX, now redifined to be RES0 + { + HSTR hstrMask = 0; + hstrMask.tjdbx = 1; + newVal &= ~((uint32_t) hstrMask); + break; + } + case MISCREG_HCPTR: + { + // If a CP bit in NSACR is 0 then the corresponding bit in + // HCPTR is RAO/WI. Same applies to NSASEDIS + secure_lookup = haveSecurity && + inSecureState(readMiscRegNoEffect(MISCREG_SCR), + readMiscRegNoEffect(MISCREG_CPSR)); + if (!secure_lookup) { + MiscReg oldValue = readMiscRegNoEffect(MISCREG_HCPTR); + MiscReg mask = (readMiscRegNoEffect(MISCREG_NSACR) ^ 0x7FFF) & 0xBFFF; + newVal = (newVal & ~mask) | (oldValue & mask); + } + break; + } + case MISCREG_HDFAR: // alias for secure DFAR + misc_reg = MISCREG_DFAR_S; + break; + case MISCREG_HIFAR: // alias for secure IFAR + misc_reg = MISCREG_IFAR_S; + break; + case MISCREG_ATS1CPR: + case MISCREG_ATS1CPW: + case MISCREG_ATS1CUR: + case MISCREG_ATS1CUW: + case MISCREG_ATS12NSOPR: + case MISCREG_ATS12NSOPW: + case MISCREG_ATS12NSOUR: + case MISCREG_ATS12NSOUW: + case MISCREG_ATS1HR: + case MISCREG_ATS1HW: { RequestPtr req = new Request; - unsigned flags; - BaseTLB::Mode mode; + unsigned flags = 0; + BaseTLB::Mode mode = BaseTLB::Read; + TLB::ArmTranslationType tranType = TLB::NormalTran; Fault fault; switch(misc_reg) { - case MISCREG_V2PCWPR: - flags = TLB::MustBeOne; - mode = BaseTLB::Read; - break; - case MISCREG_V2PCWPW: - flags = TLB::MustBeOne; - mode = BaseTLB::Write; - break; - case MISCREG_V2PCWUR: - flags = TLB::MustBeOne | TLB::UserMode; - mode = BaseTLB::Read; - break; - case MISCREG_V2PCWUW: - flags = TLB::MustBeOne | TLB::UserMode; - mode = BaseTLB::Write; - break; - default: - panic("Security Extensions not implemented!"); + case MISCREG_ATS1CPR: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1CPW: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS1CUR: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1CUW: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS12NSOPR: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOPR"); + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS12NSOPW: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOPW"); + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS12NSOUR: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOUR"); + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS12NSOUW: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOUW"); + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS1HR: // only really useful from secure mode. + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1HW: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Write; + break; } - warn("Translating via MISCREG in atomic mode! Fix Me!\n"); - req->setVirt(0, val, 1, flags, tc->pcState().pc(), - Request::funcMasterId); - fault = tc->getDTBPtr()->translateAtomic(req, tc, mode); + // If we're in timing mode then doing the translation in + // functional mode then we're slightly distorting performance + // results obtained from simulations. The translation should be + // done in the same mode the core is running in. NOTE: This + // can't be an atomic translation because that causes problems + // with unexpected atomic snoop requests. + warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg); + req->setVirt(0, val, 1, flags, Request::funcMasterId, + tc->pcState().pc()); + req->setThreadContext(tc->contextId(), tc->threadId()); + fault = tc->getDTBPtr()->translateFunctional(req, tc, mode, tranType); + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + HCR hcr = readMiscRegNoEffect(MISCREG_HCR); + + MiscReg newVal; if (fault == NoFault) { - miscRegs[MISCREG_PAR] = - (req->getPaddr() & 0xfffff000) | - (tc->getDTBPtr()->getAttr() ); + Addr paddr = req->getPaddr(); + if (haveLPAE && (ttbcr.eae || tranType & TLB::HypMode || + ((tranType & TLB::S1S2NsTran) && hcr.vm) )) { + newVal = (paddr & mask(39, 12)) | + (tc->getDTBPtr()->getAttr()); + } else { + newVal = (paddr & 0xfffff000) | + (tc->getDTBPtr()->getAttr()); + } DPRINTF(MiscRegs, "MISCREG: Translated addr 0x%08x: PAR: 0x%08x\n", - val, miscRegs[MISCREG_PAR]); - } - else { + val, newVal); + } else { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); // Set fault bit and FSR - FSR fsr = miscRegs[MISCREG_DFSR]; - miscRegs[MISCREG_PAR] = - (fsr.ext << 6) | - (fsr.fsHigh << 5) | - (fsr.fsLow << 1) | - 0x1; // F bit + FSR fsr = armFault->getFsr(tc); + + newVal = ((fsr >> 9) & 1) << 11; + if (newVal) { + // LPAE - rearange fault status + newVal |= ((fsr >> 0) & 0x3f) << 1; + } else { + // VMSA - rearange fault status + newVal |= ((fsr >> 0) & 0xf) << 1; + newVal |= ((fsr >> 10) & 0x1) << 5; + newVal |= ((fsr >> 12) & 0x1) << 6; + } + newVal |= 0x1; // F bit + newVal |= ((armFault->iss() >> 7) & 0x1) << 8; + newVal |= armFault->isStage2() ? 0x200 : 0; + DPRINTF(MiscRegs, + "MISCREG: Translated addr 0x%08x fault fsr %#x: PAR: 0x%08x\n", + val, fsr, newVal); } + delete req; + setMiscRegNoEffect(MISCREG_PAR, newVal); return; } + case MISCREG_TTBCR: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + const uint32_t ones = (uint32_t)(-1); + TTBCR ttbcrMask = 0; + TTBCR ttbcrNew = newVal; + + // ARM DDI 0406C.b, ARMv7-32 + ttbcrMask.n = ones; // T0SZ + if (haveSecurity) { + ttbcrMask.pd0 = ones; + ttbcrMask.pd1 = ones; + } + ttbcrMask.epd0 = ones; + ttbcrMask.irgn0 = ones; + ttbcrMask.orgn0 = ones; + ttbcrMask.sh0 = ones; + ttbcrMask.ps = ones; // T1SZ + ttbcrMask.a1 = ones; + ttbcrMask.epd1 = ones; + ttbcrMask.irgn1 = ones; + ttbcrMask.orgn1 = ones; + ttbcrMask.sh1 = ones; + if (haveLPAE) + ttbcrMask.eae = ones; + + if (haveLPAE && ttbcrNew.eae) { + newVal = newVal & ttbcrMask; + } else { + newVal = (newVal & ttbcrMask) | (ttbcr & (~ttbcrMask)); + } + } + case MISCREG_TTBR0: + case MISCREG_TTBR1: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + if (haveLPAE) { + if (ttbcr.eae) { + // ARMv7 bit 63-56, 47-40 reserved, UNK/SBZP + // ARMv8 AArch32 bit 63-56 only + uint64_t ttbrMask = mask(63,56) | mask(47,40); + newVal = (newVal & (~ttbrMask)); + } + } + } case MISCREG_CONTEXTIDR: case MISCREG_PRRR: case MISCREG_NMRR: + case MISCREG_MAIR0: + case MISCREG_MAIR1: case MISCREG_DACR: + case MISCREG_VTTBR: + case MISCREG_SCR_EL3: + case MISCREG_SCTLR_EL1: + case MISCREG_SCTLR_EL2: + case MISCREG_SCTLR_EL3: + case MISCREG_TCR_EL1: + case MISCREG_TCR_EL2: + case MISCREG_TCR_EL3: + case MISCREG_TTBR0_EL1: + case MISCREG_TTBR1_EL1: + case MISCREG_TTBR0_EL2: + case MISCREG_TTBR0_EL3: tc->getITBPtr()->invalidateMiscReg(); tc->getDTBPtr()->invalidateMiscReg(); break; + case MISCREG_NZCV: + { + CPSR cpsr = val; + + tc->setIntReg(INTREG_CONDCODES_NZ, cpsr.nz); + tc->setIntReg(INTREG_CONDCODES_C, cpsr.c); + tc->setIntReg(INTREG_CONDCODES_V, cpsr.v); + } + break; + case MISCREG_DAIF: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.daif = (uint8_t) ((CPSR) newVal).daif; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_SP_EL0: + tc->setIntReg(INTREG_SP0, newVal); + break; + case MISCREG_SP_EL1: + tc->setIntReg(INTREG_SP1, newVal); + break; + case MISCREG_SP_EL2: + tc->setIntReg(INTREG_SP2, newVal); + break; + case MISCREG_SPSEL: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.sp = (uint8_t) ((CPSR) newVal).sp; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_CURRENTEL: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.el = (uint8_t) ((CPSR) newVal).el; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_AT_S1E1R_Xt: + case MISCREG_AT_S1E1W_Xt: + case MISCREG_AT_S1E0R_Xt: + case MISCREG_AT_S1E0W_Xt: + case MISCREG_AT_S1E2R_Xt: + case MISCREG_AT_S1E2W_Xt: + case MISCREG_AT_S12E1R_Xt: + case MISCREG_AT_S12E1W_Xt: + case MISCREG_AT_S12E0R_Xt: + case MISCREG_AT_S12E0W_Xt: + case MISCREG_AT_S1E3R_Xt: + case MISCREG_AT_S1E3W_Xt: + { + RequestPtr req = new Request; + unsigned flags = 0; + BaseTLB::Mode mode = BaseTLB::Read; + TLB::ArmTranslationType tranType = TLB::NormalTran; + Fault fault; + switch(misc_reg) { + case MISCREG_AT_S1E1R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E1W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E0R_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E0W_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E2R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E2W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S12E0R_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S12E0W_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S12E1R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S12E1W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E3R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; // There is no TZ mode defined. + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E3W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; // There is no TZ mode defined. + mode = BaseTLB::Write; + break; + } + // If we're in timing mode then doing the translation in + // functional mode then we're slightly distorting performance + // results obtained from simulations. The translation should be + // done in the same mode the core is running in. NOTE: This + // can't be an atomic translation because that causes problems + // with unexpected atomic snoop requests. + warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg); + req->setVirt(0, val, 1, flags, Request::funcMasterId, + tc->pcState().pc()); + req->setThreadContext(tc->contextId(), tc->threadId()); + fault = tc->getDTBPtr()->translateFunctional(req, tc, mode, + tranType); + + MiscReg newVal; + if (fault == NoFault) { + Addr paddr = req->getPaddr(); + uint64_t attr = tc->getDTBPtr()->getAttr(); + uint64_t attr1 = attr >> 56; + if (!attr1 || attr1 ==0x44) { + attr |= 0x100; + attr &= ~ uint64_t(0x80); + } + newVal = (paddr & mask(47, 12)) | attr; + DPRINTF(MiscRegs, + "MISCREG: Translated addr %#x: PAR_EL1: %#xx\n", + val, newVal); + } else { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + // Set fault bit and FSR + FSR fsr = armFault->getFsr(tc); + + newVal = ((fsr >> 9) & 1) << 11; + // rearange fault status + newVal |= ((fsr >> 0) & 0x3f) << 1; + newVal |= 0x1; // F bit + newVal |= ((armFault->iss() >> 7) & 0x1) << 8; + newVal |= armFault->isStage2() ? 0x200 : 0; + DPRINTF(MiscRegs, + "MISCREG: Translated addr %#x fault fsr %#x: PAR: %#x\n", + val, fsr, newVal); + } + delete req; + setMiscRegNoEffect(MISCREG_PAR_EL1, newVal); + return; + } + case MISCREG_SPSR_EL3: + case MISCREG_SPSR_EL2: + case MISCREG_SPSR_EL1: + // Force bits 23:21 to 0 + newVal = val & ~(0x7 << 21); + break; case MISCREG_L2CTLR: warn("miscreg L2CTLR (%s) written with %#x. ignored...\n", miscRegName[misc_reg], uint32_t(val)); + break; + + // Generic Timer registers + case MISCREG_CNTFRQ: + case MISCREG_CNTFRQ_EL0: + getSystemCounter(tc)->setFreq(val); + break; + case MISCREG_CNTP_CVAL: + case MISCREG_CNTP_CVAL_EL0: + getArchTimer(tc, tc->cpuId())->setCompareValue(val); + break; + case MISCREG_CNTP_TVAL: + case MISCREG_CNTP_TVAL_EL0: + getArchTimer(tc, tc->cpuId())->setTimerValue(val); + break; + case MISCREG_CNTP_CTL: + case MISCREG_CNTP_CTL_EL0: + getArchTimer(tc, tc->cpuId())->setControl(val); + break; + // PL1 phys. timer, secure + // AArch64 + case MISCREG_CNTPS_CVAL_EL1: + case MISCREG_CNTPS_TVAL_EL1: + case MISCREG_CNTPS_CTL_EL1: + // PL2 phys. timer, non-secure + // AArch32 + case MISCREG_CNTHCTL: + case MISCREG_CNTHP_CVAL: + case MISCREG_CNTHP_TVAL: + case MISCREG_CNTHP_CTL: + // AArch64 + case MISCREG_CNTHCTL_EL2: + case MISCREG_CNTHP_CVAL_EL2: + case MISCREG_CNTHP_TVAL_EL2: + case MISCREG_CNTHP_CTL_EL2: + // Virtual timer + // AArch32 + case MISCREG_CNTV_CVAL: + case MISCREG_CNTV_TVAL: + case MISCREG_CNTV_CTL: + // AArch64 + // case MISCREG_CNTV_CVAL_EL2: + // case MISCREG_CNTV_TVAL_EL2: + // case MISCREG_CNTV_CTL_EL2: + panic("Generic Timer register not implemented\n"); + break; } } setMiscRegNoEffect(misc_reg, newVal); } +void +ISA::tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid, bool secure_lookup, + uint8_t target_el) +{ + if (haveLargeAsid64) + asid &= mask(8); + Addr va = ((Addr) bits(newVal, 43, 0)) << 12; + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushMvaAsid(va, asid, + secure_lookup, target_el); + oc->getDTBPtr()->flushMvaAsid(va, asid, + secure_lookup, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMvaAsid( + va, asid, secure_lookup, target_el); + checker->getDTBPtr()->flushMvaAsid( + va, asid, secure_lookup, target_el); + } + } +} + +void +ISA::tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); + oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); + + // If CheckerCPU is connected, need to notify it of a flush + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAllSecurity(secure_lookup, + target_el); + checker->getDTBPtr()->flushAllSecurity(secure_lookup, + target_el); + } + } +} + +void +ISA::tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushAllNs(hyp, target_el); + oc->getDTBPtr()->flushAllNs(hyp, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAllNs(hyp, target_el); + checker->getDTBPtr()->flushAllNs(hyp, target_el); + } + } +} + +void +ISA::tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup, bool hyp, + uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + oc->getDTBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + checker->getDTBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + } + } +} + +::GenericTimer::SystemCounter * +ISA::getSystemCounter(ThreadContext *tc) +{ + ::GenericTimer::SystemCounter *cnt = ((ArmSystem *) tc->getSystemPtr())-> + getSystemCounter(); + if (cnt == NULL) { + panic("System counter not available\n"); + } + return cnt; +} + +::GenericTimer::ArchTimer * +ISA::getArchTimer(ThreadContext *tc, int cpu_id) +{ + ::GenericTimer::ArchTimer *timer = ((ArmSystem *) tc->getSystemPtr())-> + getArchTimer(cpu_id); + if (timer == NULL) { + panic("Architected timer not available\n"); + } + return timer; +} + } ArmISA::ISA * diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index c747fc770..c72d5d50f 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,9 +44,11 @@ #define __ARCH_ARM_ISA_HH__ #include "arch/arm/registers.hh" +#include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "arch/arm/types.hh" #include "debug/Checkpoint.hh" +#include "dev/arm/generic_timer.hh" #include "sim/sim_object.hh" struct ArmISAParams; @@ -56,45 +58,174 @@ class EventManager; namespace ArmISA { + + /** + * At the moment there are 57 registers which need to be aliased/ + * translated with other registers in the ISA. This enum helps with that + * translation. + */ + enum translateTable { + miscRegTranslateCSSELR_EL1, + miscRegTranslateSCTLR_EL1, + miscRegTranslateSCTLR_EL2, + miscRegTranslateACTLR_EL1, + miscRegTranslateACTLR_EL2, + miscRegTranslateCPACR_EL1, + miscRegTranslateCPTR_EL2, + miscRegTranslateHCR_EL2, + miscRegTranslateMDCR_EL2, + miscRegTranslateHSTR_EL2, + miscRegTranslateHACR_EL2, + miscRegTranslateTTBR0_EL1, + miscRegTranslateTTBR1_EL1, + miscRegTranslateTTBR0_EL2, + miscRegTranslateVTTBR_EL2, + miscRegTranslateTCR_EL1, + miscRegTranslateTCR_EL2, + miscRegTranslateVTCR_EL2, + miscRegTranslateAFSR0_EL1, + miscRegTranslateAFSR1_EL1, + miscRegTranslateAFSR0_EL2, + miscRegTranslateAFSR1_EL2, + miscRegTranslateESR_EL2, + miscRegTranslateFAR_EL1, + miscRegTranslateFAR_EL2, + miscRegTranslateHPFAR_EL2, + miscRegTranslatePAR_EL1, + miscRegTranslateMAIR_EL1, + miscRegTranslateMAIR_EL2, + miscRegTranslateAMAIR_EL1, + miscRegTranslateVBAR_EL1, + miscRegTranslateVBAR_EL2, + miscRegTranslateCONTEXTIDR_EL1, + miscRegTranslateTPIDR_EL0, + miscRegTranslateTPIDRRO_EL0, + miscRegTranslateTPIDR_EL1, + miscRegTranslateTPIDR_EL2, + miscRegTranslateTEECR32_EL1, + miscRegTranslateCNTFRQ_EL0, + miscRegTranslateCNTPCT_EL0, + miscRegTranslateCNTVCT_EL0, + miscRegTranslateCNTVOFF_EL2, + miscRegTranslateCNTKCTL_EL1, + miscRegTranslateCNTHCTL_EL2, + miscRegTranslateCNTP_TVAL_EL0, + miscRegTranslateCNTP_CTL_EL0, + miscRegTranslateCNTP_CVAL_EL0, + miscRegTranslateCNTV_TVAL_EL0, + miscRegTranslateCNTV_CTL_EL0, + miscRegTranslateCNTV_CVAL_EL0, + miscRegTranslateCNTHP_TVAL_EL2, + miscRegTranslateCNTHP_CTL_EL2, + miscRegTranslateCNTHP_CVAL_EL2, + miscRegTranslateDACR32_EL2, + miscRegTranslateIFSR32_EL2, + miscRegTranslateTEEHBR32_EL1, + miscRegTranslateSDER32_EL3, + miscRegTranslateMax + }; + class ISA : public SimObject { protected: + // Parent system + ArmSystem *system; + + // Cached copies of system-level properties + bool haveSecurity; + bool haveLPAE; + bool haveVirtualization; + bool haveLargeAsid64; + uint8_t physAddrRange64; + + /** Register translation entry used in lookUpMiscReg */ + struct MiscRegLUTEntry { + uint32_t lower; + uint32_t upper; + }; + + struct MiscRegInitializerEntry { + uint32_t index; + struct MiscRegLUTEntry entry; + }; + + /** Register table noting all translations */ + static const struct MiscRegInitializerEntry + MiscRegSwitch[miscRegTranslateMax]; + + /** Translation table accessible via the value of the register */ + std::vector<struct MiscRegLUTEntry> lookUpMiscReg; + MiscReg miscRegs[NumMiscRegs]; const IntRegIndex *intRegMap; void updateRegMap(CPSR cpsr) { - switch (cpsr.mode) { - case MODE_USER: - case MODE_SYSTEM: - intRegMap = IntRegUsrMap; - break; - case MODE_FIQ: - intRegMap = IntRegFiqMap; - break; - case MODE_IRQ: - intRegMap = IntRegIrqMap; - break; - case MODE_SVC: - intRegMap = IntRegSvcMap; - break; - case MODE_MON: - intRegMap = IntRegMonMap; - break; - case MODE_ABORT: - intRegMap = IntRegAbtMap; - break; - case MODE_UNDEFINED: - intRegMap = IntRegUndMap; - break; - default: - panic("Unrecognized mode setting in CPSR.\n"); + if (cpsr.width == 0) { + intRegMap = IntReg64Map; + } else { + switch (cpsr.mode) { + case MODE_USER: + case MODE_SYSTEM: + intRegMap = IntRegUsrMap; + break; + case MODE_FIQ: + intRegMap = IntRegFiqMap; + break; + case MODE_IRQ: + intRegMap = IntRegIrqMap; + break; + case MODE_SVC: + intRegMap = IntRegSvcMap; + break; + case MODE_MON: + intRegMap = IntRegMonMap; + break; + case MODE_ABORT: + intRegMap = IntRegAbtMap; + break; + case MODE_HYP: + intRegMap = IntRegHypMap; + break; + case MODE_UNDEFINED: + intRegMap = IntRegUndMap; + break; + default: + panic("Unrecognized mode setting in CPSR.\n"); + } } } + ::GenericTimer::SystemCounter * getSystemCounter(ThreadContext *tc); + ::GenericTimer::ArchTimer * getArchTimer(ThreadContext *tc, + int cpu_id); + + + private: + inline void assert32(ThreadContext *tc) { + CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc); + assert(cpsr.width); + } + + inline void assert64(ThreadContext *tc) { + CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc); + assert(!cpsr.width); + } + + void tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid, + bool secure_lookup, uint8_t target_el); + + void tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el); + + void tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el); + + void tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup, + bool hyp, uint8_t target_el); + public: void clear(); + void clear64(const ArmISAParams *p); MiscReg readMiscRegNoEffect(int misc_reg) const; MiscReg readMiscReg(int misc_reg, ThreadContext *tc); @@ -109,28 +240,28 @@ namespace ArmISA return intRegMap[reg]; } else if (reg < NUM_INTREGS) { return reg; - } else { - int mode = reg / intRegsPerMode; - reg = reg % intRegsPerMode; - switch (mode) { - case MODE_USER: - case MODE_SYSTEM: - return INTREG_USR(reg); - case MODE_FIQ: - return INTREG_FIQ(reg); - case MODE_IRQ: - return INTREG_IRQ(reg); - case MODE_SVC: - return INTREG_SVC(reg); - case MODE_MON: - return INTREG_MON(reg); - case MODE_ABORT: - return INTREG_ABT(reg); - case MODE_UNDEFINED: - return INTREG_UND(reg); + } else if (reg == INTREG_SPX) { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + ExceptionLevel el = opModeToEL( + (OperatingMode) (uint8_t) cpsr.mode); + if (!cpsr.sp && el != EL0) + return INTREG_SP0; + switch (el) { + case EL3: + return INTREG_SP3; + // @todo: uncomment this to enable Virtualization + // case EL2: + // return INTREG_SP2; + case EL1: + return INTREG_SP1; + case EL0: + return INTREG_SP0; default: - panic("Flattening into an unknown mode.\n"); + panic("Invalid exception level"); + break; } + } else { + return flattenIntRegModeIndex(reg); } } @@ -150,47 +281,127 @@ namespace ArmISA int flattenMiscIndex(int reg) const { + int flat_idx = reg; + if (reg == MISCREG_SPSR) { - int spsr_idx = NUM_MISCREGS; CPSR cpsr = miscRegs[MISCREG_CPSR]; switch (cpsr.mode) { + case MODE_EL0T: + warn("User mode does not have SPSR\n"); + flat_idx = MISCREG_SPSR; + break; + case MODE_EL1T: + case MODE_EL1H: + flat_idx = MISCREG_SPSR_EL1; + break; + case MODE_EL2T: + case MODE_EL2H: + flat_idx = MISCREG_SPSR_EL2; + break; + case MODE_EL3T: + case MODE_EL3H: + flat_idx = MISCREG_SPSR_EL3; + break; case MODE_USER: warn("User mode does not have SPSR\n"); - spsr_idx = MISCREG_SPSR; + flat_idx = MISCREG_SPSR; break; case MODE_FIQ: - spsr_idx = MISCREG_SPSR_FIQ; + flat_idx = MISCREG_SPSR_FIQ; break; case MODE_IRQ: - spsr_idx = MISCREG_SPSR_IRQ; + flat_idx = MISCREG_SPSR_IRQ; break; case MODE_SVC: - spsr_idx = MISCREG_SPSR_SVC; + flat_idx = MISCREG_SPSR_SVC; break; case MODE_MON: - spsr_idx = MISCREG_SPSR_MON; + flat_idx = MISCREG_SPSR_MON; break; case MODE_ABORT: - spsr_idx = MISCREG_SPSR_ABT; + flat_idx = MISCREG_SPSR_ABT; + break; + case MODE_HYP: + flat_idx = MISCREG_SPSR_HYP; break; case MODE_UNDEFINED: - spsr_idx = MISCREG_SPSR_UND; + flat_idx = MISCREG_SPSR_UND; break; default: warn("Trying to access SPSR in an invalid mode: %d\n", cpsr.mode); - spsr_idx = MISCREG_SPSR; + flat_idx = MISCREG_SPSR; break; } - return spsr_idx; + } else if (miscRegInfo[reg][MISCREG_MUTEX]) { + // Mutually exclusive CP15 register + switch (reg) { + case MISCREG_PRRR_MAIR0: + case MISCREG_PRRR_MAIR0_NS: + case MISCREG_PRRR_MAIR0_S: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + // If the muxed reg has been flattened, work out the + // offset and apply it to the unmuxed reg + int idxOffset = reg - MISCREG_PRRR_MAIR0; + if (ttbcr.eae) + flat_idx = flattenMiscIndex(MISCREG_MAIR0 + + idxOffset); + else + flat_idx = flattenMiscIndex(MISCREG_PRRR + + idxOffset); + } + break; + case MISCREG_NMRR_MAIR1: + case MISCREG_NMRR_MAIR1_NS: + case MISCREG_NMRR_MAIR1_S: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + // If the muxed reg has been flattened, work out the + // offset and apply it to the unmuxed reg + int idxOffset = reg - MISCREG_NMRR_MAIR1; + if (ttbcr.eae) + flat_idx = flattenMiscIndex(MISCREG_MAIR1 + + idxOffset); + else + flat_idx = flattenMiscIndex(MISCREG_NMRR + + idxOffset); + } + break; + case MISCREG_PMXEVTYPER_PMCCFILTR: + { + PMSELR pmselr = miscRegs[MISCREG_PMSELR]; + if (pmselr.sel == 31) + flat_idx = flattenMiscIndex(MISCREG_PMCCFILTR); + else + flat_idx = flattenMiscIndex(MISCREG_PMXEVTYPER); + } + break; + default: + panic("Unrecognized misc. register.\n"); + break; + } + } else { + if (miscRegInfo[reg][MISCREG_BANKED]) { + bool secureReg = haveSecurity && + inSecureState(miscRegs[MISCREG_SCR], + miscRegs[MISCREG_CPSR]); + flat_idx += secureReg ? 2 : 1; + } } - return reg; + return flat_idx; } void serialize(std::ostream &os) { DPRINTF(Checkpoint, "Serializing Arm Misc Registers\n"); SERIALIZE_ARRAY(miscRegs, NumMiscRegs); + + SERIALIZE_SCALAR(haveSecurity); + SERIALIZE_SCALAR(haveLPAE); + SERIALIZE_SCALAR(haveVirtualization); + SERIALIZE_SCALAR(haveLargeAsid64); + SERIALIZE_SCALAR(physAddrRange64); } void unserialize(Checkpoint *cp, const std::string §ion) { @@ -198,6 +409,12 @@ namespace ArmISA UNSERIALIZE_ARRAY(miscRegs, NumMiscRegs); CPSR tmp_cpsr = miscRegs[MISCREG_CPSR]; updateRegMap(tmp_cpsr); + + UNSERIALIZE_SCALAR(haveSecurity); + UNSERIALIZE_SCALAR(haveLPAE); + UNSERIALIZE_SCALAR(haveVirtualization); + UNSERIALIZE_SCALAR(haveLargeAsid64); + UNSERIALIZE_SCALAR(physAddrRange64); } void startup(ThreadContext *tc) {} diff --git a/src/arch/arm/isa/bitfields.isa b/src/arch/arm/isa/bitfields.isa index 5a8b5db6d..6006cfb2d 100644 --- a/src/arch/arm/isa/bitfields.isa +++ b/src/arch/arm/isa/bitfields.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,6 +73,7 @@ def bitfield SEVEN_AND_FOUR sevenAndFour; def bitfield THUMB thumb; def bitfield BIGTHUMB bigThumb; +def bitfield AARCH64 aarch64; // Other def bitfield COND_CODE condCode; diff --git a/src/arch/arm/isa/decoder/aarch64.isa b/src/arch/arm/isa/decoder/aarch64.isa new file mode 100644 index 000000000..a6c0fa2df --- /dev/null +++ b/src/arch/arm/isa/decoder/aarch64.isa @@ -0,0 +1,48 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// The 64 bit ARM decoder +// -------------------------- +// + + +Aarch64::aarch64(); + diff --git a/src/arch/arm/isa/decoder/arm.isa b/src/arch/arm/isa/decoder/arm.isa index 4bd9d5cf4..f0c0dec18 100644 --- a/src/arch/arm/isa/decoder/arm.isa +++ b/src/arch/arm/isa/decoder/arm.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,7 +73,11 @@ format DataOp { 0x9: ArmBlxReg::armBlxReg(); } 0x5: ArmSatAddSub::armSatAddSub(); - 0x7: Breakpoint::bkpt(); + 0x6: ArmERet::armERet(); + 0x7: decode OPCODE_22 { + 0: Breakpoint::bkpt(); + 1: ArmSmcHyp::armSmcHyp(); + } } 0x1: ArmHalfWordMultAndMultAcc::armHalfWordMultAndMultAcc(); } @@ -105,6 +109,10 @@ format DataOp { } 0x6: decode CPNUM { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStore(); + 0xf: decode OPCODE_20 { + 0: Mcrr15::Mcrr15(); + 1: Mrrc15::Mrrc15(); + } } 0x7: decode OPCODE_24 { 0: decode OPCODE_4 { diff --git a/src/arch/arm/isa/decoder/decoder.isa b/src/arch/arm/isa/decoder/decoder.isa index cf7d17871..94685b943 100644 --- a/src/arch/arm/isa/decoder/decoder.isa +++ b/src/arch/arm/isa/decoder/decoder.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -41,8 +41,12 @@ // Authors: Gabe Black decode THUMB default Unknown::unknown() { -0: -##include "arm.isa" +0: decode AARCH64 { + 0: + ##include "arm.isa" + 1: + ##include "aarch64.isa" +} 1: ##include "thumb.isa" } diff --git a/src/arch/arm/isa/decoder/thumb.isa b/src/arch/arm/isa/decoder/thumb.isa index f54cc728d..31495793e 100644 --- a/src/arch/arm/isa/decoder/thumb.isa +++ b/src/arch/arm/isa/decoder/thumb.isa @@ -95,8 +95,14 @@ decode BIGTHUMB { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre(); 0xf: decode HTOPCODE_9_4 { 0x00: Unknown::undefined(); - 0x04: WarnUnimpl::mcrr(); // mcrr2 - 0x05: WarnUnimpl::mrrc(); // mrrc2 + 0x04: decode LTCOPROC { + 0xf: Mcrr15::Mcrr15(); + default: WarnUnimpl::mcrr(); // mcrr2 + } + 0x05: decode LTCOPROC { + 0xf: Mrrc15::Mrrc15(); + default: WarnUnimpl::mrrc(); // mrrc2 + } 0x02, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e: WarnUnimpl::stc(); // stc2 diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa new file mode 100644 index 000000000..3ed70ce81 --- /dev/null +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -0,0 +1,2035 @@ +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Thomas Grocutt +// Mbou Eyole +// Giacomo Gabrielli + +output header {{ +namespace Aarch64 +{ + StaticInstPtr decodeDataProcImm(ExtMachInst machInst); + StaticInstPtr decodeBranchExcSys(ExtMachInst machInst); + StaticInstPtr decodeLoadsStores(ExtMachInst machInst); + StaticInstPtr decodeDataProcReg(ExtMachInst machInst); + + StaticInstPtr decodeFpAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeFp(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMDScalar(ExtMachInst machInst); + + StaticInstPtr decodeGem5Ops(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcImm(ExtMachInst machInst) + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + + uint8_t opc = bits(machInst, 30, 29); + bool sf = bits(machInst, 31); + bool n = bits(machInst, 22); + uint8_t immr = bits(machInst, 21, 16); + uint8_t imms = bits(machInst, 15, 10); + switch (bits(machInst, 25, 23)) { + case 0x0: + case 0x1: + { + uint64_t immlo = bits(machInst, 30, 29); + uint64_t immhi = bits(machInst, 23, 5); + uint64_t imm = (immlo << 0) | (immhi << 2); + if (bits(machInst, 31) == 0) + return new AdrXImm(machInst, rd, INTREG_ZERO, sext<21>(imm)); + else + return new AdrpXImm(machInst, rd, INTREG_ZERO, + sext<33>(imm << 12)); + } + case 0x2: + case 0x3: + { + uint32_t imm12 = bits(machInst, 21, 10); + uint8_t shift = bits(machInst, 23, 22); + uint32_t imm; + if (shift == 0x0) + imm = imm12 << 0; + else if (shift == 0x1) + imm = imm12 << 12; + else + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new AddXImm(machInst, rdsp, rnsp, imm); + case 0x1: + return new AddXImmCc(machInst, rd, rnsp, imm); + case 0x2: + return new SubXImm(machInst, rdsp, rnsp, imm); + case 0x3: + return new SubXImmCc(machInst, rd, rnsp, imm); + } + } + case 0x4: + { + if (!sf && n) + return new Unknown64(machInst); + // len = MSB(n:NOT(imms)), len < 1 is undefined. + uint8_t len = 0; + if (n) { + len = 6; + } else if (imms == 0x3f || imms == 0x3e) { + return new Unknown64(machInst); + } else { + len = findMsbSet(imms ^ 0x3f); + } + // Generate r, s, and size. + uint64_t r = bits(immr, len - 1, 0); + uint64_t s = bits(imms, len - 1, 0); + uint8_t size = 1 << len; + if (s == size - 1) + return new Unknown64(machInst); + // Generate the pattern with s 1s, rotated by r, with size bits. + uint64_t pattern = mask(s + 1); + if (r) { + pattern = (pattern >> r) | (pattern << (size - r)); + pattern &= mask(size); + } + uint8_t width = sf ? 64 : 32; + // Replicate that to fill up the immediate. + for (unsigned i = 1; i < (width / size); i *= 2) + pattern |= (pattern << (i * size)); + uint64_t imm = pattern; + + switch (opc) { + case 0x0: + return new AndXImm(machInst, rdsp, rn, imm); + case 0x1: + return new OrrXImm(machInst, rdsp, rn, imm); + case 0x2: + return new EorXImm(machInst, rdsp, rn, imm); + case 0x3: + return new AndXImmCc(machInst, rd, rn, imm); + } + } + case 0x5: + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + uint32_t imm16 = bits(machInst, 20, 5); + uint32_t hw = bits(machInst, 22, 21); + switch (opc) { + case 0x0: + return new Movn(machInst, rd, imm16, hw * 16); + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new Movz(machInst, rd, imm16, hw * 16); + case 0x3: + return new Movk(machInst, rd, imm16, hw * 16); + } + } + case 0x6: + if ((sf != n) || (!sf && (bits(immr, 5) || bits(imms, 5)))) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Sbfm64(machInst, rd, rn, immr, imms); + case 0x1: + return new Bfm64(machInst, rd, rn, immr, imms); + case 0x2: + return new Ubfm64(machInst, rd, rn, immr, imms); + case 0x3: + return new Unknown64(machInst); + } + case 0x7: + { + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (opc || bits(machInst, 21)) + return new Unknown64(machInst); + else + return new Extr64(machInst, rd, rn, rm, imms); + } + } + return new FailUnimplemented("Unhandled Case8", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeBranchExcSys(ExtMachInst machInst) + { + switch (bits(machInst, 30, 29)) { + case 0x0: + { + int64_t imm = sext<26>(bits(machInst, 25, 0)) << 2; + if (bits(machInst, 31) == 0) + return new B64(machInst, imm); + else + return new Bl64(machInst, imm); + } + case 0x1: + { + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + if (bits(machInst, 25) == 0) { + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + if (bits(machInst, 24) == 0) + return new Cbz64(machInst, imm, rt); + else + return new Cbnz64(machInst, imm, rt); + } else { + uint64_t bitmask = 0x1; + bitmask <<= bits(machInst, 23, 19); + int64_t imm = sext<14>(bits(machInst, 18, 5)) << 2; + if (bits(machInst, 31)) + bitmask <<= 32; + if (bits(machInst, 24) == 0) + return new Tbz64(machInst, bitmask, imm, rt); + else + return new Tbnz64(machInst, bitmask, imm, rt); + } + } + case 0x2: + // bit 30:26=10101 + if (bits(machInst, 31) == 0) { + if (bits(machInst, 25, 24) || bits(machInst, 4)) + return new Unknown64(machInst); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + ConditionCode condCode = + (ConditionCode)(uint8_t)(bits(machInst, 3, 0)); + return new BCond64(machInst, imm, condCode); + } else if (bits(machInst, 25, 24) == 0x0) { + if (bits(machInst, 4, 2)) + return new Unknown64(machInst); + uint8_t decVal = (bits(machInst, 1, 0) << 0) | + (bits(machInst, 23, 21) << 2); + switch (decVal) { + case 0x01: + return new Svc64(machInst); + case 0x02: + return new FailUnimplemented("hvc", machInst); + case 0x03: + return new Smc64(machInst); + case 0x04: + return new FailUnimplemented("brk", machInst); + case 0x08: + return new FailUnimplemented("hlt", machInst); + case 0x15: + return new FailUnimplemented("dcps1", machInst); + case 0x16: + return new FailUnimplemented("dcps2", machInst); + case 0x17: + return new FailUnimplemented("dcps3", machInst); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 25, 22) == 0x4) { + // bit 31:22=1101010100 + bool l = bits(machInst, 21); + uint8_t op0 = bits(machInst, 20, 19); + uint8_t op1 = bits(machInst, 18, 16); + uint8_t crn = bits(machInst, 15, 12); + uint8_t crm = bits(machInst, 11, 8); + uint8_t op2 = bits(machInst, 7, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + switch (op0) { + case 0x0: + if (rt != 0x1f || l) + return new Unknown64(machInst); + if (crn == 0x2 && op1 == 0x3) { + switch (op2) { + case 0x0: + return new NopInst(machInst); + case 0x1: + return new YieldInst(machInst); + case 0x2: + return new WfeInst(machInst); + case 0x3: + return new WfiInst(machInst); + case 0x4: + return new SevInst(machInst); + case 0x5: + return new SevlInst(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x3 && op1 == 0x3) { + switch (op2) { + case 0x2: + return new Clrex64(machInst); + case 0x4: + return new Dsb64(machInst); + case 0x5: + return new Dmb64(machInst); + case 0x6: + return new Isb64(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x4) { + // MSR immediate + switch (op1 << 3 | op2) { + case 0x5: + // SP + return new MsrSP64(machInst, + (IntRegIndex) MISCREG_SPSEL, + INTREG_ZERO, + crm & 0x1); + case 0x1e: + // DAIFSet + return new MsrDAIFSet64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + case 0x1f: + // DAIFClr + return new MsrDAIFClr64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + default: + return new Unknown64(machInst); + } + } else { + return new Unknown64(machInst); + } + break; + case 0x1: + case 0x2: + case 0x3: + { + // bit 31:22=1101010100, 20:19=11 + bool read = l; + MiscRegIndex miscReg = + decodeAArch64SysReg(op0, op1, crn, crm, op2); + if (read) { + if ((miscReg == MISCREG_DC_CIVAC_Xt) || + (miscReg == MISCREG_DC_CVAC_Xt) || + (miscReg == MISCREG_DC_ZVA_Xt)) { + return new Unknown64(machInst); + } + } + // Check for invalid registers + if (miscReg == MISCREG_UNKNOWN) { + return new Unknown64(machInst); + } else if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (miscReg == MISCREG_NZCV) { + if (read) + return new MrsNZCV64(machInst, rt, (IntRegIndex) miscReg); + else + return new MsrNZCV64(machInst, (IntRegIndex) miscReg, rt); + } + uint32_t iss = msrMrs64IssBuild(read, op0, op1, crn, crm, op2, rt); + if (miscReg == MISCREG_DC_ZVA_Xt && !read) + return new Dczva(machInst, rt, (IntRegIndex) miscReg, iss); + + if (read) + return new Mrs64(machInst, rt, (IntRegIndex) miscReg, iss); + else + return new Msr64(machInst, (IntRegIndex) miscReg, rt, iss); + } else if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]); + return new WarnUnimplemented(read ? "mrs" : "msr", + machInst, full_mnem); + } else { + return new FailUnimplemented(csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]).c_str(), + machInst); + } + } + break; + } + } else if (bits(machInst, 25) == 0x1) { + uint8_t opc = bits(machInst, 24, 21); + uint8_t op2 = bits(machInst, 20, 16); + uint8_t op3 = bits(machInst, 15, 10); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + uint8_t op4 = bits(machInst, 4, 0); + if (op2 != 0x1f || op3 != 0x0 || op4 != 0x0) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Br64(machInst, rn); + case 0x1: + return new Blr64(machInst, rn); + case 0x2: + return new Ret64(machInst, rn); + case 0x4: + if (rn != 0x1f) + return new Unknown64(machInst); + return new Eret64(machInst); + case 0x5: + if (rn != 0x1f) + return new Unknown64(machInst); + return new FailUnimplemented("dret", machInst); + } + } + default: + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case7", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeLoadsStores(ExtMachInst machInst) + { + // bit 27,25=10 + switch (bits(machInst, 29, 28)) { + case 0x0: + if (bits(machInst, 26) == 0) { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rs = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + uint8_t opc = (bits(machInst, 15) << 0) | + (bits(machInst, 23, 21) << 1); + uint8_t size = bits(machInst, 31, 30); + switch (opc) { + case 0x0: + switch (size) { + case 0x0: + return new STXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STXRX64(machInst, rt, rnsp, rs); + } + case 0x1: + switch (size) { + case 0x0: + return new STLXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STLXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STLXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STLXRX64(machInst, rt, rnsp, rs); + } + case 0x2: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x3: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STLXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STLXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x4: + switch (size) { + case 0x0: + return new LDXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDXRX64(machInst, rt, rnsp, rs); + } + case 0x5: + switch (size) { + case 0x0: + return new LDAXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDAXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDAXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDAXRX64(machInst, rt, rnsp, rs); + } + case 0x6: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDXPX64(machInst, rt, rt2, rnsp); + } + + case 0x7: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDAXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDAXPX64(machInst, rt, rt2, rnsp); + } + + case 0x9: + switch (size) { + case 0x0: + return new STLRB64(machInst, rt, rnsp); + case 0x1: + return new STLRH64(machInst, rt, rnsp); + case 0x2: + return new STLRW64(machInst, rt, rnsp); + case 0x3: + return new STLRX64(machInst, rt, rnsp); + } + case 0xd: + switch (size) { + case 0x0: + return new LDARB64(machInst, rt, rnsp); + case 0x1: + return new LDARH64(machInst, rt, rnsp); + case 0x2: + return new LDARW64(machInst, rt, rnsp); + case 0x3: + return new LDARX64(machInst, rt, rnsp); + } + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 31)) { + return new Unknown64(machInst); + } else { + return decodeNeonMem(machInst); + } + case 0x1: + { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + uint8_t switchVal = (bits(machInst, 26) << 0) | + (bits(machInst, 31, 30) << 1); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + switch (switchVal) { + case 0x0: + return new LDRWL64_LIT(machInst, rt, imm); + case 0x1: + return new LDRSFP64_LIT(machInst, rt, imm); + case 0x2: + return new LDRXL64_LIT(machInst, rt, imm); + case 0x3: + return new LDRDFP64_LIT(machInst, rt, imm); + case 0x4: + return new LDRSWL64_LIT(machInst, rt, imm); + case 0x5: + return new BigFpMemLit("ldr", machInst, rt, imm); + case 0x6: + return new PRFM64_LIT(machInst, rt, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + uint8_t opc = bits(machInst, 31, 30); + if (opc >= 3) + return new Unknown64(machInst); + uint32_t size = 0; + bool fp = bits(machInst, 26); + bool load = bits(machInst, 22); + if (fp) { + size = 4 << opc; + } else { + if ((opc == 1) && !load) + return new Unknown64(machInst); + size = (opc == 0 || opc == 1) ? 4 : 8; + } + uint8_t type = bits(machInst, 24, 23); + int64_t imm = sext<7>(bits(machInst, 21, 15)) * size; + + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + + bool noAlloc = (type == 0); + bool signExt = !noAlloc && !fp && opc == 1; + PairMemOp::AddrMode mode; + const char *mnemonic = NULL; + switch (type) { + case 0x0: + case 0x2: + mode = PairMemOp::AddrMd_Offset; + break; + case 0x1: + mode = PairMemOp::AddrMd_PostIndex; + break; + case 0x3: + mode = PairMemOp::AddrMd_PreIndex; + break; + default: + return new Unknown64(machInst); + } + if (load) { + if (noAlloc) + mnemonic = "ldnp"; + else if (signExt) + mnemonic = "ldpsw"; + else + mnemonic = "ldp"; + } else { + if (noAlloc) + mnemonic = "stnp"; + else + mnemonic = "stp"; + } + + return new LdpStp(mnemonic, machInst, size, fp, load, noAlloc, + signExt, false, false, imm, mode, rn, rt, rt2); + } + // bit 29:27=111, 25=0 + case 0x3: + { + uint8_t switchVal = (bits(machInst, 23, 22) << 0) | + (bits(machInst, 26) << 2) | + (bits(machInst, 31, 30) << 3); + if (bits(machInst, 24) == 1) { + uint64_t imm12 = bits(machInst, 21, 10); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + switch (switchVal) { + case 0x00: + return new STRB64_IMM(machInst, rt, rnsp, imm12); + case 0x01: + return new LDRB64_IMM(machInst, rt, rnsp, imm12); + case 0x02: + return new LDRSBX64_IMM(machInst, rt, rnsp, imm12); + case 0x03: + return new LDRSBW64_IMM(machInst, rt, rnsp, imm12); + case 0x04: + return new STRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x05: + return new LDRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x06: + return new BigFpMemImm("str", machInst, false, + rt, rnsp, imm12 << 4); + case 0x07: + return new BigFpMemImm("ldr", machInst, true, + rt, rnsp, imm12 << 4); + case 0x08: + return new STRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x09: + return new LDRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0a: + return new LDRSHX64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0b: + return new LDRSHW64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0c: + return new STRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0d: + return new LDRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x10: + return new STRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x11: + return new LDRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x12: + return new LDRSW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x14: + return new STRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x15: + return new LDRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x18: + return new STRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x19: + return new LDRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1a: + return new PRFM64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1c: + return new STRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1d: + return new LDRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 11, 10) != 0x2) + return new Unknown64(machInst); + if (!bits(machInst, 14)) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ArmExtendType type = + (ArmExtendType)(uint32_t)bits(machInst, 15, 13); + uint8_t s = bits(machInst, 12); + switch (switchVal) { + case 0x00: + return new STRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x01: + return new LDRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x02: + return new LDRSBX64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x03: + return new LDRSBW64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x04: + return new STRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x05: + return new LDRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x6: + return new BigFpMemReg("str", machInst, false, + rt, rnsp, rm, type, s * 4); + case 0x7: + return new BigFpMemReg("ldr", machInst, true, + rt, rnsp, rm, type, s * 4); + case 0x08: + return new STRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x09: + return new LDRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0a: + return new LDRSHX64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0b: + return new LDRSHW64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0c: + return new STRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0d: + return new LDRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x10: + return new STRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x11: + return new LDRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x12: + return new LDRSW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x14: + return new STRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x15: + return new LDRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x18: + return new STRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x19: + return new LDRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1a: + return new PRFM64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1c: + return new STRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1d: + return new LDRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + default: + return new Unknown64(machInst); + } + } else { + // bit 29:27=111, 25:24=00, 21=0 + switch (bits(machInst, 11, 10)) { + case 0x0: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STURB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDURB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDURSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDURSBW64_IMM(machInst, rt, rnsp, imm); + case 0x04: + return new STURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x05: + return new LDURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemImm("stur", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemImm("ldur", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STURH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDURH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDURSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDURSHW64_IMM(machInst, rt, rnsp, imm); + case 0x0c: + return new STURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x0d: + return new LDURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STURW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDURW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDURSW64_IMM(machInst, rt, rnsp, imm); + case 0x14: + return new STURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x15: + return new LDURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STURX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDURX64_IMM(machInst, rt, rnsp, imm); + case 0x1a: + return new PRFUM64_IMM(machInst, rt, rnsp, imm); + case 0x1c: + return new STURDFP64_IMM(machInst, rt, rnsp, imm); + case 0x1d: + return new LDURDFP64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + // bit 29:27=111, 25:24=00, 21=0, 11:10=01 + case 0x1: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_POST(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_POST(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_POST(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_POST(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_POST(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_POST(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPost("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPost("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_POST(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_POST(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_POST(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_POST(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_POST(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_POST(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_POST(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_POST(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_POST(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_POST(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_POST(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_POST(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_POST(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_POST(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_POST(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STTRB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDTRB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDTRSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDTRSBW64_IMM(machInst, rt, rnsp, imm); + case 0x08: + return new STTRH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDTRH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDTRSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDTRSHW64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STTRW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDTRW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDTRSW64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STTRX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDTRX64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_PRE(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_PRE(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_PRE(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_PRE(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPre("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPre("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_PRE(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_PRE(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_PRE(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_PRE(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_PRE(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_PRE(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_PRE(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_PRE(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_PRE(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_PRE(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_PRE(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + } + } + } + } + return new FailUnimplemented("Unhandled Case1", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcReg(ExtMachInst machInst) + { + uint8_t switchVal = (bits(machInst, 28) << 1) | + (bits(machInst, 24) << 0); + switch (switchVal) { + case 0x0: + { + uint8_t switchVal = (bits(machInst, 21) << 0) | + (bits(machInst, 30, 29) << 1); + ArmShiftType type = (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + uint8_t imm6 = bits(machInst, 15, 10); + bool sf = bits(machInst, 31); + if (!sf && (imm6 & 0x20)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AndXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new BicXSReg(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new OrrXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new OrnXSReg(machInst, rd, rn, rm, imm6, type); + case 0x4: + return new EorXSReg(machInst, rd, rn, rm, imm6, type); + case 0x5: + return new EonXSReg(machInst, rd, rn, rm, imm6, type); + case 0x6: + return new AndXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x7: + return new BicXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } + case 0x1: + { + uint8_t switchVal = bits(machInst, 30, 29); + if (bits(machInst, 21) == 0) { + ArmShiftType type = + (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + if (type == ROR) + return new Unknown64(machInst); + uint8_t imm6 = bits(machInst, 15, 10); + if (!bits(machInst, 31) && bits(imm6, 5)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (switchVal) { + case 0x0: + return new AddXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new AddXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new SubXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new SubXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } else { + if (bits(machInst, 23, 22) != 0 || bits(machInst, 12, 10) > 0x4) + return new Unknown64(machInst); + ArmExtendType type = + (ArmExtendType)(uint8_t)bits(machInst, 15, 13); + uint8_t imm3 = bits(machInst, 12, 10); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AddXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x1: + return new AddXERegCc(machInst, rd, rnsp, rm, type, imm3); + case 0x2: + return new SubXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x3: + return new SubXERegCc(machInst, rd, rnsp, rm, type, imm3); + } + } + } + case 0x2: + { + if (bits(machInst, 21) == 1) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 22)) { + case 0x0: + { + if (bits(machInst, 15, 10)) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 30, 29); + switch (switchVal) { + case 0x0: + return new AdcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x1: + return new AdcXSRegCc(machInst, rd, rn, rm, 0, LSL); + case 0x2: + return new SbcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x3: + return new SbcXSRegCc(machInst, rd, rn, rm, 0, LSL); + } + } + case 0x1: + { + if ((bits(machInst, 4) == 1) || + (bits(machInst, 10) == 1) || + (bits(machInst, 29) == 0)) { + return new Unknown64(machInst); + } + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + uint8_t flags = bits(machInst, 3, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + if (bits(machInst, 11) == 0) { + IntRegIndex rm = + (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnReg64(machInst, rn, rm, cond, flags); + } else { + return new CcmpReg64(machInst, rn, rm, cond, flags); + } + } else { + uint8_t imm5 = bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnImm64(machInst, rn, imm5, cond, flags); + } else { + return new CcmpImm64(machInst, rn, imm5, cond, flags); + } + } + } + case 0x2: + { + if (bits(machInst, 29) == 1 || + bits(machInst, 11) == 1) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 10) << 0) | + (bits(machInst, 30) << 1); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + switch (switchVal) { + case 0x0: + return new Csel64(machInst, rd, rn, rm, cond); + case 0x1: + return new Csinc64(machInst, rd, rn, rm, cond); + case 0x2: + return new Csinv64(machInst, rd, rn, rm, cond); + case 0x3: + return new Csneg64(machInst, rd, rn, rm, cond); + } + } + case 0x3: + if (bits(machInst, 30) == 0) { + if (bits(machInst, 29) != 0) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x2: + return new Udiv64(machInst, rd, rn, rm); + case 0x3: + return new Sdiv64(machInst, rd, rn, rm); + case 0x8: + return new Lslv64(machInst, rd, rn, rm); + case 0x9: + return new Lsrv64(machInst, rd, rn, rm); + case 0xa: + return new Asrv64(machInst, rd, rn, rm); + case 0xb: + return new Rorv64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } else { + if (bits(machInst, 20, 16) != 0 || + bits(machInst, 29) != 0) { + return new Unknown64(machInst); + } + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x0: + return new Rbit64(machInst, rd, rn); + case 0x1: + return new Rev1664(machInst, rd, rn); + case 0x2: + if (bits(machInst, 31) == 0) + return new Rev64(machInst, rd, rn); + else + return new Rev3264(machInst, rd, rn); + case 0x3: + if (bits(machInst, 31) != 1) + return new Unknown64(machInst); + return new Rev64(machInst, rd, rn); + case 0x4: + return new Clz64(machInst, rd, rn); + case 0x5: + return new Cls64(machInst, rd, rn); + } + } + } + } + case 0x3: + { + if (bits(machInst, 30, 29) != 0x0 || + (bits(machInst, 23, 21) != 0 && bits(machInst, 31) == 0)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex ra = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 21)) { + case 0x0: + if (bits(machInst, 15) == 0) + return new Madd64(machInst, rd, ra, rn, rm); + else + return new Msub64(machInst, rd, ra, rn, rm); + case 0x1: + if (bits(machInst, 15) == 0) + return new Smaddl64(machInst, rd, ra, rn, rm); + else + return new Smsubl64(machInst, rd, ra, rn, rm); + case 0x2: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Smulh64(machInst, rd, rn, rm); + case 0x5: + if (bits(machInst, 15) == 0) + return new Umaddl64(machInst, rd, ra, rn, rm); + else + return new Umsubl64(machInst, rd, ra, rn, rm); + case 0x6: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Umulh64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case2", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMD(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonIndexedElem(machInst); + } else if (bits(machInst, 23) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 22, 19)) { + return decodeNeonShiftByImm(machInst); + } else { + return decodeNeonModImm(machInst); + } + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeon3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeon3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeon2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonAcrossLanes(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 24) || + bits(machInst, 21) || + bits(machInst, 15)) { + return new Unknown64(machInst); + } else if (bits(machInst, 10) == 1) { + if (bits(machInst, 23, 22)) + return new Unknown64(machInst); + return decodeNeonCopy(machInst); + } else if (bits(machInst, 29) == 1) { + return decodeNeonExt(machInst); + } else if (bits(machInst, 11) == 1) { + return decodeNeonZipUzpTrn(machInst); + } else if (bits(machInst, 23, 22) == 0x0) { + return decodeNeonTblTbx(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case3", machInst); + } +} +}}; + + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + // bit 30=0, 28:25=1111 + decodeFp(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + IntRegIndex ra = (IntRegIndex)(uint32_t)bits(machInst, 14, 10); + uint8_t switchVal = (bits(machInst, 23, 21) << 1) | + (bits(machInst, 15) << 0); + switch (switchVal) { + case 0x0: // FMADD Sd = Sa + Sn*Sm + return new FMAddS(machInst, rd, rn, rm, ra); + case 0x1: // FMSUB Sd = Sa + (-Sn)*Sm + return new FMSubS(machInst, rd, rn, rm, ra); + case 0x2: // FNMADD Sd = (-Sa) + (-Sn)*Sm + return new FNMAddS(machInst, rd, rn, rm, ra); + case 0x3: // FNMSUB Sd = (-Sa) + Sn*Sm + return new FNMSubS(machInst, rd, rn, rm, ra); + case 0x4: // FMADD Dd = Da + Dn*Dm + return new FMAddD(machInst, rd, rn, rm, ra); + case 0x5: // FMSUB Dd = Da + (-Dn)*Dm + return new FMSubD(machInst, rd, rn, rm, ra); + case 0x6: // FNMADD Dd = (-Da) + (-Dn)*Dm + return new FNMAddD(machInst, rd, rn, rm, ra); + case 0x7: // FNMSUB Dd = (-Da) + Dn*Dm + return new FNMSubD(machInst, rd, rn, rm, ra); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 0) { + bool s = bits(machInst, 29); + if (s) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 20, 16); + uint8_t type = bits(machInst, 23, 22); + uint8_t scale = bits(machInst, 15, 10); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + if (bits(machInst, 18, 17) == 3 && scale != 0) + return new Unknown64(machInst); + // 30:24=0011110, 21=0 + switch (switchVal) { + case 0x00: + return new FailUnimplemented("fcvtns", machInst); + case 0x01: + return new FailUnimplemented("fcvtnu", machInst); + case 0x02: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // SCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpSW(machInst, rd, rn, scale); + case 1: // SCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpDW(machInst, rd, rn, scale); + case 4: // SCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpSX(machInst, rd, rn, scale); + case 5: // SCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x03: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // UCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpSW(machInst, rd, rn, scale); + case 1: // UCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpDW(machInst, rd, rn, scale); + case 4: // UCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpSX(machInst, rd, rn, scale); + case 5: // UCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x04: + return new FailUnimplemented("fcvtas", machInst); + case 0x05: + return new FailUnimplemented("fcvtau", machInst); + case 0x08: + return new FailUnimplemented("fcvtps", machInst); + case 0x09: + return new FailUnimplemented("fcvtpu", machInst); + case 0x0e: + return new FailUnimplemented("fmov elem. to 64", machInst); + case 0x0f: + return new FailUnimplemented("fmov 64 bit", machInst); + case 0x10: + return new FailUnimplemented("fcvtms", machInst); + case 0x11: + return new FailUnimplemented("fcvtmu", machInst); + case 0x18: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZS Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZS Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZS Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZS Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x19: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZU Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZU Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZU Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZU Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + } + } else { + // 30=0, 28:24=11110, 21=1 + uint8_t type = bits(machInst, 23, 22); + uint8_t imm8 = bits(machInst, 20, 13); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + switch (bits(machInst, 11, 10)) { + case 0x0: + if (bits(machInst, 12) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 9, 5)) { + return new Unknown64(machInst); + } + // 31:29=000, 28:24=11110, 21=1, 12:10=100 + if (type == 0) { + // FMOV S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5) + // :imm8<5:0>:Zeros(19) + uint32_t imm = vfp_modified_imm(imm8, false); + return new FmovImmS(machInst, rd, imm); + } else if (type == 1) { + // FMOV D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8) + // :imm8<5:0>:Zeros(48) + uint64_t imm = vfp_modified_imm(imm8, true); + return new FmovImmD(machInst, rd, imm); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 13) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 15, 14) || + bits(machInst, 23) || + bits(machInst, 2, 0)) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 4, 3) << 0) | + (bits(machInst, 22) << 2); + IntRegIndex rm = (IntRegIndex)(uint32_t) + bits(machInst, 20, 16); + // 28:23=000111100, 21=1, 15:10=001000, 2:0=000 + switch (switchVal) { + case 0x0: + // FCMP flags = compareQuiet(Sn,Sm) + return new FCmpRegS(machInst, rn, rm); + case 0x1: + // FCMP flags = compareQuiet(Sn,0.0) + return new FCmpImmS(machInst, rn, 0); + case 0x2: + // FCMPE flags = compareSignaling(Sn,Sm) + return new FCmpERegS(machInst, rn, rm); + case 0x3: + // FCMPE flags = compareSignaling(Sn,0.0) + return new FCmpEImmS(machInst, rn, 0); + case 0x4: + // FCMP flags = compareQuiet(Dn,Dm) + return new FCmpRegD(machInst, rn, rm); + case 0x5: + // FCMP flags = compareQuiet(Dn,0.0) + return new FCmpImmD(machInst, rn, 0); + case 0x6: + // FCMPE flags = compareSignaling(Dn,Dm) + return new FCmpERegD(machInst, rn, rm); + case 0x7: + // FCMPE flags = compareSignaling(Dn,0.0) + return new FCmpEImmD(machInst, rn, 0); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 14) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t opcode = bits(machInst, 20, 15); + // Bits 31:24=00011110, 21=1, 14:10=10000 + switch (opcode) { + case 0x0: + if (type == 0) + // FMOV Sd = Sn + return new FmovRegS(machInst, rd, rn); + else if (type == 1) + // FMOV Dd = Dn + return new FmovRegD(machInst, rd, rn); + break; + case 0x1: + if (type == 0) + // FABS Sd = abs(Sn) + return new FAbsS(machInst, rd, rn); + else if (type == 1) + // FABS Dd = abs(Dn) + return new FAbsD(machInst, rd, rn); + break; + case 0x2: + if (type == 0) + // FNEG Sd = -Sn + return new FNegS(machInst, rd, rn); + else if (type == 1) + // FNEG Dd = -Dn + return new FNegD(machInst, rd, rn); + break; + case 0x3: + if (type == 0) + // FSQRT Sd = sqrt(Sn) + return new FSqrtS(machInst, rd, rn); + else if (type == 1) + // FSQRT Dd = sqrt(Dn) + return new FSqrtD(machInst, rd, rn); + break; + case 0x4: + if (type == 1) + // FCVT Sd = convertFormat(Dn) + return new FcvtFpDFpS(machInst, rd, rn); + else if (type == 3) + // FCVT Sd = convertFormat(Hn) + return new FcvtFpHFpS(machInst, rd, rn); + break; + case 0x5: + if (type == 0) + // FCVT Dd = convertFormat(Sn) + return new FCvtFpSFpD(machInst, rd, rn); + else if (type == 3) + // FCVT Dd = convertFormat(Hn) + return new FcvtFpHFpD(machInst, rd, rn); + break; + case 0x7: + if (type == 0) + // FCVT Hd = convertFormat(Sn) + return new FcvtFpSFpH(machInst, rd, rn); + else if (type == 1) + // FCVT Hd = convertFormat(Dn) + return new FcvtFpDFpH(machInst, rd, rn); + break; + case 0x8: + if (type == 0) // FRINTN Sd = roundToIntegralTiesToEven(Sn) + return new FRIntNS(machInst, rd, rn); + else if (type == 1) // FRINTN Dd = roundToIntegralTiesToEven(Dn) + return new FRIntND(machInst, rd, rn); + break; + case 0x9: + if (type == 0) // FRINTP Sd = roundToIntegralTowardPlusInf(Sn) + return new FRIntPS(machInst, rd, rn); + else if (type == 1) // FRINTP Dd = roundToIntegralTowardPlusInf(Dn) + return new FRIntPD(machInst, rd, rn); + break; + case 0xa: + if (type == 0) // FRINTM Sd = roundToIntegralTowardMinusInf(Sn) + return new FRIntMS(machInst, rd, rn); + else if (type == 1) // FRINTM Dd = roundToIntegralTowardMinusInf(Dn) + return new FRIntMD(machInst, rd, rn); + break; + case 0xb: + if (type == 0) // FRINTZ Sd = roundToIntegralTowardZero(Sn) + return new FRIntZS(machInst, rd, rn); + else if (type == 1) // FRINTZ Dd = roundToIntegralTowardZero(Dn) + return new FRIntZD(machInst, rd, rn); + break; + case 0xc: + if (type == 0) // FRINTA Sd = roundToIntegralTiesToAway(Sn) + return new FRIntAS(machInst, rd, rn); + else if (type == 1) // FRINTA Dd = roundToIntegralTiesToAway(Dn) + return new FRIntAD(machInst, rd, rn); + break; + case 0xe: + if (type == 0) // FRINTX Sd = roundToIntegralExact(Sn) + return new FRIntXS(machInst, rd, rn); + else if (type == 1) // FRINTX Dd = roundToIntegralExact(Dn) + return new FRIntXD(machInst, rd, rn); + break; + case 0xf: + if (type == 0) // FRINTI Sd = roundToIntegral(Sn) + return new FRIntIS(machInst, rd, rn); + else if (type == 1) // FRINTI Dd = roundToIntegral(Dn) + return new FRIntID(machInst, rd, rn); + break; + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } else if (bits(machInst, 15) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t rmode = bits(machInst, 20, 19); + uint8_t switchVal1 = bits(machInst, 18, 16); + uint8_t switchVal2 = (type << 1) | bits(machInst, 31); + // 30:24=0011110, 21=1, 15:10=000000 + switch (switchVal1) { + case 0x0: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNS Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntWSN(machInst, rd, rn); + case 0x1: //FCVTPS Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntWSP(machInst, rd, rn); + case 0x2: //FCVTMS Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntWSM(machInst, rd, rn); + case 0x3: //FCVTZS Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNS Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntXSN(machInst, rd, rn); + case 0x5: //FCVTPS Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntXSP(machInst, rd, rn); + case 0x6: //FCVTMS Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntXSM(machInst, rd, rn); + case 0x7: //FCVTZS Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNS Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntWDN(machInst, rd, rn); + case 0x9: //FCVTPS Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntWDP(machInst, rd, rn); + case 0xA: //FCVTMS Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntWDM(machInst, rd, rn); + case 0xB: //FCVTZS Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNS Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntXDN(machInst, rd, rn); + case 0xD: //FCVTPS Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntXDP(machInst, rd, rn); + case 0xE: //FCVTMS Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntXDM(machInst, rd, rn); + case 0xF: //FCVTZS Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x1: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNU Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntWSN(machInst, rd, rn); + case 0x1: //FCVTPU Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntWSP(machInst, rd, rn); + case 0x2: //FCVTMU Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntWSM(machInst, rd, rn); + case 0x3: //FCVTZU Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNU Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntXSN(machInst, rd, rn); + case 0x5: //FCVTPU Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntXSP(machInst, rd, rn); + case 0x6: //FCVTMU Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntXSM(machInst, rd, rn); + case 0x7: //FCVTZU Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNU Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntWDN(machInst, rd, rn); + case 0x9: //FCVTPU Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntWDP(machInst, rd, rn); + case 0xA: //FCVTMU Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntWDM(machInst, rd, rn); + case 0xB: //FCVTZU Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNU Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntXDN(machInst, rd, rn); + case 0xD: //FCVTPU Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntXDP(machInst, rd, rn); + case 0xE: //FCVTMU Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntXDM(machInst, rd, rn); + case 0xF: //FCVTZU Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x2: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // SCVTF Sd = convertFromInt(Wn) + return new FcvtWSIntFpS(machInst, rd, rn); + case 1: // SCVTF Sd = convertFromInt(Xn) + return new FcvtXSIntFpS(machInst, rd, rn); + case 2: // SCVTF Dd = convertFromInt(Wn) + return new FcvtWSIntFpD(machInst, rd, rn); + case 3: // SCVTF Dd = convertFromInt(Xn) + return new FcvtXSIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x3: + switch (switchVal2) { + case 0: // UCVTF Sd = convertFromInt(Wn) + return new FcvtWUIntFpS(machInst, rd, rn); + case 1: // UCVTF Sd = convertFromInt(Xn) + return new FcvtXUIntFpS(machInst, rd, rn); + case 2: // UCVTF Dd = convertFromInt(Wn) + return new FcvtWUIntFpD(machInst, rd, rn); + case 3: // UCVTF Dd = convertFromInt(Xn) + return new FcvtXUIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x4: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // FCVTAS Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntWSA(machInst, rd, rn); + case 1: // FCVTAS Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntXSA(machInst, rd, rn); + case 2: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntWDA(machInst, rd, rn); + case 3: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x5: + switch (switchVal2) { + case 0: // FCVTAU Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntWSA(machInst, rd, rn); + case 1: // FCVTAU Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntXSA(machInst, rd, rn); + case 2: // FCVTAU Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntWDA(machInst, rd, rn); + case 3: // FCVTAU Xd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x06: + switch (switchVal2) { + case 0: // FMOV Wd = Sn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovURegCoreX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + case 0x07: + switch (switchVal2) { + case 0: // FMOV Sd = Wn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovUCoreRegX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + default: // Warning! missing cases in switch statement above, that still need to be added + return new Unknown64(machInst); + } + } + case 0x1: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rm = (IntRegIndex)(uint32_t) bits(machInst, 20, 16); + IntRegIndex rn = (IntRegIndex)(uint32_t) bits(machInst, 9, 5); + uint8_t imm = (IntRegIndex)(uint32_t) bits(machInst, 3, 0); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + uint8_t switchVal = (bits(machInst, 4) << 0) | + (bits(machInst, 22) << 1); + // 31:23=000111100, 21=1, 11:10=01 + switch (switchVal) { + case 0x0: + // FCCMP flags = if cond the compareQuiet(Sn,Sm) else #nzcv + return new FCCmpRegS(machInst, rn, rm, cond, imm); + case 0x1: + // FCCMP flags = if cond then compareSignaling(Sn,Sm) + // else #nzcv + return new FCCmpERegS(machInst, rn, rm, cond, imm); + case 0x2: + // FCCMP flags = if cond then compareQuiet(Dn,Dm) else #nzcv + return new FCCmpRegD(machInst, rn, rm, cond, imm); + case 0x3: + // FCCMP flags = if cond then compareSignaling(Dn,Dm) + // else #nzcv + return new FCCmpERegD(machInst, rn, rm, cond, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + uint8_t switchVal = (bits(machInst, 15, 12) << 0) | + (bits(machInst, 22) << 4); + switch (switchVal) { + case 0x00: // FMUL Sd = Sn * Sm + return new FMulS(machInst, rd, rn, rm); + case 0x10: // FMUL Dd = Dn * Dm + return new FMulD(machInst, rd, rn, rm); + case 0x01: // FDIV Sd = Sn / Sm + return new FDivS(machInst, rd, rn, rm); + case 0x11: // FDIV Dd = Dn / Dm + return new FDivD(machInst, rd, rn, rm); + case 0x02: // FADD Sd = Sn + Sm + return new FAddS(machInst, rd, rn, rm); + case 0x12: // FADD Dd = Dn + Dm + return new FAddD(machInst, rd, rn, rm); + case 0x03: // FSUB Sd = Sn - Sm + return new FSubS(machInst, rd, rn, rm); + case 0x13: // FSUB Dd = Dn - Dm + return new FSubD(machInst, rd, rn, rm); + case 0x04: // FMAX Sd = max(Sn, Sm) + return new FMaxS(machInst, rd, rn, rm); + case 0x14: // FMAX Dd = max(Dn, Dm) + return new FMaxD(machInst, rd, rn, rm); + case 0x05: // FMIN Sd = min(Sn, Sm) + return new FMinS(machInst, rd, rn, rm); + case 0x15: // FMIN Dd = min(Dn, Dm) + return new FMinD(machInst, rd, rn, rm); + case 0x06: // FMAXNM Sd = maxNum(Sn, Sm) + return new FMaxNMS(machInst, rd, rn, rm); + case 0x16: // FMAXNM Dd = maxNum(Dn, Dm) + return new FMaxNMD(machInst, rd, rn, rm); + case 0x07: // FMINNM Sd = minNum(Sn, Sm) + return new FMinNMS(machInst, rd, rn, rm); + case 0x17: // FMINNM Dd = minNum(Dn, Dm) + return new FMinNMD(machInst, rd, rn, rm); + case 0x08: // FNMUL Sd = -(Sn * Sm) + return new FNMulS(machInst, rd, rn, rm); + case 0x18: // FNMUL Dd = -(Dn * Dm) + return new FNMulD(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t type = bits(machInst, 23, 22); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + if (type == 0) // FCSEL Sd = if cond then Sn else Sm + return new FCSelS(machInst, rd, rn, rm, cond); + else if (type == 1) // FCSEL Dd = if cond then Dn else Dm + return new FCSelD(machInst, rd, rn, rm, cond); + else + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case4", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMDScalar(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonScIndexedElem(machInst); + } else if (bits(machInst, 23) == 0) { + return decodeNeonScShiftByImm(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeonSc3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeonSc3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeonSc2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonScPwise(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 23, 22) == 0 && + bits(machInst, 15) == 0 && + bits(machInst, 10) == 1) { + return decodeNeonScCopy(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case6", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeFpAdvSIMD(ExtMachInst machInst) + { + + if (bits(machInst, 28) == 0) { + if (bits(machInst, 31) == 0) { + return decodeAdvSIMD(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 30) == 0) { + return decodeFp(machInst); + } else if (bits(machInst, 31) == 0) { + return decodeAdvSIMDScalar(machInst); + } else { + return new Unknown64(machInst); + } + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeGem5Ops(ExtMachInst machInst) + { + const uint32_t m5func = bits(machInst, 23, 16); + switch (m5func) { + case 0x00: return new Arm(machInst); + case 0x01: return new Quiesce(machInst); + case 0x02: return new QuiesceNs64(machInst); + case 0x03: return new QuiesceCycles64(machInst); + case 0x04: return new QuiesceTime64(machInst); + case 0x07: return new Rpns64(machInst); + case 0x09: return new WakeCPU64(machInst); + case 0x10: return new Deprecated_ivlb(machInst); + case 0x11: return new Deprecated_ivle(machInst); + case 0x20: return new Deprecated_exit (machInst); + case 0x21: return new M5exit64(machInst); + case 0x31: return new Loadsymbol(machInst); + case 0x30: return new Initparam64(machInst); + case 0x40: return new Resetstats64(machInst); + case 0x41: return new Dumpstats64(machInst); + case 0x42: return new Dumpresetstats64(machInst); + case 0x43: return new M5checkpoint64(machInst); + case 0x4F: return new M5writefile64(machInst); + case 0x50: return new M5readfile64(machInst); + case 0x51: return new M5break(machInst); + case 0x52: return new M5switchcpu(machInst); + case 0x53: return new M5addsymbol64(machInst); + case 0x54: return new M5panic(machInst); + case 0x5a: return new M5workbegin64(machInst); + case 0x5b: return new M5workend64(machInst); + default: return new Unknown64(machInst); + } + } +} +}}; + +def format Aarch64() {{ + decode_block = ''' + { + using namespace Aarch64; + if (bits(machInst, 27) == 0x0) { + if (bits(machInst, 28) == 0x0) + return new Unknown64(machInst); + else if (bits(machInst, 26) == 0) + // bit 28:26=100 + return decodeDataProcImm(machInst); + else + // bit 28:26=101 + return decodeBranchExcSys(machInst); + } else if (bits(machInst, 25) == 0) { + // bit 27=1, 25=0 + return decodeLoadsStores(machInst); + } else if (bits(machInst, 26) == 0) { + // bit 27:25=101 + return decodeDataProcReg(machInst); + } else if (bits(machInst, 24) == 1 && + bits(machInst, 31, 28) == 0xF) { + return decodeGem5Ops(machInst); + } else { + // bit 27:25=111 + return decodeFpAdvSIMD(machInst); + } + } + ''' +}}; diff --git a/src/arch/arm/isa/formats/branch.isa b/src/arch/arm/isa/formats/branch.isa index f1b17ec90..513506d31 100644 --- a/src/arch/arm/isa/formats/branch.isa +++ b/src/arch/arm/isa/formats/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -101,7 +101,7 @@ def format Thumb16CondBranchAndSvc() {{ return new B(machInst, sext<9>(bits(machInst, 7, 0) << 1), (ConditionCode)(uint32_t)bits(machInst, 11, 8)); } else if (bits(machInst, 8)) { - return new Svc(machInst); + return new Svc(machInst, bits(machInst, 7, 0)); } else { // This space will not be allocated in the future. return new Unknown(machInst); @@ -127,7 +127,7 @@ def format Thumb32BranchesAndMiscCtrl() {{ // Permanently undefined. return new Unknown(machInst); } else { - return new WarnUnimplemented("smc", machInst); + return new Smc(machInst); } } else if ((op & 0x38) != 0x38) { const uint32_t s = bits(machInst, 26); @@ -141,20 +141,26 @@ def format Thumb32BranchesAndMiscCtrl() {{ return new B(machInst, imm, (ConditionCode)(uint32_t)bits(machInst, 25, 22)); } else { + // HIGH: 12-11=10, LOW: 15-14=00, 12=0 switch (op) { case 0x38: - { - const IntRegIndex rn = - (IntRegIndex)(uint32_t)bits(machInst, 19, 16); - const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrCpsrReg(machInst, rn, byteMask); - } case 0x39: { const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 19, 16); const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrSpsrReg(machInst, rn, byteMask); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + byteMask; + return new MsrBankedReg(machInst, rn, sysM, r); + } else { + if (r) { + return new MsrSpsrReg(machInst, rn, byteMask); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } + } } case 0x3a: { @@ -196,11 +202,11 @@ def format Thumb32BranchesAndMiscCtrl() {{ case 0x2: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); default: break; } @@ -208,28 +214,44 @@ def format Thumb32BranchesAndMiscCtrl() {{ } case 0x3c: { - // On systems that don't support bxj, bxj == bx - return new BxReg(machInst, + return new BxjReg(machInst, (IntRegIndex)(uint32_t)bits(machInst, 19, 16), COND_UC); } case 0x3d: { const uint32_t imm32 = bits(machInst, 7, 0); - return new SubsImmPclr(machInst, INTREG_PC, INTREG_LR, - imm32, false); + if (imm32 == 0) { + return new Eret(machInst); + } else { + return new SubsImmPclr(machInst, INTREG_PC, + INTREG_LR, imm32, false); + } } case 0x3e: + case 0x3f: { + const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsCpsr(machInst, rd); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + bits(machInst, 11, 8); + return new MrsBankedReg(machInst, rd, sysM, r); + } else { + if (r) { + return new MrsSpsr(machInst, rd); + } else { + return new MrsCpsr(machInst, rd); + } + } } - case 0x3f: + case 0xfe: { - const IntRegIndex rd = - (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsSpsr(machInst, rd); + uint32_t imm16 = (bits(machInst, 19, 16) << 12) | + (bits(machInst, 11, 0) << 0); + return new Hvc(machInst, imm16); } } break; diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa index 90144c101..44e9c5b5e 100644 --- a/src/arch/arm/isa/formats/formats.isa +++ b/src/arch/arm/isa/formats/formats.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -44,6 +44,12 @@ //Include the basic format ##include "basic.isa" +//Include support for decoding AArch64 instructions +##include "aarch64.isa" + +//Include support for decoding AArch64 NEON instructions +##include "neon64.isa" + //Include support for predicated instructions ##include "pred.isa" diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 6d779e541..ccd4589a3 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -151,8 +151,7 @@ let {{ if (singleAll) { size = bits(machInst, 7, 6); bool t = bits(machInst, 5); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; if (width == 1) { regs = t ? 2 : 1; inc = 1; @@ -164,7 +163,7 @@ let {{ case 1: case 2: if (bits(machInst, 4)) - align = width * eBytes - 1; + align = size + width - 1; break; case 3: break; @@ -173,20 +172,19 @@ let {{ if (bits(machInst, 4) == 0) return new Unknown(machInst); size = 2; - align = 0xf; + align = 0x4; } else if (size == 2) { if (bits(machInst, 4)) - align = 7; + align = 0x3; } else { if (bits(machInst, 4)) - align = 4 * eBytes - 1; + align = size + 2; } break; } } else { size = bits(machInst, 11, 10); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; regs = width; unsigned indexAlign = bits(machInst, 7, 4); // If width is 1, inc is always 1. That's overridden later. @@ -219,13 +217,13 @@ let {{ break; case 2: if (bits(indexAlign, 1, 0)) - align = 3; + align = 2; break; } break; case 2: if (bits(indexAlign, 0)) - align = (2 * eBytes) - 1; + align = size + 1; break; case 3: break; @@ -234,11 +232,11 @@ let {{ case 0: case 1: if (bits(indexAlign, 0)) - align = (4 * eBytes) - 1; + align = size + 2; break; case 2: if (bits(indexAlign, 0)) - align = (4 << bits(indexAlign, 1, 0)) - 1; + align = bits(indexAlign, 1, 0) + 2; break; } break; @@ -252,9 +250,9 @@ let {{ align = bits(machInst, 5, 4); if (align == 0) { // @align wasn't specified, so alignment can be turned off. - align = ((1 << size) - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; } else { - align = ((4 << align) - 1); + align = align + 2; } switch (width) { case 1: @@ -588,6 +586,23 @@ let {{ } } case 0xc: + if (b) { + if (!u) { + if (bits(c, 1) == 0) { + if (q) { + return new NVfmaQFp<float>(machInst, vd, vn, vm); + } else { + return new NVfmaDFp<float>(machInst, vd, vn, vm); + } + } else { + if (q) { + return new NVfmsQFp<float>(machInst, vd, vn, vm); + } else { + return new NVfmsDFp<float>(machInst, vd, vn, vm); + } + } + } + } return new Unknown(machInst); case 0xd: if (b) { @@ -1827,7 +1842,7 @@ let {{ break; case 0x1: { - if (offset == 0 || vd + offset/2 > NumFloatArchRegs) { + if (offset == 0 || vd + offset/2 > NumFloatV7ArchRegs) { break; } switch (bits(opcode, 1, 0)) { @@ -1951,8 +1966,9 @@ let {{ } else if (a == 0x7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -1974,7 +1990,9 @@ let {{ if (specReg == MISCREG_FPSCR) { return new VmsrFpscr(machInst, (IntRegIndex)specReg, rt); } else { - return new Vmsr(machInst, (IntRegIndex)specReg, rt); + uint32_t iss = mcrMrcIssBuild(0, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmsr(machInst, (IntRegIndex)specReg, rt, iss); } } } else if (l == 0 && c == 1) { @@ -2041,8 +2059,9 @@ let {{ } else if (a == 7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -2070,7 +2089,9 @@ let {{ } else if (specReg == MISCREG_FPSCR) { return new VmrsFpscr(machInst, rt, (IntRegIndex)specReg); } else { - return new Vmrs(machInst, rt, (IntRegIndex)specReg); + uint32_t iss = mcrMrcIssBuild(l, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmrs(machInst, rt, (IntRegIndex)specReg, iss); } } } else { @@ -2235,6 +2256,44 @@ let {{ } } break; + case 0x9: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp<VfnmaS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfnmaD>( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp<VfnmsS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfnmsD>( + machInst, vd, vn, vm, true); + } + } + break; + case 0xa: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp<VfmaS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfmaD>( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp<VfmsS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfmsD>( + machInst, vd, vn, vm, true); + } + } + break; case 0xb: if ((opc3 & 0x1) == 0) { const uint32_t baseImm = diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa index f7830eff3..abac27021 100644 --- a/src/arch/arm/isa/formats/mem.isa +++ b/src/arch/arm/isa/formats/mem.isa @@ -282,7 +282,7 @@ def format Thumb32SrsRfe() {{ } } else { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); if (!add && !wb) { return new %(srs)s(machInst, mode, diff --git a/src/arch/arm/isa/formats/misc.isa b/src/arch/arm/isa/formats/misc.isa index 00a37d17b..647f9846d 100644 --- a/src/arch/arm/isa/formats/misc.isa +++ b/src/arch/arm/isa/formats/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -36,19 +36,42 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Authors: Gabe Black +// Giacomo Gabrielli + +def format ArmERet() {{ + decode_block = "return new Eret(machInst);" +}}; def format Svc() {{ - decode_block = "return new Svc(machInst);" + decode_block = "return new Svc(machInst, bits(machInst, 23, 0));" +}}; + +def format ArmSmcHyp() {{ + decode_block = ''' + { + if (bits(machInst, 21)) + { + return new Smc(machInst); + } else { + uint32_t imm16 = (bits(machInst, 19, 8) << 4) | + (bits(machInst, 3, 0) << 0); + return new Hvc(machInst, imm16); + } + } + ''' }}; def format ArmMsrMrs() {{ decode_block = ''' { const uint8_t byteMask = bits(machInst, 19, 16); + const uint8_t sysM = byteMask | (bits(machInst, 8) << 4); const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 3, 0); const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); const uint32_t opcode = bits(machInst, 24, 21); const bool useImm = bits(machInst, 25); + const bool r = bits(machInst, 22); + const bool isBanked = bits(machInst, 9); const uint32_t unrotated = bits(machInst, 7, 0); const uint32_t rotation = (bits(machInst, 11, 8) << 1); @@ -56,20 +79,36 @@ def format ArmMsrMrs() {{ switch (opcode) { case 0x8: - return new MrsCpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsCpsr(machInst, rd); + } case 0x9: if (useImm) { return new MsrCpsrImm(machInst, imm, byteMask); } else { - return new MsrCpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } } case 0xa: - return new MrsSpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsSpsr(machInst, rd); + } case 0xb: if (useImm) { return new MsrSpsrImm(machInst, imm, byteMask); } else { - return new MsrSpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrSpsrReg(machInst, rn, byteMask); + } } default: return new Unknown(machInst); @@ -99,16 +138,17 @@ let {{ switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP14_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); default: + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); if (isRead) { - return new Mrc14(machInst, rt, (IntRegIndex)miscReg); + return new Mrc14(machInst, rt, (IntRegIndex)miscReg, iss); } else { - return new Mcr14(machInst, (IntRegIndex)miscReg, rt); + return new Mcr14(machInst, (IntRegIndex)miscReg, rt, iss); } } } @@ -123,8 +163,8 @@ def format McrMrc14() {{ let {{ header_output = ''' - StaticInstPtr - decodeMcrMrc15(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc14(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc15(ExtMachInst machInst); ''' decoder_output = ''' StaticInstPtr @@ -136,107 +176,50 @@ let {{ const uint32_t crm = bits(machInst, 3, 0); const MiscRegIndex miscReg = decodeCP15Reg(crn, opc1, crm, opc2); const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - const bool isRead = bits(machInst, 20); + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP15_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); - case MISCREG_DCCISW: - return new WarnUnimplemented( - isRead ? "mrc dccisw" : "mcr dcisw", machInst); - case MISCREG_DCCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dccimvac" : "mcr dccimvac", machInst); - case MISCREG_DCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dcimvac" : "mcr dcimvac", machInst); case MISCREG_DCCMVAC: return new FlushPipeInst( isRead ? "mrc dccmvac" : "mcr dccmvac", machInst); - case MISCREG_DCCMVAU: - return new WarnUnimplemented( - isRead ? "mrc dccmvau" : "mcr dccmvau", machInst); case MISCREG_CP15ISB: - return new Isb(machInst); + return new Isb(machInst, iss); case MISCREG_CP15DSB: - return new Dsb(machInst); + return new Dsb(machInst, iss); case MISCREG_CP15DMB: - return new Dmb(machInst); - case MISCREG_ICIALLUIS: - return new WarnUnimplemented( - isRead ? "mrc icialluis" : "mcr icialluis", machInst); - case MISCREG_ICIMVAU: - return new WarnUnimplemented( - isRead ? "mrc icimvau" : "mcr icimvau", machInst); - case MISCREG_BPIMVA: - return new WarnUnimplemented( - isRead ? "mrc bpimva" : "mcr bpimva", machInst); - case MISCREG_BPIALLIS: - return new WarnUnimplemented( - isRead ? "mrc bpiallis" : "mcr bpiallis", machInst); - case MISCREG_BPIALL: - return new WarnUnimplemented( - isRead ? "mrc bpiall" : "mcr bpiall", machInst); - case MISCREG_L2LATENCY: - return new WarnUnimplemented( - isRead ? "mrc l2latency" : "mcr l2latency", machInst); - case MISCREG_CRN15: - return new WarnUnimplemented( - isRead ? "mrc crn15" : "mcr crn15", machInst); - - // Write only. - case MISCREG_TLBIALLIS: - case MISCREG_TLBIMVAIS: - case MISCREG_TLBIASIDIS: - case MISCREG_TLBIMVAAIS: - case MISCREG_ITLBIALL: - case MISCREG_ITLBIMVA: - case MISCREG_ITLBIASID: - case MISCREG_DTLBIALL: - case MISCREG_DTLBIMVA: - case MISCREG_DTLBIASID: - case MISCREG_TLBIALL: - case MISCREG_TLBIMVA: - case MISCREG_TLBIASID: - case MISCREG_TLBIMVAA: - if (isRead) { - return new Unknown(machInst); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); - } + return new Dmb(machInst, iss); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrc" : "mcr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); - // Read only in user mode. - case MISCREG_TPIDRURO: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; } - // Read/write in user mode. - case MISCREG_TPIDRURW: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (isRead) + return new Mrc15(machInst, rt, (IntRegIndex)miscReg, iss); + return new Mcr15(machInst, (IntRegIndex)miscReg, rt, iss); } else { - return new Mcr15User(machInst, (IntRegIndex)miscReg, rt); - } - - // Read/write, priveleged only. - default: - if (miscReg >= MISCREG_CP15_UNIMP_START) return new FailUnimplemented(csprintf("%s %s", isRead ? "mrc" : "mcr", miscRegName[miscReg]).c_str(), machInst); - if (isRead) { - return new Mrc15(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); } } } @@ -248,3 +231,70 @@ def format McrMrc15() {{ return decodeMcrMrc15(machInst); ''' }}; + +let {{ + header_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst); + ''' + decoder_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst) + { + const uint32_t crm = bits(machInst, 3, 0); + const uint32_t opc1 = bits(machInst, 7, 4); + const MiscRegIndex miscReg = decodeCP15Reg64(crm, opc1); + const IntRegIndex rt = (IntRegIndex) (uint32_t) bits(machInst, 15, 12); + const IntRegIndex rt2 = (IntRegIndex) (uint32_t) bits(machInst, 19, 16); + + const bool isRead = bits(machInst, 20); + + switch (miscReg) { + case MISCREG_CP15_UNIMPL: + return new FailUnimplemented( + csprintf("miscreg crm:%d opc1:%d 64-bit %s unknown", + crm, opc1, isRead ? "read" : "write").c_str(), + machInst); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); + + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; + } + + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + uint32_t iss = mcrrMrrcIssBuild(isRead, crm, rt, rt2, opc1); + + if (isRead) + return new Mrrc15(machInst, (IntRegIndex) miscReg, rt2, rt, iss); + return new Mcrr15(machInst, rt2, rt, (IntRegIndex) miscReg, iss); + } else { + return new FailUnimplemented(csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]).c_str(), + machInst); + } + } + } + ''' +}}; + +def format Mcrr15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; + +def format Mrrc15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa new file mode 100644 index 000000000..72bbd0c60 --- /dev/null +++ b/src/arch/arm/isa/formats/neon64.isa @@ -0,0 +1,2626 @@ +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Mbou Eyole + +output header {{ +namespace Aarch64 +{ + // AdvSIMD three same + StaticInstPtr decodeNeon3Same(ExtMachInst machInst); + // AdvSIMD three different + StaticInstPtr decodeNeon3Diff(ExtMachInst machInst); + // AdvSIMD two-reg misc + StaticInstPtr decodeNeon2RegMisc(ExtMachInst machInst); + // AdvSIMD across lanes + StaticInstPtr decodeNeonAcrossLanes(ExtMachInst machInst); + // AdvSIMD copy + StaticInstPtr decodeNeonCopy(ExtMachInst machInst); + // AdvSIMD vector x indexed element + StaticInstPtr decodeNeonIndexedElem(ExtMachInst machInst); + // AdvSIMD modified immediate + StaticInstPtr decodeNeonModImm(ExtMachInst machInst); + // AdvSIMD shift by immediate + StaticInstPtr decodeNeonShiftByImm(ExtMachInst machInst); + // AdvSIMD TBL/TBX + StaticInstPtr decodeNeonTblTbx(ExtMachInst machInst); + // AdvSIMD ZIP/UZP/TRN + StaticInstPtr decodeNeonZipUzpTrn(ExtMachInst machInst); + // AdvSIMD EXT + StaticInstPtr decodeNeonExt(ExtMachInst machInst); + + // AdvSIMD scalar three same + StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst); + // AdvSIMD scalar three different + StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst); + // AdvSIMD scalar two-reg misc + StaticInstPtr decodeNeonSc2RegMisc(ExtMachInst machInst); + // AdvSIMD scalar pairwise + StaticInstPtr decodeNeonScPwise(ExtMachInst machInst); + // AdvSIMD scalar copy + StaticInstPtr decodeNeonScCopy(ExtMachInst machInst); + // AdvSIMD scalar x indexed element + StaticInstPtr decodeNeonScIndexedElem(ExtMachInst machInst); + // AdvSIMD scalar shift by immediate + StaticInstPtr decodeNeonScShiftByImm(ExtMachInst machInst); + + // AdvSIMD load/store + StaticInstPtr decodeNeonMem(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeNeon3Same(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + + switch (opcode) { + case 0x00: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UhaddDX, UhaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<ShaddDX, ShaddQX>( + q, size, machInst, vd, vn, vm); + case 0x01: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqaddDX, UqaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqaddDX, SqaddQX>( + q, size, machInst, vd, vn, vm); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UrhaddDX, UrhaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SrhaddDX, SrhaddQX>( + q, size, machInst, vd, vn, vm); + case 0x03: + switch (size) { + case 0x0: + if (u) { + if (q) + return new EorQX<uint64_t>(machInst, vd, vn, vm); + else + return new EorDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new AndQX<uint64_t>(machInst, vd, vn, vm); + else + return new AndDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x1: + if (u) { + if (q) + return new BslQX<uint64_t>(machInst, vd, vn, vm); + else + return new BslDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new BicQX<uint64_t>(machInst, vd, vn, vm); + else + return new BicDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x2: + if (u) { + if (q) + return new BitQX<uint64_t>(machInst, vd, vn, vm); + else + return new BitDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new OrrQX<uint64_t>(machInst, vd, vn, vm); + else + return new OrrDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x3: + if (u) { + if (q) + return new BifQX<uint64_t>(machInst, vd, vn, vm); + else + return new BifDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new OrnQX<uint64_t>(machInst, vd, vn, vm); + else + return new OrnDX<uint64_t>(machInst, vd, vn, vm); + } + } + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UhsubDX, UhsubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<ShsubDX, ShsubQX>( + q, size, machInst, vd, vn, vm); + case 0x05: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqsubDX, UqsubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqsubDX, SqsubQX>( + q, size, machInst, vd, vn, vm); + case 0x06: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmhiDX, CmhiQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<CmgtDX, CmgtQX>( + q, size, machInst, vd, vn, vm); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmhsDX, CmhsQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<CmgeDX, CmgeQX>( + q, size, machInst, vd, vn, vm); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UshlDX, UshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SshlDX, SshlQX>( + q, size, machInst, vd, vn, vm); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqshlDX, UqshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqshlDX, SqshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UrshlDX, UrshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SrshlDX, SrshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqrshlDX, UqrshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqrshlDX, SqrshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0c: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmaxDX, UmaxQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmaxDX, SmaxQX>( + q, size, machInst, vd, vn, vm); + case 0x0d: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UminDX, UminQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SminDX, SminQX>( + q, size, machInst, vd, vn, vm); + case 0x0e: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabdDX, UabdQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabdDX, SabdQX>( + q, size, machInst, vd, vn, vm); + case 0x0f: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabaDX, UabaQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabaDX, SabaQX>( + q, size, machInst, vd, vn, vm); + case 0x10: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<SubDX, SubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg<AddDX, AddQX>( + q, size, machInst, vd, vn, vm); + case 0x11: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmeqDX, CmeqQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg<CmtstDX, CmtstQX>( + q, size, machInst, vd, vn, vm); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<MlsDX, MlsQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<MlaDX, MlaQX>( + q, size, machInst, vd, vn, vm); + case 0x13: + if (size == 0x3 || (size != 0x0 && bits(machInst, 29))) + return new Unknown64(machInst); + if (u) { + if (q) + return new PmulQX<uint8_t>(machInst, vd, vn, vm); + else + return new PmulDX<uint8_t>(machInst, vd, vn, vm); + } else { + return decodeNeonUThreeSReg<MulDX, MulQX>( + q, size, machInst, vd, vn, vm); + } + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmaxpDX, UmaxpQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmaxpDX, SmaxpQX>( + q, size, machInst, vd, vn, vm); + case 0x15: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UminpDX, UminpQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SminpDX, SminpQX>( + q, size, machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) { + if (q) + return decodeNeonSThreeHAndWReg<SqrdmulhQX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqrdmulhDX>( + size, machInst, vd, vn, vm); + } else { + if (q) + return decodeNeonSThreeHAndWReg<SqdmulhQX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqdmulhDX>( + size, machInst, vd, vn, vm); + } + case 0x17: + if (u || size_q == 0x6) + return new Unknown64(machInst); + else + return decodeNeonUThreeXReg<AddpDX, AddpQX>( + q, size, machInst, vd, vn, vm); + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmaxnmpDX, FmaxnmpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmaxnmDX, FmaxnmQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FminnmpDX, FminnmpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FminnmDX, FminnmQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x19: + if (size < 0x2) { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FmlaDX, FmlaQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FmlsDX, FmlsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FaddpDX, FaddpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FaddDX, FaddQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FabdDX, FabdQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FsubDX, FsubQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1b: + if (size < 0x2 && sz_q != 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmulDX, FmulQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmulxDX, FmulxQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + return new Unknown64(machInst); + } + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FcmgeDX, FcmgeQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FcmeqDX, FcmeqQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FcmgtDX, FcmgtQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FacgeDX, FacgeQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } else { + if (u) + return decodeNeonUThreeFpReg<FacgtDX, FacgtQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1e: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmaxpDX, FmaxpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmaxDX, FmaxQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FminpDX, FminpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FminDX, FminQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1f: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FdivDX, FdivQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FrecpsDX, FrecpsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FrsqrtsDX, FrsqrtsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon3Diff(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x0: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UaddlX, Uaddl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SaddlX, Saddl2X>( + q, size, machInst, vd, vn, vm); + case 0x1: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UaddwX, Uaddw2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SaddwX, Saddw2X>( + q, size, machInst, vd, vn, vm); + case 0x2: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UsublX, Usubl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SsublX, Ssubl2X>( + q, size, machInst, vd, vn, vm); + case 0x3: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UsubwX, Usubw2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SsubwX, Ssubw2X>( + q, size, machInst, vd, vn, vm); + case 0x4: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<RaddhnX, Raddhn2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<AddhnX, Addhn2X>( + q, size, machInst, vd, vn, vm); + case 0x5: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabalX, Uabal2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabalX, Sabal2X>( + q, size, machInst, vd, vn, vm); + case 0x6: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<RsubhnX, Rsubhn2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<SubhnX, Subhn2X>( + q, size, machInst, vd, vn, vm); + case 0x7: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabdlX, Uabdl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabdlX, Sabdl2X>( + q, size, machInst, vd, vn, vm); + case 0x8: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmlalX, Umlal2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmlalX, Smlal2X>( + q, size, machInst, vd, vn, vm); + case 0x9: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmlal2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmlalX>( + size, machInst, vd, vn, vm); + } + } + case 0xa: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmlslX, Umlsl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmlslX, Smlsl2X>( + q, size, machInst, vd, vn, vm); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmlsl2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmlslX>( + size, machInst, vd, vn, vm); + } + } + case 0xc: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmullX, Umull2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmullX, Smull2X>( + q, size, machInst, vd, vn, vm); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmull2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmullX>( + size, machInst, vd, vn, vm); + } + } + case 0xe: + if (u || size != 0) { + return new Unknown64(machInst); + } else { + if (q) + return new Pmull2X<uint8_t>(machInst, vd, vn, vm); + else + return new PmullX<uint8_t>(machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon2RegMisc(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t op = (uint8_t)((bits(machInst, 12) << 1) | + bits(machInst, 29)); + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x00: + if (op + size >= 3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<Rev64DX, Rev64QX>( + q, size, machInst, vd, vn); + case 0x01: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) + return new Rev16QX<uint8_t>(machInst, vd, vn); + else + return new Rev16DX<uint8_t>(machInst, vd, vn); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SaddlpDX, SaddlpQX>( + q, size, machInst, vd, vn); + case 0x03: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg<SuqaddDX, SuqaddQX>( + q, size, machInst, vd, vn); + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<ClsDX, ClsQX>( + q, size, machInst, vd, vn); + case 0x05: + if (size != 0x0) + return new Unknown64(machInst); + if (q) + return new CntQX<uint8_t>(machInst, vd, vn); + else + return new CntDX<uint8_t>(machInst, vd, vn); + case 0x06: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SadalpDX, SadalpQX>( + q, size, machInst, vd, vn); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<SqabsDX, SqabsQX>( + q, size, machInst, vd, vn); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmgtZeroDX, CmgtZeroQX>( + q, size, machInst, vd, vn); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmeqZeroDX, CmeqZeroQX>( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmltZeroDX, CmltZeroQX>( + q, size, machInst, vd, vn); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<AbsDX, AbsQX>( + q, size, machInst, vd, vn); + case 0x0c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmgtZeroDX, FcmgtZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmeqZeroDX, FcmeqZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmltZeroDX, FcmltZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FabsDX, FabsQX>( + q, size & 0x1, machInst, vd, vn); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<XtnX, Xtn2X>( + q, size, machInst, vd, vn); + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SqxtnX, Sqxtn2X>( + q, size, machInst, vd, vn); + case 0x16: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtn2X<uint32_t>(machInst, vd, vn); + else + return new Fcvtn2X<uint16_t>(machInst, vd, vn); + } else { + if (size) + return new FcvtnX<uint32_t>(machInst, vd, vn); + else + return new FcvtnX<uint16_t>(machInst, vd, vn); + } + case 0x17: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtl2X<uint32_t>(machInst, vd, vn); + else + return new Fcvtl2X<uint16_t>(machInst, vd, vn); + } else { + if (size) + return new FcvtlX<uint32_t>(machInst, vd, vn); + else + return new FcvtlX<uint16_t>(machInst, vd, vn); + } + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintnDX, FrintnQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintpDX, FrintpQX>( + q, size & 0x1, machInst, vd, vn); + case 0x19: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintmDX, FrintmQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintzDX, FrintzQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtnsDX, FcvtnsQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtpsDX, FcvtpsQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtmsDX, FcvtmsQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtzsIntDX, FcvtzsIntQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) { + if (sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcvtasDX, FcvtasQX>( + q, size & 0x1, machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown64(machInst); + if (q) + return new UrecpeQX<uint32_t>(machInst, vd, vn); + else + return new UrecpeDX<uint32_t>(machInst, vd, vn); + } + case 0x1d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) { + if (size & 0x1) + return new ScvtfIntDQX<uint64_t>(machInst, vd, vn); + else + return new ScvtfIntSQX<uint32_t>(machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfIntDX<uint32_t>(machInst, vd, vn); + } + } else { + return decodeNeonUTwoMiscFpReg<FrecpeDX, FrecpeQX>( + q, size & 0x1, machInst, vd, vn); + } + case 0x20: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) { + if (size & 0x1) + return new Rev32QX<uint16_t>(machInst, vd, vn); + else + return new Rev32QX<uint8_t>(machInst, vd, vn); + } else { + if (size & 0x1) + return new Rev32DX<uint16_t>(machInst, vd, vn); + else + return new Rev32DX<uint8_t>(machInst, vd, vn); + } + case 0x22: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UaddlpDX, UaddlpQX>( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg<UsqaddDX, UsqaddQX>( + q, size, machInst, vd, vn); + return new Unknown64(machInst); + case 0x24: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<ClzDX, ClzQX>( + q, size, machInst, vd, vn); + case 0x25: + if (size == 0x0) { + if (q) + return new MvnQX<uint64_t>(machInst, vd, vn); + else + return new MvnDX<uint64_t>(machInst, vd, vn); + } else if (size == 0x1) { + if (q) + return new RbitQX<uint8_t>(machInst, vd, vn); + else + return new RbitDX<uint8_t>(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x26: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UadalpDX, UadalpQX>( + q, size, machInst, vd, vn); + case 0x27: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<SqnegDX, SqnegQX>( + q, size, machInst, vd, vn); + case 0x28: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmgeZeroDX, CmgeZeroQX>( + q, size, machInst, vd, vn); + case 0x29: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmleZeroDX, CmleZeroQX>( + q, size, machInst, vd, vn); + case 0x2b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<NegDX, NegQX>( + q, size, machInst, vd, vn); + case 0x2c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmgeZeroDX, FcmgeZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmleZeroDX, FcmleZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x2f: + if (size < 0x2 || size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FnegDX, FnegQX>( + q, size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SqxtunX, Sqxtun2X>( + q, size, machInst, vd, vn); + case 0x33: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<ShllX, Shll2X>( + q, size, machInst, vd, vn); + case 0x34: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UqxtnX, Uqxtn2X>( + q, size, machInst, vd, vn); + case 0x36: + if (size != 0x1) + return new Unknown64(machInst); + if (q) + return new Fcvtxn2X<uint32_t>(machInst, vd, vn); + else + return new FcvtxnX<uint32_t>(machInst, vd, vn); + case 0x38: + if (size > 0x1 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FrintaDX, FrintaQX>( + q, size & 0x1, machInst, vd, vn); + case 0x39: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintxDX, FrintxQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintiDX, FrintiQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtnuDX, FcvtnuQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtpuDX, FcvtpuQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtmuDX, FcvtmuQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtzuIntDX, FcvtzuIntQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) { + return decodeNeonUTwoMiscFpReg<FcvtauDX, FcvtauQX>( + q, size & 0x1, machInst, vd, vn); + } else if (size == 0x2) { + if (q) + return new UrsqrteQX<uint32_t>(machInst, vd, vn); + else + return new UrsqrteDX<uint32_t>(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x3d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<UcvtfIntDX, UcvtfIntQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrsqrteDX, FrsqrteQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FsqrtDX, FsqrtQX>( + q, size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonAcrossLanes(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x03: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesLongReg<SaddlvDX, SaddlvQX, + SaddlvBQX>( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg<SmaxvDX, SmaxvQX>( + q, size, machInst, vd, vn); + case 0x1a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg<SminvDX, SminvQX>( + q, size, machInst, vd, vn); + case 0x1b: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<AddvDX, AddvQX>( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesLongReg<UaddlvDX, UaddlvQX, + UaddlvBQX>( + q, size, machInst, vd, vn); + case 0x2a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<UmaxvDX, UmaxvQX>( + q, size, machInst, vd, vn); + case 0x2c: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxnmvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminnmvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x2f: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<UminvDX, UminvQX>( + q, size, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonCopy(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t imm5 = bits(machInst, 20, 16); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t imm5_pos = findLsbSet(imm5); + uint8_t index1 = 0, index2 = 0; + + if (op) { + if (!q || (imm4 & mask(imm5_pos))) + return new Unknown64(machInst); + + index1 = bits(imm5, 4, imm5_pos + 1); // dst + index2 = bits(imm4, 3, imm5_pos); // src + + switch (imm5_pos) { + case 0: + return new InsElemX<uint8_t>(machInst, vd, vn, index1, index2); + case 1: + return new InsElemX<uint16_t>(machInst, vd, vn, index1, index2); + case 2: + return new InsElemX<uint32_t>(machInst, vd, vn, index1, index2); + case 3: + return new InsElemX<uint64_t>(machInst, vd, vn, index1, index2); + default: + return new Unknown64(machInst); + } + } + + switch (imm4) { + case 0x0: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new DupElemQX<uint8_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint8_t>(machInst, vd, vn, index1); + case 1: + if (q) + return new DupElemQX<uint16_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint16_t>(machInst, vd, vn, index1); + case 2: + if (q) + return new DupElemQX<uint32_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint32_t>(machInst, vd, vn, index1); + case 3: + if (q) + return new DupElemQX<uint64_t>(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x1: + switch (imm5) { + case 0x1: + if (q) + return new DupGprWQX<uint8_t>(machInst, vd, vn); + else + return new DupGprWDX<uint8_t>(machInst, vd, vn); + case 0x2: + if (q) + return new DupGprWQX<uint16_t>(machInst, vd, vn); + else + return new DupGprWDX<uint16_t>(machInst, vd, vn); + case 0x4: + if (q) + return new DupGprWQX<uint32_t>(machInst, vd, vn); + else + return new DupGprWDX<uint32_t>(machInst, vd, vn); + case 0x8: + if (q) + return new DupGprXQX<uint64_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3: + index1 = imm5 >> (imm5_pos + 1); + switch (imm5_pos) { + case 0: + return new InsGprWX<uint8_t>(machInst, vd, vn, index1); + case 1: + return new InsGprWX<uint16_t>(machInst, vd, vn, index1); + case 2: + return new InsGprWX<uint32_t>(machInst, vd, vn, index1); + case 3: + return new InsGprXX<uint64_t>(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + case 0x5: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new SmovXX<int8_t>(machInst, vd, vn, index1); + else + return new SmovWX<int8_t>(machInst, vd, vn, index1); + case 1: + if (q) + return new SmovXX<int16_t>(machInst, vd, vn, index1); + else + return new SmovWX<int16_t>(machInst, vd, vn, index1); + case 2: + if (q) + return new SmovXX<int32_t>(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x7: + index1 = imm5 >> (imm5_pos + 1); + + if ((q && imm5_pos != 3) || (!q && imm5_pos >= 3)) + return new Unknown64(machInst); + + switch (imm5_pos) { + case 0: + return new UmovWX<uint8_t>(machInst, vd, vn, index1); + case 1: + return new UmovWX<uint16_t>(machInst, vd, vn, index1); + case 2: + return new UmovWX<uint32_t>(machInst, vd, vn, index1); + case 3: + return new UmovXX<uint64_t>(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonIndexedElem(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz = size & 0x1; + uint8_t sz_q = (sz << 1) | bits(machInst, 30); + uint8_t sz_L = (sz << 1) | L; + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + switch (opcode) { + case 0x0: + if (!u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg<MlaElemDX, MlaElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0x1: + if (!u && size >= 2 && sz_q != 0x2 && sz_L != 0x3) + return decodeNeonUThreeImmFpReg<FmlaElemDX, FmlaElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x2: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmlalElemX, UmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmlalElemX, SmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x3: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlalElemX, + SqdmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x4: + if (u && !(size == 0x0 || size == 0x3)) + return decodeNeonUThreeImmHAndWReg<MlsElemDX, MlsElemQX>( + q, size, machInst, vd, vn, vm, index); + else + return new Unknown64(machInst); + case 0x5: + if (!u && size >= 0x2 && sz_L != 0x3 && sz_q != 0x2) + return decodeNeonUThreeImmFpReg<FmlsElemDX, FmlsElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x6: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmlslElemX, UmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmlslElemX, SmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x7: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlslElemX, + SqdmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x8: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg<MulElemDX, MulElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0x9: + if (size >= 2 && sz_q != 0x2 && sz_L != 0x3) { + if (u) + return decodeNeonUThreeImmFpReg<FmulxElemDX, FmulxElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmFpReg<FmulElemDX, FmulElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + } else { + return new Unknown64(machInst); + } + case 0xa: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmullElemX, UmullElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmullElemX, SmullElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmullElemX, SqdmullElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0xc: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>( + q, size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonModImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t abcdefgh = (bits(machInst, 18, 16) << 5) | + bits(machInst, 9, 5); + uint8_t cmode = bits(machInst, 15, 12); + uint8_t o2 = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + + if (o2 == 0x1 || (op == 0x1 && cmode == 0xf && !q)) + return new Unknown64(machInst); + + bool immValid = true; + const uint64_t bigImm = simd_modified_imm(op, cmode, abcdefgh, + immValid, + true /* isAarch64 */); + if (!immValid) { + return new Unknown(machInst); + } + + if (op) { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX<uint64_t>(machInst, vd, bigImm); + else + return new BicImmDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + switch (bits(cmode, 1, 0)) { + case 0: + case 1: + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + case 2: + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + case 3: + if (q) + return new FmovQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX<uint64_t>(machInst, vd, + bigImm); + else + return new BicImmDX<uint64_t>(machInst, vd, + bigImm); + } + } + } + } else { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX<uint64_t>(machInst, vd, bigImm); + else + return new OrrImmDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + if (bits(cmode, 1, 0) == 0x3) { + if (q) + return new FmovQX<uint32_t>(machInst, vd, bigImm); + else + return new FmovDX<uint32_t>(machInst, vd, bigImm); + } else { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX<uint64_t>(machInst, vd, + bigImm); + else + return new OrrImmDX<uint64_t>(machInst, vd, bigImm); + } + } + } + } + return new Unknown(machInst); + } + + StaticInstPtr + decodeNeonShiftByImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t immh3_q = (immh3 << 1) | q; + uint8_t op_u = (bits(machInst, 12) << 1) | u; + uint8_t size = findMsbSet(immh); + int shiftAmt = 0; + + switch (opcode) { + case 0x00: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UshrDX, UshrQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SshrDX, SshrQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x02: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UsraDX, UsraQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SsraDX, SsraQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x04: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UrshrDX, UrshrQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SrshrDX, SrshrQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x06: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UrsraDX, UrsraQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SrsraDX, SrsraQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x08: + if (u && !(immh3_q == 0x2)) { + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonUTwoShiftXReg<SriDX, SriQX>( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0a: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg<SliDX, SliQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftXReg<ShlDX, ShlQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x0c: + if (u && !(immh3_q == 0x2 || op_u == 0x0)) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftXReg<SqshluDX, SqshluQX>( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + if (immh3_q == 0x2 || op_u == 0x0) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg<UqshlImmDX, UqshlImmQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SqshlImmDX, SqshlImmQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x10: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg<SqshrunX, Sqshrun2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg<ShrnX, Shrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x11: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg<SqrshrunX, Sqrshrun2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg<RshrnX, Rshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg<UqshrnX, Uqshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SqshrnX, Sqshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg<UqrshrnX, Uqrshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SqrshrnX, Sqrshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x14: + if (immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftSReg<UshllX, Ushll2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SshllX, Sshll2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftFpReg<UcvtfFixedDX, UcvtfFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (q) { + if (size & 0x1) + return new ScvtfFixedDQX<uint64_t>(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedSQX<uint32_t>(machInst, vd, vn, + shiftAmt); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfFixedDX<uint32_t>(machInst, vd, vn, + shiftAmt); + } + } + case 0x1f: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftFpReg<FcvtzuFixedDX, FcvtzuFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftFpReg<FcvtzsFixedDX, FcvtzsFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonTblTbx(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t switchVal = bits(machInst, 14, 12); + + switch (switchVal) { + case 0x0: + if (q) + return new Tbl1QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl1DX<uint8_t>(machInst, vd, vn, vm); + case 0x1: + if (q) + return new Tbx1QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx1DX<uint8_t>(machInst, vd, vn, vm); + case 0x2: + if (q) + return new Tbl2QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl2DX<uint8_t>(machInst, vd, vn, vm); + case 0x3: + if (q) + return new Tbx2QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx2DX<uint8_t>(machInst, vd, vn, vm); + case 0x4: + if (q) + return new Tbl3QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl3DX<uint8_t>(machInst, vd, vn, vm); + case 0x5: + if (q) + return new Tbx3QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx3DX<uint8_t>(machInst, vd, vn, vm); + case 0x6: + if (q) + return new Tbl4QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl4DX<uint8_t>(machInst, vd, vn, vm); + case 0x7: + if (q) + return new Tbx4QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx4DX<uint8_t>(machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonZipUzpTrn(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 14, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x1: + return decodeNeonUThreeXReg<Uzp1DX, Uzp1QX>( + q, size, machInst, vd, vn, vm); + case 0x2: + return decodeNeonUThreeXReg<Trn1DX, Trn1QX>( + q, size, machInst, vd, vn, vm); + case 0x3: + return decodeNeonUThreeXReg<Zip1DX, Zip1QX>( + q, size, machInst, vd, vn, vm); + case 0x5: + return decodeNeonUThreeXReg<Uzp2DX, Uzp2QX>( + q, size, machInst, vd, vn, vm); + case 0x6: + return decodeNeonUThreeXReg<Trn2DX, Trn2QX>( + q, size, machInst, vd, vn, vm); + case 0x7: + return decodeNeonUThreeXReg<Zip2DX, Zip2QX>( + q, size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonExt(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op2 = bits(machInst, 23, 22); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (op2 != 0 || (q == 0x0 && bits(imm4, 3) == 0x1)) + return new Unknown64(machInst); + + uint8_t index = q ? imm4 : imm4 & 0x7; + + if (q) { + return new ExtQX<uint8_t>(machInst, vd, vn, vm, index); + } else { + return new ExtDX<uint8_t>(machInst, vd, vn, vm, index); + } + } + + StaticInstPtr + decodeNeonSc3Same(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + uint8_t s = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x01: + if (u) + return decodeNeonUThreeUReg<UqaddScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqaddScX>( + size, machInst, vd, vn, vm); + case 0x05: + if (u) + return decodeNeonUThreeUReg<UqsubScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqsubScX>( + size, machInst, vd, vn, vm); + case 0x06: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhiDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmgtDX<int64_t>(machInst, vd, vn, vm); + case 0x07: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhsDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmgeDX<int64_t>(machInst, vd, vn, vm); + case 0x08: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UshlDX<uint64_t>(machInst, vd, vn, vm); + else + return new SshlDX<int64_t>(machInst, vd, vn, vm); + case 0x09: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg<UqshlScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqshlScX>( + size, machInst, vd, vn, vm); + case 0x0a: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UrshlDX<uint64_t>(machInst, vd, vn, vm); + else + return new SrshlDX<int64_t>(machInst, vd, vn, vm); + case 0x0b: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg<UqrshlScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqrshlScX>( + size, machInst, vd, vn, vm); + case 0x10: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new SubDX<uint64_t>(machInst, vd, vn, vm); + else + return new AddDX<uint64_t>(machInst, vd, vn, vm); + case 0x11: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmeqDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmtstDX<uint64_t>(machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) + return decodeNeonSThreeHAndWReg<SqrdmulhScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqdmulhScX>( + size, machInst, vd, vn, vm); + case 0x1a: + if (!u || size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg<FabdScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1b: + if (u || size > 0x1) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg<FmulxScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeScFpReg<FcmgeScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FcmeqScX>( + size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeScFpReg<FcmgtScX>( + size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (!u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg<FacgeScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FacgtScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1f: + if (u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg<FrecpsScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FrsqrtsScX>( + size & 0x1, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc3Diff(ExtMachInst machInst) + { + if (bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x9: + return decodeNeonSThreeHAndWReg<SqdmlalScX>(size, machInst, vd, vn, vm); + case 0xb: + return decodeNeonSThreeHAndWReg<SqdmlslScX>(size, machInst, vd, vn, vm); + case 0xd: + return decodeNeonSThreeHAndWReg<SqdmullScX>(size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc2RegMisc(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + switch (switchVal) { + case 0x03: + return decodeNeonUTwoMiscUReg<SuqaddScX>(size, machInst, vd, vn); + case 0x07: + return decodeNeonSTwoMiscUReg<SqabsScX>(size, machInst, vd, vn); + case 0x08: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgtZeroDX<int64_t>(machInst, vd, vn); + case 0x09: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmeqZeroDX<int64_t>(machInst, vd, vn); + case 0x0a: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmltZeroDX<int64_t>(machInst, vd, vn); + case 0x0b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new AbsDX<int64_t>(machInst, vd, vn); + case 0x0c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmgtZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmeqZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmltZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x14: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtnScX<int8_t>(machInst, vd, vn); + case 0x1: + return new SqxtnScX<int16_t>(machInst, vd, vn); + case 0x2: + return new SqxtnScX<int32_t>(machInst, vd, vn); + } + } + case 0x1a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtnsScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtpsScX>( + size & 0x1, machInst, vd, vn); + case 0x1b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtmsScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtzsIntScX>( + size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtasScX>( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x1d: + if (size < 0x2) { + if (size & 0x1) + return new ScvtfIntScDX<uint64_t>(machInst, vd, vn); + else + return new ScvtfIntScSX<uint32_t>(machInst, vd, vn); + } else { + return decodeNeonUTwoMiscScFpReg<FrecpeScX>( + size & 0x1, machInst, vd, vn); + } + case 0x1f: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FrecpxX>( + size & 0x1, machInst, vd, vn); + case 0x23: + return decodeNeonUTwoMiscUReg<UsqaddScX>(size, machInst, vd, vn); + case 0x27: + return decodeNeonSTwoMiscUReg<SqnegScX>(size, machInst, vd, vn); + case 0x28: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgeZeroDX<int64_t>(machInst, vd, vn); + case 0x29: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmleZeroDX<int64_t>(machInst, vd, vn); + case 0x2b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new NegDX<int64_t>(machInst, vd, vn); + case 0x2c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmgeZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmleZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtunScX<int8_t>(machInst, vd, vn); + case 0x1: + return new SqxtunScX<int16_t>(machInst, vd, vn); + case 0x2: + return new SqxtunScX<int32_t>(machInst, vd, vn); + } + } + case 0x34: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new UqxtnScX<uint8_t>(machInst, vd, vn); + case 0x1: + return new UqxtnScX<uint16_t>(machInst, vd, vn); + case 0x2: + return new UqxtnScX<uint32_t>(machInst, vd, vn); + } + } + case 0x36: + if (size != 0x1) { + return new Unknown64(machInst); + } else { + return new FcvtxnScX<uint32_t>(machInst, vd, vn); + } + case 0x3a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtnuScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtpuScX>( + size & 0x1, machInst, vd, vn); + case 0x3b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtmuScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtzuIntScX>( + size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtauScX>( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x3d: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<UcvtfIntScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FrsqrteScX>( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScPwise(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + if (!u) { + if (opcode == 0x1b && size == 0x3) + return new AddpScQX<uint64_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + + uint8_t switchVal = (opcode << 0) | (size << 5); + switch (switchVal) { + case 0x0c: + case 0x2c: + return decodeNeonUTwoMiscPwiseScFpReg<FmaxnmpScDX, FmaxnmpScQX>( + size & 0x1, machInst, vd, vn); + case 0x0d: + case 0x2d: + return decodeNeonUTwoMiscPwiseScFpReg<FaddpScDX, FaddpScQX>( + size & 0x1, machInst, vd, vn); + case 0x0f: + case 0x2f: + return decodeNeonUTwoMiscPwiseScFpReg<FmaxpScDX, FmaxpScQX>( + size & 0x1, machInst, vd, vn); + case 0x4c: + case 0x6c: + return decodeNeonUTwoMiscPwiseScFpReg<FminnmpScDX, FminnmpScQX>( + size & 0x1, machInst, vd, vn); + case 0x4f: + case 0x6f: + return decodeNeonUTwoMiscPwiseScFpReg<FminpScDX, FminpScQX>( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScCopy(ExtMachInst machInst) + { + if (bits(machInst, 14, 11) != 0 || bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t imm5 = bits(machInst, 20, 16); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = findLsbSet(imm5); + uint8_t index = bits(imm5, 4, size + 1); + + return decodeNeonUTwoShiftUReg<DupElemScX>( + size, machInst, vd, vn, index); + } + + StaticInstPtr + decodeNeonScIndexedElem(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz_L = bits(machInst, 22, 21); + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } else if (size == 0x3) { + index = H; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + if (u && opcode != 9) + return new Unknown64(machInst); + + switch (opcode) { + case 0x1: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg<FmlaElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x3: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlalElemScX>( + size, machInst, vd, vn, vm, index); + case 0x5: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg<FmlsElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x7: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlslElemScX>( + size, machInst, vd, vn, vm, index); + case 0x9: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmScFpReg<FmulxElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmScFpReg<FmulElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0xb: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmullElemScX>( + size, machInst, vd, vn, vm, index); + case 0xc: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>( + size, machInst, vd, vn, vm, index); + case 0xd: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>( + size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScShiftByImm(ExtMachInst machInst) + { + bool u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t size = findMsbSet(immh); + int shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + + if (immh == 0x0) + return new Unknown64(machInst); + + switch (opcode) { + case 0x00: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UshrDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SshrDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x02: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UsraDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SsraDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x04: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrshrDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SrshrDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x06: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrsraDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SrsraDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x08: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new SriDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new Unknown64(machInst); + case 0x0a: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return new SliDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new ShlDX<uint64_t>(machInst, vd, vn, shiftAmt); + case 0x0c: + if (u) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftUReg<SqshluScX>( + size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftUReg<UqshlImmScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUReg<SqshlImmScX>( + size, machInst, vd, vn, shiftAmt); + case 0x10: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg<SqshrunScX>( + size, machInst, vd, vn, shiftAmt); + case 0x11: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg<SqrshrunScX>( + size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg<UqshrnScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg<SqshrnScX>( + size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg<UqrshrnScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg<SqrshrnScX>( + size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftUFpReg<UcvtfFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (size & 0x1) + return new ScvtfFixedScDX<uint64_t>(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedScSX<uint32_t>(machInst, vd, vn, + shiftAmt); + } + case 0x1f: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUFpReg<FcvtzuFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftUFpReg<FcvtzsFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonMem(ExtMachInst machInst) + { + uint8_t dataSize = bits(machInst, 30) ? 128 : 64; + bool multiple = bits(machInst, 24, 23) < 0x2; + bool load = bits(machInst, 22); + + uint8_t numStructElems = 0; + uint8_t numRegs = 0; + + if (multiple) { // AdvSIMD load/store multiple structures + uint8_t opcode = bits(machInst, 15, 12); + uint8_t eSize = bits(machInst, 11, 10); + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + + switch (opcode) { + case 0x0: // LD/ST4 (4 regs) + numStructElems = 4; + numRegs = 4; + break; + case 0x2: // LD/ST1 (4 regs) + numStructElems = 1; + numRegs = 4; + break; + case 0x4: // LD/ST3 (3 regs) + numStructElems = 3; + numRegs = 3; + break; + case 0x6: // LD/ST1 (3 regs) + numStructElems = 1; + numRegs = 3; + break; + case 0x7: // LD/ST1 (1 reg) + numStructElems = 1; + numRegs = 1; + break; + case 0x8: // LD/ST2 (2 regs) + numStructElems = 2; + numRegs = 2; + break; + case 0xa: // LD/ST1 (2 regs) + numStructElems = 1; + numRegs = 2; + break; + default: + return new Unknown64(machInst); + } + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } else { + return new VstMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } + } else { // AdvSIMD load/store single structure + uint8_t scale = bits(machInst, 15, 14); + uint8_t numStructElems = (((uint8_t) bits(machInst, 13) << 1) | + (uint8_t) bits(machInst, 21)) + 1; + uint8_t index = 0; + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + bool replicate = false; + + switch (scale) { + case 0x0: + index = ((uint8_t) bits(machInst, 30) << 3) | + ((uint8_t) bits(machInst, 12) << 2) | + (uint8_t) bits(machInst, 11, 10); + break; + case 0x1: + index = ((uint8_t) bits(machInst, 30) << 2) | + ((uint8_t) bits(machInst, 12) << 1) | + (uint8_t) bits(machInst, 11); + break; + case 0x2: + if (bits(machInst, 10) == 0x0) { + index = ((uint8_t) bits(machInst, 30) << 1) | + bits(machInst, 12); + } else { + index = (uint8_t) bits(machInst, 30); + scale = 0x3; + } + break; + case 0x3: + scale = bits(machInst, 11, 10); + replicate = true; + break; + default: + return new Unknown64(machInst); + } + + uint8_t eSize = scale; + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } else { + return new VstSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } + } + } +} +}}; diff --git a/src/arch/arm/isa/formats/uncond.isa b/src/arch/arm/isa/formats/uncond.isa index 4a18a55bb..c376cd9ce 100644 --- a/src/arch/arm/isa/formats/uncond.isa +++ b/src/arch/arm/isa/formats/uncond.isa @@ -99,11 +99,11 @@ def format ArmUnconditional() {{ case 0x1: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); } } } else if (bits(op2, 0) == 0) { @@ -166,7 +166,7 @@ def format ArmUnconditional() {{ const uint32_t val = ((machInst >> 20) & 0x5); if (val == 0x4) { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); switch (bits(machInst, 24, 21)) { case 0x2: @@ -250,17 +250,10 @@ def format ArmUnconditional() {{ "ldc, ldc2 (immediate)", machInst); } } - if (op1 == 0xC5) { - return new WarnUnimplemented( - "mrrc, mrrc2", machInst); - } } else { if (bits(op1, 4, 3) != 0 || bits(op1, 1) == 1) { return new WarnUnimplemented( "stc, stc2", machInst); - } else if (op1 == 0xC4) { - return new WarnUnimplemented( - "mcrr, mcrrc", machInst); } } } diff --git a/src/arch/arm/isa/formats/unimp.isa b/src/arch/arm/isa/formats/unimp.isa index 1c9a4b402..8e346112c 100644 --- a/src/arch/arm/isa/formats/unimp.isa +++ b/src/arch/arm/isa/formats/unimp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -85,6 +85,9 @@ output header {{ private: /// Have we warned on this instruction yet? mutable bool warned; + /// Full mnemonic for MRC and MCR instructions including the + /// coproc. register name + std::string fullMnemonic; public: /// Constructor @@ -96,6 +99,16 @@ output header {{ flags[IsNonSpeculative] = true; } + WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst, + const std::string& _fullMnemonic) + : ArmStaticInst(_mnemonic, _machInst, No_OpClass), warned(false), + fullMnemonic(_fullMnemonic) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + %(BasicExecDeclare)s std::string @@ -147,10 +160,7 @@ output exec {{ FailUnimplemented::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, false, mnemonic); + return new UndefinedInstruction(machInst, false, mnemonic); } Fault @@ -158,7 +168,8 @@ output exec {{ Trace::InstRecord *traceData) const { if (!warned) { - warn("\tinstruction '%s' unimplemented\n", mnemonic); + warn("\tinstruction '%s' unimplemented\n", + fullMnemonic.size() ? fullMnemonic.c_str() : mnemonic); warned = true; } diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa index 5dd13d623..a2ce84345 100644 --- a/src/arch/arm/isa/includes.isa +++ b/src/arch/arm/isa/includes.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -50,10 +50,16 @@ output header {{ #include <sstream> #include "arch/arm/insts/branch.hh" +#include "arch/arm/insts/branch64.hh" +#include "arch/arm/insts/data64.hh" +#include "arch/arm/insts/fplib.hh" #include "arch/arm/insts/macromem.hh" #include "arch/arm/insts/mem.hh" +#include "arch/arm/insts/mem64.hh" #include "arch/arm/insts/misc.hh" +#include "arch/arm/insts/misc64.hh" #include "arch/arm/insts/mult.hh" +#include "arch/arm/insts/neon64_mem.hh" #include "arch/arm/insts/pred_inst.hh" #include "arch/arm/insts/static_inst.hh" #include "arch/arm/insts/vfp.hh" @@ -63,6 +69,7 @@ output header {{ }}; output decoder {{ +#include <string> #include "arch/arm/decoder.hh" #include "arch/arm/faults.hh" #include "arch/arm/intregs.hh" diff --git a/src/arch/arm/isa/insts/aarch64.isa b/src/arch/arm/isa/insts/aarch64.isa new file mode 100644 index 000000000..6fcf9b5d2 --- /dev/null +++ b/src/arch/arm/isa/insts/aarch64.isa @@ -0,0 +1,58 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + movzCode = 'Dest64 = ((uint64_t)imm1) << imm2;' + movzIop = InstObjParams("movz", "Movz", "RegImmImmOp", movzCode, []) + header_output += RegImmImmOpDeclare.subst(movzIop) + decoder_output += RegImmImmOpConstructor.subst(movzIop) + exec_output += BasicExecute.subst(movzIop) + + movkCode = 'Dest64 = insertBits(Dest64, imm2 + 15, imm2, imm1);' + movkIop = InstObjParams("movk", "Movk", "RegImmImmOp", movkCode, []) + header_output += RegImmImmOpDeclare.subst(movkIop) + decoder_output += RegImmImmOpConstructor.subst(movkIop) + exec_output += BasicExecute.subst(movkIop) + + movnCode = 'Dest64 = ~(((uint64_t)imm1) << imm2);' + movnIop = InstObjParams("movn", "Movn", "RegImmImmOp", movnCode, []) + header_output += RegImmImmOpDeclare.subst(movnIop) + decoder_output += RegImmImmOpConstructor.subst(movnIop) + exec_output += BasicExecute.subst(movnIop) +}}; diff --git a/src/arch/arm/isa/insts/branch.isa b/src/arch/arm/isa/insts/branch.isa index e360f4581..3ee9d88e4 100644 --- a/src/arch/arm/isa/insts/branch.isa +++ b/src/arch/arm/isa/insts/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,7 +48,7 @@ let {{ bCode = ''' NPC = (uint32_t)(PC + imm); ''' - br_tgt_code = '''pcs.instNPC(branchPC.instPC() + imm);''' + br_tgt_code = '''pcs.instNPC((uint32_t)(branchPC.instPC() + imm));''' instFlags = ["IsDirectControl"] if (link): bCode += ''' @@ -86,9 +86,9 @@ let {{ Name += "Imm" # Since we're switching ISAs, the target ISA will be the opposite # of the current ISA. Thumb is whether the target is ARM. - newPC = '(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' + newPC = '(uint32_t)(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' br_tgt_code = ''' - pcs.instNPC((branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : + pcs.instNPC((uint32_t)(branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : (branchPC.instPC() + imm))); ''' base = "BranchImmCond" @@ -150,7 +150,26 @@ let {{ if imm: decoder_output += BranchTarget.subst(blxIop) - #Ignore BXJ for now + bxjcode = ''' + HSTR hstr = Hstr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if (ArmSystem::haveVirtualization(xc->tcBase()) && hstr.tjdbx && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + fault = new HypervisorTrap(machInst, op1, EC_TRAPPED_BXJ); + } + IWNPC = Op1; + ''' + + bxjIop = InstObjParams("bxj", "BxjReg", "BranchRegCond", + {"code": bxjcode, + "predicate_test": predicateTest, + "is_ras_pop": "op1 == INTREG_LR" }, + ["IsIndirectControl"]) + header_output += BranchRegCondDeclare.subst(bxjIop) + decoder_output += BranchRegCondConstructor.subst(bxjIop) + exec_output += PredOpExecute.subst(bxjIop) #CBNZ, CBZ. These are always unconditional as far as predicates for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): diff --git a/src/arch/arm/isa/insts/branch64.isa b/src/arch/arm/isa/insts/branch64.isa new file mode 100644 index 000000000..89cee6c22 --- /dev/null +++ b/src/arch/arm/isa/insts/branch64.isa @@ -0,0 +1,248 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Giacomo Gabrielli + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # B, BL + for (mnem, link) in (("b", False), ("bl", True)): + bCode = ('NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsDirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImm64", bCode, instFlags) + header_output += BranchImm64Declare.subst(bIop) + decoder_output += BranchImm64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # BR, BLR + for (mnem, link) in (("br", False), ("blr", True)): + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchReg64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # B conditional + bCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) + NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), + currEL(xc->tcBase())); + else + NPC = NPC; + ''' + bIop = InstObjParams("b", "BCond64", "BranchImmCond64", bCode, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmCond64Declare.subst(bIop) + decoder_output += BranchImmCond64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # RET + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn'] + + bIop = InstObjParams('ret', 'Ret64', "BranchRet64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # ERET + bCode = '''Addr newPc; + CPSR cpsr = Cpsr; + CPSR spsr = Spsr; + + ExceptionLevel curr_el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode); + switch (curr_el) { + case EL3: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL3); + break; + case EL2: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL2); + break; + case EL1: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL1); + break; + default: + return new UndefinedInstruction(machInst, false, mnemonic); + break; + } + if (spsr.width && (newPc & mask(2))) { + // To avoid PC Alignment fault when returning to AArch32 + if (spsr.t) + newPc = newPc & ~mask(1); + else + newPc = newPc & ~mask(2); + } + spsr.q = 0; + spsr.it1 = 0; + spsr.j = 0; + spsr.res0_23_22 = 0; + spsr.ge = 0; + spsr.it2 = 0; + spsr.t = 0; + + OperatingMode mode = (OperatingMode) (uint8_t) spsr.mode; + bool illegal = false; + ExceptionLevel target_el; + if (badMode(mode)) { + illegal = true; + } else { + target_el = opModeToEL(mode); + if (((target_el == EL2) && + !ArmSystem::haveVirtualization(xc->tcBase())) || + (target_el > curr_el) || + (spsr.width == 1)) { + illegal = true; + } else { + bool known = true; + bool from32 = (spsr.width == 1); + bool to32 = false; + if (false) { // TODO: !haveAArch32EL + to32 = false; + } else if (!ArmSystem::highestELIs64(xc->tcBase())) { + to32 = true; + } else { + bool scr_rw, hcr_rw; + if (ArmSystem::haveSecurity(xc->tcBase())) { + SCR scr = xc->tcBase()->readMiscReg(MISCREG_SCR_EL3); + scr_rw = scr.rw; + } else { + scr_rw = true; + } + + if (ArmSystem::haveVirtualization(xc->tcBase())) { + HCR hcr = xc->tcBase()->readMiscReg(MISCREG_HCR_EL2); + hcr_rw = hcr.rw; + } else { + hcr_rw = scr_rw; + } + + switch (target_el) { + case EL3: + to32 = false; + break; + case EL2: + to32 = !scr_rw; + break; + case EL1: + to32 = !scr_rw || !hcr_rw; + break; + case EL0: + if (curr_el == EL0) { + to32 = cpsr.width; + } else if (!scr_rw || !hcr_rw) { + // EL0 using AArch32 if EL1 using AArch32 + to32 = true; + } else { + known = false; + to32 = false; + } + } + } + if (known) + illegal = (from32 != to32); + } + } + + if (illegal) { + uint8_t old_mode = cpsr.mode; + spsr.mode = old_mode; // Preserve old mode when invalid + spsr.il = 1; + } else { + if (cpsr.width != spsr.width) + panic("AArch32/AArch64 interprocessing not supported yet"); + } + Cpsr = spsr; + + CondCodesNZ = spsr.nz; + CondCodesC = spsr.c; + CondCodesV = spsr.v; + NPC = purifyTaggedAddr(newPc, xc->tcBase(), + opModeToEL((OperatingMode) (uint8_t) spsr.mode)); + LLSCLock = 0; // Clear exclusive monitor + SevMailbox = 1; //Set Event Register + ''' + instFlags = ['IsSerializeAfter', 'IsNonSpeculative', 'IsSquashAfter'] + bIop = InstObjParams('eret', 'Eret64', "BranchEret64", bCode, instFlags) + header_output += BasicDeclare.subst(bIop) + decoder_output += BasicConstructor64.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # CBNZ, CBZ + for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): + code = ('NPC = (Op164 %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmReg64Declare.subst(iop) + decoder_output += BranchImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + # TBNZ, TBZ + for (mnem, test) in (("tbz", "=="), ("tbnz", "!=")): + code = ('NPC = ((Op164 & imm1) %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm2, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmImmReg64Declare.subst(iop) + decoder_output += BranchImmImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/data.isa b/src/arch/arm/isa/insts/data.isa index be56554b0..881676496 100644 --- a/src/arch/arm/isa/insts/data.isa +++ b/src/arch/arm/isa/insts/data.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -257,7 +257,8 @@ let {{ CPSR old_cpsr = Cpsr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa new file mode 100644 index 000000000..77d7541ca --- /dev/null +++ b/src/arch/arm/isa/insts/data64.isa @@ -0,0 +1,465 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + def createCcCode64(carry, overflow): + code = "" + code += ''' + uint16_t _iz, _in; + _in = bits(resTemp, intWidth - 1); + _iz = ((resTemp & mask(intWidth)) == 0); + CondCodesNZ = (_in << 1) | _iz; + DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz); + ''' + if overflow and overflow != "none": + code += ''' + uint16_t _iv; + _iv = %s & 1; + CondCodesV = _iv; + DPRINTF(Arm, "(iv) = (%%d)\\n", _iv); + ''' % overflow + if carry and carry != "none": + code += ''' + uint16_t _ic; + _ic = %s & 1; + CondCodesC = _ic; + DPRINTF(Arm, "(ic) = (%%d)\\n", _ic); + ''' % carry + return code + + oldC = 'CondCodesC' + oldV = 'CondCodesV' + # Dicts of ways to set the carry flag. + carryCode64 = { + "none": "none", + "add": 'findCarry(intWidth, resTemp, Op164, secOp)', + "sub": 'findCarry(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + # Dict of ways to set the overflow flag. + overflowCode64 = { + "none": "none", + "add": 'findOverflow(intWidth, resTemp, Op164, secOp)', + "sub": 'findOverflow(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + + immOp2 = "uint64_t secOp M5_VAR_USED = imm;" + sRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "shiftReg64(Op264, shiftAmt, shiftType, intWidth);" + eRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "extendReg64(Op264, extendType, shiftAmt, intWidth);" + + def buildDataWork(mnem, code, flagType, suffix, buildCc, buildNonCc, + base, templateBase): + code = ''' + uint64_t resTemp M5_VAR_USED = 0; + ''' + code + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + Name = mnem.capitalize() + suffix + iop = InstObjParams(mnem, Name, base, code) + iopCc = InstObjParams(mnem + "s", Name + "Cc", base, code + ccCode) + + def subst(iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + if buildNonCc: + subst(iop) + if buildCc: + subst(iopCc) + + def buildXImmDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XImm"): + buildDataWork(mnem, immOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXImmOp", "DataXImm") + + def buildXSRegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XSReg"): + buildDataWork(mnem, sRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXSRegOp", "DataXSReg") + + def buildXERegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XEReg"): + buildDataWork(mnem, eRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXERegOp", "DataXEReg") + + def buildDataInst(mnem, code, flagType = "logic", + buildCc = True, buildNonCc = True): + buildXImmDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXSRegDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXERegDataInst(mnem, code, flagType, buildCc, buildNonCc) + + buildXImmDataInst("adr", "Dest64 = RawPC + imm", buildCc = False); + buildXImmDataInst("adrp", "Dest64 = (RawPC & ~mask(12)) + imm", + buildCc = False); + buildDataInst("and", "Dest64 = resTemp = Op164 & secOp;") + buildDataInst("eor", "Dest64 = Op164 ^ secOp;", buildCc = False) + buildXSRegDataInst("eon", "Dest64 = Op164 ^ ~secOp;", buildCc = False) + buildDataInst("sub", "Dest64 = resTemp = Op164 - secOp;", "sub") + buildDataInst("add", "Dest64 = resTemp = Op164 + secOp;", "add") + buildXSRegDataInst("adc", + "Dest64 = resTemp = Op164 + secOp + %s;" % oldC, "add") + buildXSRegDataInst("sbc", + "Dest64 = resTemp = Op164 - secOp - !%s;" % oldC, "sub") + buildDataInst("orr", "Dest64 = Op164 | secOp;", buildCc = False) + buildXSRegDataInst("orn", "Dest64 = Op164 | ~secOp;", buildCc = False) + buildXSRegDataInst("bic", "Dest64 = resTemp = Op164 & ~secOp;") + + def buildDataXImmInst(mnem, code, optArgs = []): + global header_output, decoder_output, exec_output + classNamePrefix = mnem[0].upper() + mnem[1:] + templateBase = "DataXImm" + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXRegInst(mnem, regOps, code, optArgs = [], + overrideOpClass=None): + global header_output, decoder_output, exec_output + templateBase = "DataX%dReg" % regOps + classNamePrefix = mnem[0].upper() + mnem[1:] + if overrideOpClass: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", + { 'code': code, 'op_class': overrideOpClass}, + optArgs) + else: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + buildDataXRegInst("madd", 3, "Dest64 = Op164 + Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("msub", 3, "Dest64 = Op164 - Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("smaddl", 3, + "XDest = XOp1 + sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smsubl", 3, + "XDest = XOp1 - sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smulh", 2, ''' + uint64_t op1H = (int32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (int32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += shiftReg64(mid1, 32, ASR, intWidth); + result += shiftReg64(mid2, 32, ASR, intWidth); + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + buildDataXRegInst("umaddl", 3, "XDest = XOp1 + WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umsubl", 3, "XDest = XOp1 - WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umulh", 2, ''' + uint64_t op1H = (uint32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (uint32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += mid1 >> 32; + result += mid2 >> 32; + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + + buildDataXRegInst("asrv", 2, + "Dest64 = shiftReg64(Op164, Op264, ASR, intWidth)") + buildDataXRegInst("lslv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSL, intWidth)") + buildDataXRegInst("lsrv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSR, intWidth)") + buildDataXRegInst("rorv", 2, + "Dest64 = shiftReg64(Op164, Op264, ROR, intWidth)") + buildDataXRegInst("sdiv", 2, ''' + int64_t op1 = Op164; + int64_t op2 = Op264; + if (intWidth == 32) { + op1 = sext<32>(op1); + op2 = sext<32>(op2); + } + Dest64 = op2 == -1 ? -op1 : op2 ? op1 / op2 : 0; + ''', overrideOpClass="IntDivOp") + buildDataXRegInst("udiv", 2, "Dest64 = Op264 ? Op164 / Op264 : 0", + overrideOpClass="IntDivOp") + + buildDataXRegInst("cls", 1, ''' + uint64_t op1 = Op164; + if (bits(op1, intWidth - 1)) + op1 ^= mask(intWidth); + Dest64 = (op1 == 0) ? intWidth - 1 : (intWidth - 2 - findMsbSet(op1)); + ''') + buildDataXRegInst("clz", 1, ''' + Dest64 = (Op164 == 0) ? intWidth : (intWidth - 1 - findMsbSet(Op164)); + ''') + buildDataXRegInst("rbit", 1, ''' + uint64_t result = Op164; + uint64_t lBit = 1ULL << (intWidth - 1); + uint64_t rBit = 1ULL; + while (lBit > rBit) { + uint64_t maskBits = lBit | rBit; + uint64_t testBits = result & maskBits; + // If these bits are different, swap them by toggling them. + if (testBits && testBits != maskBits) + result ^= maskBits; + lBit >>= 1; rBit <<= 1; + } + Dest64 = result; + ''') + buildDataXRegInst("rev", 1, ''' + if (intWidth == 32) + Dest64 = betole<uint32_t>(Op164); + else + Dest64 = betole<uint64_t>(Op164); + ''') + buildDataXRegInst("rev16", 1, ''' + int count = intWidth / 16; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint16_t hw = Op164 >> (i * 16); + result |= (uint64_t)betole<uint16_t>(hw) << (i * 16); + } + Dest64 = result; + ''') + buildDataXRegInst("rev32", 1, ''' + int count = intWidth / 32; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint32_t hw = Op164 >> (i * 32); + result |= (uint64_t)betole<uint32_t>(hw) << (i * 32); + } + Dest64 = result; + ''') + + msrMrs64EnabledCheckCode = ''' + // Check for read/write access right + if (!can%sAArch64SysReg(flat_idx, Scr64, cpsr, xc->tcBase())) { + if (flat_idx == MISCREG_DAIF || + flat_idx == MISCREG_DC_ZVA_Xt || + flat_idx == MISCREG_DC_CVAC_Xt || + flat_idx == MISCREG_DC_CIVAC_Xt + ) + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + return new UndefinedInstruction(machInst, false, mnemonic); + } + + // Check for traps to supervisor (FP/SIMD regs) + if (el <= EL1 && msrMrs64TrapToSup(flat_idx, el, Cpacr64)) + return new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_SIMD_FP); + + bool is_vfp_neon = false; + + // Check for traps to hypervisor + if ((ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) && + msrMrs64TrapToHyp(flat_idx, %s, CptrEl264, Hcr64, &is_vfp_neon)) { + return new HypervisorTrap(machInst, is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + + // Check for traps to secure monitor + if ((ArmSystem::haveSecurity(xc->tcBase()) && el <= EL3) && + msrMrs64TrapToMon(flat_idx, CptrEl364, el, &is_vfp_neon)) { + return new SecureMonitorTrap(machInst, + is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + ''' + + buildDataXImmInst("mrs", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(op1); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + XDest = MiscOp1_ud; + ''' % (msrMrs64EnabledCheckCode % ('Read', 'true'),), + ["IsSerializeBefore"]) + + buildDataXRegInst("mrsNZCV", 1, ''' + CPSR cpsr = 0; + cpsr.nz = CondCodesNZ; + cpsr.c = CondCodesC; + cpsr.v = CondCodesV; + XDest = cpsr; + ''') + + buildDataXImmInst("msr", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + MiscDest_ud = XOp1; + ''' % (msrMrs64EnabledCheckCode % ('Write', 'false'),), + ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXRegInst("msrNZCV", 1, ''' + CPSR cpsr = XOp1; + CondCodesNZ = cpsr.nz; + CondCodesC = cpsr.c; + CondCodesV = cpsr.v; + ''') + + msrdczva_ea_code = ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + ''' + + msrdczva_ea_code += msrMrs64EnabledCheckCode % ('Write', 'false') + msrdczva_ea_code += ''' + Request::Flags memAccessFlags = Request::CACHE_BLOCK_ZERO|ArmISA::TLB::MustBeOne; + EA = XBase; + assert(!(Dczid & 0x10)); + uint64_t op_size = power(2, Dczid + 2); + EA &= ~(op_size - 1); + + ''' + + msrDCZVAIop = InstObjParams("dczva", "Dczva", "SysDC64", + { "ea_code" : msrdczva_ea_code, + "memacc_code" : ";", "use_uops" : 0, + "op_wb" : ";", "fa_code" : ";"}, ['IsStore', 'IsMemRef']); + header_output += DCStore64Declare.subst(msrDCZVAIop); + decoder_output += DCStore64Constructor.subst(msrDCZVAIop); + exec_output += DCStore64Execute.subst(msrDCZVAIop); + exec_output += DCStore64InitiateAcc.subst(msrDCZVAIop); + exec_output += Store64CompleteAcc.subst(msrDCZVAIop); + + + + buildDataXImmInst("msrSP", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + MiscDest_ud = imm; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFSet", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif | imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFClr", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif & ~imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + def buildDataXCompInst(mnem, instType, suffix, code): + global header_output, decoder_output, exec_output + templateBase = "DataXCond%s" % instType + iop = InstObjParams(mnem, mnem.capitalize() + suffix + "64", + templateBase + "Op", code) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXCondImmInst(mnem, code): + buildDataXCompInst(mnem, "CompImm", "Imm", code) + def buildDataXCondRegInst(mnem, code): + buildDataXCompInst(mnem, "CompReg", "Reg", code) + def buildDataXCondSelInst(mnem, code): + buildDataXCompInst(mnem, "Sel", "", code) + + def condCompCode(flagType, op, imm): + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + opDecl = "uint64_t secOp M5_VAR_USED = imm;" + if not imm: + opDecl = "uint64_t secOp M5_VAR_USED = Op264;" + return opDecl + ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + uint64_t resTemp = Op164 ''' + op + ''' secOp; + ''' + ccCode + ''' + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + ''' + + buildDataXCondImmInst("ccmn", condCompCode("add", "+", True)) + buildDataXCondImmInst("ccmp", condCompCode("sub", "-", True)) + buildDataXCondRegInst("ccmn", condCompCode("add", "+", False)) + buildDataXCondRegInst("ccmp", condCompCode("sub", "-", False)) + + condSelCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + Dest64 = Op164; + } else { + Dest64 = %(altVal)s; + } + ''' + buildDataXCondSelInst("csel", condSelCode % {"altVal" : "Op264"}) + buildDataXCondSelInst("csinc", condSelCode % {"altVal" : "Op264 + 1"}) + buildDataXCondSelInst("csinv", condSelCode % {"altVal" : "~Op264"}) + buildDataXCondSelInst("csneg", condSelCode % {"altVal" : "-Op264"}) +}}; diff --git a/src/arch/arm/isa/insts/div.isa b/src/arch/arm/isa/insts/div.isa index 1ff6ef9e4..0896ea94f 100644 --- a/src/arch/arm/isa/insts/div.isa +++ b/src/arch/arm/isa/insts/div.isa @@ -40,12 +40,6 @@ let {{ sdivCode = ''' if (Op2_sw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_sw = 0; } else if (Op1_sw == INT_MIN && Op2_sw == -1) { Dest_sw = INT_MIN; @@ -63,12 +57,6 @@ let {{ udivCode = ''' if (Op2_uw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_uw = 0; } else { Dest_uw = Op1_uw / Op2_uw; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index b701995f4..60f030c3d 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -191,14 +191,17 @@ let {{ decoder_output = "" exec_output = "" - vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegOp", - { "code": vmsrEnabledCheckCode + \ - "MiscDest = Op1;", + vmsrCode = vmsrEnabledCheckCode + ''' + MiscDest = Op1; + ''' + + vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegImmOp", + { "code": vmsrCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += FpRegRegOpDeclare.subst(vmsrIop); - decoder_output += FpRegRegOpConstructor.subst(vmsrIop); + header_output += FpRegRegImmOpDeclare.subst(vmsrIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmsrIop); exec_output += PredOpExecute.subst(vmsrIop); vmsrFpscrCode = vmsrEnabledCheckCode + ''' @@ -215,14 +218,36 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop); exec_output += PredOpExecute.subst(vmsrFpscrIop); - vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegOp", - { "code": vmrsEnabledCheckCode + \ - "Dest = MiscOp1;", + vmrsCode = vmrsEnabledCheckCode + ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + HCR hcr = Hcr; + bool hypTrap = false; + switch(xc->tcBase()->flattenMiscIndex(op1)) { + case MISCREG_FPSID: + hypTrap = hcr.tid0; + break; + case MISCREG_MVFR0: + case MISCREG_MVFR1: + hypTrap = hcr.tid3; + break; + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP10_MRC_VMRS); + } + } + Dest = MiscOp1; + ''' + + vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegImmOp", + { "code": vmrsCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeBefore"]) - header_output += FpRegRegOpDeclare.subst(vmrsIop); - decoder_output += FpRegRegOpConstructor.subst(vmrsIop); + header_output += FpRegRegImmOpDeclare.subst(vmrsIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmrsIop); exec_output += PredOpExecute.subst(vmrsIop); vmrsFpscrIop = InstObjParams("vmrs", "VmrsFpscr", "FpRegRegOp", @@ -323,7 +348,7 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmovRegQIop); exec_output += PredOpExecute.subst(vmovRegQIop); - vmovCoreRegBCode = vfpEnabledCheckCode + ''' + vmovCoreRegBCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 8 + 7, imm * 8, Op1_ub); ''' vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp", @@ -334,7 +359,7 @@ let {{ decoder_output += FpRegRegImmOpConstructor.subst(vmovCoreRegBIop); exec_output += PredOpExecute.subst(vmovCoreRegBIop); - vmovCoreRegHCode = vfpEnabledCheckCode + ''' + vmovCoreRegHCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 16 + 15, imm * 16, Op1_uh); ''' vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp", @@ -453,6 +478,17 @@ let {{ singleCode = singleSimpleCode + ''' FpscrExc = fpscr; ''' + singleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + float cOp1 = FpOp1; + float cOp2 = FpOp2; + float cOp3 = FpDestP0; + FpDestP0 = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)" @@ -463,6 +499,19 @@ let {{ FpDestP1_uw = dblHi(dest); FpscrExc = fpscr; ''' + doubleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw); + double cOp2 = dbl(FpOp2P0_uw, FpOp2P1_uw); + double cOp3 = dbl(FpDestP0_uw, FpDestP1_uw); + double cDest = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + FpDestP0_uw = dblLow(cDest); + FpDestP1_uw = dblHi(cDest); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' doubleBinOp = ''' binaryOp(fpscr, dbl(FpOp1P0_uw, FpOp1P1_uw), dbl(FpOp2P0_uw, FpOp2P1_uw), @@ -473,6 +522,37 @@ let {{ fpscr.fz, fpscr.rMode) ''' + def buildTernaryFpOp(Name, base, opClass, singleOp, doubleOp, paramStr): + global header_output, decoder_output, exec_output + + code = singleTernOp % { "op": singleOp, "palam": paramStr } + sIop = InstObjParams(Name.lower() + "s", Name + "S", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + code = doubleTernOp % { "op": doubleOp, "palam": paramStr } + dIop = InstObjParams(Name.lower() + "d", Name + "D", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + + declareTempl = eval(base + "Declare"); + constructorTempl = eval(base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += PredOpExecute.subst(iop) + + buildTernaryFpOp("Vfma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfnma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, -cOp3" ) + buildTernaryFpOp("Vfnms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, -cOp3" ) + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): global header_output, decoder_output, exec_output @@ -830,7 +910,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0, false); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -849,7 +929,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false); + uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -868,7 +948,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0, false); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -887,7 +967,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false); + int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -907,7 +987,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -927,7 +1007,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0); + uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -947,7 +1027,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -967,7 +1047,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0); + int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1333,7 +1413,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, imm); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1352,7 +1432,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1372,7 +1452,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, imm); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1391,7 +1471,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1410,7 +1490,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sw) : "m" (FpOp1_sw)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, false, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1428,7 +1508,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1447,7 +1527,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uw) : "m" (FpOp1_uw)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, false, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1465,7 +1545,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1485,7 +1565,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sh = vfpFpSToFixed(FpOp1, true, true, imm); + FpDest_sh = vfpFpToFixed<float>(FpOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_sh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1505,7 +1585,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, true, true, imm); + uint64_t result = vfpFpToFixed<double>(cOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1526,7 +1606,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uh = vfpFpSToFixed(FpOp1, false, true, imm); + FpDest_uh = vfpFpToFixed<float>(FpOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_uh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1546,7 +1626,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1566,7 +1646,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sh) : "m" (FpOp1_sh)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, true, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1585,7 +1665,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1605,7 +1685,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uh) : "m" (FpOp1_uh)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, true, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1624,7 +1704,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa new file mode 100644 index 000000000..95dec5062 --- /dev/null +++ b/src/arch/arm/isa/insts/fp64.isa @@ -0,0 +1,811 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Thomas Grocutt +// Edmund Grimley Evans + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + fmovImmSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp", + { "code": fmovImmSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmSIop); + decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop); + exec_output += BasicExecute.subst(fmovImmSIop); + + fmovImmDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = bits(imm, 63, 32); + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp", + { "code": fmovImmDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmDIop); + decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop); + exec_output += BasicExecute.subst(fmovImmDIop); + + fmovRegSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp", + { "code": fmovRegSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop); + exec_output += BasicExecute.subst(fmovRegSIop); + + fmovRegDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = AA64FpOp1P1_uw; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp", + { "code": fmovRegDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop); + exec_output += BasicExecute.subst(fmovRegDIop); + + fmovCoreRegWCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = WOp1_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp", + { "code": fmovCoreRegWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop); + exec_output += BasicExecute.subst(fmovCoreRegWIop); + + fmovCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = XOp1_ud; + AA64FpDestP1_uw = XOp1_ud >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp", + { "code": fmovCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop); + exec_output += BasicExecute.subst(fmovCoreRegXIop); + + fmovUCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP2_uw = XOp1_ud; + AA64FpDestP3_uw = XOp1_ud >> 32; + ''' + fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", + { "code": fmovUCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop); + exec_output += BasicExecute.subst(fmovUCoreRegXIop); + + fmovRegCoreWCode = vfp64EnabledCheckCode + ''' + WDest = AA64FpOp1P0_uw; + ''' + fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp", + { "code": fmovRegCoreWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop); + exec_output += BasicExecute.subst(fmovRegCoreWIop); + + fmovRegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw; + ''' + fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp", + { "code": fmovRegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop); + exec_output += BasicExecute.subst(fmovRegCoreXIop); + + fmovURegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw; + ''' + fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp", + { "code": fmovURegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop); + exec_output += BasicExecute.subst(fmovURegCoreXIop); +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + singleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \ + "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" + singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)" + + doubleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleBinOp = ''' + binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), + dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw), + %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode); + ''' + doubleUnaryOp = ''' + unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s, + fpscr.fz, fpscr.rMode) + ''' + + def buildTernaryFpOp(name, opClass, sOp, dOp): + global header_output, decoder_output, exec_output + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + code += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; + uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; + uint64_t cDest; + ''' "cDest = " + dOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cOp3 = AA64FpOp3P0_uw; + uint32_t cDest; + ''' "cDest = " + sOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"), + "FpRegRegRegRegOp", + { "code": code, "op_class": opClass }, []) + + header_output += AA64FpRegRegRegRegOpDeclare.subst(iop) + decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)", + "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" ) + buildTernaryFpOp("FMSub", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", + "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", + "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" ) + + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): + global header_output, decoder_output, exec_output + + code = singleIntConvCode2 % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + + code = doubleIntConvCode2 % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)", + "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibSub<uint32_t>(cOp1, cOp2, fpscr)", + "fplibSub<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp", + "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)", + "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibMul<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMul<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))", + "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))") + buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMin<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMin<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMax<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMax<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)") + + def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + code = singleIntConvCode % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + code = doubleIntConvCode % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp", + "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)") + + def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, + doubleOp = None, isIntConv = True): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + if isIntConv: + sCode = singleIntConvCode + dCode = doubleIntConvCode + else: + sCode = singleCode + dCode = doubleCode + + for code, op, suffix in [[sCode, singleOp, "S"], + [dCode, doubleOp, "D"]]: + iop = InstObjParams(name, Name + suffix, base, + { "code": code % { "op": op }, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp", + "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)") + buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp", + "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)") + buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)") + buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)") + buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)") + buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)") + buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)") + buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)") + buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)") +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Creates the integer to floating point instructions, including variants for + # signed/unsigned, float/double, etc + for regL, regOpL, width in [["W", "w", 32], + ["X", "d", 64]]: + for isDouble in True, False: + for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)], + ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]: + fcvtIntFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s + ''' %(usCode) + + if isDouble: + fcvtIntFpDCode += ''' + uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' % ("true" if us == "U" else "false") + else: + fcvtIntFpDCode += ''' + uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' % ("true" if us == "U" else "false") + fcvtIntFpDCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S") + mnem = "%scvtf" %(us.lower()) + fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtIntFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop); + exec_output += BasicExecute.subst(fcvtIntFpDIop); + + # Generates the floating point to integer conversion instructions in various + # variants, eg signed/unsigned + def buildFpCvtIntOp(isDouble, isSigned, isXReg): + global header_output, decoder_output, exec_output + + for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"], + ["P", "FPRounding_POSINF"], + ["M", "FPRounding_NEGINF"], + ["Z", "FPRounding_ZERO"], + ["A", "FPRounding_TIEAWAY"]]: + fcvtFpIntCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc;''' + if isDouble: + fcvtFpIntCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + + fcvtFpIntCode += ''' + %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true", + roundingMode) + + instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U", + "X" if isXReg else "W", + "D" if isDouble else "S", rmode) + mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u") + fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtFpIntCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop); + decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop); + exec_output += BasicExecute.subst(fcvtFpIntIop); + + # Now actually do the building with the different variants + for isDouble in True, False: + for isSigned in True, False: + for isXReg in True, False: + buildFpCvtIntOp(isDouble, isSigned, isXReg) + + fcvtFpSFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp", + { "code": fcvtFpSFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop); + exec_output += BasicExecute.subst(fcvtFpSFpDIop); + + fcvtFpDFpSCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp", + {"code": fcvtFpDFpSCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop); + exec_output += BasicExecute.subst(fcvtFpDFpSIop); + + # Half precision to single or double precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + ''' % ("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32") + if isDouble: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "FcvtFpHFp%s" %("D" if isDouble else "S") + fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop); + exec_output += BasicExecute.subst(fcvtFpHFpIop); + + # single or double precision to Half precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s; + AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw", + "64" if isDouble else "32") + + instName = "FcvtFp%sFpH" %("D" if isDouble else "S") + fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop); + exec_output += BasicExecute.subst(fcvtFpFpHIop); + + # Build the various versions of the floating point compare instructions + def buildFCmpOp(isQuiet, isDouble, isImm): + global header_output, decoder_output, exec_output + + fcmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cOp1 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32" + if isDouble else "AA64FpDestP0_uw") + if isImm: + fcmpCode += ''' + %s cOp2 = imm; + ''' % ("uint64_t" if isDouble else "uint32_t") + else: + fcmpCode += ''' + %s cOp2 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw") + fcmpCode += ''' + int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("64" if isDouble else "32", "false" if isQuiet else "true") + + typeName = "Imm" if isImm else "Reg" + instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName, + "D" if isDouble else "S") + fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName, + "FpReg%sOp" %(typeName), + {"code": fcmpCode, + "op_class": "SimdFloatCmpOp"}, []) + + declareTemp = eval("FpReg%sOpDeclare" %(typeName)); + constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName)); + header_output += declareTemp.subst(fcmpIop); + decoder_output += constructorTemp.subst(fcmpIop); + exec_output += BasicExecute.subst(fcmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + for isImm in True, False: + buildFCmpOp(isQuiet, isDouble, isImm) + + # Build the various versions of the conditional floating point compare + # instructions + def buildFCCmpOp(isQuiet, isDouble): + global header_output, decoder_output, exec_output + + fccmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + %s cOp1 = %s; + %s cOp2 = %s; + int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw", + "uint64_t" if isDouble else "uint32_t", + "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32" + if isDouble else "AA64FpOp2P0_uw", + "64" if isDouble else "32", "false" if isQuiet else "true") + + instName = "FCCmp%sReg%s" %("" if isQuiet else "E", + "D" if isDouble else "S") + fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"), + instName, "FpCondCompRegOp", + {"code": fccmpCode, + "op_class": "SimdFloatCmpOp"}, []) + header_output += DataXCondCompRegDeclare.subst(fccmpIop); + decoder_output += DataXCondCompRegConstructor.subst(fccmpIop); + exec_output += BasicExecute.subst(fccmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + buildFCCmpOp(isQuiet, isDouble) + +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Generates the variants of the floating to fixed point instructions + def buildFpCvtFixedOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + fcvtFpFixedCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + fcvtFpFixedCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + fcvtFpFixedCode += ''' + %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s, + FPRounding_ZERO, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true") + + instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + "X" if isXReg else "W") + mnem = "fcvtz%s" %("s" if isSigned else "u") + fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFpFixedCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop); + decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop); + exec_output += BasicExecute.subst(fcvtFpFixedIop); + + # Generates the variants of the fixed to floating point instructions + def buildFixedCvtFpOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + srcRegType = "X" if isXReg else "W" + fcvtFixedFpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm, + %s, FPCRRounding(fpscr), fpscr); + ''' %("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32", + "int" if isSigned else "uint", "64" if isXReg else "32", + srcRegType, + "false" if isSigned else "true") + if isDouble: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = result >> 32; + ''' + else: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = 0; + ''' + fcvtFixedFpCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + srcRegType) + mnem = "%scvtf" %("s" if isSigned else "u") + fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFixedFpCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop); + decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop); + exec_output += BasicExecute.subst(fcvtFixedFpIop); + + # loop over the variants building the instructions for each + for isXReg in True, False: + for isDouble in True, False: + for isSigned in True, False: + buildFpCvtFixedOp(isSigned, isDouble, isXReg) + buildFixedCvtFpOp(isSigned, isDouble, isXReg) +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + for isDouble in True, False: + code = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + AA64FpDestP0_uw = AA64FpOp1P0_uw; + ''' + if isDouble: + code += ''' + AA64FpDestP1_uw = AA64FpOp1P1_uw; + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + AA64FpDestP1_uw = AA64FpOp2P1_uw; + } + ''' + else: + code += ''' + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + } + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + + iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"), + "FpCondSelOp", code) + header_output += DataXCondSelDeclare.subst(iop) + decoder_output += DataXCondSelConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa index c01e87df8..9d90f7779 100644 --- a/src/arch/arm/isa/insts/insts.isa +++ b/src/arch/arm/isa/insts/insts.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -37,6 +37,9 @@ // // Authors: Gabe Black +//AArch64 instructions +##include "aarch64.isa" + //Basic forms of various templates ##include "basic.isa" @@ -46,8 +49,15 @@ //Loads of a single item ##include "ldr.isa" +//Loads of a single item, AArch64 +##include "ldr64.isa" + //Miscellaneous instructions that don't fit elsewhere ##include "misc.isa" +##include "misc64.isa" + +//Stores of a single item, AArch64 +##include "str64.isa" //Stores of a single item ##include "str.isa" @@ -61,8 +71,12 @@ //Data processing instructions ##include "data.isa" +//AArch64 data processing instructions +##include "data64.isa" + //Branches ##include "branch.isa" +##include "branch64.isa" //Multiply ##include "mult.isa" @@ -72,9 +86,14 @@ //VFP ##include "fp.isa" +##include "fp64.isa" //Neon ##include "neon.isa" +//AArch64 Neon +##include "neon64.isa" +##include "neon64_mem.isa" + //m5 Psuedo-ops ##include "m5ops.isa" diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index f599fa4b9..6bfe40118 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ // Authors: Gabe Black let {{ + import math header_output = "" decoder_output = "" @@ -78,7 +79,8 @@ let {{ newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, self.memFlags, instFlags, base, - wbDecl, pcDecl, self.rasPop) + wbDecl, pcDecl, self.rasPop, + self.size, self.sign) header_output += newHeader decoder_output += newDecoder @@ -160,7 +162,7 @@ let {{ self.size, self.sign, self.user) # Add memory request flags where necessary - self.memFlags.append("%d" % (self.size - 1)) + self.memFlags.append("%d" % int(math.log(self.size, 2))) if self.user: self.memFlags.append("ArmISA::TLB::UserMode") diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa new file mode 100644 index 000000000..78460f661 --- /dev/null +++ b/src/arch/arm/isa/insts/ldr64.isa @@ -0,0 +1,446 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + class LoadInst64(LoadStoreInst): + execBase = 'Load64' + micro = False + + def __init__(self, mnem, Name, size=4, sign=False, user=False, + literal=False, flavor="normal", top=False): + super(LoadInst64, self).__init__() + + self.name = mnem + self.Name = Name + self.size = size + self.sign = sign + self.user = user + self.literal = literal + self.flavor = flavor + self.top = top + + self.memFlags = ["ArmISA::TLB::MustBeOne"] + self.instFlags = [] + self.codeBlobs = {"postacc_code" : ""} + + # Add memory request flags where necessary + if self.user: + self.memFlags.append("ArmISA::TLB::UserMode") + + if self.flavor == "dprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsDataPrefetch'] + elif self.flavor == "iprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsInstPrefetch'] + if self.micro: + self.instFlags.append("IsMicroop") + + if self.flavor in ("acexp", "exp"): + # For exclusive pair ops alignment check is based on total size + self.memFlags.append("%d" % int(math.log(self.size, 2) + 1)) + elif not (self.size == 16 and self.top): + # Only the first microop should perform alignment checking. + self.memFlags.append("%d" % int(math.log(self.size, 2))) + + if self.flavor not in ("acquire", "acex", "exclusive", + "acexp", "exp"): + self.memFlags.append("ArmISA::TLB::AllowUnaligned") + + if self.flavor in ("acquire", "acex", "acexp"): + self.instFlags.extend(["IsMemBarrier", + "IsWriteBarrier", + "IsReadBarrier"]) + if self.flavor in ("acex", "exclusive", "exp", "acexp"): + self.memFlags.append("Request::LLSC") + + def buildEACode(self): + # Address computation code + eaCode = "" + if self.flavor == "fp": + eaCode += vfp64EnabledCheckCode + + if self.literal: + eaCode += "EA = RawPC" + else: + eaCode += SPAlignmentCheckCode + "EA = XBase" + + if self.size == 16: + if self.top: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)" + else: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)" + if not self.post: + eaCode += self.offset + eaCode += ";" + + self.codeBlobs["ea_code"] = eaCode + + def emitHelper(self, base='Memory64', wbDecl=None): + global header_output, decoder_output, exec_output + + # If this is a microop itself, don't allow anything that would + # require further microcoding. + if self.micro: + assert not wbDecl + + fa_code = None + if not self.micro and self.flavor in ("normal", "widen", "acquire"): + fa_code = ''' + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, %s); + fault->annotate(ArmFault::AR, %s); + ''' % ("0" if self.size == 1 else + "1" if self.size == 2 else + "2" if self.size == 4 else "3", + "true" if self.sign else "false", + "true" if (self.size == 8 or + self.flavor == "widen") else "false", + "true" if self.flavor == "acquire" else "false") + + (newHeader, newDecoder, newExec) = \ + self.fillTemplates(self.name, self.Name, self.codeBlobs, + self.memFlags, self.instFlags, + base, wbDecl, faCode=fa_code) + + header_output += newHeader + decoder_output += newDecoder + exec_output += newExec + + class LoadImmInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadImmInst64, self).__init__(*args, **kargs) + self.offset = " + imm" + + self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);" + + class LoadRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRegInst64, self).__init__(*args, **kargs) + self.offset = " + extendReg64(XOffset, type, shiftAmt, 64)" + + self.wbDecl = \ + "MicroAddXERegUop(machInst, base, base, " + \ + " offset, type, shiftAmt);" + + class LoadRawRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRawRegInst64, self).__init__(*args, **kargs) + self.offset = "" + + class LoadSingle64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor in ("dprefetch", "iprefetch"): + accCode = 'uint64_t temp M5_VAR_USED = Mem%s;' + elif self.flavor == "fp": + if self.size in (1, 2, 4): + accCode = ''' + AA64FpDestP0_uw = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 8 or (self.size == 16 and not self.top): + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = (data >> 32); + ''' + # Only zero out the other half if this isn't part of a + # pair of 8 byte loads implementing a 16 byte load. + if self.size == 8: + accCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 16 and self.top: + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP2_uw = (uint32_t)data; + AA64FpDestP3_uw = (data >> 32); + ''' + elif self.flavor == "widen" or self.size == 8: + accCode = "XDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + else: + accCode = "WDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + if self.size == 16: + accCode = accCode % buildMemSuffix(self.sign, 8) + else: + accCode = accCode % buildMemSuffix(self.sign, self.size) + + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadDouble64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + accCode = ''' + uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + AA64FpDest2P0_uw = (data >> 32); + AA64FpDest2P1_uw = 0; + AA64FpDest2P2_uw = 0; + AA64FpDest2P3_uw = 0; + ''' + else: + if self.sign: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = sext<32>((uint32_t)data); + XDest2 = sext<32>(data >> 32); + ''' + elif self.size == 8: + accCode = ''' + XDest = sext<64>(Mem_tud.a); + XDest2 = sext<64>(Mem_tud.b); + ''' + else: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = (uint32_t)data; + XDest2 = data >> 32; + ''' + elif self.size == 8: + accCode = ''' + XDest = Mem_tud.a; + XDest2 = Mem_tud.b; + ''' + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadImm64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryImm64' + writeback = False + post = False + + class LoadPre64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPreIndex64' + writeback = True + post = False + + class LoadPost64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPostIndex64' + writeback = True + post = True + + class LoadReg64(LoadRegInst64, LoadSingle64): + decConstBase = 'LoadStoreReg64' + base = 'ArmISA::MemoryReg64' + writeback = False + post = False + + class LoadRaw64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreRaw64' + base = 'ArmISA::MemoryRaw64' + writeback = False + post = False + + class LoadEx64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreEx64' + base = 'ArmISA::MemoryEx64' + writeback = False + post = False + + class LoadLit64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreLit64' + base = 'ArmISA::MemoryLiteral64' + writeback = False + post = False + + def buildLoads64(mnem, NameBase, size, sign, flavor="normal"): + LoadImm64(mnem, NameBase + "_IMM", size, sign, flavor=flavor).emit() + LoadPre64(mnem, NameBase + "_PRE", size, sign, flavor=flavor).emit() + LoadPost64(mnem, NameBase + "_POST", size, sign, flavor=flavor).emit() + LoadReg64(mnem, NameBase + "_REG", size, sign, flavor=flavor).emit() + + buildLoads64("ldrb", "LDRB64", 1, False) + buildLoads64("ldrsb", "LDRSBW64", 1, True) + buildLoads64("ldrsb", "LDRSBX64", 1, True, flavor="widen") + buildLoads64("ldrh", "LDRH64", 2, False) + buildLoads64("ldrsh", "LDRSHW64", 2, True) + buildLoads64("ldrsh", "LDRSHX64", 2, True, flavor="widen") + buildLoads64("ldrsw", "LDRSW64", 4, True, flavor="widen") + buildLoads64("ldr", "LDRW64", 4, False) + buildLoads64("ldr", "LDRX64", 8, False) + buildLoads64("ldr", "LDRBFP64", 1, False, flavor="fp") + buildLoads64("ldr", "LDRHFP64", 2, False, flavor="fp") + buildLoads64("ldr", "LDRSFP64", 4, False, flavor="fp") + buildLoads64("ldr", "LDRDFP64", 8, False, flavor="fp") + + LoadImm64("prfm", "PRFM64_IMM", 8, flavor="dprefetch").emit() + LoadReg64("prfm", "PRFM64_REG", 8, flavor="dprefetch").emit() + LoadLit64("prfm", "PRFM64_LIT", 8, literal=True, flavor="dprefetch").emit() + LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="dprefetch").emit() + + LoadImm64("ldurb", "LDURB64_IMM", 1, False).emit() + LoadImm64("ldursb", "LDURSBW64_IMM", 1, True).emit() + LoadImm64("ldursb", "LDURSBX64_IMM", 1, True, flavor="widen").emit() + LoadImm64("ldurh", "LDURH64_IMM", 2, False).emit() + LoadImm64("ldursh", "LDURSHW64_IMM", 2, True).emit() + LoadImm64("ldursh", "LDURSHX64_IMM", 2, True, flavor="widen").emit() + LoadImm64("ldursw", "LDURSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldur", "LDURW64_IMM", 4, False).emit() + LoadImm64("ldur", "LDURX64_IMM", 8, False).emit() + LoadImm64("ldur", "LDURBFP64_IMM", 1, flavor="fp").emit() + LoadImm64("ldur", "LDURHFP64_IMM", 2, flavor="fp").emit() + LoadImm64("ldur", "LDURSFP64_IMM", 4, flavor="fp").emit() + LoadImm64("ldur", "LDURDFP64_IMM", 8, flavor="fp").emit() + + LoadImm64("ldtrb", "LDTRB64_IMM", 1, False, True).emit() + LoadImm64("ldtrsb", "LDTRSBW64_IMM", 1, True, True).emit() + LoadImm64("ldtrsb", "LDTRSBX64_IMM", 1, True, True, flavor="widen").emit() + LoadImm64("ldtrh", "LDTRH64_IMM", 2, False, True).emit() + LoadImm64("ldtrsh", "LDTRSHW64_IMM", 2, True, True).emit() + LoadImm64("ldtrsh", "LDTRSHX64_IMM", 2, True, True, flavor="widen").emit() + LoadImm64("ldtrsw", "LDTRSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldtr", "LDTRW64_IMM", 4, False, True).emit() + LoadImm64("ldtr", "LDTRX64_IMM", 8, False, True).emit() + + LoadLit64("ldrsw", "LDRSWL64_LIT", 4, True, \ + literal=True, flavor="widen").emit() + LoadLit64("ldr", "LDRWL64_LIT", 4, False, literal=True).emit() + LoadLit64("ldr", "LDRXL64_LIT", 8, False, literal=True).emit() + LoadLit64("ldr", "LDRSFP64_LIT", 4, literal=True, flavor="fp").emit() + LoadLit64("ldr", "LDRDFP64_LIT", 8, literal=True, flavor="fp").emit() + + LoadRaw64("ldar", "LDARX64", 8, flavor="acquire").emit() + LoadRaw64("ldar", "LDARW64", 4, flavor="acquire").emit() + LoadRaw64("ldarh", "LDARH64", 2, flavor="acquire").emit() + LoadRaw64("ldarb", "LDARB64", 1, flavor="acquire").emit() + + LoadEx64("ldaxr", "LDAXRX64", 8, flavor="acex").emit() + LoadEx64("ldaxr", "LDAXRW64", 4, flavor="acex").emit() + LoadEx64("ldaxrh", "LDAXRH64", 2, flavor="acex").emit() + LoadEx64("ldaxrb", "LDAXRB64", 1, flavor="acex").emit() + + LoadEx64("ldxr", "LDXRX64", 8, flavor="exclusive").emit() + LoadEx64("ldxr", "LDXRW64", 4, flavor="exclusive").emit() + LoadEx64("ldxrh", "LDXRH64", 2, flavor="exclusive").emit() + LoadEx64("ldxrb", "LDXRB64", 1, flavor="exclusive").emit() + + class LoadImmU64(LoadImm64): + decConstBase = 'LoadStoreImmU64' + micro = True + + class LoadImmDU64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = True + post = False + writeback = False + + class LoadImmDouble64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = False + post = False + writeback = False + + class LoadRegU64(LoadReg64): + decConstBase = 'LoadStoreRegU64' + micro = True + + class LoadLitU64(LoadLit64): + decConstBase = 'LoadStoreLitU64' + micro = True + + LoadImmDouble64("ldaxp", "LDAXPW64", 4, flavor="acexp").emit() + LoadImmDouble64("ldaxp", "LDAXPX64", 8, flavor="acexp").emit() + LoadImmDouble64("ldxp", "LDXPW64", 4, flavor="exp").emit() + LoadImmDouble64("ldxp", "LDXPX64", 8, flavor="exp").emit() + + LoadImmU64("ldrxi_uop", "MicroLdrXImmUop", 8).emit() + LoadRegU64("ldrxr_uop", "MicroLdrXRegUop", 8).emit() + LoadLitU64("ldrxl_uop", "MicroLdrXLitUop", 8, literal=True).emit() + LoadImmU64("ldrfpxi_uop", "MicroLdrFpXImmUop", 8, flavor="fp").emit() + LoadRegU64("ldrfpxr_uop", "MicroLdrFpXRegUop", 8, flavor="fp").emit() + LoadLitU64("ldrfpxl_uop", "MicroLdrFpXLitUop", 8, literal=True, + flavor="fp").emit() + LoadImmU64("ldrqbfpxi_uop", "MicroLdrQBFpXImmUop", + 16, flavor="fp", top = False).emit() + LoadRegU64("ldrqbfpxr_uop", "MicroLdrQBFpXRegUop", + 16, flavor="fp", top = False).emit() + LoadLitU64("ldrqbfpxl_uop", "MicroLdrQBFpXLitUop", + 16, literal=True, flavor="fp", top = False).emit() + LoadImmU64("ldrqtfpxi_uop", "MicroLdrQTFpXImmUop", + 16, flavor="fp", top = True).emit() + LoadRegU64("ldrqtfpxr_uop", "MicroLdrQTFpXRegUop", + 16, flavor="fp", top = True).emit() + LoadLitU64("ldrqtfpxl_uop", "MicroLdrQTFpXLitUop", + 16, literal=True, flavor="fp", top = True).emit() + LoadImmDU64("ldrduxi_uop", "MicroLdrDUXImmUop", 4, sign=False).emit() + LoadImmDU64("ldrdsxi_uop", "MicroLdrDSXImmUop", 4, sign=True).emit() + LoadImmDU64("ldrdfpxi_uop", "MicroLdrDFpXImmUop", 4, flavor="fp").emit() +}}; diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa index 06ed34af8..928d1be0d 100644 --- a/src/arch/arm/isa/insts/m5ops.isa +++ b/src/arch/arm/isa/insts/m5ops.isa @@ -1,5 +1,5 @@ // -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -58,6 +58,7 @@ let {{ armCode = ''' PseudoInst::arm(xc->tcBase()); ''' + armIop = InstObjParams("arm", "Arm", "PredOp", { "code": armCode, "predicate_test": predicateTest }, @@ -69,6 +70,7 @@ let {{ quiesceCode = ''' PseudoInst::quiesce(xc->tcBase()); ''' + quiesceIop = InstObjParams("quiesce", "Quiesce", "PredOp", { "code": quiesceCode, "predicate_test": predicateTest }, @@ -81,6 +83,10 @@ let {{ PseudoInst::quiesceNs(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceNsCode64 = ''' + PseudoInst::quiesceNs(xc->tcBase(), X0); + ''' + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs", "PredOp", { "code": quiesceNsCode, "predicate_test": predicateTest }, @@ -89,10 +95,22 @@ let {{ decoder_output += BasicConstructor.subst(quiesceNsIop) exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs64", "PredOp", + { "code": quiesceNsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce"]) + header_output += BasicDeclare.subst(quiesceNsIop) + decoder_output += BasicConstructor.subst(quiesceNsIop) + exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceCyclesCode = ''' PseudoInst::quiesceCycles(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceCyclesCode64 = ''' + PseudoInst::quiesceCycles(xc->tcBase(), X0); + ''' + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles", "PredOp", { "code": quiesceCyclesCode, "predicate_test": predicateTest }, @@ -101,12 +119,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceCyclesIop) exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles64", "PredOp", + { "code": quiesceCyclesCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceCyclesIop) + decoder_output += BasicConstructor.subst(quiesceCyclesIop) + exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceTimeCode = ''' uint64_t qt_val = PseudoInst::quiesceTime(xc->tcBase()); R0 = bits(qt_val, 31, 0); R1 = bits(qt_val, 63, 32); ''' + quiesceTimeCode64 = ''' + X0 = PseudoInst::quiesceTime(xc->tcBase()); + ''' quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime", "PredOp", { "code": quiesceTimeCode, "predicate_test": predicateTest }, @@ -115,12 +144,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceTimeIop) exec_output += PredOpExecute.subst(quiesceTimeIop) + quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime64", "PredOp", + { "code": quiesceTimeCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceTimeIop) + decoder_output += BasicConstructor.subst(quiesceTimeIop) + exec_output += PredOpExecute.subst(quiesceTimeIop) + rpnsCode = ''' uint64_t rpns_val = PseudoInst::rpns(xc->tcBase()); R0 = bits(rpns_val, 31, 0); R1 = bits(rpns_val, 63, 32); ''' + rpnsCode64 = ''' + X0 = PseudoInst::rpns(xc->tcBase()); + ''' rpnsIop = InstObjParams("rpns", "Rpns", "PredOp", { "code": rpnsCode, "predicate_test": predicateTest }, @@ -129,10 +169,22 @@ let {{ decoder_output += BasicConstructor.subst(rpnsIop) exec_output += PredOpExecute.subst(rpnsIop) + rpnsIop = InstObjParams("rpns", "Rpns64", "PredOp", + { "code": rpnsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(rpnsIop) + decoder_output += BasicConstructor.subst(rpnsIop) + exec_output += PredOpExecute.subst(rpnsIop) + wakeCpuCode = ''' PseudoInst::wakeCPU(xc->tcBase(), join32to64(R1,R0)); ''' + wakeCpuCode64 = ''' + PseudoInst::wakeCPU(xc->tcBase(), X0); + ''' + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU", "PredOp", { "code": wakeCpuCode, "predicate_test": predicateTest }, @@ -141,6 +193,14 @@ let {{ decoder_output += BasicConstructor.subst(wakeCPUIop) exec_output += PredOpExecute.subst(wakeCPUIop) + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU64", "PredOp", + { "code": wakeCpuCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(wakeCPUIop) + decoder_output += BasicConstructor.subst(wakeCPUIop) + exec_output += PredOpExecute.subst(wakeCPUIop) + deprecated_ivlbIop = InstObjParams("deprecated_ivlb", "Deprecated_ivlb", "PredOp", { "code": '''warn_once("Obsolete M5 ivlb instruction encountered.\\n");''', "predicate_test": predicateTest }) @@ -171,6 +231,11 @@ let {{ m5exit_code = ''' PseudoInst::m5exit(xc->tcBase(), join32to64(R1, R0)); ''' + + m5exit_code64 = ''' + PseudoInst::m5exit(xc->tcBase(), X0); + ''' + m5exitIop = InstObjParams("m5exit", "M5exit", "PredOp", { "code": m5exit_code, "predicate_test": predicateTest }, @@ -190,6 +255,14 @@ let {{ decoder_output += BasicConstructor.subst(m5failIop) exec_output += PredOpExecute.subst(m5failIop) + m5exitIop = InstObjParams("m5exit", "M5exit64", "PredOp", + { "code": m5exit_code64, + "predicate_test": predicateTest }, + ["No_OpClass", "IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5exitIop) + decoder_output += BasicConstructor.subst(m5exitIop) + exec_output += PredOpExecute.subst(m5exitIop) + loadsymbolCode = ''' PseudoInst::loadsymbol(xc->tcBase()); ''' @@ -208,6 +281,10 @@ let {{ R1 = bits(ip_val, 63, 32); ''' + initparamCode64 = ''' + X0 = PseudoInst::initParam(xc->tcBase()); + ''' + initparamIop = InstObjParams("initparam", "Initparam", "PredOp", { "code": initparamCode, "predicate_test": predicateTest }, @@ -216,10 +293,21 @@ let {{ decoder_output += BasicConstructor.subst(initparamIop) exec_output += PredOpExecute.subst(initparamIop) + initparamIop = InstObjParams("initparam", "Initparam64", "PredOp", + { "code": initparamCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(initparamIop) + decoder_output += BasicConstructor.subst(initparamIop) + exec_output += PredOpExecute.subst(initparamIop) + resetstats_code = ''' PseudoInst::resetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + resetstats_code64 = ''' + PseudoInst::resetstats(xc->tcBase(), X0, X1); + ''' resetstatsIop = InstObjParams("resetstats", "Resetstats", "PredOp", { "code": resetstats_code, "predicate_test": predicateTest }, @@ -228,9 +316,22 @@ let {{ decoder_output += BasicConstructor.subst(resetstatsIop) exec_output += PredOpExecute.subst(resetstatsIop) + resetstatsIop = InstObjParams("resetstats", "Resetstats64", "PredOp", + { "code": resetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(resetstatsIop) + decoder_output += BasicConstructor.subst(resetstatsIop) + exec_output += PredOpExecute.subst(resetstatsIop) + dumpstats_code = ''' PseudoInst::dumpstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpstats_code64 = ''' + PseudoInst::dumpstats(xc->tcBase(), X0, X1); + ''' + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats", "PredOp", { "code": dumpstats_code, "predicate_test": predicateTest }, @@ -239,9 +340,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpstatsIop) exec_output += PredOpExecute.subst(dumpstatsIop) + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats64", "PredOp", + { "code": dumpstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpstatsIop) + decoder_output += BasicConstructor.subst(dumpstatsIop) + exec_output += PredOpExecute.subst(dumpstatsIop) + dumpresetstats_code = ''' PseudoInst::dumpresetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpresetstats_code64 = ''' + PseudoInst::dumpresetstats(xc->tcBase(), X0, X1); + ''' + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats", "PredOp", { "code": dumpresetstats_code, "predicate_test": predicateTest }, @@ -250,9 +364,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpresetstatsIop) exec_output += PredOpExecute.subst(dumpresetstatsIop) + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats64", "PredOp", + { "code": dumpresetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpresetstatsIop) + decoder_output += BasicConstructor.subst(dumpresetstatsIop) + exec_output += PredOpExecute.subst(dumpresetstatsIop) + m5checkpoint_code = ''' PseudoInst::m5checkpoint(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + m5checkpoint_code64 = ''' + PseudoInst::m5checkpoint(xc->tcBase(), X0, X1); + ''' + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint", "PredOp", { "code": m5checkpoint_code, "predicate_test": predicateTest }, @@ -261,11 +388,27 @@ let {{ decoder_output += BasicConstructor.subst(m5checkpointIop) exec_output += PredOpExecute.subst(m5checkpointIop) + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint64", "PredOp", + { "code": m5checkpoint_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5checkpointIop) + decoder_output += BasicConstructor.subst(m5checkpointIop) + exec_output += PredOpExecute.subst(m5checkpointIop) + m5readfileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); R0 = PseudoInst::readfile(xc->tcBase(), R0, join32to64(R3,R2), offset); ''' + + m5readfileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + X0 = PseudoInst::readfile(xc->tcBase(), (uint32_t)X0, X1, offset); + ''' + m5readfileIop = InstObjParams("m5readfile", "M5readfile", "PredOp", { "code": m5readfileCode, "predicate_test": predicateTest }, @@ -274,6 +417,14 @@ let {{ decoder_output += BasicConstructor.subst(m5readfileIop) exec_output += PredOpExecute.subst(m5readfileIop) + m5readfileIop = InstObjParams("m5readfile", "M5readfile64", "PredOp", + { "code": m5readfileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5readfileIop) + decoder_output += BasicConstructor.subst(m5readfileIop) + exec_output += PredOpExecute.subst(m5readfileIop) + m5writefileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); @@ -282,6 +433,16 @@ let {{ R0 = PseudoInst::writefile(xc->tcBase(), R0, join32to64(R3,R2), offset, filenameAddr); ''' + + m5writefileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + Addr filenameAddr = getArgument(xc->tcBase(), n, sizeof(Addr), false); + X0 = PseudoInst::writefile(xc->tcBase(), (uint32_t)X0, X1, offset, + filenameAddr); + ''' + m5writefileIop = InstObjParams("m5writefile", "M5writefile", "PredOp", { "code": m5writefileCode, "predicate_test": predicateTest }, @@ -290,6 +451,14 @@ let {{ decoder_output += BasicConstructor.subst(m5writefileIop) exec_output += PredOpExecute.subst(m5writefileIop) + m5writefileIop = InstObjParams("m5writefile", "M5writefile64", "PredOp", + { "code": m5writefileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5writefileIop) + decoder_output += BasicConstructor.subst(m5writefileIop) + exec_output += PredOpExecute.subst(m5writefileIop) + m5breakIop = InstObjParams("m5break", "M5break", "PredOp", { "code": "PseudoInst::debugbreak(xc->tcBase());", "predicate_test": predicateTest }, @@ -309,6 +478,9 @@ let {{ m5addsymbolCode = ''' PseudoInst::addsymbol(xc->tcBase(), join32to64(R1, R0), R2); ''' + m5addsymbolCode64 = ''' + PseudoInst::addsymbol(xc->tcBase(), X0, (uint32_t)X1); + ''' m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol", "PredOp", { "code": m5addsymbolCode, "predicate_test": predicateTest }, @@ -317,8 +489,17 @@ let {{ decoder_output += BasicConstructor.subst(m5addsymbolIop) exec_output += PredOpExecute.subst(m5addsymbolIop) + m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol64", "PredOp", + { "code": m5addsymbolCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5addsymbolIop) + decoder_output += BasicConstructor.subst(m5addsymbolIop) + exec_output += PredOpExecute.subst(m5addsymbolIop) + m5panicCode = '''panic("M5 panic instruction called at pc=%#x.", xc->pcState().pc());''' + m5panicIop = InstObjParams("m5panic", "M5panic", "PredOp", { "code": m5panicCode, "predicate_test": predicateTest }, @@ -332,6 +513,13 @@ let {{ join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workbeginCode64 = '''PseudoInst::workbegin( + xc->tcBase(), + X0, + X1 + );''' + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin", "PredOp", { "code": m5workbeginCode, "predicate_test": predicateTest }, @@ -340,11 +528,26 @@ let {{ decoder_output += BasicConstructor.subst(m5workbeginIop) exec_output += PredOpExecute.subst(m5workbeginIop) + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin64", "PredOp", + { "code": m5workbeginCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workbeginIop) + decoder_output += BasicConstructor.subst(m5workbeginIop) + exec_output += PredOpExecute.subst(m5workbeginIop) + m5workendCode = '''PseudoInst::workend( xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workendCode64 = '''PseudoInst::workend( + xc->tcBase(), + X0, + X1 + );''' + m5workendIop = InstObjParams("m5workend", "M5workend", "PredOp", { "code": m5workendCode, "predicate_test": predicateTest }, @@ -353,4 +556,11 @@ let {{ decoder_output += BasicConstructor.subst(m5workendIop) exec_output += PredOpExecute.subst(m5workendIop) + m5workendIop = InstObjParams("m5workend", "M5workend64", "PredOp", + { "code": m5workendCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workendIop) + decoder_output += BasicConstructor.subst(m5workendIop) + exec_output += PredOpExecute.subst(m5workendIop) }}; diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index db36a3fff..f164595dd 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -91,7 +91,8 @@ let {{ SCTLR sctlr = Sctlr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -158,8 +159,8 @@ let {{ header_output = decoder_output = exec_output = '' - loadIops = (microLdrUopIop, microLdrRetUopIop, - microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop) + loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop, + microLdrDBFpUopIop, microLdrDTFpUopIop) storeIops = (microStrUopIop, microStrFpUopIop, microStrDBFpUopIop, microStrDTFpUopIop) for iop in loadIops + storeIops: @@ -178,7 +179,7 @@ let {{ let {{ exec_output = header_output = '' - eaCode = 'EA = URa + imm;' + eaCode = 'EA = XURa + imm;' for size in (1, 2, 3, 4, 6, 8, 12, 16): # Set up the memory access. @@ -592,6 +593,26 @@ let {{ URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' + microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop', + 'MicroIntImmXOp', + 'XURa = XURb + imm;', + ['IsMicroop']) + + microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop', + 'MicroIntImmXOp', ''' + if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + XURa = XURb + imm; + ''', ['IsMicroop']) + + microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop', + 'MicroIntRegXOp', + 'XURa = XURb + ' + \ + 'extendReg64(XURc, type, shiftAmt, 64);', + ['IsMicroop']) + microAddUopIop = InstObjParams('add_uop', 'MicroAddUop', 'MicroIntRegOp', {'code': microAddUopCode, @@ -604,6 +625,11 @@ let {{ 'predicate_test': predicateTest}, ['IsMicroop']) + microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop', + 'MicroIntImmXOp', + 'XURa = XURb - imm;', + ['IsMicroop']) + microSubUopCode = ''' URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' @@ -631,8 +657,8 @@ let {{ SCTLR sctlr = Sctlr; pNPC = URa; CPSR new_cpsr = - cpsrWriteByInstr(cpsrOrCondCodes, URb, - 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr, + 0xF, true, sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; NextThumb = new_cpsr.t; NextJazelle = new_cpsr.j; @@ -651,25 +677,37 @@ let {{ ['IsMicroop']) header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \ MicroIntImmDeclare.subst(microSubiUopIop) + \ + MicroIntImmDeclare.subst(microSubXiUopIop) + \ MicroIntRegDeclare.subst(microAddUopIop) + \ MicroIntRegDeclare.subst(microSubUopIop) + \ + MicroIntXERegDeclare.subst(microAddXERegUopIop) + \ MicroIntMovDeclare.subst(microUopRegMovIop) + \ MicroIntMovDeclare.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop) decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \ MicroIntImmConstructor.subst(microSubiUopIop) + \ + MicroIntImmXConstructor.subst(microSubXiUopIop) + \ MicroIntRegConstructor.subst(microAddUopIop) + \ MicroIntRegConstructor.subst(microSubUopIop) + \ + MicroIntXERegConstructor.subst(microAddXERegUopIop) + \ MicroIntMovConstructor.subst(microUopRegMovIop) + \ MicroIntMovConstructor.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop) exec_output = PredOpExecute.subst(microAddiUopIop) + \ + BasicExecute.subst(microAddXiUopIop) + \ + BasicExecute.subst(microAddXiSpAlignUopIop) + \ PredOpExecute.subst(microSubiUopIop) + \ + BasicExecute.subst(microSubXiUopIop) + \ PredOpExecute.subst(microAddUopIop) + \ PredOpExecute.subst(microSubUopIop) + \ + BasicExecute.subst(microAddXERegUopIop) + \ PredOpExecute.subst(microUopRegMovIop) + \ PredOpExecute.subst(microUopRegMovRetIop) + \ PredOpExecute.subst(microUopSetPCCPSRIop) @@ -681,6 +719,25 @@ let {{ header_output = MacroMemDeclare.subst(iop) decoder_output = MacroMemConstructor.subst(iop) + iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", []) + header_output += PairMemDeclare.subst(iop) + decoder_output += PairMemConstructor.subst(iop) + + iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "") + iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "") + iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "") + for iop in (iopImm, iopPre, iopPost): + header_output += BigFpMemImmDeclare.subst(iop) + decoder_output += BigFpMemImmConstructor.subst(iop) + + iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "") + header_output += BigFpMemRegDeclare.subst(iop) + decoder_output += BigFpMemRegConstructor.subst(iop) + + iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "") + header_output += BigFpMemLitDeclare.subst(iop) + decoder_output += BigFpMemLitConstructor.subst(iop) + iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", []) header_output += VMemMultDeclare.subst(iop) decoder_output += VMemMultConstructor.subst(iop) diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index c39f1b14f..aed6bab0d 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,8 +48,8 @@ let {{ self.constructTemplate = eval(self.decConstBase + 'Constructor') def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags, - base = 'Memory', wbDecl = None, pcDecl = None, - rasPop = False): + base='Memory', wbDecl=None, pcDecl=None, + rasPop=False, size=4, sign=False, faCode=None): # Make sure flags are in lists (convert to lists if not). memFlags = makeList(memFlags) instFlags = makeList(instFlags) @@ -63,6 +63,22 @@ let {{ codeBlobs["ea_code"] = eaCode + if faCode: + # For AArch64 the fa_code snippet comes already assembled here + codeBlobs["fa_code"] = faCode + elif wbDecl == None: + codeBlobs["fa_code"] = ''' + if (dest != INTREG_PC) { + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + } + ''' %("0" if size == 1 else + "1" if size == 2 else "2", + "true" if sign else "false") + else: + codeBlobs["fa_code"] = '' + macroName = Name instFlagsCopy = list(instFlags) codeBlobsCopy = dict(codeBlobs) @@ -108,6 +124,7 @@ let {{ "use_uops" : use_uops, "use_pc" : use_pc, "use_wb" : use_wb, + "fa_code" : '', "is_ras_pop" : is_ras_pop }, ['IsMacroop']) header_output += self.declareTemplate.subst(iop) @@ -176,8 +193,13 @@ let {{ return Name def buildMemSuffix(sign, size): - if size == 4: - memSuffix = '' + if size == 8: + memSuffix = '_ud' + elif size == 4: + if sign: + memSuffix = '_sw' + else: + memSuffix = '_uw' elif size == 2: if sign: memSuffix = '_sh' diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index b8425a240..678a125fb 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -40,21 +40,102 @@ let {{ svcCode = ''' - if (FullSystem) { - fault = new SupervisorCall; - } else { - fault = new SupervisorCall(machInst); - } + fault = new SupervisorCall(machInst, imm); ''' - svcIop = InstObjParams("svc", "Svc", "PredOp", + svcIop = InstObjParams("svc", "Svc", "ImmOp", { "code": svcCode, "predicate_test": predicateTest }, ["IsSyscall", "IsNonSpeculative", "IsSerializeAfter"]) - header_output = BasicDeclare.subst(svcIop) - decoder_output = BasicConstructor.subst(svcIop) + header_output = ImmOpDeclare.subst(svcIop) + decoder_output = ImmOpConstructor.subst(svcIop) exec_output = PredOpExecute.subst(svcIop) + smcCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if ((cpsr.mode != MODE_USER) && FullSystem) { + if (ArmSystem::haveVirtualization(xc->tcBase()) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && hcr.tsc) { + fault = new HypervisorTrap(machInst, 0, EC_SMC_TO_HYP); + } else { + if (scr.scd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + } + } else { + fault = disabledFault(); + } + ''' + + smcIop = InstObjParams("smc", "Smc", "PredOp", + { "code": smcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor.subst(smcIop) + exec_output += PredOpExecute.subst(smcIop) + + hvcCode = ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + + // Filter out the various cases where this instruction isn't defined + if (!FullSystem || !ArmSystem::haveVirtualization(xc->tcBase()) || + (cpsr.mode == MODE_USER) || + (ArmSystem::haveSecurity(xc->tcBase()) && (!scr.ns || !scr.hce))) { + fault = disabledFault(); + } else { + fault = new HypervisorCall(machInst, imm); + } + ''' + + hvcIop = InstObjParams("hvc", "Hvc", "ImmOp", + { "code": hvcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += ImmOpDeclare.subst(hvcIop) + decoder_output += ImmOpConstructor.subst(hvcIop) + exec_output += PredOpExecute.subst(hvcIop) + + eretCode = ''' + SCTLR sctlr = Sctlr; + CPSR old_cpsr = Cpsr; + old_cpsr.nz = CondCodesNZ; + old_cpsr.c = CondCodesC; + old_cpsr.v = CondCodesV; + old_cpsr.ge = CondCodesGE; + + CPSR new_cpsr = cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, + true, sctlr.nmfi, xc->tcBase()); + Cpsr = ~CondCodesMask & new_cpsr; + CondCodesNZ = new_cpsr.nz; + CondCodesC = new_cpsr.c; + CondCodesV = new_cpsr.v; + CondCodesGE = new_cpsr.ge; + + NextThumb = (new_cpsr).t; + NextJazelle = (new_cpsr).j; + NextItState = (((new_cpsr).it2 << 2) & 0xFC) + | ((new_cpsr).it1 & 0x3); + + NPC = (old_cpsr.mode == MODE_HYP) ? ElrHyp : LR; + ''' + + eretIop = InstObjParams("eret", "Eret", "PredOp", + { "code": eretCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(eretIop) + decoder_output += BasicConstructor.subst(eretIop) + exec_output += PredOpExecute.subst(eretIop) + + + }}; let {{ @@ -87,6 +168,59 @@ let {{ decoder_output += MrsConstructor.subst(mrsSpsrIop) exec_output += PredOpExecute.subst(mrsSpsrIop) + mrsBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + Dest = DecodedBankedIntReg; + } else { + Dest = xc->readMiscReg(regIdx); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + mrsBankedRegIop = InstObjParams("mrs", "MrsBankedReg", "MrsOp", + { "code": mrsBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeBefore"]) + header_output += MrsBankedRegDeclare.subst(mrsBankedRegIop) + decoder_output += MrsBankedRegConstructor.subst(mrsBankedRegIop) + exec_output += PredOpExecute.subst(mrsBankedRegIop) + + msrBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + // This is a bit nasty, you would have thought that + // DecodedBankedIntReg wouldn't be written to unless the + // conditions on the IF statements above are met, however if + // you look at the generated C code you'll find that they are. + // However this is safe as DecodedBankedIntReg (which is used + // in operands.isa to get the index of DecodedBankedIntReg) + // will return INTREG_DUMMY if its not a valid integer + // register, so redirecting the write to somewhere we don't + // care about. + DecodedBankedIntReg = Op1; + } else { + xc->setMiscReg(regIdx, Op1); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + msrBankedRegIop = InstObjParams("msr", "MsrBankedReg", "MsrRegOp", + { "code": msrBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeAfter"]) + header_output += MsrBankedRegDeclare.subst(msrBankedRegIop) + decoder_output += MsrBankedRegConstructor.subst(msrBankedRegIop) + exec_output += PredOpExecute.subst(msrBankedRegIop) + msrCpsrRegCode = ''' SCTLR sctlr = Sctlr; CPSR old_cpsr = Cpsr; @@ -96,7 +230,8 @@ let {{ old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Op1, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Op1, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -128,7 +263,8 @@ let {{ old_cpsr.v = CondCodesV; old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, imm, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, imm, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -488,12 +624,10 @@ let {{ decoder_output += BasicConstructor.subst(bkptIop) exec_output += BasicExecute.subst(bkptIop) - nopIop = InstObjParams("nop", "NopInst", "PredOp", \ - { "code" : "", "predicate_test" : predicateTest }, - ['IsNop']) + nopIop = InstObjParams("nop", "NopInst", "ArmStaticInst", "", ['IsNop']) header_output += BasicDeclare.subst(nopIop) - decoder_output += BasicConstructor.subst(nopIop) - exec_output += PredOpExecute.subst(nopIop) + decoder_output += BasicConstructor64.subst(nopIop) + exec_output += BasicExecute.subst(nopIop) yieldIop = InstObjParams("yield", "YieldInst", "PredOp", \ { "code" : "", "predicate_test" : predicateTest }) @@ -502,14 +636,31 @@ let {{ exec_output += PredOpExecute.subst(yieldIop) wfeCode = ''' - // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + + // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending, + ThreadContext *tc = xc->tcBase(); if (SevMailbox == 1) { SevMailbox = 0; - PseudoInst::quiesceSkip(xc->tcBase()); - } else if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkInterrupts(xc->tcBase())) { - PseudoInst::quiesceSkip(xc->tcBase()); + PseudoInst::quiesceSkip(tc); + } else if (tc->getCpuPtr()->getInterruptController()->checkInterrupts(tc)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwe) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && + hcr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } ''' wfePredFixUpCode = ''' @@ -528,12 +679,30 @@ let {{ exec_output += QuiescePredOpExecuteWithFixup.subst(wfeIop) wfiCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + // WFI doesn't sleep if interrupts are pending (masked or not) - if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkRaw()) { - PseudoInst::quiesceSkip(xc->tcBase()); + ThreadContext *tc = xc->tcBase(); + if (tc->getCpuPtr()->getInterruptController()->checkWfiWake(hcr, cpsr, + scr)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwi) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && hcr.twi && + (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr)) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twi) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); ''' wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \ { "code" : wfiCode, "predicate_test" : predicateTest }, @@ -564,6 +733,16 @@ let {{ decoder_output += BasicConstructor.subst(sevIop) exec_output += PredOpExecute.subst(sevIop) + sevlCode = ''' + SevMailbox = 1; + ''' + sevlIop = InstObjParams("sevl", "SevlInst", "PredOp", \ + { "code" : sevlCode, "predicate_test" : predicateTest }, + ["IsNonSpeculative", "IsSquashAfter", "IsUnverifiable"]) + header_output += BasicDeclare.subst(sevlIop) + decoder_output += BasicConstructor.subst(sevlIop) + exec_output += BasicExecute.subst(sevlIop) + itIop = InstObjParams("it", "ItInst", "PredOp", \ { "code" : ";", "predicate_test" : predicateTest }, []) @@ -571,10 +750,7 @@ let {{ decoder_output += BasicConstructor.subst(itIop) exec_output += PredOpExecute.subst(itIop) unknownCode = ''' - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, true); + return new UndefinedInstruction(machInst, true); ''' unknownIop = InstObjParams("unknown", "Unknown", "UnknownOp", \ { "code": unknownCode, @@ -626,108 +802,152 @@ let {{ exec_output += PredOpExecute.subst(bfiIop) mrc14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(op1); + if (!canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp((const MiscRegIndex) op1, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } Dest = MiscOp1; ''' - mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegOp", + mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegImmOp", { "code": mrc14code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14Iop) - decoder_output += RegRegOpConstructor.subst(mrc14Iop) + header_output += RegRegImmOpDeclare.subst(mrc14Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc14Iop) exec_output += PredOpExecute.subst(mrc14Iop) mcr14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + if (!canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } MiscDest = Op1; ''' - mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegOp", + mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegImmOp", { "code": mcr14code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14Iop) - decoder_output += RegRegOpConstructor.subst(mcr14Iop) + header_output += RegRegImmOpDeclare.subst(mcr14Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr14Iop) exec_output += PredOpExecute.subst(mcr14Iop) - mrc14UserIop = InstObjParams("mrc", "Mrc14User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14UserIop) - decoder_output += RegRegOpConstructor.subst(mrc14UserIop) - exec_output += PredOpExecute.subst(mrc14UserIop) - - mcr14UserIop = InstObjParams("mcr", "Mcr14User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14UserIop) - decoder_output += RegRegOpConstructor.subst(mcr14UserIop) - exec_output += PredOpExecute.subst(mcr14UserIop) - mrc15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - Dest = MiscOp1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + Dest = MiscNsBankedOp1; ''' - mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegOp", + mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegImmOp", { "code": mrc15code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15Iop) - decoder_output += RegRegOpConstructor.subst(mrc15Iop) + header_output += RegRegImmOpDeclare.subst(mrc15Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc15Iop) exec_output += PredOpExecute.subst(mrc15Iop) mcr15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - MiscDest = Op1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + MiscNsBankedDest = Op1; ''' - mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegOp", + mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegImmOp", { "code": mcr15code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15Iop) - decoder_output += RegRegOpConstructor.subst(mcr15Iop) + header_output += RegRegImmOpDeclare.subst(mcr15Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr15Iop) exec_output += PredOpExecute.subst(mcr15Iop) - mrc15UserIop = InstObjParams("mrc", "Mrc15User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15UserIop) - decoder_output += RegRegOpConstructor.subst(mrc15UserIop) - exec_output += PredOpExecute.subst(mrc15UserIop) - - mcr15UserIop = InstObjParams("mcr", "Mcr15User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15UserIop) - decoder_output += RegRegOpConstructor.subst(mcr15UserIop) - exec_output += PredOpExecute.subst(mcr15UserIop) + + mrrc15code = ''' + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + Dest = bits(MiscNsBankedOp164, 63, 32); + Dest2 = bits(MiscNsBankedOp164, 31, 0); + ''' + mrrc15Iop = InstObjParams("mrrc", "Mrrc15", "MrrcOp", + { "code": mrrc15code, + "predicate_test": predicateTest }, []) + header_output += MrrcOpDeclare.subst(mrrc15Iop) + decoder_output += MrrcOpConstructor.subst(mrrc15Iop) + exec_output += PredOpExecute.subst(mrrc15Iop) + + + mcrr15code = ''' + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + MiscNsBankedDest64 = ((uint64_t) Op1 << 32) | Op2; + ''' + mcrr15Iop = InstObjParams("mcrr", "Mcrr15", "McrrOp", + { "code": mcrr15code, + "predicate_test": predicateTest }, []) + header_output += McrrOpDeclare.subst(mcrr15Iop) + decoder_output += McrrOpConstructor.subst(mcrr15Iop) + exec_output += PredOpExecute.subst(mcrr15Iop) + enterxCode = ''' NextThumb = true; @@ -775,35 +995,53 @@ let {{ exec_output += PredOpExecute.subst(clrexIop) isbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15ISB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - isbIop = InstObjParams("isb", "Isb", "PredOp", + isbIop = InstObjParams("isb", "Isb", "ImmOp", {"code": isbCode, "predicate_test": predicateTest}, ['IsSerializeAfter']) - header_output += BasicDeclare.subst(isbIop) - decoder_output += BasicConstructor.subst(isbIop) + header_output += ImmOpDeclare.subst(isbIop) + decoder_output += ImmOpConstructor.subst(isbIop) exec_output += PredOpExecute.subst(isbIop) dsbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DSB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - dsbIop = InstObjParams("dsb", "Dsb", "PredOp", + dsbIop = InstObjParams("dsb", "Dsb", "ImmOp", {"code": dsbCode, "predicate_test": predicateTest}, ['IsMemBarrier', 'IsSerializeAfter']) - header_output += BasicDeclare.subst(dsbIop) - decoder_output += BasicConstructor.subst(dsbIop) + header_output += ImmOpDeclare.subst(dsbIop) + decoder_output += ImmOpConstructor.subst(dsbIop) exec_output += PredOpExecute.subst(dsbIop) dmbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DMB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } ''' - dmbIop = InstObjParams("dmb", "Dmb", "PredOp", + dmbIop = InstObjParams("dmb", "Dmb", "ImmOp", {"code": dmbCode, "predicate_test": predicateTest}, ['IsMemBarrier']) - header_output += BasicDeclare.subst(dmbIop) - decoder_output += BasicConstructor.subst(dmbIop) + header_output += ImmOpDeclare.subst(dmbIop) + decoder_output += ImmOpConstructor.subst(dmbIop) exec_output += PredOpExecute.subst(dmbIop) dbgCode = ''' diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa new file mode 100644 index 000000000..6ebbcc2ba --- /dev/null +++ b/src/arch/arm/isa/insts/misc64.isa @@ -0,0 +1,147 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + svcCode = ''' + fault = new SupervisorCall(machInst, bits(machInst, 20, 5)); + ''' + + svcIop = InstObjParams("svc", "Svc64", "ArmStaticInst", + svcCode, ["IsSyscall", "IsNonSpeculative", + "IsSerializeAfter"]) + header_output = BasicDeclare.subst(svcIop) + decoder_output = BasicConstructor64.subst(svcIop) + exec_output = BasicExecute.subst(svcIop) + + # @todo: extend to take into account Virtualization. + smcCode = ''' + SCR scr = Scr64; + CPSR cpsr = Cpsr; + + if (!ArmSystem::haveSecurity(xc->tcBase()) || inUserMode(cpsr) || scr.smd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + ''' + + smcIop = InstObjParams("smc", "Smc64", "ArmStaticInst", + smcCode, ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor64.subst(smcIop) + exec_output += BasicExecute.subst(smcIop) + + def subst(templateBase, iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + bfmMaskCode = ''' + uint64_t bitMask; + int diff = imm2 - imm1; + if (imm1 <= imm2) { + bitMask = mask(diff + 1); + } else { + bitMask = mask(imm2 + 1); + bitMask = (bitMask >> imm1) | (bitMask << (intWidth - imm1)); + diff += intWidth; + } + uint64_t topBits M5_VAR_USED = ~mask(diff+1); + uint64_t result = (Op164 >> imm1) | (Op164 << (intWidth - imm1)); + result &= bitMask; + ''' + + bfmCode = bfmMaskCode + 'Dest64 = result | (Dest64 & ~bitMask);' + bfmIop = InstObjParams("bfm", "Bfm64", "RegRegImmImmOp64", bfmCode); + subst("RegRegImmImmOp64", bfmIop) + + ubfmCode = bfmMaskCode + 'Dest64 = result;' + ubfmIop = InstObjParams("ubfm", "Ubfm64", "RegRegImmImmOp64", ubfmCode); + subst("RegRegImmImmOp64", ubfmIop) + + sbfmCode = bfmMaskCode + \ + 'Dest64 = result | (bits(Op164, imm2) ? topBits : 0);' + sbfmIop = InstObjParams("sbfm", "Sbfm64", "RegRegImmImmOp64", sbfmCode); + subst("RegRegImmImmOp64", sbfmIop) + + extrCode = ''' + if (imm == 0) { + Dest64 = Op264; + } else { + Dest64 = (Op164 << (intWidth - imm)) | (Op264 >> imm); + } + ''' + extrIop = InstObjParams("extr", "Extr64", "RegRegRegImmOp64", extrCode); + subst("RegRegRegImmOp64", extrIop); + + unknownCode = ''' + return new UndefinedInstruction(machInst, true); + ''' + unknown64Iop = InstObjParams("unknown", "Unknown64", "UnknownOp64", + unknownCode) + header_output += BasicDeclare.subst(unknown64Iop) + decoder_output += BasicConstructor64.subst(unknown64Iop) + exec_output += BasicExecute.subst(unknown64Iop) + + isbIop = InstObjParams("isb", "Isb64", "ArmStaticInst", + "fault = new FlushPipe;", ['IsSerializeAfter']) + header_output += BasicDeclare.subst(isbIop) + decoder_output += BasicConstructor64.subst(isbIop) + exec_output += BasicExecute.subst(isbIop) + + dsbIop = InstObjParams("dsb", "Dsb64", "ArmStaticInst", + "fault = new FlushPipe;", + ['IsMemBarrier', 'IsSerializeAfter']) + header_output += BasicDeclare.subst(dsbIop) + decoder_output += BasicConstructor64.subst(dsbIop) + exec_output += BasicExecute.subst(dsbIop) + + dmbIop = InstObjParams("dmb", "Dmb64", "ArmStaticInst", "", + ['IsMemBarrier']) + header_output += BasicDeclare.subst(dmbIop) + decoder_output += BasicConstructor64.subst(dmbIop) + exec_output += BasicExecute.subst(dmbIop) + + clrexIop = InstObjParams("clrex", "Clrex64", "ArmStaticInst", + "LLSCLock = 0;") + header_output += BasicDeclare.subst(clrexIop) + decoder_output += BasicConstructor64.subst(clrexIop) + exec_output += BasicExecute.subst(clrexIop) +}}; diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 876bb3bb7..ca5c3038c 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -94,8 +94,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonUThreeUSReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1, IntRegIndex op2) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) { switch (size) { case 0: @@ -112,8 +112,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonSThreeUSReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1, IntRegIndex op2) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) { switch (size) { case 0: @@ -129,6 +129,38 @@ output header {{ template <template <typename T> class Base> StaticInstPtr + decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, op1, op2); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr decodeNeonUSThreeUSReg(bool notSigned, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) @@ -177,6 +209,38 @@ output header {{ template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr + decodeNeonSThreeXReg(bool q, unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + if (q) { + return decodeNeonSThreeUReg<BaseQ>( + size, machInst, dest, op1, op2); + } else { + return decodeNeonSThreeUSReg<BaseD>( + size, machInst, dest, op1, op2); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeXReg(bool q, unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + if (q) { + return decodeNeonUThreeUReg<BaseQ>( + size, machInst, dest, op1, op2); + } else { + return decodeNeonUThreeUSReg<BaseD>( + size, machInst, dest, op1, op2); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) @@ -241,6 +305,124 @@ output header {{ template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr + decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, op2); + else + return new BaseQ<uint32_t>(machInst, dest, op1, op2); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, op2); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, op2); + else + return new Base<uint32_t>(machInst, dest, op1, op2); + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, op2, imm); + else + return new Base<uint32_t>(machInst, dest, op1, op2, imm); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + switch (size) { + case 1: + return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new BaseD<uint16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + switch (size) { + case 1: + return new BaseQ<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseQ<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new BaseD<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseD<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm); + else + return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr decodeNeonUTwoShiftReg(bool q, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, uint64_t imm) @@ -345,6 +527,46 @@ output header {{ } } + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoShiftUReg(unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, uint64_t imm) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, imm); + case 1: + return new Base<uint16_t>(machInst, dest, op1, imm); + case 2: + return new Base<uint32_t>(machInst, dest, op1, imm); + case 3: + return new Base<uint64_t>(machInst, dest, op1, imm); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonSTwoShiftUReg(unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, uint64_t imm) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, imm); + case 1: + return new Base<int16_t>(machInst, dest, op1, imm); + case 2: + return new Base<int32_t>(machInst, dest, op1, imm); + case 3: + return new Base<int64_t>(machInst, dest, op1, imm); + default: + return new Unknown(machInst); + } + } + template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr @@ -411,6 +633,66 @@ output header {{ } } + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + return decodeNeonUTwoShiftUReg<BaseQ>( + size, machInst, dest, op1, imm); + } else { + return decodeNeonUTwoShiftUSReg<BaseD>( + size, machInst, dest, op1, imm); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + return decodeNeonSTwoShiftUReg<BaseQ>( + size, machInst, dest, op1, imm); + } else { + return decodeNeonSTwoShiftUSReg<BaseD>( + size, machInst, dest, op1, imm); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, imm); + else + return new Base<uint32_t>(machInst, dest, op1, imm); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, imm); + else + return new BaseQ<uint32_t>(machInst, dest, op1, imm); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, imm); + } + } + template <template <typename T> class Base> StaticInstPtr decodeNeonUTwoMiscUSReg(unsigned size, @@ -451,8 +733,8 @@ output header {{ template <typename T> class BaseQ> StaticInstPtr decodeNeonUTwoMiscSReg(bool q, unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { if (q) { return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); @@ -465,8 +747,8 @@ output header {{ template <typename T> class BaseQ> StaticInstPtr decodeNeonSTwoMiscSReg(bool q, unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { if (q) { return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); @@ -498,8 +780,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonSTwoMiscUReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { switch (size) { case 0: @@ -559,6 +841,221 @@ output header {{ } } + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); + } else { + return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); + } else { + return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1); + else + return new BaseQ<uint32_t>(machInst, dest, op1); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1); + else + return new BaseD<uint32_t>(machInst, dest, op1); + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1); + else + return new Base<uint32_t>(machInst, dest, op1); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<int16_t>(machInst, dest, op1); + case 0x2: + return new BaseQ<int32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<int16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<int16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<int32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<int16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } }}; output exec {{ @@ -872,10 +1369,7 @@ let {{ readDestCode = 'destElem = gtoh(destReg.elements[i]);' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < eCount; i++) { Element srcElem1 = gtoh(srcReg1.elements[i]); @@ -926,10 +1420,7 @@ let {{ readDestCode = 'destElem = gtoh(destReg.elements[i]);' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < eCount; i++) { Element srcElem1 = gtoh(srcReg1.elements[i]); @@ -978,10 +1469,7 @@ let {{ readDestCode = 'destReg = destRegs[i];' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < rCount; i++) { FloatReg srcReg1 = srcRegs1[i]; @@ -2156,7 +2644,7 @@ let {{ bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, + destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>, true, true, VfpRoundNearest); } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; @@ -2171,7 +2659,7 @@ let {{ bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, + destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>, true, true, VfpRoundNearest); } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; @@ -2234,6 +2722,24 @@ let {{ threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) + vfmafpCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>, + true, true, VfpRoundNearest); + FpscrExc = fpscr; + ''' + threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True) + threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True) + + vfmsfpCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>, + true, true, VfpRoundNearest); + FpscrExc = fpscr; + ''' + threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True) + threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True) + vmlsfpCode = ''' FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, @@ -2765,7 +3271,7 @@ let {{ fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); - destReg = vfpFpSToFixed(srcElem1, false, false, imm); + destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2781,7 +3287,7 @@ let {{ fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); - destReg = vfpFpSToFixed(srcElem1, true, false, imm); + destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2795,7 +3301,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); - destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); + destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2809,7 +3315,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); - destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); + destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -3296,10 +3802,7 @@ let {{ } else { index -= eCount; if (index >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { destReg.elements[i] = srcReg2.elements[index]; } diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa new file mode 100644 index 000000000..e065761f4 --- /dev/null +++ b/src/arch/arm/isa/insts/neon64.isa @@ -0,0 +1,3355 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Mbou Eyole + +let {{ + + header_output = "" + exec_output = "" + + # FP types (FP operations always work with unsigned representations) + floatTypes = ("uint32_t", "uint64_t") + smallFloatTypes = ("uint32_t",) + + def threeEqualRegInstX(name, Name, opClass, types, rCount, op, + readDest=False, pairwise=False, scalar=False, + byElem=False): + assert (not pairwise) or ((not byElem) and (not scalar)) + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + if byElem: + # 2nd register operand has to be read fully + eWalkCode += ''' + FullRegVect srcReg2; + ''' + else: + eWalkCode += ''' + RegVect srcReg2; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + if byElem: + # 2nd operand has to be read fully + for reg in range(rCount, 4): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + if pairwise: + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(2 * i < eCount ? + srcReg1.elements[2 * i] : + srcReg2.elements[2 * i - eCount]); + Element srcElem2 = gtoh(2 * i < eCount ? + srcReg1.elements[2 * i + 1] : + srcReg2.elements[2 * i + 1 - eCount]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + else: + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + Element srcElem1 = gtoh(srcReg1.elements[i]); + Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "", + "src2Index" : "imm" if byElem else "i" } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp" if byElem else "DataX2RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + if byElem: + header_output += NeonX2RegImmOpDeclare.subst(iop) + else: + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeUnequalRegInstX(name, Name, opClass, types, op, + bigSrc1, bigSrc2, bigDest, readDest, scalar=False, + byElem=False, hi=False): + assert not (scalar and hi) + global header_output, exec_output + src1Cnt = src2Cnt = destCnt = 2 + src1Prefix = src2Prefix = destPrefix = '' + if bigSrc1: + src1Cnt = 4 + src1Prefix = 'Big' + if bigSrc2: + src2Cnt = 4 + src2Prefix = 'Big' + if bigDest: + destCnt = 4 + destPrefix = 'Big' + if byElem: + src2Prefix = 'Full' + eWalkCode = simd64EnabledCheckCode + ''' + %sRegVect srcReg1; + %sRegVect srcReg2; + %sRegVect destReg; + ''' % (src1Prefix, src2Prefix, destPrefix) + srcReg1 = 0 + if hi and not bigSrc1: # long/widening operations + srcReg1 = 2 + for reg in range(src1Cnt): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw); + ''' % { "reg" : reg, "srcReg1" : srcReg1 } + srcReg1 += 1 + srcReg2 = 0 + if (not byElem) and (hi and not bigSrc2): # long/widening operations + srcReg2 = 2 + for reg in range(src2Cnt): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw); + ''' % { "reg" : reg, "srcReg2" : srcReg2 } + srcReg2 += 1 + if byElem: + # 2nd operand has to be read fully + for reg in range(src2Cnt, 4): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + for reg in range(destCnt): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); + %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); + %(destPrefix)sElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, + "destPrefix" : destPrefix, + "scalarCheck" : scalarCheck if scalar else "", + "src2Index" : "imm" if byElem else "i" } + destReg = 0 + if hi and not bigDest: + # narrowing operations + destReg = 2 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + if destCnt < 4 and not hi: # zero upper half + for reg in range(destCnt, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp" if byElem else "DataX2RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if byElem: + header_output += NeonX2RegImmOpDeclare.subst(iop) + else: + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + assert not byElem + threeUnequalRegInstX(name, Name, opClass, types, op, + True, True, False, readDest, scalar, byElem, hi) + + def threeRegLongInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + threeUnequalRegInstX(name, Name, opClass, types, op, + False, False, True, readDest, scalar, byElem, hi) + + def threeRegWideInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + assert not byElem + threeUnequalRegInstX(name, Name, opClass, types, op, + True, False, True, readDest, scalar, byElem, hi) + + def twoEqualRegInstX(name, Name, opClass, types, rCount, op, + readDest=False, scalar=False, byElem=False, + hasImm=False, isDup=False): + global header_output, exec_output + assert (not isDup) or byElem + if byElem: + hasImm = True + if isDup: + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg1; + RegVect destReg; + ''' + else: + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + for reg in range(4 if isDup else rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + unsigned j = i; + Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[j] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "", + "src1Index" : "imm" if byElem else "i" } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegLongInstX(name, Name, opClass, types, op, readDest=False, + hi=False, hasImm=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1; + BigRegVect destReg; + ''' + destReg = 0 if not hi else 2 + for reg in range(2): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + destReg = 0 if not hi else 2 + if readDest: + for reg in range(4): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + destReg += 1 + readDestCode = '' + if readDest: + readDestCode = 'destReg = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(srcReg1.elements[i]); + BigElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, hi=False, hasImm=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + BigRegVect srcReg1; + RegVect destReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + for reg in range(2): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + else: + eWalkCode += ''' + destReg.elements[0] = 0; + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + BigElement srcElem1 = gtoh(srcReg1.elements[i]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "" } + destReg = 0 if not hi else 2 + for reg in range(2): + eWalkCode += ''' + AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + if not hi: + for reg in range(2, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeRegScrambleInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, srcReg2, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += op + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insFromVecElemInstX(name, Name, opClass, types, rCount): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg1; + RegVect destReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + Element srcElem1 = gtoh(srcReg1.elements[imm2]); + Element destElem = srcElem1; + destReg.elements[imm1] = htog(destElem); + ''' + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1Reg2ImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1Reg2ImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + Element srcElem1 = gtoh(srcReg1.elements[0]); + Element srcElem2 = gtoh(srcReg1.elements[1]); + Element destElem; + %(op)s + destReg.elements[0] = htog(destElem); + ''' % { "op" : op } + destCnt = rCount / 2 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + for reg in range(destCnt, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegAcrossInstX(name, Name, opClass, types, rCount, op, + doubleDest=False, long=False): + global header_output, exec_output + destPrefix = "Big" if long else "" + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1; + %sRegVect destReg; + ''' % destPrefix + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + destReg.regs[0] = 0; + %(destPrefix)sElement destElem = 0; + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(srcReg1.elements[i]); + if (i == 0) { + destElem = srcElem1; + } else { + %(op)s + } + } + destReg.elements[0] = htog(destElem); + ''' % { "op" : op, "destPrefix" : destPrefix } + destCnt = 2 if doubleDest else 1 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + for reg in range(destCnt, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + if long: + exec_output += NeonXUnequalRegOpExecute.subst(iop) + else: + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegCondenseInstX(name, Name, opClass, types, rCount, op, + readDest=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcRegs; + BigRegVect destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount / 2; i++) { + Element srcElem1 = gtoh(srcRegs.elements[2 * i]); + Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); + BigElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + ''' + if readDest: + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataXImmOnlyOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def dupGprInstX(name, Name, opClass, types, rCount, gprSpec): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + for (unsigned i = 0; i < eCount; i++) { + destReg.elements[i] = htog((Element) %sOp1); + } + ''' % gprSpec + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def extInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, srcReg2, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += op + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + destReg.elements[imm] = htog((Element) %sOp1); + ''' % gprSpec + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insToGprInstX(name, Name, opClass, types, rCount, gprSpec, + signExt=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if signExt: + eWalkCode += ''' + %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]); + ''' % gprSpec + else: + eWalkCode += ''' + %sDest = srcReg.elements[imm]; + ''' % gprSpec + iop = InstObjParams(name, Name, + "DataX1RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount): + global header_output, decoder_output, exec_output + code = simd64EnabledCheckCode + ''' + union + { + uint8_t bytes[64]; + FloatRegBits regs[16]; + } table; + + union + { + uint8_t bytes[%(rCount)d * 4]; + FloatRegBits regs[%(rCount)d]; + } destReg, srcReg2; + + const unsigned length = %(length)d; + const bool isTbl = %(isTbl)s; + ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl } + for reg in range(rCount): + code += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + for reg in range(16): + if reg < length * 4: + code += ''' + table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw); + ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 } + else: + code += ''' + table.regs[%(reg)d] = 0; + ''' % { "reg" : reg } + code += ''' + for (unsigned i = 0; i < sizeof(destReg); i++) { + uint8_t index = srcReg2.bytes[i]; + if (index < 16 * length) { + destReg.bytes[i] = table.bytes[index]; + } else { + if (isTbl) + destReg.bytes[i] = 0; + // else destReg.bytes[i] unchanged + } + } + ''' + for reg in range(rCount): + code += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + code += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegOp", + { "code": code, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + # ABS + absCode = ''' + if (srcElem1 < 0) { + destElem = -srcElem1; + } else { + destElem = srcElem1; + } + ''' + twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode) + twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode) + # ADD + addCode = "destElem = srcElem1 + srcElem2;" + threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode) + threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode) + # ADDHN, ADDHN2 + addhnCode = ''' + destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes, + addhnCode) + threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes, + addhnCode, hi=True) + # ADDP (scalar) + twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4, + addCode) + # ADDP (vector) + threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2, + addCode, pairwise=True) + threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4, + addCode, pairwise=True) + # ADDV + # Note: SimdAddOp can be a bit optimistic here + addAcrossCode = "destElem += srcElem1;" + twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"), + 2, addAcrossCode) + twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4, + addAcrossCode) + # AND + andCode = "destElem = srcElem1 & srcElem2;" + threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode) + threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode) + # BIC (immediate) + bicImmCode = "destElem &= ~imm;" + oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2, + bicImmCode, True) + oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4, + bicImmCode, True) + # BIC (register) + bicCode = "destElem = srcElem1 & ~srcElem2;" + threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode) + threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode) + # BIF + bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);" + threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode, + True) + threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode, + True) + # BIT + bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);" + threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode, + True) + threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode, + True) + # BSL + bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);" + threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode, + True) + threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode, + True) + # CLS + clsCode = ''' + unsigned count = 0; + if (srcElem1 < 0) { + srcElem1 <<= 1; + while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { + count++; + srcElem1 <<= 1; + } + } else { + srcElem1 <<= 1; + while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { + count++; + srcElem1 <<= 1; + } + } + destElem = count; + ''' + twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode) + twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode) + # CLZ + clzCode = ''' + unsigned count = 0; + while (srcElem1 >= 0 && count < sizeof(Element) * 8) { + count++; + srcElem1 <<= 1; + } + destElem = count; + ''' + twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode) + twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode) + # CMEQ (register) + cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2, + cmeqCode) + threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4, + cmeqCode) + # CMEQ (zero) + cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2, + cmeqZeroCode) + twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4, + cmeqZeroCode) + # CMGE (register) + cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode) + threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode) + # CMGE (zero) + cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2, + cmgeZeroCode) + twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4, + cmgeZeroCode) + # CMGT (register) + cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode) + threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode) + # CMGT (zero) + cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2, + cmgtZeroCode) + twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4, + cmgtZeroCode) + # CMHI (register) + threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2, + cmgtCode) + threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4, + cmgtCode) + # CMHS (register) + threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2, + cmgeCode) + threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4, + cmgeCode) + # CMLE (zero) + cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2, + cmleZeroCode) + twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4, + cmleZeroCode) + # CMLT (zero) + cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2, + cmltZeroCode) + twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4, + cmltZeroCode) + # CMTST (register) + tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2, + tstCode) + threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4, + tstCode) + # CNT + cntCode = ''' + unsigned count = 0; + while (srcElem1 && count < sizeof(Element) * 8) { + count += srcElem1 & 0x1; + srcElem1 >>= 1; + } + destElem = count; + ''' + twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode) + twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode) + # DUP (element) + dupCode = "destElem = srcElem1;" + twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2, + dupCode, isDup=True, byElem=True) + twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4, + dupCode, isDup=True, byElem=True) + twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4, + dupCode, isDup=True, byElem=True, scalar=True) + # DUP (general register) + dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W') + dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') + dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X') + # EOR + eorCode = "destElem = srcElem1 ^ srcElem2;" + threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode) + threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode) + # EXT + extCode = ''' + for (unsigned i = 0; i < eCount; i++) { + unsigned index = i + imm; + if (index < eCount) { + destReg.elements[i] = srcReg1.elements[index]; + } else { + index -= eCount; + if (index >= eCount) { + fault = new UndefinedInstruction(machInst, false, mnemonic); + } else { + destReg.elements[i] = srcReg2.elements[index]; + } + } + } + ''' + extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode) + extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode) + # FABD + fpOp = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destElem = %s; + FpscrExc = fpscr; + ''' + fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))" + threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2, + fabdCode) + threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4, + fabdCode) + threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4, + fabdCode, scalar=True) + # FABS + fabsCode = fpOp % "fplibAbs<Element>(srcElem1)" + twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2, + fabsCode) + twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4, + fabsCode) + # FACGE + fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1)," + " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0") + facgeCode = fpCmpAbsOp % "GE" + threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes, + 2, facgeCode) + threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4, + facgeCode) + threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4, + facgeCode, scalar=True) + # FACGT + facgtCode = fpCmpAbsOp % "GT" + threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes, + 2, facgtCode) + threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4, + facgtCode) + threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4, + facgtCode, scalar=True) + # FADD + fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)" + faddCode = fpBinOp % "Add" + threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2, + faddCode) + threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4, + faddCode) + # FADDP (scalar) + twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp", + ("uint32_t",), 2, faddCode) + twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp", + ("uint64_t",), 4, faddCode) + # FADDP (vector) + threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes, + 2, faddCode, pairwise=True) + threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4, + faddCode, pairwise=True) + # FCMEQ (register) + fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?" + " -1 : 0") + fcmeqCode = fpCmpOp % "EQ" + threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmeqCode) + threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqCode) + threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqCode, scalar=True) + # FCMEQ (zero) + fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0" + fcmeqZeroCode = fpCmpZeroOp % "EQ" + twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmeqZeroCode) + twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqZeroCode) + twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqZeroCode, scalar=True) + # FCMGE (register) + fcmgeCode = fpCmpOp % "GE" + threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgeCode) + threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeCode) + threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeCode, scalar=True) + # FCMGE (zero) + fcmgeZeroCode = fpCmpZeroOp % "GE" + twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgeZeroCode) + twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeZeroCode) + twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeZeroCode, scalar=True) + # FCMGT (register) + fcmgtCode = fpCmpOp % "GT" + threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgtCode) + threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtCode) + threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtCode, scalar=True) + # FCMGT (zero) + fcmgtZeroCode = fpCmpZeroOp % "GT" + twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgtZeroCode) + twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtZeroCode) + twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtZeroCode, scalar=True) + # FCMLE (zero) + fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?" + " -1 : 0") + fcmleZeroCode = fpCmpRevZeroOp % "GE" + twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmleZeroCode) + twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmleZeroCode) + twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmleZeroCode, scalar=True) + # FCMLT (zero) + fcmltZeroCode = fpCmpRevZeroOp % "GT" + twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmltZeroCode) + twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmltZeroCode) + twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmltZeroCode, scalar=True) + # FCVTAS + fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>(" + "srcElem1, %s, %s, %s, fpscr)") + fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY") + twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtasCode) + twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4, + fcvtasCode) + twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4, + fcvtasCode, scalar=True) + # FCVTAU + fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY") + twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtauCode) + twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4, + fcvtauCode) + twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4, + fcvtauCode, scalar=True) + # FCVTL, FCVTL2 + fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>(" + "srcElem1, FPCRRounding(fpscr), fpscr)") + twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"), + fcvtlCode) + twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"), + fcvtlCode, hi=True) + # FCVTMS + fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF") + twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtmsCode) + twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4, + fcvtmsCode) + twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4, + fcvtmsCode, scalar=True) + # FCVTMU + fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF") + twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtmuCode) + twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4, + fcvtmuCode) + twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4, + fcvtmuCode, scalar=True) + # FCVTN, FCVTN2 + fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>(" + "srcElem1, FPCRRounding(fpscr), fpscr)") + twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp", + ("uint16_t", "uint32_t"), fcvtnCode) + twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp", + ("uint16_t", "uint32_t"), fcvtnCode, hi=True) + # FCVTNS + fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN") + twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtnsCode) + twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4, + fcvtnsCode) + twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4, + fcvtnsCode, scalar=True) + # FCVTNU + fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN") + twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtnuCode) + twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4, + fcvtnuCode) + twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4, + fcvtnuCode, scalar=True) + # FCVTPS + fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF") + twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtpsCode) + twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4, + fcvtpsCode) + twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4, + fcvtpsCode, scalar=True) + # FCVTPU + fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF") + twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtpuCode) + twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4, + fcvtpuCode) + twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4, + fcvtpuCode, scalar=True) + # FCVTXN, FCVTXN2 + fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>(" + "srcElem1, FPRounding_ODD, fpscr)") + twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes, + fcvtxnCode) + twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes, + fcvtxnCode, hi=True) + twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes, + fcvtxnCode, scalar=True) + # FCVTZS (fixed-point) + fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzsCode, hasImm=True) + twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4, + fcvtzsCode, hasImm=True) + twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4, + fcvtzsCode, hasImm=True, scalar=True) + # FCVTZS (integer) + fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzsIntCode) + twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4, + fcvtzsIntCode) + twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4, + fcvtzsIntCode, scalar=True) + # FCVTZU (fixed-point) + fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzuCode, hasImm=True) + twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4, + fcvtzuCode, hasImm=True) + twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4, + fcvtzuCode, hasImm=True, scalar=True) + # FCVTZU (integer) + fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtzuIntCode) + twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4, + fcvtzuIntCode) + twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4, + fcvtzuIntCode, scalar=True) + # FDIV + fdivCode = fpBinOp % "Div" + threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2, + fdivCode) + threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4, + fdivCode) + # FMAX + fmaxCode = fpBinOp % "Max" + threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2, + fmaxCode) + threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxCode) + # FMAXNM + fmaxnmCode = fpBinOp % "MaxNum" + threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fmaxnmCode) + threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxnmCode) + # FMAXNMP (scalar) + twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fmaxnmCode) + twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fmaxnmCode) + # FMAXNMP (vector) + threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp", + smallFloatTypes, 2, fmaxnmCode, pairwise=True) + threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxnmCode, pairwise=True) + # FMAXNMV + # Note: SimdFloatCmpOp can be a bit optimistic here + fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)" + fmaxnmAcrossCode = fpAcrossOp % "MaxNum" + twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",), + 4, fmaxnmAcrossCode) + # FMAXP (scalar) + twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fmaxCode) + twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fmaxCode) + # FMAXP (vector) + threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fmaxCode, pairwise=True) + threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxCode, pairwise=True) + # FMAXV + # Note: SimdFloatCmpOp can be a bit optimistic here + fmaxAcrossCode = fpAcrossOp % "Max" + twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4, + fmaxAcrossCode) + # FMIN + fminCode = fpBinOp % "Min" + threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2, + fminCode) + threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4, + fminCode) + # FMINNM + fminnmCode = fpBinOp % "MinNum" + threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fminnmCode) + threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4, + fminnmCode) + # FMINNMP (scalar) + twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fminnmCode) + twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fminnmCode) + # FMINNMP (vector) + threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp", + smallFloatTypes, 2, fminnmCode, pairwise=True) + threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4, + fminnmCode, pairwise=True) + # FMINNMV + # Note: SimdFloatCmpOp can be a bit optimistic here + fminnmAcrossCode = fpAcrossOp % "MinNum" + twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",), + 4, fminnmAcrossCode) + # FMINP (scalar) + twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fminCode) + twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fminCode) + # FMINP (vector) + threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fminCode, pairwise=True) + threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4, + fminCode, pairwise=True) + # FMINV + # Note: SimdFloatCmpOp can be a bit optimistic here + fminAcrossCode = fpAcrossOp % "Min" + twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4, + fminAcrossCode) + # FMLA (by element) + fmlaCode = fpOp % ("fplibMulAdd<Element>(" + "destElem, srcElem1, srcElem2, fpscr)") + threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, fmlaCode, True, byElem=True) + threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes, + 4, fmlaCode, True, byElem=True) + threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes, + 4, fmlaCode, True, byElem=True, scalar=True) + # FMLA (vector) + threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes, + 2, fmlaCode, True) + threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4, + fmlaCode, True) + # FMLS (by element) + fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem," + " fplibNeg<Element>(srcElem1), srcElem2, fpscr)") + threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, fmlsCode, True, byElem=True) + threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes, + 4, fmlsCode, True, byElem=True) + threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes, + 4, fmlsCode, True, byElem=True, scalar=True) + # FMLS (vector) + threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes, + 2, fmlsCode, True) + threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4, + fmlsCode, True) + # FMOV + fmovCode = 'destElem = imm;' + oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2, + fmovCode) + oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode) + # FMUL (by element) + fmulCode = fpBinOp % "Mul" + threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp", + smallFloatTypes, 2, fmulCode, byElem=True) + threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4, + fmulCode, byElem=True) + threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4, + fmulCode, byElem=True, scalar=True) + # FMUL (vector) + threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2, + fmulCode) + threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4, + fmulCode) + # FMULX + fmulxCode = fpBinOp % "MulX" + threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes, + 2, fmulxCode) + threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4, + fmulxCode) + threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4, + fmulxCode, scalar=True) + # FMULX (by element) + threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp", + smallFloatTypes, 2, fmulxCode, byElem=True) + threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes, + 4, fmulxCode, byElem=True) + threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes, + 4, fmulxCode, byElem=True, scalar=True) + # FNEG + fnegCode = fpOp % "fplibNeg<Element>(srcElem1)" + twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2, + fnegCode) + twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4, + fnegCode) + # FRECPE + frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, frecpeCode) + twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4, + frecpeCode) + twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes, + 4, frecpeCode, scalar=True) + # FRECPS + frecpsCode = fpBinOp % "RecipStepFused" + threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, frecpsCode) + threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes, + 4, frecpsCode) + threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes, + 4, frecpsCode, scalar=True) + # FRECPX + frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4, + frecpxCode, scalar=True) + # FRINTA + frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)" + frintaCode = frintCode % ("FPRounding_TIEAWAY", "false") + twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2, + frintaCode) + twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4, + frintaCode) + # FRINTI + frintiCode = frintCode % ("FPCRRounding(fpscr)", "false") + twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2, + frintiCode) + twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4, + frintiCode) + # FRINTM + frintmCode = frintCode % ("FPRounding_NEGINF", "false") + twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2, + frintmCode) + twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4, + frintmCode) + # FRINTN + frintnCode = frintCode % ("FPRounding_TIEEVEN", "false") + twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2, + frintnCode) + twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4, + frintnCode) + # FRINTP + frintpCode = frintCode % ("FPRounding_POSINF", "false") + twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2, + frintpCode) + twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4, + frintpCode) + # FRINTX + frintxCode = frintCode % ("FPCRRounding(fpscr)", "true") + twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2, + frintxCode) + twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4, + frintxCode) + # FRINTZ + frintzCode = frintCode % ("FPRounding_ZERO", "false") + twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2, + frintzCode) + twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4, + frintzCode) + # FRSQRTE + frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp", + smallFloatTypes, 2, frsqrteCode) + twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4, + frsqrteCode) + twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4, + frsqrteCode, scalar=True) + # FRSQRTS + frsqrtsCode = fpBinOp % "RSqrtStepFused" + threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp", + smallFloatTypes, 2, frsqrtsCode) + threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes, + 4, frsqrtsCode) + threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes, + 4, frsqrtsCode, scalar=True) + # FSQRT + fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" + twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2, + fsqrtCode) + twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4, + fsqrtCode) + # FSUB + fsubCode = fpBinOp % "Sub" + threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2, + fsubCode) + threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4, + fsubCode) + # INS (element) + insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4) + # INS (general register) + insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4, + 'W') + insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X') + # MLA (by element) + mlaCode = "destElem += srcElem1 * srcElem2;" + threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True) + threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True) + # MLA (vector) + threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2, + mlaCode, True) + threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4, + mlaCode, True) + # MLS (by element) + mlsCode = "destElem -= srcElem1 * srcElem2;" + threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True) + threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True) + # MLS (vector) + threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2, + mlsCode, True) + threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4, + mlsCode, True) + # MOV (element) -> alias to INS (element) + # MOV (from general) -> alias to INS (general register) + # MOV (scalar) -> alias to DUP (element) + # MOV (to general) -> alias to UMOV + # MOV (vector) -> alias to ORR (register) + # MOVI + movImmCode = "destElem = imm;" + oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2, + movImmCode) + oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4, + movImmCode) + # MUL (by element) + mulCode = "destElem = srcElem1 * srcElem2;" + threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp", + ("uint16_t", "uint32_t"), 2, mulCode, byElem=True) + threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp", + ("uint16_t", "uint32_t"), 4, mulCode, byElem=True) + # MUL (vector) + threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2, + mulCode) + threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4, + mulCode) + # MVN + mvnCode = "destElem = ~srcElem1;" + twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode) + twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode) + # MVNI + mvniCode = "destElem = ~imm;" + oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode) + oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode) + # NEG + negCode = "destElem = -srcElem1;" + twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode) + twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode) + # NOT -> alias to MVN + # ORN + ornCode = "destElem = srcElem1 | ~srcElem2;" + threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode) + threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode) + # ORR (immediate) + orrImmCode = "destElem |= imm;" + oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2, + orrImmCode, True) + oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4, + orrImmCode, True) + # ORR (register) + orrCode = "destElem = srcElem1 | srcElem2;" + threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode) + threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode) + # PMUL + pmulCode = ''' + destElem = 0; + for (unsigned j = 0; j < sizeof(Element) * 8; j++) { + if (bits(srcElem2, j)) + destElem ^= srcElem1 << j; + } + ''' + threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2, + pmulCode) + threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4, + pmulCode) + # PMULL, PMULL2 + # Note: 64-bit PMULL is not available (Crypto. Extension) + pmullCode = ''' + destElem = 0; + for (unsigned j = 0; j < sizeof(Element) * 8; j++) { + if (bits(srcElem2, j)) + destElem ^= (BigElement)srcElem1 << j; + } + ''' + threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode) + threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",), + pmullCode, hi=True) + # RADDHN, RADDHN2 + raddhnCode = ''' + destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + + ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes, + raddhnCode) + threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes, + raddhnCode, hi=True) + # RBIT + rbitCode = ''' + destElem = 0; + Element temp = srcElem1; + for (int i = 0; i < 8 * sizeof(Element); i++) { + destElem = destElem | ((temp & 0x1) << + (8 * sizeof(Element) - 1 - i)); + temp >>= 1; + } + ''' + twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode) + twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode) + # REV16 + rev16Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 1) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2, + rev16Code) + twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4, + rev16Code) + # REV32 + rev32Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 2) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"), + 2, rev32Code) + twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"), + 4, rev32Code) + # REV64 + rev64Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 3) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2, + rev64Code) + twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4, + rev64Code) + # RSHRN, RSHRN2 + rshrnCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem = 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem = srcElem1; + } + ''' + twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes, + rshrnCode, hasImm=True) + twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes, + rshrnCode, hasImm=True, hi=True) + # RSUBHN, RSUBHN2 + rsubhnCode = ''' + destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + + ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes, + rsubhnCode) + threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes, + rsubhnCode, hi=True) + # SABA + abaCode = ''' + destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : + (srcElem2 - srcElem1); + ''' + threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2, + abaCode, True) + threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4, + abaCode, True) + # SABAL, SABAL2 + abalCode = ''' + destElem += (srcElem1 > srcElem2) ? + ((BigElement)srcElem1 - (BigElement)srcElem2) : + ((BigElement)srcElem2 - (BigElement)srcElem1); + ''' + threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes, + abalCode, True) + threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes, + abalCode, True, hi=True) + # SABD + abdCode = ''' + destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : + (srcElem2 - srcElem1); + ''' + threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2, + abdCode) + threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4, + abdCode) + # SABDL, SABDL2 + abdlCode = ''' + destElem = (srcElem1 > srcElem2) ? + ((BigElement)srcElem1 - (BigElement)srcElem2) : + ((BigElement)srcElem2 - (BigElement)srcElem1); + ''' + threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes, + abdlCode, True) + threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes, + abdlCode, True, hi=True) + # SADALP + adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;" + twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2, + adalpCode, True) + twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4, + adalpCode, True) + # SADDL, SADDL2 + addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;" + threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes, + addlwCode) + threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes, + addlwCode, hi=True) + # SADDLP + twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2, + addlwCode) + twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4, + addlwCode) + # SADDLV + # Note: SimdAddOp can be a bit optimistic here + addAcrossLongCode = "destElem += (BigElement)srcElem1;" + twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"), + 2, addAcrossLongCode, long=True) + twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"), + 4, addAcrossLongCode, long=True) + twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4, + addAcrossLongCode, doubleDest=True, long=True) + # SADDW, SADDW2 + threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes, + addlwCode) + threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes, + addlwCode, hi=True) + # SCVTF (fixed-point) + scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm," + " false, FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, + scvtfFixedCode % 32, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4, + scvtfFixedCode % 32, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4, + scvtfFixedCode % 64, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes, + 4, scvtfFixedCode % 32, hasImm=True, scalar=True) + twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4, + scvtfFixedCode % 64, hasImm=True, scalar=True) + # SCVTF (integer) + scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0," + " false, FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, + scvtfIntCode % 32) + twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4, + scvtfIntCode % 32) + twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4, + scvtfIntCode % 64) + twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4, + scvtfIntCode % 32, scalar=True) + twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4, + scvtfIntCode % 64, scalar=True) + # SHADD + haddCode = ''' + Element carryBit = + (((unsigned)srcElem1 & 0x1) + + ((unsigned)srcElem2 & 0x1)) >> 1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) + + ((srcElem2 & ~(Element)1) / 2)) + carryBit; + ''' + threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2, + haddCode) + threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4, + haddCode) + # SHL + shlCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; + else + destElem = srcElem1 << imm; + ''' + twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode, + hasImm=True) + twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode, + hasImm=True) + # SHLL, SHLL2 + shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);" + twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode) + twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode, + hi=True) + # SHRN, SHRN2 + shrnCode = ''' + if (imm >= sizeof(srcElem1) * 8) { + destElem = 0; + } else { + destElem = srcElem1 >> imm; + } + ''' + twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes, + shrnCode, hasImm=True) + twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes, + shrnCode, hasImm=True, hi=True) + # SHSUB + hsubCode = ''' + Element borrowBit = + (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) - + ((srcElem2 & ~(Element)1) / 2)) - borrowBit; + ''' + threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2, + hsubCode) + threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4, + hsubCode) + # SLI + sliCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = destElem; + else + destElem = (srcElem1 << imm) | (destElem & mask(imm)); + ''' + twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode, + True, hasImm=True) + twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode, + True, hasImm=True) + # SMAX + maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;" + threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2, + maxCode) + threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4, + maxCode) + # SMAXP + threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2, + maxCode, pairwise=True) + threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4, + maxCode, pairwise=True) + # SMAXV + maxAcrossCode = ''' + if (i == 0 || srcElem1 > destElem) + destElem = srcElem1; + ''' + twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"), + 2, maxAcrossCode) + twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4, + maxAcrossCode) + # SMIN + minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;" + threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2, + minCode) + threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4, + minCode) + # SMINP + threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2, + minCode, pairwise=True) + threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4, + minCode, pairwise=True) + # SMINV + minAcrossCode = ''' + if (i == 0 || srcElem1 < destElem) + destElem = srcElem1; + ''' + twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"), + 2, minAcrossCode) + twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4, + minAcrossCode) + # SMLAL, SMLAL2 (by element) + mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), mlalCode, True, byElem=True) + threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), mlalCode, True, byElem=True, + hi=True) + # SMLAL, SMLAL2 (vector) + threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes, + mlalCode, True) + threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes, + mlalCode, True, hi=True) + # SMLSL, SMLSL2 (by element) + mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes, + mlslCode, True, byElem=True) + threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp", + smallSignedTypes, mlslCode, True, byElem=True, hi=True) + # SMLSL, SMLSL2 (vector) + threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes, + mlslCode, True) + threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes, + mlslCode, True, hi=True) + # SMOV + insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4, + 'W', True) + insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X', + True) + # SMULL, SMULL2 (by element) + mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes, + mullCode, byElem=True) + threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes, + mullCode, byElem=True, hi=True) + # SMULL, SMULL2 (vector) + threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes, + mullCode) + threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes, + mullCode, hi=True) + # SQABS + sqabsCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { + fpscr.qc = 1; + destElem = ~srcElem1; + } else if (srcElem1 < 0) { + destElem = -srcElem1; + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2, + sqabsCode) + twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4, + sqabsCode) + twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4, + sqabsCode, scalar=True) + # SQADD + sqaddCode = ''' + destElem = srcElem1 + srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool negSrc2 = (srcElem2 < 0); + if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2, + sqaddCode) + threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4, + sqaddCode) + threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4, + sqaddCode, scalar=True) + # SQDMLAL, SQDMLAL2 (by element) + qdmlalCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + bool negPreDest = ltz(destElem); + destElem += midElem; + bool negDest = ltz(destElem); + bool negMid = ltz(midElem); + if (negPreDest == negMid && negMid != negDest) { + destElem = mask(sizeof(BigElement) * 8 - 1); + if (negPreDest) + destElem = ~destElem; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True) + threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, + scalar=True) + # SQDMLAL, SQDMLAL2 (vector) + threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True) + threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, hi=True) + threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, scalar=True) + # SQDMLSL, SQDMLSL2 (by element) + qdmlslCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + bool negPreDest = ltz(destElem); + destElem -= midElem; + bool negDest = ltz(destElem); + bool posMid = ltz((BigElement)-midElem); + if (negPreDest == posMid && posMid != negDest) { + destElem = mask(sizeof(BigElement) * 8 - 1); + if (negPreDest) + destElem = ~destElem; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True) + threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, + scalar=True) + # SQDMLSL, SQDMLSL2 (vector) + threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True) + threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, hi=True) + threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, scalar=True) + # SQDMULH (by element) + sqdmulhCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> + (sizeof(Element) * 8); + if (srcElem1 == srcElem2 && + srcElem1 == (Element)((Element)1 << + (sizeof(Element) * 8 - 1))) { + destElem = ~srcElem1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True) + threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True) + threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True, + scalar=True) + # SQDMULH (vector) + threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqdmulhCode) + threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode) + threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True) + # SQDMULL, SQDMULL2 (by element) + qdmullCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + if (srcElem1 == srcElem2 && + srcElem1 == (Element)((Element)1 << + (Element)(sizeof(Element) * 8 - 1))) { + destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True) + threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True, + scalar=True) + # SQDMULL, SQDMULL2 (vector) + threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True) + threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, hi=True) + threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, scalar=True) + # SQNEG + sqnegCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { + fpscr.qc = 1; + destElem = ~srcElem1; + } else { + destElem = -srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2, + sqnegCode) + twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4, + sqnegCode) + twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, + sqnegCode, scalar=True) + # SQRDMULH (by element) + sqrdmulhCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + + ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + if (destElem < 0) { + destElem = mask(sizeof(Element) * 8 - 1); + } else { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + } + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True) + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True) + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True, + scalar=True) + # SQRDMULH (vector) + threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmulhCode) + threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode) + threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True) + # SQRSHL + sqrshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) + rBit = 1; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (srcElem1 < 0 && destElem >= 0) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + destElem += rBit; + } else if (shiftAmt > 0) { + bool sat = false; + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) + sat = true; + else + destElem = 0; + } else { + if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - shiftAmt) != + ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { + sat = true; + } else { + destElem = srcElem1 << shiftAmt; + } + } + if (sat) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2, + sqrshlCode) + threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4, + sqrshlCode) + threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4, + sqrshlCode, scalar=True) + # SQRSHRN, SQRSHRN2 + sqrshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0 && srcElem1 != -1) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + mid += rBit; + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 != (Element)srcElem1) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True) + twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True, scalar=True) + # SQRSHRUN, SQRSHRUN2 + sqrshrunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + mid += rBit; + if (bits(mid, sizeof(BigElement) * 8 - 1, + sizeof(Element) * 8) != 0) { + if (srcElem1 < 0) { + destElem = 0; + } else { + destElem = mask(sizeof(Element) * 8); + } + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 < 0) { + fpscr.qc = 1; + destElem = 0; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes, + sqrshrunCode, hasImm=True) + twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp", + smallSignedTypes, sqrshrunCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp", + smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True) + # SQSHL (immediate) + sqshlImmCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (srcElem1 > 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - imm); + if (topBits != 0 && topBits != mask(imm + 1)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (srcElem1 > 0) + destElem = ~destElem; + fpscr.qc = 1; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2, + sqshlImmCode, hasImm=True) + twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4, + sqshlImmCode, hasImm=True) + twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4, + sqshlImmCode, hasImm=True, scalar=True) + # SQSHL (register) + sqshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (srcElem1 < 0 && destElem >= 0) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + } else if (shiftAmt > 0) { + bool sat = false; + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) + sat = true; + else + destElem = 0; + } else { + if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - shiftAmt) != + ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { + sat = true; + } else { + destElem = srcElem1 << shiftAmt; + } + } + if (sat) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2, + sqshlCode) + threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4, + sqshlCode) + threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4, + sqshlCode, scalar=True) + # SQSHLU + sqshluCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 < 0) { + destElem = 0; + fpscr.qc = 1; + } else if (srcElem1 > 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - imm); + if (srcElem1 < 0) { + destElem = 0; + fpscr.qc = 1; + } else if (topBits != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } + } else { + if (srcElem1 < 0) { + fpscr.qc = 1; + destElem = 0; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2, + sqshluCode, hasImm=True) + twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4, + sqshluCode, hasImm=True) + twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4, + sqshluCode, hasImm=True, scalar=True) + # SQSHRN, SQSHRN2 + sqshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0 && srcElem1 != -1) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True) + twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True, scalar=True) + # SQSHRUN, SQSHRUN2 + sqshrunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + if (bits(mid, sizeof(BigElement) * 8 - 1, + sizeof(Element) * 8) != 0) { + if (srcElem1 < 0) { + destElem = 0; + } else { + destElem = mask(sizeof(Element) * 8); + } + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True) + twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True, scalar=True) + # SQSUB + sqsubCode = ''' + destElem = srcElem1 - srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool posSrc2 = (srcElem2 >= 0); + if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2, + sqsubCode) + threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4, + sqsubCode) + threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4, + sqsubCode, scalar=True) + # SQXTN, SQXTN2 + sqxtnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if ((BigElement)destElem != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes, + sqxtnCode) + twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes, + sqxtnCode, hi=True) + twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes, + sqxtnCode, scalar=True) + # SQXTUN, SQXTUN2 + sqxtunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if (srcElem1 < 0 || + ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8); + if (srcElem1 < 0) + destElem = ~destElem; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes, + sqxtunCode) + twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes, + sqxtunCode, hi=True) + twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes, + sqxtunCode, scalar=True) + # SRHADD + rhaddCode = ''' + Element carryBit = + (((unsigned)srcElem1 & 0x1) + + ((unsigned)srcElem2 & 0x1) + 1) >> 1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) + + ((srcElem2 & ~(Element)1) / 2)) + carryBit; + ''' + threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2, + rhaddCode) + threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4, + rhaddCode) + # SRI + sriCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = destElem; + else + destElem = (srcElem1 >> imm) | + (destElem & ~mask(sizeof(Element) * 8 - imm)); + ''' + twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode, + True, hasImm=True) + twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode, + True, hasImm=True) + # SRSHL + rshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) + rBit = 1; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (ltz(srcElem1) && !ltz(destElem)) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + destElem += rBit; + } else if (shiftAmt > 0) { + if (shiftAmt >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 << shiftAmt; + } + } else { + destElem = srcElem1; + } + ''' + threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2, + rshlCode) + threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4, + rshlCode) + # SRSHR + rshrCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem = 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem = srcElem1; + } + ''' + twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2, + rshrCode, hasImm=True) + twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4, + rshrCode, hasImm=True) + # SRSRA + rsraCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem += 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem += srcElem1; + } + ''' + twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2, + rsraCode, True, hasImm=True) + twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4, + rsraCode, True, hasImm=True) + # SSHL + shlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (ltz(srcElem1) && !ltz(destElem)) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + } else { + if (shiftAmt >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 << shiftAmt; + } + } + ''' + threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2, + shlCode) + threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4, + shlCode) + # SSHLL, SSHLL2 + shllCode = ''' + if (imm >= sizeof(destElem) * 8) { + destElem = 0; + } else { + destElem = (BigElement)srcElem1 << imm; + } + ''' + twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes, + shllCode, hasImm=True) + twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes, + shllCode, hasImm=True, hi=True) + # SSHR + shrCode = ''' + if (imm >= sizeof(srcElem1) * 8) { + if (ltz(srcElem1)) + destElem = -1; + else + destElem = 0; + } else { + destElem = srcElem1 >> imm; + } + ''' + twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode, + hasImm=True) + twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode, + hasImm=True) + # SSRA + sraCode = ''' + Element mid;; + if (imm >= sizeof(srcElem1) * 8) { + mid = ltz(srcElem1) ? -1 : 0; + } else { + mid = srcElem1 >> imm; + if (ltz(srcElem1) && !ltz(mid)) { + mid |= -(mid & ((Element)1 << + (sizeof(Element) * 8 - 1 - imm))); + } + } + destElem += mid; + ''' + twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode, + True, hasImm=True) + twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode, + True, hasImm=True) + # SSUBL + sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;" + threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes, + sublwCode) + threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes, + sublwCode, hi=True) + # SSUBW + threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes, + sublwCode) + threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes, + sublwCode, hi=True) + # SUB + subCode = "destElem = srcElem1 - srcElem2;" + threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode) + threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode) + # SUBHN, SUBHN2 + subhnCode = ''' + destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes, + subhnCode) + threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes, + subhnCode, hi=True) + # SUQADD + suqaddCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + Element tmp = destElem + srcElem1; + if (bits(destElem, sizeof(Element) * 8 - 1) == 0) { + if (bits(tmp, sizeof(Element) * 8 - 1) == 1 || + tmp < srcElem1 || tmp < destElem) { + destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + Element absDestElem = (~destElem) + 1; + if (absDestElem < srcElem1) { + // Still check for positive sat., no need to check for negative sat. + if (bits(tmp, sizeof(Element) * 8 - 1) == 1) { + destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + destElem = tmp; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + suqaddCode, True) + twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4, + suqaddCode, True) + twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4, + suqaddCode, True, scalar=True) + # SXTL -> alias to SSHLL + # TBL + tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2) + tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4) + tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2) + tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4) + tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2) + tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4) + tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2) + tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4) + # TBX + tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2) + tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4) + tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2) + tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4) + tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2) + tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4) + tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2) + tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4) + # TRN1 + trnCode = ''' + unsigned part = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[2 * i] = srcReg1.elements[2 * i + part]; + destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part]; + } + ''' + threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2, + trnCode % "0") + threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4, + trnCode % "0") + # TRN2 + threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2, + trnCode % "1") + threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4, + trnCode % "1") + # UABA + threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2, + abaCode, True) + threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4, + abaCode, True) + # UABAL, UABAL2 + threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes, + abalCode, True) + threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes, + abalCode, True, hi=True) + # UABD + threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2, + abdCode) + threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4, + abdCode) + # UABDL, UABDL2 + threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes, + abdlCode, True) + threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes, + abdlCode, True, hi=True) + # UADALP + twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes, + 2, adalpCode, True) + twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes, + 4, adalpCode, True) + # UADDL, UADDL2 + threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes, + addlwCode) + threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes, + addlwCode, hi=True) + # UADDLP + twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes, + 2, addlwCode) + twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes, + 4, addlwCode) + # UADDLV + twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp", + ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True) + twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp", + ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True) + twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4, + addAcrossLongCode, doubleDest=True, long=True) + # UADDW + threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes, + addlwCode) + threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes, + addlwCode, hi=True) + # UCVTF (fixed-point) + ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true," + " FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, + ucvtfFixedCode, hasImm=True) + twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4, + ucvtfFixedCode, hasImm=True) + twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4, + ucvtfFixedCode, hasImm=True, scalar=True) + # UCVTF (integer) + ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true," + " FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, + ucvtfIntCode) + twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4, + ucvtfIntCode) + twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4, + ucvtfIntCode, scalar=True) + # UHADD + threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2, + haddCode) + threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4, + haddCode) + # UHSUB + threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2, + hsubCode) + threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4, + hsubCode) + # UMAX + threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2, + maxCode) + threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxCode) + # UMAXP + threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2, + maxCode, pairwise=True) + threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxCode, pairwise=True) + # UMAXV + twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), + 2, maxAcrossCode) + twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxAcrossCode) + # UMIN + threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2, + minCode) + threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4, + minCode) + # UMINP + threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2, + minCode, pairwise=True) + threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4, + minCode, pairwise=True) + # UMINV + twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), + 2, minAcrossCode) + twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4, + minAcrossCode) + # UMLAL (by element) + threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp", + smallUnsignedTypes, mlalCode, True, byElem=True) + threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp", + smallUnsignedTypes, mlalCode, True, byElem=True, hi=True) + # UMLAL (vector) + threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes, + mlalCode, True) + threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes, + mlalCode, True, hi=True) + # UMLSL (by element) + threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp", + smallUnsignedTypes, mlslCode, True, byElem=True) + threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp", + smallUnsignedTypes, mlslCode, True, byElem=True, hi=True) + # UMLSL (vector) + threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes, + mlslCode, True) + threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes, + mlslCode, True, hi=True) + # UMOV + insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') + insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X') + # UMULL, UMULL2 (by element) + threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes, + mullCode, byElem=True) + threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes, + mullCode, byElem=True, hi=True) + # UMULL, UMULL2 (vector) + threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes, + mullCode) + threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes, + mullCode, hi=True) + # UQADD + uqaddCode = ''' + destElem = srcElem1 + srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (destElem < srcElem1 || destElem < srcElem2) { + destElem = (Element)(-1); + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + uqaddCode) + threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4, + uqaddCode) + threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4, + uqaddCode, scalar=True) + # UQRSHL + uqrshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + destElem += rBit; + } else { + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else { + if (bits(srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - shiftAmt)) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = srcElem1 << shiftAmt; + } + } + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes, + 2, uqrshlCode) + threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4, + uqrshlCode) + threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4, + uqrshlCode, scalar=True) + # UQRSHRN + uqrshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid += rBit; + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 != (Element)srcElem1) { + destElem = mask(sizeof(Element) * 8 - 1); + fpscr.qc = 1; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes, + uqrshrnCode, hasImm=True) + twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp", + smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp", + smallUnsignedTypes, uqrshrnCode, hasImm=True, + scalar=True) + # UQSHL (immediate) + uqshlImmCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - imm); + if (topBits != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2, + uqshlImmCode, hasImm=True) + twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4, + uqshlImmCode, hasImm=True) + twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4, + uqshlImmCode, hasImm=True, scalar=True) + # UQSHL (register) + uqshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + } else if (shiftAmt > 0) { + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else { + if (bits(srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - shiftAmt)) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = srcElem1 << shiftAmt; + } + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2, + uqshlCode) + threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4, + uqshlCode) + threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4, + uqshlCode, scalar=True) + # UQSHRN, UQSHRN2 + uqshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True) + twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True, scalar=True) + # UQSUB + uqsubCode = ''' + destElem = srcElem1 - srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (destElem > srcElem1) { + destElem = 0; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2, + uqsubCode) + threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4, + uqsubCode) + threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4, + uqsubCode, scalar=True) + # UQXTN + uqxtnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if ((BigElement)destElem != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8); + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode) + twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode, hi=True) + twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode, scalar=True) + # URECPE + urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);" + twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2, + urecpeCode) + twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4, + urecpeCode) + # URHADD + threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes, + 2, rhaddCode) + threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes, + 4, rhaddCode) + # URSHL + threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2, + rshlCode) + threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4, + rshlCode) + # URSHR + twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2, + rshrCode, hasImm=True) + twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4, + rshrCode, hasImm=True) + # URSQRTE + ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);" + twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2, + ursqrteCode) + twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4, + ursqrteCode) + # URSRA + twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2, + rsraCode, True, hasImm=True) + twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4, + rsraCode, True, hasImm=True) + # USHL + threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2, + shlCode) + threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4, + shlCode) + # USHLL, USHLL2 + twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes, + shllCode, hasImm=True) + twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes, + shllCode, hi=True, hasImm=True) + # USHR + twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2, + shrCode, hasImm=True) + twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4, + shrCode, hasImm=True) + # USQADD + usqaddCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + Element tmp = destElem + srcElem1; + if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) { + if (tmp < srcElem1 || tmp < destElem) { + destElem = (Element)(-1); + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + Element absSrcElem1 = (~srcElem1) + 1; + if (absSrcElem1 > destElem) { + destElem = 0; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + usqaddCode, True) + twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4, + usqaddCode, True) + twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4, + usqaddCode, True, scalar=True) + # USRA + twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2, + sraCode, True, hasImm=True) + twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4, + sraCode, True, hasImm=True) + # USUBL + threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes, + sublwCode) + threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes, + sublwCode, hi=True) + # USUBW + threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes, + sublwCode) + threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes, + sublwCode, hi=True) + # UXTL -> alias to USHLL + # UZP1 + uzpCode = ''' + unsigned part = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[i] = srcReg1.elements[2 * i + part]; + destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part]; + } + ''' + threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2, + uzpCode % "0") + threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4, + uzpCode % "0") + # UZP2 + threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2, + uzpCode % "1") + threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4, + uzpCode % "1") + # XTN, XTN2 + xtnCode = "destElem = srcElem1;" + twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode) + twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes, + xtnCode, hi=True) + # ZIP1 + zipCode = ''' + unsigned base = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[2 * i] = srcReg1.elements[base + i]; + destReg.elements[2 * i + 1] = srcReg2.elements[base + i]; + } + ''' + threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2, + zipCode % "0") + threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4, + zipCode % "0") + # ZIP2 + threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2, + zipCode % "eCount / 2") + threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4, + zipCode % "eCount / 2") + +}}; diff --git a/src/arch/arm/isa/insts/neon64_mem.isa b/src/arch/arm/isa/insts/neon64_mem.isa new file mode 100644 index 000000000..32a37f87e --- /dev/null +++ b/src/arch/arm/isa/insts/neon64_mem.isa @@ -0,0 +1,471 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Mbou Eyole +// Giacomo Gabrielli + +let {{ + + header_output = '' + decoder_output = '' + exec_output = '' + + def mkMemAccMicroOp(name): + global header_output, decoder_output, exec_output + SPAlignmentCheckCodeNeon = ''' + if (baseIsSP && bits(XURa, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + ''' + eaCode = SPAlignmentCheckCodeNeon + ''' + EA = XURa + imm; + ''' + memDecl = ''' + const int MaxNumBytes = 16; + union MemUnion { + uint8_t bytes[MaxNumBytes]; + uint32_t floatRegBits[MaxNumBytes / 4]; + }; + ''' + + # Do endian conversion for all the elements + convCode = ''' + VReg x = {0, 0}; + + x.lo = (((XReg) memUnion.floatRegBits[1]) << 32) | + (XReg) memUnion.floatRegBits[0]; + x.hi = (((XReg) memUnion.floatRegBits[3]) << 32) | + (XReg) memUnion.floatRegBits[2]; + + const unsigned eCount = 16 / (1 << eSize); + + if (isBigEndian64(xc->tcBase())) { + for (unsigned i = 0; i < eCount; i++) { + switch (eSize) { + case 0x3: // 64-bit + writeVecElem(&x, (XReg) gtobe( + (uint64_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x2: // 32-bit + writeVecElem(&x, (XReg) gtobe( + (uint32_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x1: // 16-bit + writeVecElem(&x, (XReg) gtobe( + (uint16_t) readVecElem(x, i, eSize)), i, eSize); + break; + default: // 8-bit + break; // Nothing to do here + } + } + } else { + for (unsigned i = 0; i < eCount; i++) { + switch (eSize) { + case 0x3: // 64-bit + writeVecElem(&x, (XReg) gtole( + (uint64_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x2: // 32-bit + writeVecElem(&x, (XReg) gtole( + (uint32_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x1: // 16-bit + writeVecElem(&x, (XReg) gtole( + (uint16_t) readVecElem(x, i, eSize)), i, eSize); + break; + default: // 8-bit + break; // Nothing to do here + } + } + } + + memUnion.floatRegBits[0] = (uint32_t) x.lo; + memUnion.floatRegBits[1] = (uint32_t) (x.lo >> 32); + memUnion.floatRegBits[2] = (uint32_t) x.hi; + memUnion.floatRegBits[3] = (uint32_t) (x.hi >> 32); + ''' + + # Offload everything into registers + regSetCode = '' + for reg in range(4): + regSetCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(memUnion.floatRegBits[%(reg)d]); + ''' % { 'reg' : reg } + + # Pull everything in from registers + regGetCode = '' + for reg in range(4): + regGetCode += ''' + memUnion.floatRegBits[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { 'reg' : reg } + + loadMemAccCode = convCode + regSetCode + storeMemAccCode = regGetCode + convCode + + loadIop = InstObjParams(name + 'ld', + 'MicroNeonLoad64', + 'MicroNeonMemOp', + { 'mem_decl' : memDecl, + 'memacc_code' : loadMemAccCode, + 'ea_code' : simd64EnabledCheckCode + eaCode, + }, + [ 'IsMicroop', 'IsMemRef', 'IsLoad' ]) + storeIop = InstObjParams(name + 'st', + 'MicroNeonStore64', + 'MicroNeonMemOp', + { 'mem_decl' : memDecl, + 'memacc_code' : storeMemAccCode, + 'ea_code' : simd64EnabledCheckCode + eaCode, + }, + [ 'IsMicroop', 'IsMemRef', 'IsStore' ]) + + exec_output += NeonLoadExecute64.subst(loadIop) + \ + NeonLoadInitiateAcc64.subst(loadIop) + \ + NeonLoadCompleteAcc64.subst(loadIop) + \ + NeonStoreExecute64.subst(storeIop) + \ + NeonStoreInitiateAcc64.subst(storeIop) + \ + NeonStoreCompleteAcc64.subst(storeIop) + header_output += MicroNeonMemDeclare64.subst(loadIop) + \ + MicroNeonMemDeclare64.subst(storeIop) + + def mkMarshalMicroOp(name, Name): + global header_output, decoder_output, exec_output + + getInputCodeOp1L = '' + for v in range(4): + for p in range(4): + getInputCodeOp1L += ''' + writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)d_uw, + %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + getInputCodeOp1S = '' + for v in range(4): + for p in range(4): + getInputCodeOp1S += ''' + writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)dS_uw, + %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + if name == 'deint_neon_uop': + + eCode = ''' + VReg input[4]; // input data from scratch area + VReg output[2]; // output data to arch. SIMD regs + VReg temp; + temp.lo = 0; + temp.hi = 0; + ''' + for p in range(4): + eCode += ''' + writeVecElem(&temp, (XReg) AA64FpDestP%(p)dV1L_uw, %(p)d, 0x2); + ''' % { 'p' : p } + eCode += getInputCodeOp1L + + # Note that numRegs is not always the same as numStructElems; in + # particular, for LD1/ST1, numStructElems is 1 but numRegs can be + # 1, 2, 3 or 4 + + eCode += ''' + output[0].lo = 0; + output[0].hi = 0; + output[1].lo = 0; + output[1].hi = 0; + + int eCount = dataSize / (8 << eSize); + int eSizeBytes = 1 << eSize; // element size in bytes + int numBytes = step * dataSize / 4; + int totNumBytes = numRegs * dataSize / 8; + + int structElemNo, pos, a, b; + XReg data; + + for (int r = 0; r < 2; ++r) { + for (int i = 0; i < eCount; ++i) { + if (numBytes < totNumBytes) { + structElemNo = r + (step * 2); + if (numStructElems == 1) { + pos = (eSizeBytes * i) + + (eCount * structElemNo * eSizeBytes); + } else { + pos = (numStructElems * eSizeBytes * i) + + (structElemNo * eSizeBytes); + } + a = pos / 16; + b = (pos % 16) / eSizeBytes; + data = (XReg) readVecElem(input[a], (XReg) b, + eSize); + writeVecElem(&output[r], data, i, eSize); + numBytes += eSizeBytes; + } + } + } + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV0L_uw = (uint32_t) readVecElem(output[0], + %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + if ((numRegs % 2 == 0) || (numRegs == 3 && step == 0)) { + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem( + output[1], %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + } else { + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem(temp, + %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + } + ''' + + iop = InstObjParams(name, Name, 'MicroNeonMixOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'int_neon_uop': + + eCode = ''' + VReg input[4]; // input data from arch. SIMD regs + VReg output[2]; // output data to scratch area + ''' + + eCode += getInputCodeOp1S + + # Note that numRegs is not always the same as numStructElems; in + # particular, for LD1/ST1, numStructElems is 1 but numRegs can be + # 1, 2, 3 or 4 + + eCode += ''' + int eCount = dataSize / (8 << eSize); + int eSizeBytes = 1 << eSize; + int totNumBytes = numRegs * dataSize / 8; + int numOutputElems = 128 / (8 << eSize); + int stepOffset = step * 32; + + for (int i = 0; i < 2; ++i) { + output[i].lo = 0; + output[i].hi = 0; + } + + int r = 0, k = 0, i, j; + XReg data; + + for (int pos = stepOffset; pos < 32 + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + if (numStructElems == 1) { + i = (pos / eSizeBytes) % eCount; + j = pos / (eCount * eSizeBytes); + } else { + i = pos / (numStructElems * eSizeBytes); + j = (pos % (numStructElems * eSizeBytes)) / + eSizeBytes; + } + data = (XReg) readVecElem(input[j], (XReg) i, eSize); + writeVecElem(&output[r], data, k, eSize); + k++; + if (k == numOutputElems){ + k = 0; + ++r; + } + } + } + ''' + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v': v, 'p': p} + + iop = InstObjParams(name, Name, 'MicroNeonMixOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'unpack_neon_uop': + + eCode = ''' + VReg input[4]; //input data from scratch area + VReg output[2]; //output data to arch. SIMD regs + ''' + + eCode += getInputCodeOp1L + + # Fill output regs with register data initially. Note that + # elements in output register outside indexed lanes are left + # untouched + for v in range(2): + for p in range(4): + eCode += ''' + writeVecElem(&output[%(v)d], (XReg) AA64FpDestP%(p)dV%(v)dL_uw, + %(p)d, 0x2); + ''' % { 'v': v, 'p': p} + eCode += ''' + int eCount = dataSize / (8 << eSize); + int eCount128 = 128 / (8 << eSize); + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + int numInputElems = eCount128; + int stepOffset = step * 2 * eSizeBytes; + int stepLimit = 2 * eSizeBytes; + + int r = 0, i, j; + XReg data; + + for (int pos = stepOffset; pos < stepLimit + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + r = pos / eSizeBytes; + j = r / numInputElems; + i = r % numInputElems; + data = (XReg) readVecElem(input[j], (XReg) i, eSize); + + if (replicate) { + for (int i = 0; i < eCount128; ++i) { + if (i < eCount) { + writeVecElem(&output[r % 2], data, i, + eSize); + } else { // zero extend if necessary + writeVecElem(&output[r % 2], (XReg) 0, i, + eSize); + } + } + } else { + writeVecElem(&output[r % 2], data, lane, eSize); + } + } + } + ''' + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)dL_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixLaneDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'pack_neon_uop': + + eCode = ''' + VReg input[4]; // input data from arch. SIMD regs + VReg output[2]; // output data to scratch area + ''' + + eCode += getInputCodeOp1S + + eCode += ''' + int eSizeBytes = 1 << eSize; + int numOutputElems = 128 / (8 << eSize); + int totNumBytes = numStructElems * eSizeBytes; + int stepOffset = step * 32; + int stepLimit = 32; + + int r = 0, i, j; + XReg data; + + for (int i = 0; i < 2; ++i) { + output[i].lo = 0; + output[i].hi = 0; + } + + for (int pos = stepOffset; pos < stepLimit + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + r = pos / 16; + j = pos / eSizeBytes; + i = (pos / eSizeBytes) % numOutputElems; + data = (XReg) readVecElem(input[j], lane, eSize); + writeVecElem(&output[r % 2], data, i, eSize); + } + } + ''' + + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixLaneDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + # Generate instructions + mkMemAccMicroOp('mem_neon_uop') + mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64') + mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64') + mkMarshalMicroOp('unpack_neon_uop', 'MicroUnpackNeon64') + mkMarshalMicroOp('pack_neon_uop', 'MicroPackNeon64') + +}}; + +let {{ + + iop = InstObjParams('vldmult64', 'VldMult64', 'VldMultOp64', '', []) + header_output += VMemMultDeclare64.subst(iop) + decoder_output += VMemMultConstructor64.subst(iop) + + iop = InstObjParams('vstmult64', 'VstMult64', 'VstMultOp64', '', []) + header_output += VMemMultDeclare64.subst(iop) + decoder_output += VMemMultConstructor64.subst(iop) + + iop = InstObjParams('vldsingle64', 'VldSingle64', 'VldSingleOp64', '', []) + header_output += VMemSingleDeclare64.subst(iop) + decoder_output += VMemSingleConstructor64.subst(iop) + + iop = InstObjParams('vstsingle64', 'VstSingle64', 'VstSingleOp64', '', []) + header_output += VMemSingleDeclare64.subst(iop) + decoder_output += VMemSingleConstructor64.subst(iop) + +}}; diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa index 80846053b..3f595692a 100644 --- a/src/arch/arm/isa/insts/str.isa +++ b/src/arch/arm/isa/insts/str.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ // Authors: Gabe Black let {{ + import math header_output = "" decoder_output = "" @@ -77,7 +78,9 @@ let {{ (newHeader, newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, - self.memFlags, self.instFlags, base, wbDecl) + self.memFlags, self.instFlags, + base, wbDecl, None, False, + self.size, self.sign) header_output += newHeader decoder_output += newDecoder @@ -171,7 +174,7 @@ let {{ self.size, self.sign, self.user) # Add memory request flags where necessary - self.memFlags.append("%d" % (self.size - 1)) + self.memFlags.append("%d" % int(math.log(self.size, 2))) if self.user: self.memFlags.append("ArmISA::TLB::UserMode") diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa new file mode 100644 index 000000000..c15dca16e --- /dev/null +++ b/src/arch/arm/isa/insts/str64.isa @@ -0,0 +1,372 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + class StoreInst64(LoadStoreInst): + execBase = 'Store64' + micro = False + + def __init__(self, mnem, Name, size=4, user=False, flavor="normal", + top = False): + super(StoreInst64, self).__init__() + + self.name = mnem + self.Name = Name + self.size = size + self.user = user + self.flavor = flavor + self.top = top + + self.memFlags = ["ArmISA::TLB::MustBeOne"] + self.instFlags = [] + self.codeBlobs = { "postacc_code" : "" } + + # Add memory request flags where necessary + if self.user: + self.memFlags.append("ArmISA::TLB::UserMode") + + if self.flavor in ("relexp", "exp"): + # For exclusive pair ops alignment check is based on total size + self.memFlags.append("%d" % int(math.log(self.size, 2) + 1)) + elif not (self.size == 16 and self.top): + # Only the first microop should perform alignment checking. + self.memFlags.append("%d" % int(math.log(self.size, 2))) + + if self.flavor not in ("release", "relex", "exclusive", + "relexp", "exp"): + self.memFlags.append("ArmISA::TLB::AllowUnaligned") + + if self.micro: + self.instFlags.append("IsMicroop") + + if self.flavor in ("release", "relex", "relexp"): + self.instFlags.extend(["IsMemBarrier", + "IsWriteBarrier", + "IsReadBarrier"]) + if self.flavor in ("relex", "exclusive", "exp", "relexp"): + self.instFlags.append("IsStoreConditional") + self.memFlags.append("Request::LLSC") + + def emitHelper(self, base = 'Memory64', wbDecl = None): + global header_output, decoder_output, exec_output + + # If this is a microop itself, don't allow anything that would + # require further microcoding. + if self.micro: + assert not wbDecl + + fa_code = None + if not self.micro and self.flavor in ("normal", "release"): + fa_code = ''' + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, false); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, %s); + fault->annotate(ArmFault::AR, %s); + ''' % ("0" if self.size == 1 else + "1" if self.size == 2 else + "2" if self.size == 4 else "3", + "true" if self.size == 8 else "false", + "true" if self.flavor == "release" else "false") + + (newHeader, newDecoder, newExec) = \ + self.fillTemplates(self.name, self.Name, self.codeBlobs, + self.memFlags, self.instFlags, + base, wbDecl, faCode=fa_code) + + header_output += newHeader + decoder_output += newDecoder + exec_output += newExec + + def buildEACode(self): + # Address computation + eaCode = "" + if self.flavor == "fp": + eaCode += vfp64EnabledCheckCode + + eaCode += SPAlignmentCheckCode + "EA = XBase" + if self.size == 16: + if self.top: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)" + else: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)" + if not self.post: + eaCode += self.offset + eaCode += ";" + + self.codeBlobs["ea_code"] = eaCode + + + class StoreImmInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreImmInst64, self).__init__(*args, **kargs) + self.offset = "+ imm" + + self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);" + + class StoreRegInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreRegInst64, self).__init__(*args, **kargs) + self.offset = "+ extendReg64(XOffset, type, shiftAmt, 64)" + + self.wbDecl = \ + "MicroAddXERegUop(machInst, base, base, " + \ + " offset, type, shiftAmt);" + + class StoreRawRegInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreRawRegInst64, self).__init__(*args, **kargs) + self.offset = "" + + class StoreSingle64(StoreInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + if self.size in (1, 2, 4): + accCode = ''' + Mem%(suffix)s = + cSwap(AA64FpDestP0%(suffix)s, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 8 or (self.size == 16 and not self.top): + accCode = ''' + uint64_t data = AA64FpDestP1_uw; + data = (data << 32) | AA64FpDestP0_uw; + Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 16 and self.top: + accCode = ''' + uint64_t data = AA64FpDestP3_uw; + data = (data << 32) | AA64FpDestP2_uw; + Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + else: + accCode = \ + 'Mem%(suffix)s = cSwap(XDest%(suffix)s, isBigEndian64(xc->tcBase()));' + if self.size == 16: + accCode = accCode % \ + { "suffix" : buildMemSuffix(False, 8) } + else: + accCode = accCode % \ + { "suffix" : buildMemSuffix(False, self.size) } + + self.codeBlobs["memacc_code"] = accCode + + if self.flavor in ("relex", "exclusive"): + self.instFlags.append("IsStoreConditional") + self.memFlags.append("Request::LLSC") + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class StoreDouble64(StoreInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + accCode = ''' + uint64_t data = AA64FpDest2P0_uw; + data = (data << 32) | AA64FpDestP0_uw; + Mem_ud = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + else: + if self.size == 4: + accCode = ''' + uint64_t data = XDest2_uw; + data = (data << 32) | XDest_uw; + Mem_ud = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 8: + accCode = ''' + // This temporary needs to be here so that the parser + // will correctly identify this instruction as a store. + Twin64_t temp; + temp.a = XDest_ud; + temp.b = XDest2_ud; + Mem_tud = temp; + ''' + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class StoreImm64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryImm64' + writeback = False + post = False + + class StorePre64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPreIndex64' + writeback = True + post = False + + class StorePost64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPostIndex64' + writeback = True + post = True + + class StoreReg64(StoreRegInst64, StoreSingle64): + decConstBase = 'LoadStoreReg64' + base = 'ArmISA::MemoryReg64' + writeback = False + post = False + + class StoreRaw64(StoreRawRegInst64, StoreSingle64): + decConstBase = 'LoadStoreRaw64' + base = 'ArmISA::MemoryRaw64' + writeback = False + post = False + + class StoreEx64(StoreRawRegInst64, StoreSingle64): + decConstBase = 'LoadStoreEx64' + base = 'ArmISA::MemoryEx64' + writeback = False + post = False + execBase = 'StoreEx64' + def __init__(self, *args, **kargs): + super(StoreEx64, self).__init__(*args, **kargs) + self.codeBlobs["postacc_code"] = "XResult = !writeResult;" + + def buildStores64(mnem, NameBase, size, flavor="normal"): + StoreImm64(mnem, NameBase + "_IMM", size, flavor=flavor).emit() + StorePre64(mnem, NameBase + "_PRE", size, flavor=flavor).emit() + StorePost64(mnem, NameBase + "_POST", size, flavor=flavor).emit() + StoreReg64(mnem, NameBase + "_REG", size, flavor=flavor).emit() + + buildStores64("strb", "STRB64", 1) + buildStores64("strh", "STRH64", 2) + buildStores64("str", "STRW64", 4) + buildStores64("str", "STRX64", 8) + buildStores64("str", "STRBFP64", 1, flavor="fp") + buildStores64("str", "STRHFP64", 2, flavor="fp") + buildStores64("str", "STRSFP64", 4, flavor="fp") + buildStores64("str", "STRDFP64", 8, flavor="fp") + + StoreImm64("sturb", "STURB64_IMM", 1).emit() + StoreImm64("sturh", "STURH64_IMM", 2).emit() + StoreImm64("stur", "STURW64_IMM", 4).emit() + StoreImm64("stur", "STURX64_IMM", 8).emit() + StoreImm64("stur", "STURBFP64_IMM", 1, flavor="fp").emit() + StoreImm64("stur", "STURHFP64_IMM", 2, flavor="fp").emit() + StoreImm64("stur", "STURSFP64_IMM", 4, flavor="fp").emit() + StoreImm64("stur", "STURDFP64_IMM", 8, flavor="fp").emit() + + StoreImm64("sttrb", "STTRB64_IMM", 1, user=True).emit() + StoreImm64("sttrh", "STTRH64_IMM", 2, user=True).emit() + StoreImm64("sttr", "STTRW64_IMM", 4, user=True).emit() + StoreImm64("sttr", "STTRX64_IMM", 8, user=True).emit() + + StoreRaw64("stlr", "STLRX64", 8, flavor="release").emit() + StoreRaw64("stlr", "STLRW64", 4, flavor="release").emit() + StoreRaw64("stlrh", "STLRH64", 2, flavor="release").emit() + StoreRaw64("stlrb", "STLRB64", 1, flavor="release").emit() + + StoreEx64("stlxr", "STLXRX64", 8, flavor="relex").emit() + StoreEx64("stlxr", "STLXRW64", 4, flavor="relex").emit() + StoreEx64("stlxrh", "STLXRH64", 2, flavor="relex").emit() + StoreEx64("stlxrb", "STLXRB64", 1, flavor="relex").emit() + + StoreEx64("stxr", "STXRX64", 8, flavor="exclusive").emit() + StoreEx64("stxr", "STXRW64", 4, flavor="exclusive").emit() + StoreEx64("stxrh", "STXRH64", 2, flavor="exclusive").emit() + StoreEx64("stxrb", "STXRB64", 1, flavor="exclusive").emit() + + class StoreImmU64(StoreImm64): + decConstBase = 'LoadStoreImmU64' + micro = True + + class StoreImmDU64(StoreImmInst64, StoreDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = True + post = False + writeback = False + + class StoreImmDEx64(StoreImmInst64, StoreDouble64): + execBase = 'StoreEx64' + decConstBase = 'StoreImmDEx64' + base = 'ArmISA::MemoryDImmEx64' + micro = False + post = False + writeback = False + def __init__(self, *args, **kargs): + super(StoreImmDEx64, self).__init__(*args, **kargs) + self.codeBlobs["postacc_code"] = "XResult = !writeResult;" + + class StoreRegU64(StoreReg64): + decConstBase = 'LoadStoreRegU64' + micro = True + + StoreImmDEx64("stlxp", "STLXPW64", 4, flavor="relexp").emit() + StoreImmDEx64("stlxp", "STLXPX64", 8, flavor="relexp").emit() + StoreImmDEx64("stxp", "STXPW64", 4, flavor="exp").emit() + StoreImmDEx64("stxp", "STXPX64", 8, flavor="exp").emit() + + StoreImmU64("strxi_uop", "MicroStrXImmUop", 8).emit() + StoreRegU64("strxr_uop", "MicroStrXRegUop", 8).emit() + StoreImmU64("strfpxi_uop", "MicroStrFpXImmUop", 8, flavor="fp").emit() + StoreRegU64("strfpxr_uop", "MicroStrFpXRegUop", 8, flavor="fp").emit() + StoreImmU64("strqbfpxi_uop", "MicroStrQBFpXImmUop", + 16, flavor="fp", top=False).emit() + StoreRegU64("strqbfpxr_uop", "MicroStrQBFpXRegUop", + 16, flavor="fp", top=False).emit() + StoreImmU64("strqtfpxi_uop", "MicroStrQTFpXImmUop", + 16, flavor="fp", top=True).emit() + StoreRegU64("strqtfpxr_uop", "MicroStrQTFpXRegUop", + 16, flavor="fp", top=True).emit() + StoreImmDU64("strdxi_uop", "MicroStrDXImmUop", 4).emit() + StoreImmDU64("strdfpxi_uop", "MicroStrDFpXImmUop", 4, flavor="fp").emit() + +}}; diff --git a/src/arch/arm/isa/insts/swap.isa b/src/arch/arm/isa/insts/swap.isa index b42a1c4b2..f2ceed28e 100644 --- a/src/arch/arm/isa/insts/swap.isa +++ b/src/arch/arm/isa/insts/swap.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,10 +73,7 @@ let {{ swpPreAccCode = ''' if (!((SCTLR)Sctlr).sw) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + return new UndefinedInstruction(machInst, false, mnemonic); } ''' diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 64deef044..7a1213377 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -1,5 +1,5 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -80,6 +80,31 @@ let {{ xc->%(func)s(this, %(op_idx)s, %(final_val)s); } ''' + aarch64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(intWidth)) + ''' + aarch64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(intWidth)) + ''' + aarchX64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(aarch64 ? 64 : 32)) + ''' + aarchX64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(aarch64 ? 64 : 32)) + ''' + aarchW64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(32)) + ''' + aarchW64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(32)) + ''' + cntrlNsBankedWrite = ''' + xc->setMiscReg(flattenMiscRegNsBanked(dest, xc->tcBase()), %(final_val)s) + ''' + + cntrlNsBankedRead = ''' + xc->readMiscReg(flattenMiscRegNsBanked(op1, xc->tcBase())) + ''' #PCState operands need to have a sorting index (the number at the end) #less than all the integer registers which might update the PC. That way @@ -99,6 +124,18 @@ let {{ return ('IntReg', 'uw', idx, 'IsInteger', srtNormal, maybePCRead, maybePCWrite) + def intReg64(idx): + return ('IntReg', 'ud', idx, 'IsInteger', srtNormal, + aarch64Read, aarch64Write) + + def intRegX64(idx, id = srtNormal): + return ('IntReg', 'ud', idx, 'IsInteger', id, + aarchX64Read, aarchX64Write) + + def intRegW64(idx, id = srtNormal): + return ('IntReg', 'ud', idx, 'IsInteger', id, + aarchW64Read, aarchW64Write) + def intRegNPC(idx): return ('IntReg', 'uw', idx, 'IsInteger', srtNormal) @@ -120,26 +157,49 @@ let {{ def cntrlReg(idx, id = srtNormal, type = 'uw'): return ('ControlReg', type, idx, None, id) + def cntrlNsBankedReg(idx, id = srtNormal, type = 'uw'): + return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite) + + def cntrlNsBankedReg64(idx, id = srtNormal, type = 'ud'): + return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite) + def cntrlRegNC(idx, id = srtNormal, type = 'uw'): return ('ControlReg', type, idx, None, id) def pcStateReg(idx, id): - return ('PCState', 'uw', idx, (None, None, 'IsControl'), id) + return ('PCState', 'ud', idx, (None, None, 'IsControl'), id) }}; def operands {{ #Abstracted integer reg operands 'Dest': intReg('dest'), + 'Dest64': intReg64('dest'), + 'XDest': intRegX64('dest'), + 'WDest': intRegW64('dest'), 'IWDest': intRegIWPC('dest'), 'AIWDest': intRegAIWPC('dest'), 'Dest2': intReg('dest2'), + 'XDest2': intRegX64('dest2'), + 'FDest2': floatReg('dest2'), 'Result': intReg('result'), + 'XResult': intRegX64('result'), + 'XBase': intRegX64('base', id = srtBase), 'Base': intRegAPC('base', id = srtBase), + 'XOffset': intRegX64('offset'), 'Index': intReg('index'), 'Shift': intReg('shift'), 'Op1': intReg('op1'), 'Op2': intReg('op2'), 'Op3': intReg('op3'), + 'Op164': intReg64('op1'), + 'Op264': intReg64('op2'), + 'Op364': intReg64('op3'), + 'XOp1': intRegX64('op1'), + 'XOp2': intRegX64('op2'), + 'XOp3': intRegX64('op3'), + 'WOp1': intRegW64('op1'), + 'WOp2': intRegW64('op2'), + 'WOp3': intRegW64('op3'), 'Reg0': intReg('reg0'), 'Reg1': intReg('reg1'), 'Reg2': intReg('reg2'), @@ -147,13 +207,19 @@ def operands {{ #Fixed index integer reg operands 'SpMode': intRegNPC('intRegInMode((OperatingMode)regMode, INTREG_SP)'), + 'DecodedBankedIntReg': intRegNPC('decodeMrsMsrBankedIntRegIndex(byteMask, r)'), 'LR': intRegNPC('INTREG_LR'), + 'XLR': intRegX64('INTREG_X30'), 'R7': intRegNPC('7'), # First four arguments are passed in registers 'R0': intRegNPC('0'), 'R1': intRegNPC('1'), 'R2': intRegNPC('2'), 'R3': intRegNPC('3'), + 'X0': intRegX64('0'), + 'X1': intRegX64('1'), + 'X2': intRegX64('2'), + 'X3': intRegX64('3'), #Pseudo integer condition code registers 'CondCodesNZ': intRegCC('INTREG_CONDCODES_NZ'), @@ -230,9 +296,95 @@ def operands {{ 'FpOp2P2': floatReg('(op2 + 2)'), 'FpOp2P3': floatReg('(op2 + 3)'), + # Create AArch64 unpacked view of the FP registers + 'AA64FpOp1P0': floatReg('((op1 * 4) + 0)'), + 'AA64FpOp1P1': floatReg('((op1 * 4) + 1)'), + 'AA64FpOp1P2': floatReg('((op1 * 4) + 2)'), + 'AA64FpOp1P3': floatReg('((op1 * 4) + 3)'), + 'AA64FpOp2P0': floatReg('((op2 * 4) + 0)'), + 'AA64FpOp2P1': floatReg('((op2 * 4) + 1)'), + 'AA64FpOp2P2': floatReg('((op2 * 4) + 2)'), + 'AA64FpOp2P3': floatReg('((op2 * 4) + 3)'), + 'AA64FpOp3P0': floatReg('((op3 * 4) + 0)'), + 'AA64FpOp3P1': floatReg('((op3 * 4) + 1)'), + 'AA64FpOp3P2': floatReg('((op3 * 4) + 2)'), + 'AA64FpOp3P3': floatReg('((op3 * 4) + 3)'), + 'AA64FpDestP0': floatReg('((dest * 4) + 0)'), + 'AA64FpDestP1': floatReg('((dest * 4) + 1)'), + 'AA64FpDestP2': floatReg('((dest * 4) + 2)'), + 'AA64FpDestP3': floatReg('((dest * 4) + 3)'), + 'AA64FpDest2P0': floatReg('((dest2 * 4) + 0)'), + 'AA64FpDest2P1': floatReg('((dest2 * 4) + 1)'), + 'AA64FpDest2P2': floatReg('((dest2 * 4) + 2)'), + 'AA64FpDest2P3': floatReg('((dest2 * 4) + 3)'), + + 'AA64FpOp1P0V0': floatReg('((((op1+0)) * 4) + 0)'), + 'AA64FpOp1P1V0': floatReg('((((op1+0)) * 4) + 1)'), + 'AA64FpOp1P2V0': floatReg('((((op1+0)) * 4) + 2)'), + 'AA64FpOp1P3V0': floatReg('((((op1+0)) * 4) + 3)'), + + 'AA64FpOp1P0V1': floatReg('((((op1+1)) * 4) + 0)'), + 'AA64FpOp1P1V1': floatReg('((((op1+1)) * 4) + 1)'), + 'AA64FpOp1P2V1': floatReg('((((op1+1)) * 4) + 2)'), + 'AA64FpOp1P3V1': floatReg('((((op1+1)) * 4) + 3)'), + + 'AA64FpOp1P0V2': floatReg('((((op1+2)) * 4) + 0)'), + 'AA64FpOp1P1V2': floatReg('((((op1+2)) * 4) + 1)'), + 'AA64FpOp1P2V2': floatReg('((((op1+2)) * 4) + 2)'), + 'AA64FpOp1P3V2': floatReg('((((op1+2)) * 4) + 3)'), + + 'AA64FpOp1P0V3': floatReg('((((op1+3)) * 4) + 0)'), + 'AA64FpOp1P1V3': floatReg('((((op1+3)) * 4) + 1)'), + 'AA64FpOp1P2V3': floatReg('((((op1+3)) * 4) + 2)'), + 'AA64FpOp1P3V3': floatReg('((((op1+3)) * 4) + 3)'), + + 'AA64FpOp1P0V0S': floatReg('((((op1+0)%32) * 4) + 0)'), + 'AA64FpOp1P1V0S': floatReg('((((op1+0)%32) * 4) + 1)'), + 'AA64FpOp1P2V0S': floatReg('((((op1+0)%32) * 4) + 2)'), + 'AA64FpOp1P3V0S': floatReg('((((op1+0)%32) * 4) + 3)'), + + 'AA64FpOp1P0V1S': floatReg('((((op1+1)%32) * 4) + 0)'), + 'AA64FpOp1P1V1S': floatReg('((((op1+1)%32) * 4) + 1)'), + 'AA64FpOp1P2V1S': floatReg('((((op1+1)%32) * 4) + 2)'), + 'AA64FpOp1P3V1S': floatReg('((((op1+1)%32) * 4) + 3)'), + + 'AA64FpOp1P0V2S': floatReg('((((op1+2)%32) * 4) + 0)'), + 'AA64FpOp1P1V2S': floatReg('((((op1+2)%32) * 4) + 1)'), + 'AA64FpOp1P2V2S': floatReg('((((op1+2)%32) * 4) + 2)'), + 'AA64FpOp1P3V2S': floatReg('((((op1+2)%32) * 4) + 3)'), + + 'AA64FpOp1P0V3S': floatReg('((((op1+3)%32) * 4) + 0)'), + 'AA64FpOp1P1V3S': floatReg('((((op1+3)%32) * 4) + 1)'), + 'AA64FpOp1P2V3S': floatReg('((((op1+3)%32) * 4) + 2)'), + 'AA64FpOp1P3V3S': floatReg('((((op1+3)%32) * 4) + 3)'), + + 'AA64FpDestP0V0': floatReg('((((dest+0)) * 4) + 0)'), + 'AA64FpDestP1V0': floatReg('((((dest+0)) * 4) + 1)'), + 'AA64FpDestP2V0': floatReg('((((dest+0)) * 4) + 2)'), + 'AA64FpDestP3V0': floatReg('((((dest+0)) * 4) + 3)'), + + 'AA64FpDestP0V1': floatReg('((((dest+1)) * 4) + 0)'), + 'AA64FpDestP1V1': floatReg('((((dest+1)) * 4) + 1)'), + 'AA64FpDestP2V1': floatReg('((((dest+1)) * 4) + 2)'), + 'AA64FpDestP3V1': floatReg('((((dest+1)) * 4) + 3)'), + + 'AA64FpDestP0V0L': floatReg('((((dest+0)%32) * 4) + 0)'), + 'AA64FpDestP1V0L': floatReg('((((dest+0)%32) * 4) + 1)'), + 'AA64FpDestP2V0L': floatReg('((((dest+0)%32) * 4) + 2)'), + 'AA64FpDestP3V0L': floatReg('((((dest+0)%32) * 4) + 3)'), + + 'AA64FpDestP0V1L': floatReg('((((dest+1)%32) * 4) + 0)'), + 'AA64FpDestP1V1L': floatReg('((((dest+1)%32) * 4) + 1)'), + 'AA64FpDestP2V1L': floatReg('((((dest+1)%32) * 4) + 2)'), + 'AA64FpDestP3V1L': floatReg('((((dest+1)%32) * 4) + 3)'), + #Abstracted control reg operands 'MiscDest': cntrlReg('dest'), 'MiscOp1': cntrlReg('op1'), + 'MiscNsBankedDest': cntrlNsBankedReg('dest'), + 'MiscNsBankedOp1': cntrlNsBankedReg('op1'), + 'MiscNsBankedDest64': cntrlNsBankedReg64('dest'), + 'MiscNsBankedOp164': cntrlNsBankedReg64('op1'), #Fixed index control regs 'Cpsr': cntrlReg('MISCREG_CPSR', srtCpsr), @@ -244,22 +396,41 @@ def operands {{ 'FpscrQc': cntrlRegNC('MISCREG_FPSCR_QC'), 'FpscrExc': cntrlRegNC('MISCREG_FPSCR_EXC'), 'Cpacr': cntrlReg('MISCREG_CPACR'), + 'Cpacr64': cntrlReg('MISCREG_CPACR_EL1'), 'Fpexc': cntrlRegNC('MISCREG_FPEXC'), + 'Nsacr': cntrlReg('MISCREG_NSACR'), + 'ElrHyp': cntrlRegNC('MISCREG_ELR_HYP'), + 'Hcr': cntrlReg('MISCREG_HCR'), + 'Hcr64': cntrlReg('MISCREG_HCR_EL2'), + 'Hdcr': cntrlReg('MISCREG_HDCR'), + 'Hcptr': cntrlReg('MISCREG_HCPTR'), + 'CptrEl264': cntrlReg('MISCREG_CPTR_EL2'), + 'CptrEl364': cntrlReg('MISCREG_CPTR_EL3'), + 'Hstr': cntrlReg('MISCREG_HSTR'), + 'Scr': cntrlReg('MISCREG_SCR'), + 'Scr64': cntrlReg('MISCREG_SCR_EL3'), 'Sctlr': cntrlRegNC('MISCREG_SCTLR'), 'SevMailbox': cntrlRegNC('MISCREG_SEV_MAILBOX'), 'LLSCLock': cntrlRegNC('MISCREG_LOCKFLAG'), + 'Dczid' : cntrlRegNC('MISCREG_DCZID_EL0'), #Register fields for microops 'URa' : intReg('ura'), + 'XURa' : intRegX64('ura'), + 'WURa' : intRegW64('ura'), 'IWRa' : intRegIWPC('ura'), 'Fa' : floatReg('ura'), + 'FaP1' : floatReg('ura + 1'), 'URb' : intReg('urb'), + 'XURb' : intRegX64('urb'), 'URc' : intReg('urc'), + 'XURc' : intRegX64('urc'), #Memory Operand 'Mem': ('Mem', 'uw', None, ('IsMemRef', 'IsLoad', 'IsStore'), srtNormal), #PCState fields + 'RawPC': pcStateReg('pc', srtPC), 'PC': pcStateReg('instPC', srtPC), 'NPC': pcStateReg('instNPC', srtPC), 'pNPC': pcStateReg('instNPC', srtEPC), diff --git a/src/arch/arm/isa/templates/basic.isa b/src/arch/arm/isa/templates/basic.isa index b3878b89a..de4506e05 100644 --- a/src/arch/arm/isa/templates/basic.isa +++ b/src/arch/arm/isa/templates/basic.isa @@ -1,5 +1,17 @@ // -*- mode:c++ -*- +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// // Copyright (c) 2007-2008 The Florida State University // All rights reserved. // @@ -60,6 +72,13 @@ def template BasicConstructor {{ } }}; +def template BasicConstructor64 {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst) : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + %(constructor)s; + } +}}; + // Basic instruction class execute method template. def template BasicExecute {{ diff --git a/src/arch/arm/isa/templates/branch64.isa b/src/arch/arm/isa/templates/branch64.isa new file mode 100644 index 000000000..84b3e6ae7 --- /dev/null +++ b/src/arch/arm/isa/templates/branch64.isa @@ -0,0 +1,141 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template BranchImm64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImm64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm) + { + %(constructor)s; + } +}}; + +def template BranchImmCond64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm, + ConditionCode _condCode); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmCond64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm, + ConditionCode _condCode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _imm, _condCode) + { + %(constructor)s; + } +}}; + +def template BranchReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1) + { + %(constructor)s; + } +}}; + +def template BranchImmReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + int64_t imm, IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm, _op1) + { + %(constructor)s; + } +}}; + +def template BranchImmImmReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm1, int64_t _imm2, + IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmImmReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm1, int64_t _imm2, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _imm1, _imm2, _op1) + { + %(constructor)s; + } +}}; diff --git a/src/arch/arm/isa/templates/data64.isa b/src/arch/arm/isa/templates/data64.isa new file mode 100644 index 000000000..b6f7ce8d0 --- /dev/null +++ b/src/arch/arm/isa/templates/data64.isa @@ -0,0 +1,279 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template DataXImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template DataXImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } +}}; + +def template DataXSRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + int32_t _shiftAmt, ArmShiftType _shiftType); + %(BasicExecDeclare)s +}; +}}; + +def template DataXSRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + int32_t _shiftAmt, + ArmShiftType _shiftType) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _shiftAmt, _shiftType) + { + %(constructor)s; + } +}}; + +def template DataXERegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + ArmExtendType _extendType, int32_t _shiftAmt); + %(BasicExecDeclare)s +}; +}}; + +def template DataXERegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + ArmExtendType _extendType, + int32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _extendType, _shiftAmt) + { + %(constructor)s; + } +}}; + +def template DataX1RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template DataX1RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _op1) + { + %(constructor)s; + } +}}; + +def template DataX2RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2); + %(BasicExecDeclare)s +}; +}}; + +def template DataX2RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } +}}; + +def template DataX2RegImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template DataX2RegImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } +}}; + +def template DataX3RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _op3); + %(BasicExecDeclare)s +}; +}}; + +def template DataX3RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + IntRegIndex _op3) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _op3) + { + %(constructor)s; + } +}}; + +def template DataXCondCompImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + uint64_t _imm, ConditionCode _condCode, uint8_t _defCc); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondCompImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + uint64_t _imm, + ConditionCode _condCode, + uint8_t _defCc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _op1, _imm, _condCode, _defCc) + { + %(constructor)s; + } +}}; + +def template DataXCondCompRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + IntRegIndex _op2, ConditionCode _condCode, + uint8_t _defCc); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondCompRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + IntRegIndex _op2, + ConditionCode _condCode, + uint8_t _defCc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _op1, _op2, _condCode, _defCc) + { + %(constructor)s; + } +}}; + +def template DataXCondSelDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondSelConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + ConditionCode _condCode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _condCode) + { + %(constructor)s; + } +}}; diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa index 195204a95..465090660 100644 --- a/src/arch/arm/isa/templates/macromem.isa +++ b/src/arch/arm/isa/templates/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -338,6 +338,18 @@ def template MicroIntImmConstructor {{ } }}; +def template MicroIntImmXConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + RegIndex _ura, + RegIndex _urb, + int32_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _imm) + { + %(constructor)s; + } +}}; + def template MicroIntRegDeclare {{ class %(class_name)s : public %(base_class)s { @@ -349,6 +361,28 @@ def template MicroIntRegDeclare {{ }; }}; +def template MicroIntXERegConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _urc, _type, _shiftAmt) + { + %(constructor)s; + } +}}; + +def template MicroIntXERegDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt); + %(BasicExecDeclare)s + }; +}}; + def template MicroIntRegConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst machInst, RegIndex _ura, RegIndex _urb, RegIndex _urc, @@ -402,6 +436,96 @@ def template MacroMemConstructor {{ }}; +def template BigFpMemImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemImmConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base, imm) +{ + %(constructor)s; +} +}}; + +def template BigFpMemRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemRegConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base, + offset, type, imm) +{ + %(constructor)s; +} +}}; + +def template BigFpMemLitDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + IntRegIndex dest, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemLitConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + IntRegIndex dest, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, dest, imm) +{ + %(constructor)s; +} +}}; + +def template PairMemDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, uint32_t imm, + AddrMode mode, IntRegIndex rn, IntRegIndex rt, + IntRegIndex rt2); + %(BasicExecPanic)s +}; +}}; + +def template PairMemConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, uint32_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) + : %(base_class)s(mnemonic, machInst, %(op_class)s, size, + fp, load, noAlloc, signExt, exclusive, acrel, + imm, mode, rn, rt, rt2) +{ + %(constructor)s; +} +}}; + def template VMemMultDeclare {{ class %(class_name)s : public %(base_class)s { diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa index 871378f3f..7682c277d 100644 --- a/src/arch/arm/isa/templates/mem.isa +++ b/src/arch/arm/isa/templates/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -697,6 +697,11 @@ def template LoadStoreImmDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -763,6 +768,11 @@ def template StoreRegDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -808,6 +818,11 @@ def template LoadRegDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -828,6 +843,11 @@ def template LoadImmDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; diff --git a/src/arch/arm/isa/templates/mem64.isa b/src/arch/arm/isa/templates/mem64.isa new file mode 100644 index 000000000..87dcba988 --- /dev/null +++ b/src/arch/arm/isa/templates/mem64.isa @@ -0,0 +1,686 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + SPAlignmentCheckCode = ''' + if (baseIsSP && bits(XBase, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + ''' +}}; + +def template Load64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemAtomic(xc, traceData, Mem, EA, + memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template StoreEx64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + uint64_t writeResult = 0; + if (fault == NoFault) { + fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags, + &writeResult); + } + + if (fault == NoFault) { + %(postacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template StoreEx64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template Load64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemTiming(xc, traceData, EA, Mem, memAccessFlags); + } + + return fault; + } +}}; + +def template Load64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + + // ARM instructions will not have a pkt if the predicate is false + getMem(pkt, Mem, traceData); + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template StoreEx64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + + uint64_t writeResult = pkt->req->getExtraData(); + %(postacc_code)s; + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template DCStore64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template DCStore64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_base, _dest, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + } +}}; + +def template DCStore64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template DCStore64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL); + } + + return fault; + } +}}; + + +def template LoadStoreImm64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImmU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImmDU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm = 0, bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template StoreImmDEx64Declare {{ + /** + * Static instruction class for "%(mnemonic)s". + */ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int64_t _imm = 0); + + %(BasicExecDeclare)s + + %(InitiateAccDeclare)s + + %(CompleteAccDeclare)s + }; +}}; + + +def template LoadStoreReg64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreRegU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreRaw64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _base); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreEx64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _base, IntRegIndex _result); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreLit64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreLitU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImm64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, (IntRegIndex)_base, _imm) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _base, _imm); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreImmU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template LoadStoreImmDU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm, bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _dest2, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template StoreImmDEx64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _result, _dest, _dest2, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + } +}}; + + +def template LoadStoreReg64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _offset, _type, _shiftAmt) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _base, _offset, + _type, _shiftAmt); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreRegU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _offset, _type, _shiftAmt) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template LoadStoreRaw64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _base) + { + %(constructor)s; + } +}}; + +def template LoadStoreEx64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _result) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _result) + { + %(constructor)s; + } +}}; + +def template LoadStoreLit64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, _imm) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _imm); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreLitU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, int64_t _imm, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; diff --git a/src/arch/arm/isa/templates/misc.isa b/src/arch/arm/isa/templates/misc.isa index 212897aa0..36db5b6c2 100644 --- a/src/arch/arm/isa/templates/misc.isa +++ b/src/arch/arm/isa/templates/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -62,6 +62,69 @@ def template MrsConstructor {{ } }}; +def template MrsBankedRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + uint8_t byteMask; + bool r; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + uint8_t _sysM, bool _r); + %(BasicExecDeclare)s +}; +}}; + +def template MrsBankedRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + uint8_t _sysM, + bool _r) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest), + byteMask(_sysM), r(_r) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + +def template MsrBankedRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + bool r; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + uint8_t _sysM, bool _r); + %(BasicExecDeclare)s +}; +}}; + +def template MsrBankedRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + uint8_t _sysM, + bool _r) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1, _sysM), + r(_r) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template MsrRegDeclare {{ class %(class_name)s : public %(base_class)s { @@ -114,6 +177,66 @@ def template MsrImmConstructor {{ } }}; +def template MrrcOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + IntRegIndex _dest, IntRegIndex _dest2, uint32_t imm); + %(BasicExecDeclare)s +}; +}}; + +def template MrrcOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex op1, + IntRegIndex dest, + IntRegIndex dest2, + uint32_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, dest, + dest2, imm) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + +def template McrrOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _dest, uint32_t imm); + %(BasicExecDeclare)s +}; +}}; + +def template McrrOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex op1, + IntRegIndex op2, + IntRegIndex dest, + uint32_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, op2, + dest, imm) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template ImmOpDeclare {{ class %(class_name)s : public %(base_class)s { @@ -310,6 +433,35 @@ def template RegRegImmOpConstructor {{ } }}; +def template RegImmImmOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2); + %(BasicExecDeclare)s +}; +}}; + +def template RegImmImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm1, _imm2) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template RegRegImmImmOpDeclare {{ class %(class_name)s : public %(base_class)s { diff --git a/src/arch/arm/isa/templates/misc64.isa b/src/arch/arm/isa/templates/misc64.isa new file mode 100644 index 000000000..09d3d4470 --- /dev/null +++ b/src/arch/arm/isa/templates/misc64.isa @@ -0,0 +1,91 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template RegRegImmImmOp64Declare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm1, uint64_t _imm2); + %(BasicExecDeclare)s +}; +}}; + +def template RegRegImmImmOp64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm1, _imm2) + { + %(constructor)s; + } +}}; + +def template RegRegRegImmOp64Declare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template RegRegRegImmOp64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } +}}; + diff --git a/src/arch/arm/isa/templates/neon.isa b/src/arch/arm/isa/templates/neon.isa index 573d245b8..ffa6b53d4 100644 --- a/src/arch/arm/isa/templates/neon.isa +++ b/src/arch/arm/isa/templates/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -39,8 +39,26 @@ let {{ simdEnabledCheckCode = ''' - if (!neonEnabled(Cpacr, Cpsr, Fpexc)) - return disabledFault(); + { + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq, Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase(), Fpexc, true)) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } + } ''' }}; diff --git a/src/arch/arm/isa/templates/neon64.isa b/src/arch/arm/isa/templates/neon64.isa new file mode 100644 index 000000000..d20e4e653 --- /dev/null +++ b/src/arch/arm/isa/templates/neon64.isa @@ -0,0 +1,527 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Mbou Eyole +// Giacomo Gabrielli + +let {{ + simd64EnabledCheckCode = vfp64EnabledCheckCode +}}; + +def template NeonX2RegOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX2RegImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1Reg2ImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm1, _imm2) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegImmOnlyOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonXExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + %(CPU_exec_context)s *, Trace::InstRecord *) const; +}}; + +def template NeonXEqualRegOpExecute {{ + template <class Element> + Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + const unsigned rCount = %(r_count)d; + const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element); + const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element); + + union RegVect { + FloatRegBits regs[rCount]; + Element elements[eCount]; + }; + + union FullRegVect { + FloatRegBits regs[4]; + Element elements[eCountFull]; + }; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonXUnequalRegOpExecute {{ + template <class Element> + Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + typedef typename bigger_type_t<Element>::type BigElement; + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + const unsigned rCount = %(r_count)d; + const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element); + const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element); + + union RegVect { + FloatRegBits regs[rCount]; + Element elements[eCount]; + BigElement bigElements[eCount / 2]; + }; + + union BigRegVect { + FloatRegBits regs[2 * rCount]; + BigElement elements[eCount]; + }; + + union FullRegVect { + FloatRegBits regs[4]; + Element elements[eCountFull]; + }; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template MicroNeonMemDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + protected: + // True if the base register is SP (used for SP alignment checking) + bool baseIsSP; + // Access size in bytes + uint8_t accSize; + // Vector element size (0 -> 8-bit, 1 -> 16-bit, 2 -> 32-bit, + // 3 -> 64-bit) + uint8_t eSize; + + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _ura, + uint32_t _imm, unsigned extraMemFlags, bool _baseIsSP, + uint8_t _accSize, uint8_t _eSize) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, + _ura, _imm), + baseIsSP(_baseIsSP), accSize(_accSize), eSize(_eSize) + { + memAccessFlags |= extraMemFlags; + %(constructor)s; + } + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; +}}; + +def template NeonLoadExecute64 {{ + Fault %(class_name)s::execute( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonLoadInitiateAcc64 {{ + Fault %(class_name)s::initiateAcc( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags); + } + + return fault; + } +}}; + +def template NeonLoadCompleteAcc64 {{ + Fault %(class_name)s::completeAcc( + PacketPtr pkt, %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(mem_decl)s; + %(op_decl)s; + %(op_rd)s; + + MemUnion &memUnion = *(MemUnion *)pkt->getPtr<uint8_t>(); + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonStoreExecute64 {{ + Fault %(class_name)s::execute( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(dataPtr, accSize, EA, memAccessFlags, + NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonStoreInitiateAcc64 {{ + Fault %(class_name)s::initiateAcc( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(memUnion.bytes, accSize, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template NeonStoreCompleteAcc64 {{ + Fault %(class_name)s::completeAcc( + PacketPtr pkt, %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template VMemMultDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb); + %(BasicExecPanic)s + }; +}}; + +def template VMemSingleDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate = false); + %(BasicExecPanic)s + }; +}}; + +def template VMemMultConstructor64 {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm, + uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _numRegs, bool _wb) : + %(base_class)s( + "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm, + _eSize, _dataSize, _numStructElems, _numRegs, _wb) + { + %(constructor)s; + } +}}; + +def template VMemSingleConstructor64 {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm, + uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _index, bool _wb, bool _replicate) : + %(base_class)s( + "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm, + _eSize, _dataSize, _numStructElems, _index, _wb, + _replicate) + { + %(constructor)s; + } +}}; + +def template MicroNeonMixDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _numRegs, + uint8_t _step) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _eSize, _dataSize, _numStructElems, + _numRegs, _step) + { + %(constructor)s; + } + + %(BasicExecDeclare)s + }; +}}; + +def template MicroNeonMixLaneDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _lane, uint8_t _step, + bool _replicate = false) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _eSize, _dataSize, _numStructElems, + _lane, _step, _replicate) + { + %(constructor)s; + } + + %(BasicExecDeclare)s + }; +}}; + +def template MicroNeonMixExecute64 {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + uint64_t resTemp = 0; + resTemp = resTemp; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa index 148139225..2263cdff4 100644 --- a/src/arch/arm/isa/templates/templates.isa +++ b/src/arch/arm/isa/templates/templates.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -40,26 +40,37 @@ //Basic instruction templates ##include "basic.isa" +//Templates for AArch64 bit data instructions. +##include "data64.isa" + //Templates for predicated instructions ##include "pred.isa" //Templates for memory instructions ##include "mem.isa" +//Templates for AArch64 memory instructions +##include "mem64.isa" + //Miscellaneous instructions that don't fit elsewhere ##include "misc.isa" +##include "misc64.isa" //Templates for microcoded memory instructions ##include "macromem.isa" //Templates for branches ##include "branch.isa" +##include "branch64.isa" //Templates for multiplies ##include "mult.isa" //Templates for VFP instructions ##include "vfp.isa" +##include "vfp64.isa" //Templates for Neon instructions ##include "neon.isa" + +##include "neon64.isa" diff --git a/src/arch/arm/isa/templates/vfp.isa b/src/arch/arm/isa/templates/vfp.isa index 90dd751ff..176b6604c 100644 --- a/src/arch/arm/isa/templates/vfp.isa +++ b/src/arch/arm/isa/templates/vfp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -39,32 +39,117 @@ let {{ vfpEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr, Fpexc)) - return disabledFault(); + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase(), Fpexc)) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } + ''' + + vfp64EnabledCheckCode = ''' + CPSR cpsrEnCheck = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsrEnCheck.el; + if (!vfpNeon64Enabled(Cpacr64, el)) + return new SupervisorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + + if (ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) { + HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL2); + if (cptrEnCheck.tfp) + return new HypervisorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + } + + if (ArmSystem::haveSecurity(xc->tcBase())) { + HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL3); + if (cptrEnCheck.tfp) + return new SecureMonitorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + } ''' vmsrEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) if (dest != (int)MISCREG_FPEXC && dest != (int)MISCREG_FPSID) - return disabledFault(); + {return disabledFault();} if (!inPrivilegedMode(Cpsr)) if (dest != (int)MISCREG_FPSCR) return disabledFault(); - + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' vmrsEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) if (op1 != (int)MISCREG_FPEXC && op1 != (int)MISCREG_FPSID && op1 != (int)MISCREG_MVFR0 && op1 != (int)MISCREG_MVFR1) - return disabledFault(); + {return disabledFault();} if (!inPrivilegedMode(Cpsr)) if (op1 != (int)MISCREG_FPSCR) return disabledFault(); + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' vmrsApsrEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) - return disabledFault(); + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' }}; diff --git a/src/arch/arm/isa/templates/vfp64.isa b/src/arch/arm/isa/templates/vfp64.isa new file mode 100644 index 000000000..518cedaae --- /dev/null +++ b/src/arch/arm/isa/templates/vfp64.isa @@ -0,0 +1,140 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2012 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Thomas Grocutt + +def template AA64FpRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm, VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegRegRegOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop); + %(BasicExecDeclare)s +}; +}}; + +def template AA64FpRegRegRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + IntRegIndex _op3, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _op3, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; diff --git a/src/arch/arm/isa_traits.hh b/src/arch/arm/isa_traits.hh index 742ca2037..506c5009c 100644 --- a/src/arch/arm/isa_traits.hh +++ b/src/arch/arm/isa_traits.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -95,6 +95,9 @@ namespace ArmISA const Addr PAddrImplMask = (ULL(1) << PABits) - 1; + // Max. physical address range in bits supported by the architecture + const unsigned MaxPhysAddrRange = 48; + // return a no-op instruction... used for instruction fetch faults const ExtMachInst NoopMachInst = 0x01E320F000ULL; @@ -124,6 +127,8 @@ namespace ArmISA INT_IRQ, INT_FIQ, INT_SEV, // Special interrupt for recieving SEV's + INT_VIRT_IRQ, + INT_VIRT_FIQ, NumInterruptTypes }; } // namespace ArmISA diff --git a/src/arch/arm/linux/linux.cc b/src/arch/arm/linux/linux.cc index 1e3a1e725..62519d38b 100644 --- a/src/arch/arm/linux/linux.cc +++ b/src/arch/arm/linux/linux.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * Copyright (c) 2007-2008 The Florida State University * All rights reserved. @@ -34,55 +46,108 @@ #include "arch/arm/linux/linux.hh" // open(2) flags translation table -OpenFlagTransTable ArmLinux::openFlagTable[] = { +OpenFlagTransTable ArmLinux32::openFlagTable[] = { +#ifdef _MSC_VER + { ArmLinux32::TGT_O_RDONLY, _O_RDONLY }, + { ArmLinux32::TGT_O_WRONLY, _O_WRONLY }, + { ArmLinux32::TGT_O_RDWR, _O_RDWR }, + { ArmLinux32::TGT_O_APPEND, _O_APPEND }, + { ArmLinux32::TGT_O_CREAT, _O_CREAT }, + { ArmLinux32::TGT_O_TRUNC, _O_TRUNC }, + { ArmLinux32::TGT_O_EXCL, _O_EXCL }, +#ifdef _O_NONBLOCK + { ArmLinux32::TGT_O_NONBLOCK, _O_NONBLOCK }, +#endif +#ifdef _O_NOCTTY + { ArmLinux32::TGT_O_NOCTTY, _O_NOCTTY }, +#endif +#ifdef _O_SYNC + { ArmLinux32::TGT_O_SYNC, _O_SYNC }, +#endif +#else /* !_MSC_VER */ + { ArmLinux32::TGT_O_RDONLY, O_RDONLY }, + { ArmLinux32::TGT_O_WRONLY, O_WRONLY }, + { ArmLinux32::TGT_O_RDWR, O_RDWR }, + { ArmLinux32::TGT_O_CREAT, O_CREAT }, + { ArmLinux32::TGT_O_EXCL, O_EXCL }, + { ArmLinux32::TGT_O_NOCTTY, O_NOCTTY }, + { ArmLinux32::TGT_O_TRUNC, O_TRUNC }, + { ArmLinux32::TGT_O_APPEND, O_APPEND }, + { ArmLinux32::TGT_O_NONBLOCK, O_NONBLOCK }, +#ifdef O_SYNC + { ArmLinux32::TGT_O_SYNC, O_SYNC }, +#endif +#ifdef FASYNC + { ArmLinux32::TGT_FASYNC, FASYNC }, +#endif +#ifdef O_DIRECT + { ArmLinux32::TGT_O_DIRECT, O_DIRECT }, +#endif +#ifdef O_LARGEFILE + { ArmLinux32::TGT_O_LARGEFILE, O_LARGEFILE }, +#endif +#ifdef O_DIRECTORY + { ArmLinux32::TGT_O_DIRECTORY, O_DIRECTORY }, +#endif +#ifdef O_NOFOLLOW + { ArmLinux32::TGT_O_NOFOLLOW, O_NOFOLLOW }, +#endif +#endif /* _MSC_VER */ +}; + +const int ArmLinux32::NUM_OPEN_FLAGS = sizeof(ArmLinux32::openFlagTable) / + sizeof(ArmLinux32::openFlagTable[0]); + +// open(2) flags translation table +OpenFlagTransTable ArmLinux64::openFlagTable[] = { #ifdef _MSC_VER - { ArmLinux::TGT_O_RDONLY, _O_RDONLY }, - { ArmLinux::TGT_O_WRONLY, _O_WRONLY }, - { ArmLinux::TGT_O_RDWR, _O_RDWR }, - { ArmLinux::TGT_O_APPEND, _O_APPEND }, - { ArmLinux::TGT_O_CREAT, _O_CREAT }, - { ArmLinux::TGT_O_TRUNC, _O_TRUNC }, - { ArmLinux::TGT_O_EXCL, _O_EXCL }, + { ArmLinux64::TGT_O_RDONLY, _O_RDONLY }, + { ArmLinux64::TGT_O_WRONLY, _O_WRONLY }, + { ArmLinux64::TGT_O_RDWR, _O_RDWR }, + { ArmLinux64::TGT_O_APPEND, _O_APPEND }, + { ArmLinux64::TGT_O_CREAT, _O_CREAT }, + { ArmLinux64::TGT_O_TRUNC, _O_TRUNC }, + { ArmLinux64::TGT_O_EXCL, _O_EXCL }, #ifdef _O_NONBLOCK - { ArmLinux::TGT_O_NONBLOCK, _O_NONBLOCK }, + { ArmLinux64::TGT_O_NONBLOCK, _O_NONBLOCK }, #endif #ifdef _O_NOCTTY - { ArmLinux::TGT_O_NOCTTY, _O_NOCTTY }, + { ArmLinux64::TGT_O_NOCTTY, _O_NOCTTY }, #endif #ifdef _O_SYNC - { ArmLinux::TGT_O_SYNC, _O_SYNC }, + { ArmLinux64::TGT_O_SYNC, _O_SYNC }, #endif #else /* !_MSC_VER */ - { ArmLinux::TGT_O_RDONLY, O_RDONLY }, - { ArmLinux::TGT_O_WRONLY, O_WRONLY }, - { ArmLinux::TGT_O_RDWR, O_RDWR }, - { ArmLinux::TGT_O_CREAT, O_CREAT }, - { ArmLinux::TGT_O_EXCL, O_EXCL }, - { ArmLinux::TGT_O_NOCTTY, O_NOCTTY }, - { ArmLinux::TGT_O_TRUNC, O_TRUNC }, - { ArmLinux::TGT_O_APPEND, O_APPEND }, - { ArmLinux::TGT_O_NONBLOCK, O_NONBLOCK }, + { ArmLinux64::TGT_O_RDONLY, O_RDONLY }, + { ArmLinux64::TGT_O_WRONLY, O_WRONLY }, + { ArmLinux64::TGT_O_RDWR, O_RDWR }, + { ArmLinux64::TGT_O_CREAT, O_CREAT }, + { ArmLinux64::TGT_O_EXCL, O_EXCL }, + { ArmLinux64::TGT_O_NOCTTY, O_NOCTTY }, + { ArmLinux64::TGT_O_TRUNC, O_TRUNC }, + { ArmLinux64::TGT_O_APPEND, O_APPEND }, + { ArmLinux64::TGT_O_NONBLOCK, O_NONBLOCK }, #ifdef O_SYNC - { ArmLinux::TGT_O_SYNC, O_SYNC }, + { ArmLinux64::TGT_O_SYNC, O_SYNC }, #endif #ifdef FASYNC - { ArmLinux::TGT_FASYNC, FASYNC }, + { ArmLinux64::TGT_FASYNC, FASYNC }, #endif #ifdef O_DIRECT - { ArmLinux::TGT_O_DIRECT, O_DIRECT }, + { ArmLinux64::TGT_O_DIRECT, O_DIRECT }, #endif #ifdef O_LARGEFILE - { ArmLinux::TGT_O_LARGEFILE, O_LARGEFILE }, + { ArmLinux64::TGT_O_LARGEFILE, O_LARGEFILE }, #endif #ifdef O_DIRECTORY - { ArmLinux::TGT_O_DIRECTORY, O_DIRECTORY }, + { ArmLinux64::TGT_O_DIRECTORY, O_DIRECTORY }, #endif #ifdef O_NOFOLLOW - { ArmLinux::TGT_O_NOFOLLOW, O_NOFOLLOW }, + { ArmLinux64::TGT_O_NOFOLLOW, O_NOFOLLOW }, #endif #endif /* _MSC_VER */ }; -const int ArmLinux::NUM_OPEN_FLAGS = - (sizeof(ArmLinux::openFlagTable)/sizeof(ArmLinux::openFlagTable[0])); +const int ArmLinux64::NUM_OPEN_FLAGS = sizeof(ArmLinux64::openFlagTable) / + sizeof(ArmLinux64::openFlagTable[0]); diff --git a/src/arch/arm/linux/linux.hh b/src/arch/arm/linux/linux.hh index 5a3e68a78..fbf5d2185 100644 --- a/src/arch/arm/linux/linux.hh +++ b/src/arch/arm/linux/linux.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2011-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,7 +47,7 @@ #include "kern/linux/linux.hh" -class ArmLinux : public Linux +class ArmLinux32 : public Linux { public: @@ -123,8 +123,10 @@ class ArmLinux : public Linux uint16_t st_uid; uint16_t st_gid; uint32_t st_rdev; + uint32_t __pad1; uint32_t st_size; uint32_t st_blksize; + uint32_t __pad2; uint32_t st_blocks; uint32_t st_atimeX; uint32_t st_atime_nsec; @@ -198,8 +200,192 @@ class ArmLinux : public Linux int32_t tms_cutime; //!< user time of children int32_t tms_cstime; //!< system time of children }; +}; + +class ArmLinux64 : public Linux +{ + public: + + /// This table maps the target open() flags to the corresponding + /// host open() flags. + static OpenFlagTransTable openFlagTable[]; + + /// Number of entries in openFlagTable[]. + static const int NUM_OPEN_FLAGS; + + //@{ + /// Basic ARM Linux types + typedef uint64_t size_t; + typedef uint64_t off_t; + typedef int64_t time_t; + typedef int64_t clock_t; + //@} + + //@{ + /// open(2) flag values. + static const int TGT_O_RDONLY = 00000000; //!< O_RDONLY + static const int TGT_O_WRONLY = 00000001; //!< O_WRONLY + static const int TGT_O_RDWR = 00000002; //!< O_RDWR + static const int TGT_O_CREAT = 00000100; //!< O_CREAT + static const int TGT_O_EXCL = 00000200; //!< O_EXCL + static const int TGT_O_NOCTTY = 00000400; //!< O_NOCTTY + static const int TGT_O_TRUNC = 00001000; //!< O_TRUNC + static const int TGT_O_APPEND = 00002000; //!< O_APPEND + static const int TGT_O_NONBLOCK = 00004000; //!< O_NONBLOCK + static const int TGT_O_SYNC = 00010000; //!< O_SYNC + static const int TGT_FASYNC = 00020000; //!< FASYNC + static const int TGT_O_DIRECT = 00040000; //!< O_DIRECT + static const int TGT_O_LARGEFILE = 00100000; //!< O_LARGEFILE + static const int TGT_O_DIRECTORY = 00200000; //!< O_DIRECTORY + static const int TGT_O_NOFOLLOW = 00400000; //!< O_NOFOLLOW + static const int TGT_O_NOATIME = 01000000; //!< O_NOATIME + static const int TGT_O_CLOEXEC = 02000000; //!< O_NOATIME + //@} + /// For mmap(). + static const unsigned TGT_MAP_ANONYMOUS = 0x20; + static const unsigned TGT_MAP_FIXED = 0x10; + //@{ + /// For getrusage(). + static const int TGT_RUSAGE_SELF = 0; + static const int TGT_RUSAGE_CHILDREN = -1; + static const int TGT_RUSAGE_BOTH = -2; + //@} + + //@{ + /// ioctl() command codes. + static const unsigned TIOCGETP_ = 0x5401; + static const unsigned TIOCSETP_ = 0x80067409; + static const unsigned TIOCSETN_ = 0x8006740a; + static const unsigned TIOCSETC_ = 0x80067411; + static const unsigned TIOCGETC_ = 0x40067412; + static const unsigned FIONREAD_ = 0x4004667f; + static const unsigned TIOCISATTY_ = 0x2000745e; + static const unsigned TIOCGETS_ = 0x402c7413; + static const unsigned TIOCGETA_ = 0x5405; + static const unsigned TCSETAW_ = 0x5407; // 2.6.15 kernel + //@} + + /// For table(). + static const int TBL_SYSINFO = 12; + + /// Resource enumeration for getrlimit(). + enum rlimit_resources { + TGT_RLIMIT_CPU = 0, + TGT_RLIMIT_FSIZE = 1, + TGT_RLIMIT_DATA = 2, + TGT_RLIMIT_STACK = 3, + TGT_RLIMIT_CORE = 4, + TGT_RLIMIT_RSS = 5, + TGT_RLIMIT_NPROC = 6, + TGT_RLIMIT_NOFILE = 7, + TGT_RLIMIT_MEMLOCK = 8, + TGT_RLIMIT_AS = 9, + TGT_RLIMIT_LOCKS = 10 + }; + + /// Limit struct for getrlimit/setrlimit. + struct rlimit { + uint64_t rlim_cur; //!< soft limit + uint64_t rlim_max; //!< hard limit + }; + + /// For gettimeofday(). + struct timeval { + int64_t tv_sec; //!< seconds + int64_t tv_usec; //!< microseconds + }; + + // For writev/readv + struct tgt_iovec { + uint64_t iov_base; // void * + uint64_t iov_len; + }; + + typedef struct { + uint64_t st_dev; + uint64_t st_ino; + uint64_t st_nlink; + uint32_t st_mode; + uint32_t st_uid; + uint32_t st_gid; + uint32_t __pad0; + uint64_t st_rdev; + uint64_t st_size; + uint64_t st_blksize; + uint64_t st_blocks; + uint64_t st_atimeX; + uint64_t st_atime_nsec; + uint64_t st_mtimeX; + uint64_t st_mtime_nsec; + uint64_t st_ctimeX; + uint64_t st_ctime_nsec; + } tgt_stat; + + typedef struct { + uint64_t st_dev; + uint64_t st_ino; + uint32_t st_mode; + uint32_t st_nlink; + uint32_t st_uid; + uint32_t st_gid; + uint32_t __pad0; + uint64_t st_rdev; + uint64_t st_size; + uint64_t st_blksize; + uint64_t st_blocks; + uint64_t st_atimeX; + uint64_t st_atime_nsec; + uint64_t st_mtimeX; + uint64_t st_mtime_nsec; + uint64_t st_ctimeX; + uint64_t st_ctime_nsec; + } tgt_stat64; + + typedef struct { + int64_t uptime; /* Seconds since boot */ + uint64_t loads[3]; /* 1, 5, and 15 minute load averages */ + uint64_t totalram; /* Total usable main memory size */ + uint64_t freeram; /* Available memory size */ + uint64_t sharedram; /* Amount of shared memory */ + uint64_t bufferram; /* Memory used by buffers */ + uint64_t totalswap; /* Total swap space size */ + uint64_t freeswap; /* swap space still available */ + uint16_t procs; /* Number of current processes */ + uint16_t pad; + uint64_t totalhigh; /* Total high memory size */ + uint64_t freehigh; /* Available high memory size */ + uint32_t mem_unit; /* Memory unit size in bytes */ + } tgt_sysinfo; + + /// For getrusage(). + struct rusage { + struct timeval ru_utime; //!< user time used + struct timeval ru_stime; //!< system time used + int64_t ru_maxrss; //!< max rss + int64_t ru_ixrss; //!< integral shared memory size + int64_t ru_idrss; //!< integral unshared data " + int64_t ru_isrss; //!< integral unshared stack " + int64_t ru_minflt; //!< page reclaims - total vmfaults + int64_t ru_majflt; //!< page faults + int64_t ru_nswap; //!< swaps + int64_t ru_inblock; //!< block input operations + int64_t ru_oublock; //!< block output operations + int64_t ru_msgsnd; //!< messages sent + int64_t ru_msgrcv; //!< messages received + int64_t ru_nsignals; //!< signals received + int64_t ru_nvcsw; //!< voluntary context switches + int64_t ru_nivcsw; //!< involuntary " + }; + + /// For times(). + struct tms { + int64_t tms_utime; //!< user time + int64_t tms_stime; //!< system time + int64_t tms_cutime; //!< user time of children + int64_t tms_cstime; //!< system time of children + }; }; #endif diff --git a/src/arch/arm/linux/process.cc b/src/arch/arm/linux/process.cc index 169565a04..e34a813ea 100644 --- a/src/arch/arm/linux/process.cc +++ b/src/arch/arm/linux/process.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -41,6 +41,7 @@ * Authors: Korey Sewell * Stephen Hines * Ali Saidi + * Giacomo Gabrielli */ #include "arch/arm/linux/linux.hh" @@ -58,8 +59,8 @@ using namespace ArmISA; /// Target uname() handler. static SyscallReturn -unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process, - ThreadContext *tc) +unameFunc32(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) { int index = 0; TypedBufferArg<Linux::utsname> name(process->getSyscallArg(tc, index)); @@ -74,13 +75,56 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process, return 0; } -SyscallDesc ArmLinuxProcess::syscallDescs[] = { +/// Target uname() handler. +static SyscallReturn +unameFunc64(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + TypedBufferArg<Linux::utsname> name(process->getSyscallArg(tc, index)); + + strcpy(name->sysname, "Linux"); + strcpy(name->nodename, "gem5"); + strcpy(name->release, "3.7.0+"); + strcpy(name->version, "#1 SMP Sat Dec 1 00:00:00 GMT 2012"); + strcpy(name->machine, "armv8l"); + + name.copyOut(tc->getMemProxy()); + return 0; +} + +/// Target set_tls() handler. +static SyscallReturn +setTLSFunc32(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + uint32_t tlsPtr = process->getSyscallArg(tc, index); + + tc->getMemProxy().writeBlob(ArmLinuxProcess32::commPage + 0x0ff0, + (uint8_t *)&tlsPtr, sizeof(tlsPtr)); + tc->setMiscReg(MISCREG_TPIDRURO,tlsPtr); + return 0; +} + +static SyscallReturn +setTLSFunc64(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + uint32_t tlsPtr = process->getSyscallArg(tc, index); + + tc->setMiscReg(MISCREG_TPIDRRO_EL0, tlsPtr); + return 0; +} + +static SyscallDesc syscallDescs32[] = { /* 0 */ SyscallDesc("syscall", unimplementedFunc), /* 1 */ SyscallDesc("exit", exitFunc), /* 2 */ SyscallDesc("fork", unimplementedFunc), /* 3 */ SyscallDesc("read", readFunc), /* 4 */ SyscallDesc("write", writeFunc), - /* 5 */ SyscallDesc("open", openFunc<ArmLinux>), + /* 5 */ SyscallDesc("open", openFunc<ArmLinux32>), /* 6 */ SyscallDesc("close", closeFunc), /* 7 */ SyscallDesc("unused#7", unimplementedFunc), /* 8 */ SyscallDesc("creat", unimplementedFunc), @@ -88,9 +132,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 10 */ SyscallDesc("unlink", unlinkFunc), /* 11 */ SyscallDesc("execve", unimplementedFunc), /* 12 */ SyscallDesc("chdir", unimplementedFunc), - /* 13 */ SyscallDesc("time", timeFunc<ArmLinux>), + /* 13 */ SyscallDesc("time", timeFunc<ArmLinux32>), /* 14 */ SyscallDesc("mknod", unimplementedFunc), - /* 15 */ SyscallDesc("chmod", chmodFunc<ArmLinux>), + /* 15 */ SyscallDesc("chmod", chmodFunc<ArmLinux32>), /* 16 */ SyscallDesc("lchown", chownFunc), /* 17 */ SyscallDesc("unused#17", unimplementedFunc), /* 18 */ SyscallDesc("unused#18", unimplementedFunc), @@ -118,7 +162,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 40 */ SyscallDesc("rmdir", unimplementedFunc), /* 41 */ SyscallDesc("dup", dupFunc), /* 42 */ SyscallDesc("pipe", pipePseudoFunc), - /* 43 */ SyscallDesc("times", timesFunc<ArmLinux>), + /* 43 */ SyscallDesc("times", timesFunc<ArmLinux32>), /* 44 */ SyscallDesc("unused#44", unimplementedFunc), /* 45 */ SyscallDesc("brk", brkFunc), /* 46 */ SyscallDesc("setgid", unimplementedFunc), @@ -129,7 +173,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 51 */ SyscallDesc("acct", unimplementedFunc), /* 52 */ SyscallDesc("umount2", unimplementedFunc), /* 53 */ SyscallDesc("unused#53", unimplementedFunc), - /* 54 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux>), + /* 54 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux32>), /* 55 */ SyscallDesc("fcntl", fcntlFunc), /* 56 */ SyscallDesc("unused#56", unimplementedFunc), /* 57 */ SyscallDesc("setpgid", unimplementedFunc), @@ -151,9 +195,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 73 */ SyscallDesc("sigpending", unimplementedFunc), /* 74 */ SyscallDesc("sethostname", ignoreFunc), /* 75 */ SyscallDesc("setrlimit", ignoreFunc), - /* 76 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux>), - /* 77 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux>), - /* 78 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux>), + /* 76 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux32>), + /* 77 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux32>), + /* 78 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux32>), /* 79 */ SyscallDesc("settimeofday", unimplementedFunc), /* 80 */ SyscallDesc("getgroups", unimplementedFunc), /* 81 */ SyscallDesc("setgroups", unimplementedFunc), @@ -165,7 +209,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 87 */ SyscallDesc("swapon", unimplementedFunc), /* 88 */ SyscallDesc("reboot", unimplementedFunc), /* 89 */ SyscallDesc("readdir", unimplementedFunc), - /* 90 */ SyscallDesc("mmap", mmapFunc<ArmLinux>), + /* 90 */ SyscallDesc("mmap", mmapFunc<ArmLinux32>), /* 91 */ SyscallDesc("munmap", munmapFunc), /* 92 */ SyscallDesc("truncate", truncateFunc), /* 93 */ SyscallDesc("ftruncate", ftruncateFunc), @@ -181,9 +225,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 103 */ SyscallDesc("syslog", unimplementedFunc), /* 104 */ SyscallDesc("setitimer", unimplementedFunc), /* 105 */ SyscallDesc("getitimer", unimplementedFunc), - /* 106 */ SyscallDesc("stat", statFunc<ArmLinux>), + /* 106 */ SyscallDesc("stat", statFunc<ArmLinux32>), /* 107 */ SyscallDesc("lstat", unimplementedFunc), - /* 108 */ SyscallDesc("fstat", fstatFunc<ArmLinux>), + /* 108 */ SyscallDesc("fstat", fstatFunc<ArmLinux32>), /* 109 */ SyscallDesc("unused#109", unimplementedFunc), /* 110 */ SyscallDesc("unused#101", unimplementedFunc), /* 111 */ SyscallDesc("vhangup", unimplementedFunc), @@ -191,17 +235,17 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 113 */ SyscallDesc("syscall", unimplementedFunc), /* 114 */ SyscallDesc("wait4", unimplementedFunc), /* 115 */ SyscallDesc("swapoff", unimplementedFunc), - /* 116 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux>), + /* 116 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux32>), /* 117 */ SyscallDesc("ipc", unimplementedFunc), /* 118 */ SyscallDesc("fsync", unimplementedFunc), /* 119 */ SyscallDesc("sigreturn", unimplementedFunc), /* 120 */ SyscallDesc("clone", cloneFunc), /* 121 */ SyscallDesc("setdomainname", unimplementedFunc), - /* 122 */ SyscallDesc("uname", unameFunc), + /* 122 */ SyscallDesc("uname", unameFunc32), /* 123 */ SyscallDesc("unused#123", unimplementedFunc), /* 124 */ SyscallDesc("adjtimex", unimplementedFunc), /* 125 */ SyscallDesc("mprotect", ignoreFunc), - /* 126 */ SyscallDesc("sigprocmask", unimplementedFunc), + /* 126 */ SyscallDesc("sigprocmask", ignoreWarnOnceFunc), /* 127 */ SyscallDesc("unused#127", unimplementedFunc), /* 128 */ SyscallDesc("init_module", unimplementedFunc), /* 129 */ SyscallDesc("delete_module", unimplementedFunc), @@ -221,7 +265,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 143 */ SyscallDesc("flock", unimplementedFunc), /* 144 */ SyscallDesc("msync", unimplementedFunc), /* 145 */ SyscallDesc("readv", unimplementedFunc), - /* 146 */ SyscallDesc("writev", writevFunc<ArmLinux>), + /* 146 */ SyscallDesc("writev", writevFunc<ArmLinux32>), /* 147 */ SyscallDesc("getsid", unimplementedFunc), /* 148 */ SyscallDesc("fdatasync", unimplementedFunc), /* 149 */ SyscallDesc("sysctl", unimplementedFunc), @@ -238,7 +282,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 160 */ SyscallDesc("sched_get_priority_min", unimplementedFunc), /* 161 */ SyscallDesc("sched_rr_get_interval", unimplementedFunc), /* 162 */ SyscallDesc("nanosleep", ignoreWarnOnceFunc), - /* 163 */ SyscallDesc("mremap", mremapFunc<ArmLinux>), // ARM-specific + /* 163 */ SyscallDesc("mremap", mremapFunc<ArmLinux32>), // ARM-specific /* 164 */ SyscallDesc("setresuid", unimplementedFunc), /* 165 */ SyscallDesc("getresuid", unimplementedFunc), /* 166 */ SyscallDesc("unused#166", unimplementedFunc), @@ -266,13 +310,13 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 188 */ SyscallDesc("unused#188", unimplementedFunc), /* 189 */ SyscallDesc("unused#189", unimplementedFunc), /* 190 */ SyscallDesc("vfork", unimplementedFunc), - /* 191 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux>), - /* 192 */ SyscallDesc("mmap2", mmapFunc<ArmLinux>), + /* 191 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux32>), + /* 192 */ SyscallDesc("mmap2", mmapFunc<ArmLinux32>), /* 193 */ SyscallDesc("truncate64", unimplementedFunc), /* 194 */ SyscallDesc("ftruncate64", ftruncate64Func), - /* 195 */ SyscallDesc("stat64", stat64Func<ArmLinux>), - /* 196 */ SyscallDesc("lstat64", lstat64Func<ArmLinux>), - /* 197 */ SyscallDesc("fstat64", fstat64Func<ArmLinux>), + /* 195 */ SyscallDesc("stat64", stat64Func<ArmLinux32>), + /* 196 */ SyscallDesc("lstat64", lstat64Func<ArmLinux32>), + /* 197 */ SyscallDesc("fstat64", fstat64Func<ArmLinux32>), /* 198 */ SyscallDesc("lchown", unimplementedFunc), /* 199 */ SyscallDesc("getuid", getuidFunc), /* 200 */ SyscallDesc("getgid", getgidFunc), @@ -319,7 +363,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 241 */ SyscallDesc("sched_setaffinity", unimplementedFunc), /* 242 */ SyscallDesc("sched_getaffinity", unimplementedFunc), /* 243 */ SyscallDesc("io_setup", unimplementedFunc), - /* 244 */ SyscallDesc("io_destory", unimplementedFunc), + /* 244 */ SyscallDesc("io_destroy", unimplementedFunc), /* 245 */ SyscallDesc("io_getevents", unimplementedFunc), /* 246 */ SyscallDesc("io_submit", unimplementedFunc), /* 247 */ SyscallDesc("io_cancel", unimplementedFunc), @@ -441,68 +485,1187 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 363 */ SyscallDesc("sys_rt_tgsigqueueinfo", unimplementedFunc), /* 364 */ SyscallDesc("sys_perf_event_open", unimplementedFunc), /* 365 */ SyscallDesc("sys_recvmmsg", unimplementedFunc), - }; -/// Target set_tls() handler. -static SyscallReturn -setTLSFunc(SyscallDesc *desc, int callnum, LiveProcess *process, - ThreadContext *tc) -{ - int index = 0; - uint32_t tlsPtr = process->getSyscallArg(tc, index); - - tc->getMemProxy().writeBlob(ArmLinuxProcess::commPage + 0x0ff0, - (uint8_t *)&tlsPtr, sizeof(tlsPtr)); - tc->setMiscReg(MISCREG_TPIDRURO,tlsPtr); - return 0; -} +static SyscallDesc syscallDescs64[] = { + /* 0 */ SyscallDesc("io_setup", unimplementedFunc), + /* 1 */ SyscallDesc("io_destroy", unimplementedFunc), + /* 2 */ SyscallDesc("io_submit", unimplementedFunc), + /* 3 */ SyscallDesc("io_cancel", unimplementedFunc), + /* 4 */ SyscallDesc("io_getevents", unimplementedFunc), + /* 5 */ SyscallDesc("setxattr", unimplementedFunc), + /* 6 */ SyscallDesc("lsetxattr", unimplementedFunc), + /* 7 */ SyscallDesc("fsetxattr", unimplementedFunc), + /* 8 */ SyscallDesc("getxattr", unimplementedFunc), + /* 9 */ SyscallDesc("lgetxattr", unimplementedFunc), + /* 10 */ SyscallDesc("fgetxattr", unimplementedFunc), + /* 11 */ SyscallDesc("listxattr", unimplementedFunc), + /* 12 */ SyscallDesc("llistxattr", unimplementedFunc), + /* 13 */ SyscallDesc("flistxattr", unimplementedFunc), + /* 14 */ SyscallDesc("removexattr", unimplementedFunc), + /* 15 */ SyscallDesc("lremovexattr", unimplementedFunc), + /* 16 */ SyscallDesc("fremovexattr", unimplementedFunc), + /* 17 */ SyscallDesc("getcwd", getcwdFunc), + /* 18 */ SyscallDesc("lookup_dcookie", unimplementedFunc), + /* 19 */ SyscallDesc("eventfd2", unimplementedFunc), + /* 20 */ SyscallDesc("epoll_create1", unimplementedFunc), + /* 21 */ SyscallDesc("epoll_ctl", unimplementedFunc), + /* 22 */ SyscallDesc("epoll_pwait", unimplementedFunc), + /* 23 */ SyscallDesc("dup", dupFunc), + /* 24 */ SyscallDesc("dup3", unimplementedFunc), + /* 25 */ SyscallDesc("fcntl64", fcntl64Func), + /* 26 */ SyscallDesc("inotify_init1", unimplementedFunc), + /* 27 */ SyscallDesc("inotify_add_watch", unimplementedFunc), + /* 28 */ SyscallDesc("inotify_rm_watch", unimplementedFunc), + /* 29 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux64>), + /* 30 */ SyscallDesc("ioprio_set", unimplementedFunc), + /* 31 */ SyscallDesc("ioprio_get", unimplementedFunc), + /* 32 */ SyscallDesc("flock", unimplementedFunc), + /* 33 */ SyscallDesc("mknodat", unimplementedFunc), + /* 34 */ SyscallDesc("mkdirat", unimplementedFunc), + /* 35 */ SyscallDesc("unlinkat", unimplementedFunc), + /* 36 */ SyscallDesc("symlinkat", unimplementedFunc), + /* 37 */ SyscallDesc("linkat", unimplementedFunc), + /* 38 */ SyscallDesc("renameat", unimplementedFunc), + /* 39 */ SyscallDesc("umount2", unimplementedFunc), + /* 40 */ SyscallDesc("mount", unimplementedFunc), + /* 41 */ SyscallDesc("pivot_root", unimplementedFunc), + /* 42 */ SyscallDesc("nfsservctl", unimplementedFunc), + /* 43 */ SyscallDesc("statfs64", unimplementedFunc), + /* 44 */ SyscallDesc("fstatfs64", unimplementedFunc), + /* 45 */ SyscallDesc("truncate64", unimplementedFunc), + /* 46 */ SyscallDesc("ftruncate64", ftruncate64Func), + /* 47 */ SyscallDesc("fallocate", unimplementedFunc), + /* 48 */ SyscallDesc("faccessat", unimplementedFunc), + /* 49 */ SyscallDesc("chdir", unimplementedFunc), + /* 50 */ SyscallDesc("fchdir", unimplementedFunc), + /* 51 */ SyscallDesc("chroot", unimplementedFunc), + /* 52 */ SyscallDesc("fchmod", unimplementedFunc), + /* 53 */ SyscallDesc("fchmodat", unimplementedFunc), + /* 54 */ SyscallDesc("fchownat", unimplementedFunc), + /* 55 */ SyscallDesc("fchown", unimplementedFunc), + /* 56 */ SyscallDesc("openat", openatFunc<ArmLinux64>), + /* 57 */ SyscallDesc("close", closeFunc), + /* 58 */ SyscallDesc("vhangup", unimplementedFunc), + /* 59 */ SyscallDesc("pipe2", unimplementedFunc), + /* 60 */ SyscallDesc("quotactl", unimplementedFunc), + /* 61 */ SyscallDesc("getdents64", unimplementedFunc), + /* 62 */ SyscallDesc("llseek", lseekFunc), + /* 63 */ SyscallDesc("read", readFunc), + /* 64 */ SyscallDesc("write", writeFunc), + /* 65 */ SyscallDesc("readv", unimplementedFunc), + /* 66 */ SyscallDesc("writev", writevFunc<ArmLinux64>), + /* 67 */ SyscallDesc("pread64", unimplementedFunc), + /* 68 */ SyscallDesc("pwrite64", unimplementedFunc), + /* 69 */ SyscallDesc("preadv", unimplementedFunc), + /* 70 */ SyscallDesc("pwritev", unimplementedFunc), + /* 71 */ SyscallDesc("sendfile64", unimplementedFunc), + /* 72 */ SyscallDesc("pselect6", unimplementedFunc), + /* 73 */ SyscallDesc("ppoll", unimplementedFunc), + /* 74 */ SyscallDesc("signalfd4", unimplementedFunc), + /* 75 */ SyscallDesc("vmsplice", unimplementedFunc), + /* 76 */ SyscallDesc("splice", unimplementedFunc), + /* 77 */ SyscallDesc("tee", unimplementedFunc), + /* 78 */ SyscallDesc("readlinkat", unimplementedFunc), + /* 79 */ SyscallDesc("fstatat64", fstatat64Func<ArmLinux64>), + /* 80 */ SyscallDesc("fstat64", fstat64Func<ArmLinux64>), + /* 81 */ SyscallDesc("sync", unimplementedFunc), + /* 82 */ SyscallDesc("fsync", unimplementedFunc), + /* 83 */ SyscallDesc("fdatasync", unimplementedFunc), + /* 84 */ SyscallDesc("sync_file_range", unimplementedFunc), + /* 85 */ SyscallDesc("timerfd_create", unimplementedFunc), + /* 86 */ SyscallDesc("timerfd_settime", unimplementedFunc), + /* 87 */ SyscallDesc("timerfd_gettime", unimplementedFunc), + /* 88 */ SyscallDesc("utimensat", unimplementedFunc), + /* 89 */ SyscallDesc("acct", unimplementedFunc), + /* 90 */ SyscallDesc("capget", unimplementedFunc), + /* 91 */ SyscallDesc("capset", unimplementedFunc), + /* 92 */ SyscallDesc("personality", unimplementedFunc), + /* 93 */ SyscallDesc("exit", exitFunc), + /* 94 */ SyscallDesc("exit_group", exitGroupFunc), + /* 95 */ SyscallDesc("waitid", unimplementedFunc), + /* 96 */ SyscallDesc("set_tid_address", unimplementedFunc), + /* 97 */ SyscallDesc("unshare", unimplementedFunc), + /* 98 */ SyscallDesc("futex", unimplementedFunc), + /* 99 */ SyscallDesc("set_robust_list", unimplementedFunc), + /* 100 */ SyscallDesc("get_robust_list", unimplementedFunc), + /* 101 */ SyscallDesc("nanosleep", ignoreWarnOnceFunc), + /* 102 */ SyscallDesc("getitimer", unimplementedFunc), + /* 103 */ SyscallDesc("setitimer", unimplementedFunc), + /* 104 */ SyscallDesc("kexec_load", unimplementedFunc), + /* 105 */ SyscallDesc("init_module", unimplementedFunc), + /* 106 */ SyscallDesc("delete_module", unimplementedFunc), + /* 107 */ SyscallDesc("timer_create", unimplementedFunc), + /* 108 */ SyscallDesc("timer_gettime", unimplementedFunc), + /* 109 */ SyscallDesc("timer_getoverrun", unimplementedFunc), + /* 110 */ SyscallDesc("timer_settime", unimplementedFunc), + /* 111 */ SyscallDesc("timer_delete", unimplementedFunc), + /* 112 */ SyscallDesc("clock_settime", unimplementedFunc), + /* 113 */ SyscallDesc("clock_gettime", unimplementedFunc), + /* 114 */ SyscallDesc("clock_getres", unimplementedFunc), + /* 115 */ SyscallDesc("clock_nanosleep", unimplementedFunc), + /* 116 */ SyscallDesc("syslog", unimplementedFunc), + /* 117 */ SyscallDesc("ptrace", unimplementedFunc), + /* 118 */ SyscallDesc("sched_setparam", unimplementedFunc), + /* 119 */ SyscallDesc("sched_setscheduler", unimplementedFunc), + /* 120 */ SyscallDesc("sched_getscheduler", unimplementedFunc), + /* 121 */ SyscallDesc("sched_getparam", unimplementedFunc), + /* 122 */ SyscallDesc("sched_setaffinity", unimplementedFunc), + /* 123 */ SyscallDesc("sched_getaffinity", unimplementedFunc), + /* 124 */ SyscallDesc("sched_yield", unimplementedFunc), + /* 125 */ SyscallDesc("sched_get_priority_max", unimplementedFunc), + /* 126 */ SyscallDesc("sched_get_priority_min", unimplementedFunc), + /* 127 */ SyscallDesc("sched_rr_get_interval", unimplementedFunc), + /* 128 */ SyscallDesc("restart_syscall", unimplementedFunc), + /* 129 */ SyscallDesc("kill", ignoreFunc), + /* 130 */ SyscallDesc("tkill", unimplementedFunc), + /* 131 */ SyscallDesc("tgkill", unimplementedFunc), + /* 132 */ SyscallDesc("sigaltstack", unimplementedFunc), + /* 133 */ SyscallDesc("rt_sigsuspend", unimplementedFunc), + /* 134 */ SyscallDesc("rt_sigaction", ignoreFunc), + /* 135 */ SyscallDesc("rt_sigprocmask", ignoreWarnOnceFunc), + /* 136 */ SyscallDesc("rt_sigpending", unimplementedFunc), + /* 137 */ SyscallDesc("rt_sigtimedwait", unimplementedFunc), + /* 138 */ SyscallDesc("rt_sigqueueinfo", ignoreFunc), + /* 139 */ SyscallDesc("rt_sigreturn", unimplementedFunc), + /* 140 */ SyscallDesc("setpriority", unimplementedFunc), + /* 141 */ SyscallDesc("getpriority", unimplementedFunc), + /* 142 */ SyscallDesc("reboot", unimplementedFunc), + /* 143 */ SyscallDesc("setregid", unimplementedFunc), + /* 144 */ SyscallDesc("setgid", unimplementedFunc), + /* 145 */ SyscallDesc("setreuid", unimplementedFunc), + /* 146 */ SyscallDesc("setuid", unimplementedFunc), + /* 147 */ SyscallDesc("setresuid", unimplementedFunc), + /* 148 */ SyscallDesc("getresuid", unimplementedFunc), + /* 149 */ SyscallDesc("setresgid", unimplementedFunc), + /* 150 */ SyscallDesc("getresgid", unimplementedFunc), + /* 151 */ SyscallDesc("setfsuid", unimplementedFunc), + /* 152 */ SyscallDesc("setfsgid", unimplementedFunc), + /* 153 */ SyscallDesc("times", timesFunc<ArmLinux64>), + /* 154 */ SyscallDesc("setpgid", unimplementedFunc), + /* 155 */ SyscallDesc("getpgid", unimplementedFunc), + /* 156 */ SyscallDesc("getsid", unimplementedFunc), + /* 157 */ SyscallDesc("setsid", unimplementedFunc), + /* 158 */ SyscallDesc("getgroups", unimplementedFunc), + /* 159 */ SyscallDesc("setgroups", unimplementedFunc), + /* 160 */ SyscallDesc("uname", unameFunc64), + /* 161 */ SyscallDesc("sethostname", ignoreFunc), + /* 162 */ SyscallDesc("setdomainname", unimplementedFunc), + /* 163 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux64>), + /* 164 */ SyscallDesc("setrlimit", ignoreFunc), + /* 165 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux64>), + /* 166 */ SyscallDesc("umask", unimplementedFunc), + /* 167 */ SyscallDesc("prctl", unimplementedFunc), + /* 168 */ SyscallDesc("getcpu", unimplementedFunc), + /* 169 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux64>), + /* 170 */ SyscallDesc("settimeofday", unimplementedFunc), + /* 171 */ SyscallDesc("adjtimex", unimplementedFunc), + /* 172 */ SyscallDesc("getpid", getpidFunc), + /* 173 */ SyscallDesc("getppid", getppidFunc), + /* 174 */ SyscallDesc("getuid", getuidFunc), + /* 175 */ SyscallDesc("geteuid", geteuidFunc), + /* 176 */ SyscallDesc("getgid", getgidFunc), + /* 177 */ SyscallDesc("getegid", getegidFunc), + /* 178 */ SyscallDesc("gettid", unimplementedFunc), + /* 179 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux64>), + /* 180 */ SyscallDesc("mq_open", unimplementedFunc), + /* 181 */ SyscallDesc("mq_unlink", unimplementedFunc), + /* 182 */ SyscallDesc("mq_timedsend", unimplementedFunc), + /* 183 */ SyscallDesc("mq_timedreceive", unimplementedFunc), + /* 184 */ SyscallDesc("mq_notify", unimplementedFunc), + /* 185 */ SyscallDesc("mq_getsetattr", unimplementedFunc), + /* 186 */ SyscallDesc("msgget", unimplementedFunc), + /* 187 */ SyscallDesc("msgctl", unimplementedFunc), + /* 188 */ SyscallDesc("msgrcv", unimplementedFunc), + /* 189 */ SyscallDesc("msgsnd", unimplementedFunc), + /* 190 */ SyscallDesc("semget", unimplementedFunc), + /* 191 */ SyscallDesc("semctl", unimplementedFunc), + /* 192 */ SyscallDesc("semtimedop", unimplementedFunc), + /* 193 */ SyscallDesc("semop", unimplementedFunc), + /* 194 */ SyscallDesc("shmget", unimplementedFunc), + /* 195 */ SyscallDesc("shmctl", unimplementedFunc), + /* 196 */ SyscallDesc("shmat", unimplementedFunc), + /* 197 */ SyscallDesc("shmdt", unimplementedFunc), + /* 198 */ SyscallDesc("socket", unimplementedFunc), + /* 199 */ SyscallDesc("socketpair", unimplementedFunc), + /* 200 */ SyscallDesc("bind", unimplementedFunc), + /* 201 */ SyscallDesc("listen", unimplementedFunc), + /* 202 */ SyscallDesc("accept", unimplementedFunc), + /* 203 */ SyscallDesc("connect", unimplementedFunc), + /* 204 */ SyscallDesc("getsockname", unimplementedFunc), + /* 205 */ SyscallDesc("getpeername", unimplementedFunc), + /* 206 */ SyscallDesc("sendto", unimplementedFunc), + /* 207 */ SyscallDesc("recvfrom", unimplementedFunc), + /* 208 */ SyscallDesc("setsockopt", unimplementedFunc), + /* 209 */ SyscallDesc("getsockopt", unimplementedFunc), + /* 210 */ SyscallDesc("shutdown", unimplementedFunc), + /* 211 */ SyscallDesc("sendmsg", unimplementedFunc), + /* 212 */ SyscallDesc("recvmsg", unimplementedFunc), + /* 213 */ SyscallDesc("readahead", unimplementedFunc), + /* 214 */ SyscallDesc("brk", brkFunc), + /* 215 */ SyscallDesc("munmap", munmapFunc), + /* 216 */ SyscallDesc("mremap", mremapFunc<ArmLinux64>), + /* 217 */ SyscallDesc("add_key", unimplementedFunc), + /* 218 */ SyscallDesc("request_key", unimplementedFunc), + /* 219 */ SyscallDesc("keyctl", unimplementedFunc), + /* 220 */ SyscallDesc("clone", unimplementedFunc), + /* 221 */ SyscallDesc("execve", unimplementedFunc), + /* 222 */ SyscallDesc("mmap2", mmapFunc<ArmLinux64>), + /* 223 */ SyscallDesc("fadvise64_64", unimplementedFunc), + /* 224 */ SyscallDesc("swapon", unimplementedFunc), + /* 225 */ SyscallDesc("swapoff", unimplementedFunc), + /* 226 */ SyscallDesc("mprotect", ignoreFunc), + /* 227 */ SyscallDesc("msync", unimplementedFunc), + /* 228 */ SyscallDesc("mlock", unimplementedFunc), + /* 229 */ SyscallDesc("munlock", unimplementedFunc), + /* 230 */ SyscallDesc("mlockall", unimplementedFunc), + /* 231 */ SyscallDesc("munlockall", unimplementedFunc), + /* 232 */ SyscallDesc("mincore", unimplementedFunc), + /* 233 */ SyscallDesc("madvise", unimplementedFunc), + /* 234 */ SyscallDesc("remap_file_pages", unimplementedFunc), + /* 235 */ SyscallDesc("mbind", unimplementedFunc), + /* 236 */ SyscallDesc("get_mempolicy", unimplementedFunc), + /* 237 */ SyscallDesc("set_mempolicy", unimplementedFunc), + /* 238 */ SyscallDesc("migrate_pages", unimplementedFunc), + /* 239 */ SyscallDesc("move_pages", unimplementedFunc), + /* 240 */ SyscallDesc("rt_tgsigqueueinfo", unimplementedFunc), + /* 241 */ SyscallDesc("perf_event_open", unimplementedFunc), + /* 242 */ SyscallDesc("accept4", unimplementedFunc), + /* 243 */ SyscallDesc("recvmmsg", unimplementedFunc), + /* 244 */ SyscallDesc("unused#244", unimplementedFunc), + /* 245 */ SyscallDesc("unused#245", unimplementedFunc), + /* 246 */ SyscallDesc("unused#246", unimplementedFunc), + /* 247 */ SyscallDesc("unused#247", unimplementedFunc), + /* 248 */ SyscallDesc("unused#248", unimplementedFunc), + /* 249 */ SyscallDesc("unused#249", unimplementedFunc), + /* 250 */ SyscallDesc("unused#250", unimplementedFunc), + /* 251 */ SyscallDesc("unused#251", unimplementedFunc), + /* 252 */ SyscallDesc("unused#252", unimplementedFunc), + /* 253 */ SyscallDesc("unused#253", unimplementedFunc), + /* 254 */ SyscallDesc("unused#254", unimplementedFunc), + /* 255 */ SyscallDesc("unused#255", unimplementedFunc), + /* 256 */ SyscallDesc("unused#256", unimplementedFunc), + /* 257 */ SyscallDesc("unused#257", unimplementedFunc), + /* 258 */ SyscallDesc("unused#258", unimplementedFunc), + /* 259 */ SyscallDesc("unused#259", unimplementedFunc), + /* 260 */ SyscallDesc("wait4", unimplementedFunc), + /* 261 */ SyscallDesc("prlimit64", unimplementedFunc), + /* 262 */ SyscallDesc("fanotify_init", unimplementedFunc), + /* 263 */ SyscallDesc("fanotify_mark", unimplementedFunc), + /* 264 */ SyscallDesc("name_to_handle_at", unimplementedFunc), + /* 265 */ SyscallDesc("open_by_handle_at", unimplementedFunc), + /* 266 */ SyscallDesc("clock_adjtime", unimplementedFunc), + /* 267 */ SyscallDesc("syncfs", unimplementedFunc), + /* 268 */ SyscallDesc("setns", unimplementedFunc), + /* 269 */ SyscallDesc("sendmmsg", unimplementedFunc), + /* 270 */ SyscallDesc("process_vm_readv", unimplementedFunc), + /* 271 */ SyscallDesc("process_vm_writev", unimplementedFunc), + /* 272 */ SyscallDesc("unused#272", unimplementedFunc), + /* 273 */ SyscallDesc("unused#273", unimplementedFunc), + /* 274 */ SyscallDesc("unused#274", unimplementedFunc), + /* 275 */ SyscallDesc("unused#275", unimplementedFunc), + /* 276 */ SyscallDesc("unused#276", unimplementedFunc), + /* 277 */ SyscallDesc("unused#277", unimplementedFunc), + /* 278 */ SyscallDesc("unused#278", unimplementedFunc), + /* 279 */ SyscallDesc("unused#279", unimplementedFunc), + /* 280 */ SyscallDesc("unused#280", unimplementedFunc), + /* 281 */ SyscallDesc("unused#281", unimplementedFunc), + /* 282 */ SyscallDesc("unused#282", unimplementedFunc), + /* 283 */ SyscallDesc("unused#283", unimplementedFunc), + /* 284 */ SyscallDesc("unused#284", unimplementedFunc), + /* 285 */ SyscallDesc("unused#285", unimplementedFunc), + /* 286 */ SyscallDesc("unused#286", unimplementedFunc), + /* 287 */ SyscallDesc("unused#287", unimplementedFunc), + /* 288 */ SyscallDesc("unused#288", unimplementedFunc), + /* 289 */ SyscallDesc("unused#289", unimplementedFunc), + /* 290 */ SyscallDesc("unused#290", unimplementedFunc), + /* 291 */ SyscallDesc("unused#291", unimplementedFunc), + /* 292 */ SyscallDesc("unused#292", unimplementedFunc), + /* 293 */ SyscallDesc("unused#293", unimplementedFunc), + /* 294 */ SyscallDesc("unused#294", unimplementedFunc), + /* 295 */ SyscallDesc("unused#295", unimplementedFunc), + /* 296 */ SyscallDesc("unused#296", unimplementedFunc), + /* 297 */ SyscallDesc("unused#297", unimplementedFunc), + /* 298 */ SyscallDesc("unused#298", unimplementedFunc), + /* 299 */ SyscallDesc("unused#299", unimplementedFunc), + /* 300 */ SyscallDesc("unused#300", unimplementedFunc), + /* 301 */ SyscallDesc("unused#301", unimplementedFunc), + /* 302 */ SyscallDesc("unused#302", unimplementedFunc), + /* 303 */ SyscallDesc("unused#303", unimplementedFunc), + /* 304 */ SyscallDesc("unused#304", unimplementedFunc), + /* 305 */ SyscallDesc("unused#305", unimplementedFunc), + /* 306 */ SyscallDesc("unused#306", unimplementedFunc), + /* 307 */ SyscallDesc("unused#307", unimplementedFunc), + /* 308 */ SyscallDesc("unused#308", unimplementedFunc), + /* 309 */ SyscallDesc("unused#309", unimplementedFunc), + /* 310 */ SyscallDesc("unused#310", unimplementedFunc), + /* 311 */ SyscallDesc("unused#311", unimplementedFunc), + /* 312 */ SyscallDesc("unused#312", unimplementedFunc), + /* 313 */ SyscallDesc("unused#313", unimplementedFunc), + /* 314 */ SyscallDesc("unused#314", unimplementedFunc), + /* 315 */ SyscallDesc("unused#315", unimplementedFunc), + /* 316 */ SyscallDesc("unused#316", unimplementedFunc), + /* 317 */ SyscallDesc("unused#317", unimplementedFunc), + /* 318 */ SyscallDesc("unused#318", unimplementedFunc), + /* 319 */ SyscallDesc("unused#319", unimplementedFunc), + /* 320 */ SyscallDesc("unused#320", unimplementedFunc), + /* 321 */ SyscallDesc("unused#321", unimplementedFunc), + /* 322 */ SyscallDesc("unused#322", unimplementedFunc), + /* 323 */ SyscallDesc("unused#323", unimplementedFunc), + /* 324 */ SyscallDesc("unused#324", unimplementedFunc), + /* 325 */ SyscallDesc("unused#325", unimplementedFunc), + /* 326 */ SyscallDesc("unused#326", unimplementedFunc), + /* 327 */ SyscallDesc("unused#327", unimplementedFunc), + /* 328 */ SyscallDesc("unused#328", unimplementedFunc), + /* 329 */ SyscallDesc("unused#329", unimplementedFunc), + /* 330 */ SyscallDesc("unused#330", unimplementedFunc), + /* 331 */ SyscallDesc("unused#331", unimplementedFunc), + /* 332 */ SyscallDesc("unused#332", unimplementedFunc), + /* 333 */ SyscallDesc("unused#333", unimplementedFunc), + /* 334 */ SyscallDesc("unused#334", unimplementedFunc), + /* 335 */ SyscallDesc("unused#335", unimplementedFunc), + /* 336 */ SyscallDesc("unused#336", unimplementedFunc), + /* 337 */ SyscallDesc("unused#337", unimplementedFunc), + /* 338 */ SyscallDesc("unused#338", unimplementedFunc), + /* 339 */ SyscallDesc("unused#339", unimplementedFunc), + /* 340 */ SyscallDesc("unused#340", unimplementedFunc), + /* 341 */ SyscallDesc("unused#341", unimplementedFunc), + /* 342 */ SyscallDesc("unused#342", unimplementedFunc), + /* 343 */ SyscallDesc("unused#343", unimplementedFunc), + /* 344 */ SyscallDesc("unused#344", unimplementedFunc), + /* 345 */ SyscallDesc("unused#345", unimplementedFunc), + /* 346 */ SyscallDesc("unused#346", unimplementedFunc), + /* 347 */ SyscallDesc("unused#347", unimplementedFunc), + /* 348 */ SyscallDesc("unused#348", unimplementedFunc), + /* 349 */ SyscallDesc("unused#349", unimplementedFunc), + /* 350 */ SyscallDesc("unused#350", unimplementedFunc), + /* 351 */ SyscallDesc("unused#351", unimplementedFunc), + /* 352 */ SyscallDesc("unused#352", unimplementedFunc), + /* 353 */ SyscallDesc("unused#353", unimplementedFunc), + /* 354 */ SyscallDesc("unused#354", unimplementedFunc), + /* 355 */ SyscallDesc("unused#355", unimplementedFunc), + /* 356 */ SyscallDesc("unused#356", unimplementedFunc), + /* 357 */ SyscallDesc("unused#357", unimplementedFunc), + /* 358 */ SyscallDesc("unused#358", unimplementedFunc), + /* 359 */ SyscallDesc("unused#359", unimplementedFunc), + /* 360 */ SyscallDesc("unused#360", unimplementedFunc), + /* 361 */ SyscallDesc("unused#361", unimplementedFunc), + /* 362 */ SyscallDesc("unused#362", unimplementedFunc), + /* 363 */ SyscallDesc("unused#363", unimplementedFunc), + /* 364 */ SyscallDesc("unused#364", unimplementedFunc), + /* 365 */ SyscallDesc("unused#365", unimplementedFunc), + /* 366 */ SyscallDesc("unused#366", unimplementedFunc), + /* 367 */ SyscallDesc("unused#367", unimplementedFunc), + /* 368 */ SyscallDesc("unused#368", unimplementedFunc), + /* 369 */ SyscallDesc("unused#369", unimplementedFunc), + /* 370 */ SyscallDesc("unused#370", unimplementedFunc), + /* 371 */ SyscallDesc("unused#371", unimplementedFunc), + /* 372 */ SyscallDesc("unused#372", unimplementedFunc), + /* 373 */ SyscallDesc("unused#373", unimplementedFunc), + /* 374 */ SyscallDesc("unused#374", unimplementedFunc), + /* 375 */ SyscallDesc("unused#375", unimplementedFunc), + /* 376 */ SyscallDesc("unused#376", unimplementedFunc), + /* 377 */ SyscallDesc("unused#377", unimplementedFunc), + /* 378 */ SyscallDesc("unused#378", unimplementedFunc), + /* 379 */ SyscallDesc("unused#379", unimplementedFunc), + /* 380 */ SyscallDesc("unused#380", unimplementedFunc), + /* 381 */ SyscallDesc("unused#381", unimplementedFunc), + /* 382 */ SyscallDesc("unused#382", unimplementedFunc), + /* 383 */ SyscallDesc("unused#383", unimplementedFunc), + /* 384 */ SyscallDesc("unused#384", unimplementedFunc), + /* 385 */ SyscallDesc("unused#385", unimplementedFunc), + /* 386 */ SyscallDesc("unused#386", unimplementedFunc), + /* 387 */ SyscallDesc("unused#387", unimplementedFunc), + /* 388 */ SyscallDesc("unused#388", unimplementedFunc), + /* 389 */ SyscallDesc("unused#389", unimplementedFunc), + /* 390 */ SyscallDesc("unused#390", unimplementedFunc), + /* 391 */ SyscallDesc("unused#391", unimplementedFunc), + /* 392 */ SyscallDesc("unused#392", unimplementedFunc), + /* 393 */ SyscallDesc("unused#393", unimplementedFunc), + /* 394 */ SyscallDesc("unused#394", unimplementedFunc), + /* 395 */ SyscallDesc("unused#395", unimplementedFunc), + /* 396 */ SyscallDesc("unused#396", unimplementedFunc), + /* 397 */ SyscallDesc("unused#397", unimplementedFunc), + /* 398 */ SyscallDesc("unused#398", unimplementedFunc), + /* 399 */ SyscallDesc("unused#399", unimplementedFunc), + /* 400 */ SyscallDesc("unused#400", unimplementedFunc), + /* 401 */ SyscallDesc("unused#401", unimplementedFunc), + /* 402 */ SyscallDesc("unused#402", unimplementedFunc), + /* 403 */ SyscallDesc("unused#403", unimplementedFunc), + /* 404 */ SyscallDesc("unused#404", unimplementedFunc), + /* 405 */ SyscallDesc("unused#405", unimplementedFunc), + /* 406 */ SyscallDesc("unused#406", unimplementedFunc), + /* 407 */ SyscallDesc("unused#407", unimplementedFunc), + /* 408 */ SyscallDesc("unused#408", unimplementedFunc), + /* 409 */ SyscallDesc("unused#409", unimplementedFunc), + /* 410 */ SyscallDesc("unused#410", unimplementedFunc), + /* 411 */ SyscallDesc("unused#411", unimplementedFunc), + /* 412 */ SyscallDesc("unused#412", unimplementedFunc), + /* 413 */ SyscallDesc("unused#413", unimplementedFunc), + /* 414 */ SyscallDesc("unused#414", unimplementedFunc), + /* 415 */ SyscallDesc("unused#415", unimplementedFunc), + /* 416 */ SyscallDesc("unused#416", unimplementedFunc), + /* 417 */ SyscallDesc("unused#417", unimplementedFunc), + /* 418 */ SyscallDesc("unused#418", unimplementedFunc), + /* 419 */ SyscallDesc("unused#419", unimplementedFunc), + /* 420 */ SyscallDesc("unused#420", unimplementedFunc), + /* 421 */ SyscallDesc("unused#421", unimplementedFunc), + /* 422 */ SyscallDesc("unused#422", unimplementedFunc), + /* 423 */ SyscallDesc("unused#423", unimplementedFunc), + /* 424 */ SyscallDesc("unused#424", unimplementedFunc), + /* 425 */ SyscallDesc("unused#425", unimplementedFunc), + /* 426 */ SyscallDesc("unused#426", unimplementedFunc), + /* 427 */ SyscallDesc("unused#427", unimplementedFunc), + /* 428 */ SyscallDesc("unused#428", unimplementedFunc), + /* 429 */ SyscallDesc("unused#429", unimplementedFunc), + /* 430 */ SyscallDesc("unused#430", unimplementedFunc), + /* 431 */ SyscallDesc("unused#431", unimplementedFunc), + /* 432 */ SyscallDesc("unused#432", unimplementedFunc), + /* 433 */ SyscallDesc("unused#433", unimplementedFunc), + /* 434 */ SyscallDesc("unused#434", unimplementedFunc), + /* 435 */ SyscallDesc("unused#435", unimplementedFunc), + /* 436 */ SyscallDesc("unused#436", unimplementedFunc), + /* 437 */ SyscallDesc("unused#437", unimplementedFunc), + /* 438 */ SyscallDesc("unused#438", unimplementedFunc), + /* 439 */ SyscallDesc("unused#439", unimplementedFunc), + /* 440 */ SyscallDesc("unused#440", unimplementedFunc), + /* 441 */ SyscallDesc("unused#441", unimplementedFunc), + /* 442 */ SyscallDesc("unused#442", unimplementedFunc), + /* 443 */ SyscallDesc("unused#443", unimplementedFunc), + /* 444 */ SyscallDesc("unused#444", unimplementedFunc), + /* 445 */ SyscallDesc("unused#445", unimplementedFunc), + /* 446 */ SyscallDesc("unused#446", unimplementedFunc), + /* 447 */ SyscallDesc("unused#447", unimplementedFunc), + /* 448 */ SyscallDesc("unused#448", unimplementedFunc), + /* 449 */ SyscallDesc("unused#449", unimplementedFunc), + /* 450 */ SyscallDesc("unused#450", unimplementedFunc), + /* 451 */ SyscallDesc("unused#451", unimplementedFunc), + /* 452 */ SyscallDesc("unused#452", unimplementedFunc), + /* 453 */ SyscallDesc("unused#453", unimplementedFunc), + /* 454 */ SyscallDesc("unused#454", unimplementedFunc), + /* 455 */ SyscallDesc("unused#455", unimplementedFunc), + /* 456 */ SyscallDesc("unused#456", unimplementedFunc), + /* 457 */ SyscallDesc("unused#457", unimplementedFunc), + /* 458 */ SyscallDesc("unused#458", unimplementedFunc), + /* 459 */ SyscallDesc("unused#459", unimplementedFunc), + /* 460 */ SyscallDesc("unused#460", unimplementedFunc), + /* 461 */ SyscallDesc("unused#461", unimplementedFunc), + /* 462 */ SyscallDesc("unused#462", unimplementedFunc), + /* 463 */ SyscallDesc("unused#463", unimplementedFunc), + /* 464 */ SyscallDesc("unused#464", unimplementedFunc), + /* 465 */ SyscallDesc("unused#465", unimplementedFunc), + /* 466 */ SyscallDesc("unused#466", unimplementedFunc), + /* 467 */ SyscallDesc("unused#467", unimplementedFunc), + /* 468 */ SyscallDesc("unused#468", unimplementedFunc), + /* 469 */ SyscallDesc("unused#469", unimplementedFunc), + /* 470 */ SyscallDesc("unused#470", unimplementedFunc), + /* 471 */ SyscallDesc("unused#471", unimplementedFunc), + /* 472 */ SyscallDesc("unused#472", unimplementedFunc), + /* 473 */ SyscallDesc("unused#473", unimplementedFunc), + /* 474 */ SyscallDesc("unused#474", unimplementedFunc), + /* 475 */ SyscallDesc("unused#475", unimplementedFunc), + /* 476 */ SyscallDesc("unused#476", unimplementedFunc), + /* 477 */ SyscallDesc("unused#477", unimplementedFunc), + /* 478 */ SyscallDesc("unused#478", unimplementedFunc), + /* 479 */ SyscallDesc("unused#479", unimplementedFunc), + /* 480 */ SyscallDesc("unused#480", unimplementedFunc), + /* 481 */ SyscallDesc("unused#481", unimplementedFunc), + /* 482 */ SyscallDesc("unused#482", unimplementedFunc), + /* 483 */ SyscallDesc("unused#483", unimplementedFunc), + /* 484 */ SyscallDesc("unused#484", unimplementedFunc), + /* 485 */ SyscallDesc("unused#485", unimplementedFunc), + /* 486 */ SyscallDesc("unused#486", unimplementedFunc), + /* 487 */ SyscallDesc("unused#487", unimplementedFunc), + /* 488 */ SyscallDesc("unused#488", unimplementedFunc), + /* 489 */ SyscallDesc("unused#489", unimplementedFunc), + /* 490 */ SyscallDesc("unused#490", unimplementedFunc), + /* 491 */ SyscallDesc("unused#491", unimplementedFunc), + /* 492 */ SyscallDesc("unused#492", unimplementedFunc), + /* 493 */ SyscallDesc("unused#493", unimplementedFunc), + /* 494 */ SyscallDesc("unused#494", unimplementedFunc), + /* 495 */ SyscallDesc("unused#495", unimplementedFunc), + /* 496 */ SyscallDesc("unused#496", unimplementedFunc), + /* 497 */ SyscallDesc("unused#497", unimplementedFunc), + /* 498 */ SyscallDesc("unused#498", unimplementedFunc), + /* 499 */ SyscallDesc("unused#499", unimplementedFunc), + /* 500 */ SyscallDesc("unused#500", unimplementedFunc), + /* 501 */ SyscallDesc("unused#501", unimplementedFunc), + /* 502 */ SyscallDesc("unused#502", unimplementedFunc), + /* 503 */ SyscallDesc("unused#503", unimplementedFunc), + /* 504 */ SyscallDesc("unused#504", unimplementedFunc), + /* 505 */ SyscallDesc("unused#505", unimplementedFunc), + /* 506 */ SyscallDesc("unused#506", unimplementedFunc), + /* 507 */ SyscallDesc("unused#507", unimplementedFunc), + /* 508 */ SyscallDesc("unused#508", unimplementedFunc), + /* 509 */ SyscallDesc("unused#509", unimplementedFunc), + /* 510 */ SyscallDesc("unused#510", unimplementedFunc), + /* 511 */ SyscallDesc("unused#511", unimplementedFunc), + /* 512 */ SyscallDesc("unused#512", unimplementedFunc), + /* 513 */ SyscallDesc("unused#513", unimplementedFunc), + /* 514 */ SyscallDesc("unused#514", unimplementedFunc), + /* 515 */ SyscallDesc("unused#515", unimplementedFunc), + /* 516 */ SyscallDesc("unused#516", unimplementedFunc), + /* 517 */ SyscallDesc("unused#517", unimplementedFunc), + /* 518 */ SyscallDesc("unused#518", unimplementedFunc), + /* 519 */ SyscallDesc("unused#519", unimplementedFunc), + /* 520 */ SyscallDesc("unused#520", unimplementedFunc), + /* 521 */ SyscallDesc("unused#521", unimplementedFunc), + /* 522 */ SyscallDesc("unused#522", unimplementedFunc), + /* 523 */ SyscallDesc("unused#523", unimplementedFunc), + /* 524 */ SyscallDesc("unused#524", unimplementedFunc), + /* 525 */ SyscallDesc("unused#525", unimplementedFunc), + /* 526 */ SyscallDesc("unused#526", unimplementedFunc), + /* 527 */ SyscallDesc("unused#527", unimplementedFunc), + /* 528 */ SyscallDesc("unused#528", unimplementedFunc), + /* 529 */ SyscallDesc("unused#529", unimplementedFunc), + /* 530 */ SyscallDesc("unused#530", unimplementedFunc), + /* 531 */ SyscallDesc("unused#531", unimplementedFunc), + /* 532 */ SyscallDesc("unused#532", unimplementedFunc), + /* 533 */ SyscallDesc("unused#533", unimplementedFunc), + /* 534 */ SyscallDesc("unused#534", unimplementedFunc), + /* 535 */ SyscallDesc("unused#535", unimplementedFunc), + /* 536 */ SyscallDesc("unused#536", unimplementedFunc), + /* 537 */ SyscallDesc("unused#537", unimplementedFunc), + /* 538 */ SyscallDesc("unused#538", unimplementedFunc), + /* 539 */ SyscallDesc("unused#539", unimplementedFunc), + /* 540 */ SyscallDesc("unused#540", unimplementedFunc), + /* 541 */ SyscallDesc("unused#541", unimplementedFunc), + /* 542 */ SyscallDesc("unused#542", unimplementedFunc), + /* 543 */ SyscallDesc("unused#543", unimplementedFunc), + /* 544 */ SyscallDesc("unused#544", unimplementedFunc), + /* 545 */ SyscallDesc("unused#545", unimplementedFunc), + /* 546 */ SyscallDesc("unused#546", unimplementedFunc), + /* 547 */ SyscallDesc("unused#547", unimplementedFunc), + /* 548 */ SyscallDesc("unused#548", unimplementedFunc), + /* 549 */ SyscallDesc("unused#549", unimplementedFunc), + /* 550 */ SyscallDesc("unused#550", unimplementedFunc), + /* 551 */ SyscallDesc("unused#551", unimplementedFunc), + /* 552 */ SyscallDesc("unused#552", unimplementedFunc), + /* 553 */ SyscallDesc("unused#553", unimplementedFunc), + /* 554 */ SyscallDesc("unused#554", unimplementedFunc), + /* 555 */ SyscallDesc("unused#555", unimplementedFunc), + /* 556 */ SyscallDesc("unused#556", unimplementedFunc), + /* 557 */ SyscallDesc("unused#557", unimplementedFunc), + /* 558 */ SyscallDesc("unused#558", unimplementedFunc), + /* 559 */ SyscallDesc("unused#559", unimplementedFunc), + /* 560 */ SyscallDesc("unused#560", unimplementedFunc), + /* 561 */ SyscallDesc("unused#561", unimplementedFunc), + /* 562 */ SyscallDesc("unused#562", unimplementedFunc), + /* 563 */ SyscallDesc("unused#563", unimplementedFunc), + /* 564 */ SyscallDesc("unused#564", unimplementedFunc), + /* 565 */ SyscallDesc("unused#565", unimplementedFunc), + /* 566 */ SyscallDesc("unused#566", unimplementedFunc), + /* 567 */ SyscallDesc("unused#567", unimplementedFunc), + /* 568 */ SyscallDesc("unused#568", unimplementedFunc), + /* 569 */ SyscallDesc("unused#569", unimplementedFunc), + /* 570 */ SyscallDesc("unused#570", unimplementedFunc), + /* 571 */ SyscallDesc("unused#571", unimplementedFunc), + /* 572 */ SyscallDesc("unused#572", unimplementedFunc), + /* 573 */ SyscallDesc("unused#573", unimplementedFunc), + /* 574 */ SyscallDesc("unused#574", unimplementedFunc), + /* 575 */ SyscallDesc("unused#575", unimplementedFunc), + /* 576 */ SyscallDesc("unused#576", unimplementedFunc), + /* 577 */ SyscallDesc("unused#577", unimplementedFunc), + /* 578 */ SyscallDesc("unused#578", unimplementedFunc), + /* 579 */ SyscallDesc("unused#579", unimplementedFunc), + /* 580 */ SyscallDesc("unused#580", unimplementedFunc), + /* 581 */ SyscallDesc("unused#581", unimplementedFunc), + /* 582 */ SyscallDesc("unused#582", unimplementedFunc), + /* 583 */ SyscallDesc("unused#583", unimplementedFunc), + /* 584 */ SyscallDesc("unused#584", unimplementedFunc), + /* 585 */ SyscallDesc("unused#585", unimplementedFunc), + /* 586 */ SyscallDesc("unused#586", unimplementedFunc), + /* 587 */ SyscallDesc("unused#587", unimplementedFunc), + /* 588 */ SyscallDesc("unused#588", unimplementedFunc), + /* 589 */ SyscallDesc("unused#589", unimplementedFunc), + /* 590 */ SyscallDesc("unused#590", unimplementedFunc), + /* 591 */ SyscallDesc("unused#591", unimplementedFunc), + /* 592 */ SyscallDesc("unused#592", unimplementedFunc), + /* 593 */ SyscallDesc("unused#593", unimplementedFunc), + /* 594 */ SyscallDesc("unused#594", unimplementedFunc), + /* 595 */ SyscallDesc("unused#595", unimplementedFunc), + /* 596 */ SyscallDesc("unused#596", unimplementedFunc), + /* 597 */ SyscallDesc("unused#597", unimplementedFunc), + /* 598 */ SyscallDesc("unused#598", unimplementedFunc), + /* 599 */ SyscallDesc("unused#599", unimplementedFunc), + /* 600 */ SyscallDesc("unused#600", unimplementedFunc), + /* 601 */ SyscallDesc("unused#601", unimplementedFunc), + /* 602 */ SyscallDesc("unused#602", unimplementedFunc), + /* 603 */ SyscallDesc("unused#603", unimplementedFunc), + /* 604 */ SyscallDesc("unused#604", unimplementedFunc), + /* 605 */ SyscallDesc("unused#605", unimplementedFunc), + /* 606 */ SyscallDesc("unused#606", unimplementedFunc), + /* 607 */ SyscallDesc("unused#607", unimplementedFunc), + /* 608 */ SyscallDesc("unused#608", unimplementedFunc), + /* 609 */ SyscallDesc("unused#609", unimplementedFunc), + /* 610 */ SyscallDesc("unused#610", unimplementedFunc), + /* 611 */ SyscallDesc("unused#611", unimplementedFunc), + /* 612 */ SyscallDesc("unused#612", unimplementedFunc), + /* 613 */ SyscallDesc("unused#613", unimplementedFunc), + /* 614 */ SyscallDesc("unused#614", unimplementedFunc), + /* 615 */ SyscallDesc("unused#615", unimplementedFunc), + /* 616 */ SyscallDesc("unused#616", unimplementedFunc), + /* 617 */ SyscallDesc("unused#617", unimplementedFunc), + /* 618 */ SyscallDesc("unused#618", unimplementedFunc), + /* 619 */ SyscallDesc("unused#619", unimplementedFunc), + /* 620 */ SyscallDesc("unused#620", unimplementedFunc), + /* 621 */ SyscallDesc("unused#621", unimplementedFunc), + /* 622 */ SyscallDesc("unused#622", unimplementedFunc), + /* 623 */ SyscallDesc("unused#623", unimplementedFunc), + /* 624 */ SyscallDesc("unused#624", unimplementedFunc), + /* 625 */ SyscallDesc("unused#625", unimplementedFunc), + /* 626 */ SyscallDesc("unused#626", unimplementedFunc), + /* 627 */ SyscallDesc("unused#627", unimplementedFunc), + /* 628 */ SyscallDesc("unused#628", unimplementedFunc), + /* 629 */ SyscallDesc("unused#629", unimplementedFunc), + /* 630 */ SyscallDesc("unused#630", unimplementedFunc), + /* 631 */ SyscallDesc("unused#631", unimplementedFunc), + /* 632 */ SyscallDesc("unused#632", unimplementedFunc), + /* 633 */ SyscallDesc("unused#633", unimplementedFunc), + /* 634 */ SyscallDesc("unused#634", unimplementedFunc), + /* 635 */ SyscallDesc("unused#635", unimplementedFunc), + /* 636 */ SyscallDesc("unused#636", unimplementedFunc), + /* 637 */ SyscallDesc("unused#637", unimplementedFunc), + /* 638 */ SyscallDesc("unused#638", unimplementedFunc), + /* 639 */ SyscallDesc("unused#639", unimplementedFunc), + /* 640 */ SyscallDesc("unused#640", unimplementedFunc), + /* 641 */ SyscallDesc("unused#641", unimplementedFunc), + /* 642 */ SyscallDesc("unused#642", unimplementedFunc), + /* 643 */ SyscallDesc("unused#643", unimplementedFunc), + /* 644 */ SyscallDesc("unused#644", unimplementedFunc), + /* 645 */ SyscallDesc("unused#645", unimplementedFunc), + /* 646 */ SyscallDesc("unused#646", unimplementedFunc), + /* 647 */ SyscallDesc("unused#647", unimplementedFunc), + /* 648 */ SyscallDesc("unused#648", unimplementedFunc), + /* 649 */ SyscallDesc("unused#649", unimplementedFunc), + /* 650 */ SyscallDesc("unused#650", unimplementedFunc), + /* 651 */ SyscallDesc("unused#651", unimplementedFunc), + /* 652 */ SyscallDesc("unused#652", unimplementedFunc), + /* 653 */ SyscallDesc("unused#653", unimplementedFunc), + /* 654 */ SyscallDesc("unused#654", unimplementedFunc), + /* 655 */ SyscallDesc("unused#655", unimplementedFunc), + /* 656 */ SyscallDesc("unused#656", unimplementedFunc), + /* 657 */ SyscallDesc("unused#657", unimplementedFunc), + /* 658 */ SyscallDesc("unused#658", unimplementedFunc), + /* 659 */ SyscallDesc("unused#659", unimplementedFunc), + /* 660 */ SyscallDesc("unused#660", unimplementedFunc), + /* 661 */ SyscallDesc("unused#661", unimplementedFunc), + /* 662 */ SyscallDesc("unused#662", unimplementedFunc), + /* 663 */ SyscallDesc("unused#663", unimplementedFunc), + /* 664 */ SyscallDesc("unused#664", unimplementedFunc), + /* 665 */ SyscallDesc("unused#665", unimplementedFunc), + /* 666 */ SyscallDesc("unused#666", unimplementedFunc), + /* 667 */ SyscallDesc("unused#667", unimplementedFunc), + /* 668 */ SyscallDesc("unused#668", unimplementedFunc), + /* 669 */ SyscallDesc("unused#669", unimplementedFunc), + /* 670 */ SyscallDesc("unused#670", unimplementedFunc), + /* 671 */ SyscallDesc("unused#671", unimplementedFunc), + /* 672 */ SyscallDesc("unused#672", unimplementedFunc), + /* 673 */ SyscallDesc("unused#673", unimplementedFunc), + /* 674 */ SyscallDesc("unused#674", unimplementedFunc), + /* 675 */ SyscallDesc("unused#675", unimplementedFunc), + /* 676 */ SyscallDesc("unused#676", unimplementedFunc), + /* 677 */ SyscallDesc("unused#677", unimplementedFunc), + /* 678 */ SyscallDesc("unused#678", unimplementedFunc), + /* 679 */ SyscallDesc("unused#679", unimplementedFunc), + /* 680 */ SyscallDesc("unused#680", unimplementedFunc), + /* 681 */ SyscallDesc("unused#681", unimplementedFunc), + /* 682 */ SyscallDesc("unused#682", unimplementedFunc), + /* 683 */ SyscallDesc("unused#683", unimplementedFunc), + /* 684 */ SyscallDesc("unused#684", unimplementedFunc), + /* 685 */ SyscallDesc("unused#685", unimplementedFunc), + /* 686 */ SyscallDesc("unused#686", unimplementedFunc), + /* 687 */ SyscallDesc("unused#687", unimplementedFunc), + /* 688 */ SyscallDesc("unused#688", unimplementedFunc), + /* 689 */ SyscallDesc("unused#689", unimplementedFunc), + /* 690 */ SyscallDesc("unused#690", unimplementedFunc), + /* 691 */ SyscallDesc("unused#691", unimplementedFunc), + /* 692 */ SyscallDesc("unused#692", unimplementedFunc), + /* 693 */ SyscallDesc("unused#693", unimplementedFunc), + /* 694 */ SyscallDesc("unused#694", unimplementedFunc), + /* 695 */ SyscallDesc("unused#695", unimplementedFunc), + /* 696 */ SyscallDesc("unused#696", unimplementedFunc), + /* 697 */ SyscallDesc("unused#697", unimplementedFunc), + /* 698 */ SyscallDesc("unused#698", unimplementedFunc), + /* 699 */ SyscallDesc("unused#699", unimplementedFunc), + /* 700 */ SyscallDesc("unused#700", unimplementedFunc), + /* 701 */ SyscallDesc("unused#701", unimplementedFunc), + /* 702 */ SyscallDesc("unused#702", unimplementedFunc), + /* 703 */ SyscallDesc("unused#703", unimplementedFunc), + /* 704 */ SyscallDesc("unused#704", unimplementedFunc), + /* 705 */ SyscallDesc("unused#705", unimplementedFunc), + /* 706 */ SyscallDesc("unused#706", unimplementedFunc), + /* 707 */ SyscallDesc("unused#707", unimplementedFunc), + /* 708 */ SyscallDesc("unused#708", unimplementedFunc), + /* 709 */ SyscallDesc("unused#709", unimplementedFunc), + /* 710 */ SyscallDesc("unused#710", unimplementedFunc), + /* 711 */ SyscallDesc("unused#711", unimplementedFunc), + /* 712 */ SyscallDesc("unused#712", unimplementedFunc), + /* 713 */ SyscallDesc("unused#713", unimplementedFunc), + /* 714 */ SyscallDesc("unused#714", unimplementedFunc), + /* 715 */ SyscallDesc("unused#715", unimplementedFunc), + /* 716 */ SyscallDesc("unused#716", unimplementedFunc), + /* 717 */ SyscallDesc("unused#717", unimplementedFunc), + /* 718 */ SyscallDesc("unused#718", unimplementedFunc), + /* 719 */ SyscallDesc("unused#719", unimplementedFunc), + /* 720 */ SyscallDesc("unused#720", unimplementedFunc), + /* 721 */ SyscallDesc("unused#721", unimplementedFunc), + /* 722 */ SyscallDesc("unused#722", unimplementedFunc), + /* 723 */ SyscallDesc("unused#723", unimplementedFunc), + /* 724 */ SyscallDesc("unused#724", unimplementedFunc), + /* 725 */ SyscallDesc("unused#725", unimplementedFunc), + /* 726 */ SyscallDesc("unused#726", unimplementedFunc), + /* 727 */ SyscallDesc("unused#727", unimplementedFunc), + /* 728 */ SyscallDesc("unused#728", unimplementedFunc), + /* 729 */ SyscallDesc("unused#729", unimplementedFunc), + /* 730 */ SyscallDesc("unused#730", unimplementedFunc), + /* 731 */ SyscallDesc("unused#731", unimplementedFunc), + /* 732 */ SyscallDesc("unused#732", unimplementedFunc), + /* 733 */ SyscallDesc("unused#733", unimplementedFunc), + /* 734 */ SyscallDesc("unused#734", unimplementedFunc), + /* 735 */ SyscallDesc("unused#735", unimplementedFunc), + /* 736 */ SyscallDesc("unused#736", unimplementedFunc), + /* 737 */ SyscallDesc("unused#737", unimplementedFunc), + /* 738 */ SyscallDesc("unused#738", unimplementedFunc), + /* 739 */ SyscallDesc("unused#739", unimplementedFunc), + /* 740 */ SyscallDesc("unused#740", unimplementedFunc), + /* 741 */ SyscallDesc("unused#741", unimplementedFunc), + /* 742 */ SyscallDesc("unused#742", unimplementedFunc), + /* 743 */ SyscallDesc("unused#743", unimplementedFunc), + /* 744 */ SyscallDesc("unused#744", unimplementedFunc), + /* 745 */ SyscallDesc("unused#745", unimplementedFunc), + /* 746 */ SyscallDesc("unused#746", unimplementedFunc), + /* 747 */ SyscallDesc("unused#747", unimplementedFunc), + /* 748 */ SyscallDesc("unused#748", unimplementedFunc), + /* 749 */ SyscallDesc("unused#749", unimplementedFunc), + /* 750 */ SyscallDesc("unused#750", unimplementedFunc), + /* 751 */ SyscallDesc("unused#751", unimplementedFunc), + /* 752 */ SyscallDesc("unused#752", unimplementedFunc), + /* 753 */ SyscallDesc("unused#753", unimplementedFunc), + /* 754 */ SyscallDesc("unused#754", unimplementedFunc), + /* 755 */ SyscallDesc("unused#755", unimplementedFunc), + /* 756 */ SyscallDesc("unused#756", unimplementedFunc), + /* 757 */ SyscallDesc("unused#757", unimplementedFunc), + /* 758 */ SyscallDesc("unused#758", unimplementedFunc), + /* 759 */ SyscallDesc("unused#759", unimplementedFunc), + /* 760 */ SyscallDesc("unused#760", unimplementedFunc), + /* 761 */ SyscallDesc("unused#761", unimplementedFunc), + /* 762 */ SyscallDesc("unused#762", unimplementedFunc), + /* 763 */ SyscallDesc("unused#763", unimplementedFunc), + /* 764 */ SyscallDesc("unused#764", unimplementedFunc), + /* 765 */ SyscallDesc("unused#765", unimplementedFunc), + /* 766 */ SyscallDesc("unused#766", unimplementedFunc), + /* 767 */ SyscallDesc("unused#767", unimplementedFunc), + /* 768 */ SyscallDesc("unused#768", unimplementedFunc), + /* 769 */ SyscallDesc("unused#769", unimplementedFunc), + /* 770 */ SyscallDesc("unused#770", unimplementedFunc), + /* 771 */ SyscallDesc("unused#771", unimplementedFunc), + /* 772 */ SyscallDesc("unused#772", unimplementedFunc), + /* 773 */ SyscallDesc("unused#773", unimplementedFunc), + /* 774 */ SyscallDesc("unused#774", unimplementedFunc), + /* 775 */ SyscallDesc("unused#775", unimplementedFunc), + /* 776 */ SyscallDesc("unused#776", unimplementedFunc), + /* 777 */ SyscallDesc("unused#777", unimplementedFunc), + /* 778 */ SyscallDesc("unused#778", unimplementedFunc), + /* 779 */ SyscallDesc("unused#779", unimplementedFunc), + /* 780 */ SyscallDesc("unused#780", unimplementedFunc), + /* 781 */ SyscallDesc("unused#781", unimplementedFunc), + /* 782 */ SyscallDesc("unused#782", unimplementedFunc), + /* 783 */ SyscallDesc("unused#783", unimplementedFunc), + /* 784 */ SyscallDesc("unused#784", unimplementedFunc), + /* 785 */ SyscallDesc("unused#785", unimplementedFunc), + /* 786 */ SyscallDesc("unused#786", unimplementedFunc), + /* 787 */ SyscallDesc("unused#787", unimplementedFunc), + /* 788 */ SyscallDesc("unused#788", unimplementedFunc), + /* 789 */ SyscallDesc("unused#789", unimplementedFunc), + /* 790 */ SyscallDesc("unused#790", unimplementedFunc), + /* 791 */ SyscallDesc("unused#791", unimplementedFunc), + /* 792 */ SyscallDesc("unused#792", unimplementedFunc), + /* 793 */ SyscallDesc("unused#793", unimplementedFunc), + /* 794 */ SyscallDesc("unused#794", unimplementedFunc), + /* 795 */ SyscallDesc("unused#795", unimplementedFunc), + /* 796 */ SyscallDesc("unused#796", unimplementedFunc), + /* 797 */ SyscallDesc("unused#797", unimplementedFunc), + /* 798 */ SyscallDesc("unused#798", unimplementedFunc), + /* 799 */ SyscallDesc("unused#799", unimplementedFunc), + /* 800 */ SyscallDesc("unused#800", unimplementedFunc), + /* 801 */ SyscallDesc("unused#801", unimplementedFunc), + /* 802 */ SyscallDesc("unused#802", unimplementedFunc), + /* 803 */ SyscallDesc("unused#803", unimplementedFunc), + /* 804 */ SyscallDesc("unused#804", unimplementedFunc), + /* 805 */ SyscallDesc("unused#805", unimplementedFunc), + /* 806 */ SyscallDesc("unused#806", unimplementedFunc), + /* 807 */ SyscallDesc("unused#807", unimplementedFunc), + /* 808 */ SyscallDesc("unused#808", unimplementedFunc), + /* 809 */ SyscallDesc("unused#809", unimplementedFunc), + /* 810 */ SyscallDesc("unused#810", unimplementedFunc), + /* 811 */ SyscallDesc("unused#811", unimplementedFunc), + /* 812 */ SyscallDesc("unused#812", unimplementedFunc), + /* 813 */ SyscallDesc("unused#813", unimplementedFunc), + /* 814 */ SyscallDesc("unused#814", unimplementedFunc), + /* 815 */ SyscallDesc("unused#815", unimplementedFunc), + /* 816 */ SyscallDesc("unused#816", unimplementedFunc), + /* 817 */ SyscallDesc("unused#817", unimplementedFunc), + /* 818 */ SyscallDesc("unused#818", unimplementedFunc), + /* 819 */ SyscallDesc("unused#819", unimplementedFunc), + /* 820 */ SyscallDesc("unused#820", unimplementedFunc), + /* 821 */ SyscallDesc("unused#821", unimplementedFunc), + /* 822 */ SyscallDesc("unused#822", unimplementedFunc), + /* 823 */ SyscallDesc("unused#823", unimplementedFunc), + /* 824 */ SyscallDesc("unused#824", unimplementedFunc), + /* 825 */ SyscallDesc("unused#825", unimplementedFunc), + /* 826 */ SyscallDesc("unused#826", unimplementedFunc), + /* 827 */ SyscallDesc("unused#827", unimplementedFunc), + /* 828 */ SyscallDesc("unused#828", unimplementedFunc), + /* 829 */ SyscallDesc("unused#829", unimplementedFunc), + /* 830 */ SyscallDesc("unused#830", unimplementedFunc), + /* 831 */ SyscallDesc("unused#831", unimplementedFunc), + /* 832 */ SyscallDesc("unused#832", unimplementedFunc), + /* 833 */ SyscallDesc("unused#833", unimplementedFunc), + /* 834 */ SyscallDesc("unused#834", unimplementedFunc), + /* 835 */ SyscallDesc("unused#835", unimplementedFunc), + /* 836 */ SyscallDesc("unused#836", unimplementedFunc), + /* 837 */ SyscallDesc("unused#837", unimplementedFunc), + /* 838 */ SyscallDesc("unused#838", unimplementedFunc), + /* 839 */ SyscallDesc("unused#839", unimplementedFunc), + /* 840 */ SyscallDesc("unused#840", unimplementedFunc), + /* 841 */ SyscallDesc("unused#841", unimplementedFunc), + /* 842 */ SyscallDesc("unused#842", unimplementedFunc), + /* 843 */ SyscallDesc("unused#843", unimplementedFunc), + /* 844 */ SyscallDesc("unused#844", unimplementedFunc), + /* 845 */ SyscallDesc("unused#845", unimplementedFunc), + /* 846 */ SyscallDesc("unused#846", unimplementedFunc), + /* 847 */ SyscallDesc("unused#847", unimplementedFunc), + /* 848 */ SyscallDesc("unused#848", unimplementedFunc), + /* 849 */ SyscallDesc("unused#849", unimplementedFunc), + /* 850 */ SyscallDesc("unused#850", unimplementedFunc), + /* 851 */ SyscallDesc("unused#851", unimplementedFunc), + /* 852 */ SyscallDesc("unused#852", unimplementedFunc), + /* 853 */ SyscallDesc("unused#853", unimplementedFunc), + /* 854 */ SyscallDesc("unused#854", unimplementedFunc), + /* 855 */ SyscallDesc("unused#855", unimplementedFunc), + /* 856 */ SyscallDesc("unused#856", unimplementedFunc), + /* 857 */ SyscallDesc("unused#857", unimplementedFunc), + /* 858 */ SyscallDesc("unused#858", unimplementedFunc), + /* 859 */ SyscallDesc("unused#859", unimplementedFunc), + /* 860 */ SyscallDesc("unused#860", unimplementedFunc), + /* 861 */ SyscallDesc("unused#861", unimplementedFunc), + /* 862 */ SyscallDesc("unused#862", unimplementedFunc), + /* 863 */ SyscallDesc("unused#863", unimplementedFunc), + /* 864 */ SyscallDesc("unused#864", unimplementedFunc), + /* 865 */ SyscallDesc("unused#865", unimplementedFunc), + /* 866 */ SyscallDesc("unused#866", unimplementedFunc), + /* 867 */ SyscallDesc("unused#867", unimplementedFunc), + /* 868 */ SyscallDesc("unused#868", unimplementedFunc), + /* 869 */ SyscallDesc("unused#869", unimplementedFunc), + /* 870 */ SyscallDesc("unused#870", unimplementedFunc), + /* 871 */ SyscallDesc("unused#871", unimplementedFunc), + /* 872 */ SyscallDesc("unused#872", unimplementedFunc), + /* 873 */ SyscallDesc("unused#873", unimplementedFunc), + /* 874 */ SyscallDesc("unused#874", unimplementedFunc), + /* 875 */ SyscallDesc("unused#875", unimplementedFunc), + /* 876 */ SyscallDesc("unused#876", unimplementedFunc), + /* 877 */ SyscallDesc("unused#877", unimplementedFunc), + /* 878 */ SyscallDesc("unused#878", unimplementedFunc), + /* 879 */ SyscallDesc("unused#879", unimplementedFunc), + /* 880 */ SyscallDesc("unused#880", unimplementedFunc), + /* 881 */ SyscallDesc("unused#881", unimplementedFunc), + /* 882 */ SyscallDesc("unused#882", unimplementedFunc), + /* 883 */ SyscallDesc("unused#883", unimplementedFunc), + /* 884 */ SyscallDesc("unused#884", unimplementedFunc), + /* 885 */ SyscallDesc("unused#885", unimplementedFunc), + /* 886 */ SyscallDesc("unused#886", unimplementedFunc), + /* 887 */ SyscallDesc("unused#887", unimplementedFunc), + /* 888 */ SyscallDesc("unused#888", unimplementedFunc), + /* 889 */ SyscallDesc("unused#889", unimplementedFunc), + /* 890 */ SyscallDesc("unused#890", unimplementedFunc), + /* 891 */ SyscallDesc("unused#891", unimplementedFunc), + /* 892 */ SyscallDesc("unused#892", unimplementedFunc), + /* 893 */ SyscallDesc("unused#893", unimplementedFunc), + /* 894 */ SyscallDesc("unused#894", unimplementedFunc), + /* 895 */ SyscallDesc("unused#895", unimplementedFunc), + /* 896 */ SyscallDesc("unused#896", unimplementedFunc), + /* 897 */ SyscallDesc("unused#897", unimplementedFunc), + /* 898 */ SyscallDesc("unused#898", unimplementedFunc), + /* 899 */ SyscallDesc("unused#899", unimplementedFunc), + /* 900 */ SyscallDesc("unused#900", unimplementedFunc), + /* 901 */ SyscallDesc("unused#901", unimplementedFunc), + /* 902 */ SyscallDesc("unused#902", unimplementedFunc), + /* 903 */ SyscallDesc("unused#903", unimplementedFunc), + /* 904 */ SyscallDesc("unused#904", unimplementedFunc), + /* 905 */ SyscallDesc("unused#905", unimplementedFunc), + /* 906 */ SyscallDesc("unused#906", unimplementedFunc), + /* 907 */ SyscallDesc("unused#907", unimplementedFunc), + /* 908 */ SyscallDesc("unused#908", unimplementedFunc), + /* 909 */ SyscallDesc("unused#909", unimplementedFunc), + /* 910 */ SyscallDesc("unused#910", unimplementedFunc), + /* 911 */ SyscallDesc("unused#911", unimplementedFunc), + /* 912 */ SyscallDesc("unused#912", unimplementedFunc), + /* 913 */ SyscallDesc("unused#913", unimplementedFunc), + /* 914 */ SyscallDesc("unused#914", unimplementedFunc), + /* 915 */ SyscallDesc("unused#915", unimplementedFunc), + /* 916 */ SyscallDesc("unused#916", unimplementedFunc), + /* 917 */ SyscallDesc("unused#917", unimplementedFunc), + /* 918 */ SyscallDesc("unused#918", unimplementedFunc), + /* 919 */ SyscallDesc("unused#919", unimplementedFunc), + /* 920 */ SyscallDesc("unused#920", unimplementedFunc), + /* 921 */ SyscallDesc("unused#921", unimplementedFunc), + /* 922 */ SyscallDesc("unused#922", unimplementedFunc), + /* 923 */ SyscallDesc("unused#923", unimplementedFunc), + /* 924 */ SyscallDesc("unused#924", unimplementedFunc), + /* 925 */ SyscallDesc("unused#925", unimplementedFunc), + /* 926 */ SyscallDesc("unused#926", unimplementedFunc), + /* 927 */ SyscallDesc("unused#927", unimplementedFunc), + /* 928 */ SyscallDesc("unused#928", unimplementedFunc), + /* 929 */ SyscallDesc("unused#929", unimplementedFunc), + /* 930 */ SyscallDesc("unused#930", unimplementedFunc), + /* 931 */ SyscallDesc("unused#931", unimplementedFunc), + /* 932 */ SyscallDesc("unused#932", unimplementedFunc), + /* 933 */ SyscallDesc("unused#933", unimplementedFunc), + /* 934 */ SyscallDesc("unused#934", unimplementedFunc), + /* 935 */ SyscallDesc("unused#935", unimplementedFunc), + /* 936 */ SyscallDesc("unused#936", unimplementedFunc), + /* 937 */ SyscallDesc("unused#937", unimplementedFunc), + /* 938 */ SyscallDesc("unused#938", unimplementedFunc), + /* 939 */ SyscallDesc("unused#939", unimplementedFunc), + /* 940 */ SyscallDesc("unused#940", unimplementedFunc), + /* 941 */ SyscallDesc("unused#941", unimplementedFunc), + /* 942 */ SyscallDesc("unused#942", unimplementedFunc), + /* 943 */ SyscallDesc("unused#943", unimplementedFunc), + /* 944 */ SyscallDesc("unused#944", unimplementedFunc), + /* 945 */ SyscallDesc("unused#945", unimplementedFunc), + /* 946 */ SyscallDesc("unused#946", unimplementedFunc), + /* 947 */ SyscallDesc("unused#947", unimplementedFunc), + /* 948 */ SyscallDesc("unused#948", unimplementedFunc), + /* 949 */ SyscallDesc("unused#949", unimplementedFunc), + /* 950 */ SyscallDesc("unused#950", unimplementedFunc), + /* 951 */ SyscallDesc("unused#951", unimplementedFunc), + /* 952 */ SyscallDesc("unused#952", unimplementedFunc), + /* 953 */ SyscallDesc("unused#953", unimplementedFunc), + /* 954 */ SyscallDesc("unused#954", unimplementedFunc), + /* 955 */ SyscallDesc("unused#955", unimplementedFunc), + /* 956 */ SyscallDesc("unused#956", unimplementedFunc), + /* 957 */ SyscallDesc("unused#957", unimplementedFunc), + /* 958 */ SyscallDesc("unused#958", unimplementedFunc), + /* 959 */ SyscallDesc("unused#959", unimplementedFunc), + /* 960 */ SyscallDesc("unused#960", unimplementedFunc), + /* 961 */ SyscallDesc("unused#961", unimplementedFunc), + /* 962 */ SyscallDesc("unused#962", unimplementedFunc), + /* 963 */ SyscallDesc("unused#963", unimplementedFunc), + /* 964 */ SyscallDesc("unused#964", unimplementedFunc), + /* 965 */ SyscallDesc("unused#965", unimplementedFunc), + /* 966 */ SyscallDesc("unused#966", unimplementedFunc), + /* 967 */ SyscallDesc("unused#967", unimplementedFunc), + /* 968 */ SyscallDesc("unused#968", unimplementedFunc), + /* 969 */ SyscallDesc("unused#969", unimplementedFunc), + /* 970 */ SyscallDesc("unused#970", unimplementedFunc), + /* 971 */ SyscallDesc("unused#971", unimplementedFunc), + /* 972 */ SyscallDesc("unused#972", unimplementedFunc), + /* 973 */ SyscallDesc("unused#973", unimplementedFunc), + /* 974 */ SyscallDesc("unused#974", unimplementedFunc), + /* 975 */ SyscallDesc("unused#975", unimplementedFunc), + /* 976 */ SyscallDesc("unused#976", unimplementedFunc), + /* 977 */ SyscallDesc("unused#977", unimplementedFunc), + /* 978 */ SyscallDesc("unused#978", unimplementedFunc), + /* 979 */ SyscallDesc("unused#979", unimplementedFunc), + /* 980 */ SyscallDesc("unused#980", unimplementedFunc), + /* 981 */ SyscallDesc("unused#981", unimplementedFunc), + /* 982 */ SyscallDesc("unused#982", unimplementedFunc), + /* 983 */ SyscallDesc("unused#983", unimplementedFunc), + /* 984 */ SyscallDesc("unused#984", unimplementedFunc), + /* 985 */ SyscallDesc("unused#985", unimplementedFunc), + /* 986 */ SyscallDesc("unused#986", unimplementedFunc), + /* 987 */ SyscallDesc("unused#987", unimplementedFunc), + /* 988 */ SyscallDesc("unused#988", unimplementedFunc), + /* 989 */ SyscallDesc("unused#989", unimplementedFunc), + /* 990 */ SyscallDesc("unused#990", unimplementedFunc), + /* 991 */ SyscallDesc("unused#991", unimplementedFunc), + /* 992 */ SyscallDesc("unused#992", unimplementedFunc), + /* 993 */ SyscallDesc("unused#993", unimplementedFunc), + /* 994 */ SyscallDesc("unused#994", unimplementedFunc), + /* 995 */ SyscallDesc("unused#995", unimplementedFunc), + /* 996 */ SyscallDesc("unused#996", unimplementedFunc), + /* 997 */ SyscallDesc("unused#997", unimplementedFunc), + /* 998 */ SyscallDesc("unused#998", unimplementedFunc), + /* 999 */ SyscallDesc("unused#999", unimplementedFunc), + /* 1000 */ SyscallDesc("unused#1000", unimplementedFunc), + /* 1001 */ SyscallDesc("unused#1001", unimplementedFunc), + /* 1002 */ SyscallDesc("unused#1002", unimplementedFunc), + /* 1003 */ SyscallDesc("unused#1003", unimplementedFunc), + /* 1004 */ SyscallDesc("unused#1004", unimplementedFunc), + /* 1005 */ SyscallDesc("unused#1005", unimplementedFunc), + /* 1006 */ SyscallDesc("unused#1006", unimplementedFunc), + /* 1007 */ SyscallDesc("unused#1007", unimplementedFunc), + /* 1008 */ SyscallDesc("unused#1008", unimplementedFunc), + /* 1009 */ SyscallDesc("unused#1009", unimplementedFunc), + /* 1010 */ SyscallDesc("unused#1010", unimplementedFunc), + /* 1011 */ SyscallDesc("unused#1011", unimplementedFunc), + /* 1012 */ SyscallDesc("unused#1012", unimplementedFunc), + /* 1013 */ SyscallDesc("unused#1013", unimplementedFunc), + /* 1014 */ SyscallDesc("unused#1014", unimplementedFunc), + /* 1015 */ SyscallDesc("unused#1015", unimplementedFunc), + /* 1016 */ SyscallDesc("unused#1016", unimplementedFunc), + /* 1017 */ SyscallDesc("unused#1017", unimplementedFunc), + /* 1018 */ SyscallDesc("unused#1018", unimplementedFunc), + /* 1019 */ SyscallDesc("unused#1019", unimplementedFunc), + /* 1020 */ SyscallDesc("unused#1020", unimplementedFunc), + /* 1021 */ SyscallDesc("unused#1021", unimplementedFunc), + /* 1022 */ SyscallDesc("unused#1022", unimplementedFunc), + /* 1023 */ SyscallDesc("unused#1023", unimplementedFunc), + /* 1024 */ SyscallDesc("open", openFunc<ArmLinux64>), + /* 1025 */ SyscallDesc("link", unimplementedFunc), + /* 1026 */ SyscallDesc("unlink", unlinkFunc), + /* 1027 */ SyscallDesc("mknod", unimplementedFunc), + /* 1028 */ SyscallDesc("chmod", chmodFunc<ArmLinux64>), + /* 1029 */ SyscallDesc("chown", unimplementedFunc), + /* 1030 */ SyscallDesc("mkdir", mkdirFunc), + /* 1031 */ SyscallDesc("rmdir", unimplementedFunc), + /* 1032 */ SyscallDesc("lchown", unimplementedFunc), + /* 1033 */ SyscallDesc("access", unimplementedFunc), + /* 1034 */ SyscallDesc("rename", renameFunc), + /* 1035 */ SyscallDesc("readlink", readlinkFunc), + /* 1036 */ SyscallDesc("symlink", unimplementedFunc), + /* 1037 */ SyscallDesc("utimes", unimplementedFunc), + /* 1038 */ SyscallDesc("stat64", stat64Func<ArmLinux64>), + /* 1039 */ SyscallDesc("lstat64", lstat64Func<ArmLinux64>), + /* 1040 */ SyscallDesc("pipe", pipePseudoFunc), + /* 1041 */ SyscallDesc("dup2", unimplementedFunc), + /* 1042 */ SyscallDesc("epoll_create", unimplementedFunc), + /* 1043 */ SyscallDesc("inotify_init", unimplementedFunc), + /* 1044 */ SyscallDesc("eventfd", unimplementedFunc), + /* 1045 */ SyscallDesc("signalfd", unimplementedFunc), + /* 1046 */ SyscallDesc("sendfile", unimplementedFunc), + /* 1047 */ SyscallDesc("ftruncate", ftruncateFunc), + /* 1048 */ SyscallDesc("truncate", truncateFunc), + /* 1049 */ SyscallDesc("stat", statFunc<ArmLinux64>), + /* 1050 */ SyscallDesc("lstat", unimplementedFunc), + /* 1051 */ SyscallDesc("fstat", fstatFunc<ArmLinux64>), + /* 1052 */ SyscallDesc("fcntl", fcntlFunc), + /* 1053 */ SyscallDesc("fadvise64", unimplementedFunc), + /* 1054 */ SyscallDesc("newfstatat", unimplementedFunc), + /* 1055 */ SyscallDesc("fstatfs", unimplementedFunc), + /* 1056 */ SyscallDesc("statfs", unimplementedFunc), + /* 1057 */ SyscallDesc("lseek", lseekFunc), + /* 1058 */ SyscallDesc("mmap", mmapFunc<ArmLinux64>), + /* 1059 */ SyscallDesc("alarm", unimplementedFunc), + /* 1060 */ SyscallDesc("getpgrp", unimplementedFunc), + /* 1061 */ SyscallDesc("pause", unimplementedFunc), + /* 1062 */ SyscallDesc("time", timeFunc<ArmLinux64>), + /* 1063 */ SyscallDesc("utime", unimplementedFunc), + /* 1064 */ SyscallDesc("creat", unimplementedFunc), + /* 1065 */ SyscallDesc("getdents", unimplementedFunc), + /* 1066 */ SyscallDesc("futimesat", unimplementedFunc), + /* 1067 */ SyscallDesc("select", unimplementedFunc), + /* 1068 */ SyscallDesc("poll", unimplementedFunc), + /* 1069 */ SyscallDesc("epoll_wait", unimplementedFunc), + /* 1070 */ SyscallDesc("ustat", unimplementedFunc), + /* 1071 */ SyscallDesc("vfork", unimplementedFunc), + /* 1072 */ SyscallDesc("oldwait4", unimplementedFunc), + /* 1073 */ SyscallDesc("recv", unimplementedFunc), + /* 1074 */ SyscallDesc("send", unimplementedFunc), + /* 1075 */ SyscallDesc("bdflush", unimplementedFunc), + /* 1076 */ SyscallDesc("umount", unimplementedFunc), + /* 1077 */ SyscallDesc("uselib", unimplementedFunc), + /* 1078 */ SyscallDesc("_sysctl", unimplementedFunc), + /* 1079 */ SyscallDesc("fork", unimplementedFunc) +}; -SyscallDesc ArmLinuxProcess::privSyscallDescs[] = { +static SyscallDesc privSyscallDescs32[] = { /* 1 */ SyscallDesc("breakpoint", unimplementedFunc), /* 2 */ SyscallDesc("cacheflush", unimplementedFunc), /* 3 */ SyscallDesc("usr26", unimplementedFunc), /* 4 */ SyscallDesc("usr32", unimplementedFunc), - /* 5 */ SyscallDesc("set_tls", setTLSFunc) + /* 5 */ SyscallDesc("set_tls", setTLSFunc32) }; -ArmLinuxProcess::ArmLinuxProcess(LiveProcessParams * params, +// Indices 1, 3 and 4 are unallocated. +static SyscallDesc privSyscallDescs64[] = { + /* 1 */ SyscallDesc("unallocated", unimplementedFunc), + /* 2 */ SyscallDesc("cacheflush", unimplementedFunc), + /* 3 */ SyscallDesc("unallocated", unimplementedFunc), + /* 4 */ SyscallDesc("unallocated", unimplementedFunc), + /* 5 */ SyscallDesc("set_tls", setTLSFunc64) +}; + +ArmLinuxProcess32::ArmLinuxProcess32(LiveProcessParams * params, ObjectFile *objFile, ObjectFile::Arch _arch) - : ArmLiveProcess(params, objFile, _arch), - Num_Syscall_Descs(sizeof(syscallDescs) / sizeof(SyscallDesc)), - Num_Priv_Syscall_Descs(sizeof(privSyscallDescs) / sizeof(SyscallDesc)) -{ } + : ArmLiveProcess32(params, objFile, _arch) +{ + SyscallTable table; + + table.descs = syscallDescs32; + table.size = sizeof(syscallDescs32) / sizeof(SyscallDesc); + table.base = 0; + syscallTables.push_back(table); + table.base = 0x900000; + syscallTables.push_back(table); -const Addr ArmLinuxProcess::commPage = 0xffff0000; + table.descs = privSyscallDescs32; + table.size = sizeof(privSyscallDescs32) / sizeof(SyscallDesc); + table.base = 0xf0001; + syscallTables.push_back(table); +} + +ArmLinuxProcess64::ArmLinuxProcess64(LiveProcessParams * params, + ObjectFile *objFile, ObjectFile::Arch _arch) + : ArmLiveProcess64(params, objFile, _arch) +{ + SyscallTable table; + + table.descs = syscallDescs64; + table.size = sizeof(syscallDescs64) / sizeof(SyscallDesc); + table.base = 0; + syscallTables.push_back(table); + table.base = 0x900000; + syscallTables.push_back(table); + + table.descs = privSyscallDescs64; + table.size = sizeof(privSyscallDescs64) / sizeof(SyscallDesc); + table.base = 0x1001; + syscallTables.push_back(table); +} + +const Addr ArmLinuxProcess32::commPage = 0xffff0000; SyscallDesc* -ArmLinuxProcess::getDesc(int callnum) +ArmLinuxProcessBits::getLinuxDesc(int callnum) { // Angel SWI syscalls are unsupported in this release - if (callnum == 0x123456) { + if (callnum == 0x123456) panic("Attempt to execute an ANGEL_SWI system call (newlib-related)"); - } else if ((callnum & 0x00f00000) == 0x00900000 || - (callnum & 0xf0000) == 0xf0000) { - callnum &= 0x000fffff; - if ((callnum & 0x0f0000) == 0xf0000) { - callnum -= 0x0f0001; - if (callnum < 0 || callnum > Num_Priv_Syscall_Descs) - return NULL; - return &privSyscallDescs[callnum]; - } + for (unsigned i = 0; i < syscallTables.size(); i++) { + SyscallDesc *desc = syscallTables[i].getDesc(callnum); + if (desc) + return desc; } - // Linux syscalls have to strip off the 0x00900000 + return NULL; +} - if (callnum < 0 || callnum > Num_Syscall_Descs) +SyscallDesc * +ArmLinuxProcessBits::SyscallTable::getDesc(int callnum) const +{ + int offset = callnum - base; + if (offset < 0 || offset >= size) return NULL; + return &descs[offset]; +} + +SyscallDesc* +ArmLinuxProcess32::getDesc(int callnum) +{ + return getLinuxDesc(callnum); +} - return &syscallDescs[callnum]; +SyscallDesc* +ArmLinuxProcess64::getDesc(int callnum) +{ + return getLinuxDesc(callnum); } void -ArmLinuxProcess::initState() +ArmLinuxProcess32::initState() { - ArmLiveProcess::initState(); + ArmLiveProcess32::initState(); allocateMem(commPage, PageBytes); ThreadContext *tc = system->getThreadContext(contextIds[0]); @@ -546,20 +1709,9 @@ ArmLinuxProcess::initState() tc->getMemProxy().writeBlob(commPage + 0x0fe0, get_tls, sizeof(get_tls)); } -ArmISA::IntReg -ArmLinuxProcess::getSyscallArg(ThreadContext *tc, int &i) -{ - // Linux apparently allows more parameter than the ABI says it should. - // This limit may need to be increased even further. - assert(i < 6); - return tc->readIntReg(ArgumentReg0 + i++); -} - void -ArmLinuxProcess::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val) +ArmLinuxProcess64::initState() { - // Linux apparently allows more parameter than the ABI says it should. - // This limit may need to be increased even further. - assert(i < 6); - tc->setIntReg(ArgumentReg0 + i, val); + ArmLiveProcess64::initState(); + // The 64 bit equivalent of the comm page would be set up here. } diff --git a/src/arch/arm/linux/process.hh b/src/arch/arm/linux/process.hh index 7d3a943ed..670739438 100644 --- a/src/arch/arm/linux/process.hh +++ b/src/arch/arm/linux/process.hh @@ -1,4 +1,16 @@ /* +* Copyright (c) 2011-2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2007-2008 The Florida State University * All rights reserved. * @@ -31,39 +43,54 @@ #ifndef __ARM_LINUX_PROCESS_HH__ #define __ARM_LINUX_PROCESS_HH__ +#include <vector> + #include "arch/arm/process.hh" +class ArmLinuxProcessBits +{ + protected: + SyscallDesc* getLinuxDesc(int callnum); + + struct SyscallTable + { + int base; + SyscallDesc *descs; + int size; + + SyscallDesc *getDesc(int offset) const; + }; + + std::vector<SyscallTable> syscallTables; +}; + /// A process with emulated Arm/Linux syscalls. -class ArmLinuxProcess : public ArmLiveProcess +class ArmLinuxProcess32 : public ArmLiveProcess32, public ArmLinuxProcessBits { public: - ArmLinuxProcess(LiveProcessParams * params, ObjectFile *objFile, - ObjectFile::Arch _arch); - - virtual SyscallDesc* getDesc(int callnum); + ArmLinuxProcess32(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); void initState(); - ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i); /// Explicitly import the otherwise hidden getSyscallArg using ArmLiveProcess::getSyscallArg; - void setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val); - - /// The target system's hostname. - static const char *hostname; /// A page to hold "kernel" provided functions. The name might be wrong. static const Addr commPage; - /// Array of syscall descriptors, indexed by call number. - static SyscallDesc syscallDescs[]; - - /// Array of "arm private" syscall descriptors. - static SyscallDesc privSyscallDescs[]; + SyscallDesc* getDesc(int callnum); +}; - const int Num_Syscall_Descs; +/// A process with emulated Arm/Linux syscalls. +class ArmLinuxProcess64 : public ArmLiveProcess64, public ArmLinuxProcessBits +{ + public: + ArmLinuxProcess64(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); - const int Num_Priv_Syscall_Descs; + void initState(); + SyscallDesc* getDesc(int callnum); }; #endif // __ARM_LINUX_PROCESS_HH__ diff --git a/src/arch/arm/linux/system.cc b/src/arch/arm/linux/system.cc index bc7fd2cb6..216a65899 100644 --- a/src/arch/arm/linux/system.cc +++ b/src/arch/arm/linux/system.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -63,7 +63,8 @@ using namespace Linux; LinuxArmSystem::LinuxArmSystem(Params *p) : ArmSystem(p), enableContextSwitchStatsDump(p->enable_context_switch_stats_dump), - kernelPanicEvent(NULL), kernelOopsEvent(NULL) + kernelPanicEvent(NULL), kernelOopsEvent(NULL), + bootReleaseAddr(p->boot_release_addr) { if (p->panic_on_panic) { kernelPanicEvent = addKernelFuncEventOrPanic<PanicPCEvent>( @@ -98,22 +99,30 @@ LinuxArmSystem::LinuxArmSystem(Params *p) secDataPtrAddr = 0; secDataAddr = 0; penReleaseAddr = 0; + kernelSymtab->findAddress("__secondary_data", secDataPtrAddr); kernelSymtab->findAddress("secondary_data", secDataAddr); kernelSymtab->findAddress("pen_release", penReleaseAddr); + kernelSymtab->findAddress("secondary_holding_pen_release", pen64ReleaseAddr); secDataPtrAddr &= ~ULL(0x7F); secDataAddr &= ~ULL(0x7F); penReleaseAddr &= ~ULL(0x7F); + pen64ReleaseAddr &= ~ULL(0x7F); + bootReleaseAddr = (bootReleaseAddr & ~ULL(0x7F)) + loadAddrOffset; + } bool LinuxArmSystem::adderBootUncacheable(Addr a) { Addr block = a & ~ULL(0x7F); + if (block == secDataPtrAddr || block == secDataAddr || - block == penReleaseAddr) + block == penReleaseAddr || pen64ReleaseAddr == block || + block == bootReleaseAddr) return true; + return false; } @@ -145,7 +154,8 @@ LinuxArmSystem::initState() if (kernel_has_fdt_support && dtb_file_specified) { // Kernel supports flattened device tree and dtb file specified. // Using Device Tree Blob to describe system configuration. - inform("Loading DTB file: %s\n", params()->dtb_filename); + inform("Loading DTB file: %s at address %#x\n", params()->dtb_filename, + params()->atags_addr + loadAddrOffset); ObjectFile *dtb_file = createObjectFile(params()->dtb_filename, true); if (!dtb_file) { @@ -165,7 +175,7 @@ LinuxArmSystem::initState() "to DTB file: %s\n", params()->dtb_filename); } - dtb_file->setTextBase(params()->atags_addr); + dtb_file->setTextBase(params()->atags_addr + loadAddrOffset); dtb_file->loadSections(physProxy); delete dtb_file; } else { @@ -215,15 +225,17 @@ LinuxArmSystem::initState() DPRINTF(Loader, "Boot atags was %d bytes in total\n", size << 2); DDUMP(Loader, boot_data, size << 2); - physProxy.writeBlob(params()->atags_addr, boot_data, size << 2); + physProxy.writeBlob(params()->atags_addr + loadAddrOffset, boot_data, + size << 2); delete[] boot_data; } + // Kernel boot requirements to set up r0, r1 and r2 in ARMv7 for (int i = 0; i < threadContexts.size(); i++) { threadContexts[i]->setIntReg(0, 0); threadContexts[i]->setIntReg(1, params()->machine_type); - threadContexts[i]->setIntReg(2, params()->atags_addr); + threadContexts[i]->setIntReg(2, params()->atags_addr + loadAddrOffset); } } diff --git a/src/arch/arm/linux/system.hh b/src/arch/arm/linux/system.hh index 008c64429..4ce6ac49e 100644 --- a/src/arch/arm/linux/system.hh +++ b/src/arch/arm/linux/system.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -126,6 +126,8 @@ class LinuxArmSystem : public ArmSystem Addr secDataPtrAddr; Addr secDataAddr; Addr penReleaseAddr; + Addr pen64ReleaseAddr; + Addr bootReleaseAddr; }; class DumpStatsPCEvent : public PCEvent diff --git a/src/arch/arm/locked_mem.hh b/src/arch/arm/locked_mem.hh index f2601f00c..24c78e721 100644 --- a/src/arch/arm/locked_mem.hh +++ b/src/arch/arm/locked_mem.hh @@ -53,6 +53,8 @@ */ #include "arch/arm/miscregs.hh" +#include "arch/arm/isa_traits.hh" +#include "debug/LLSC.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -62,31 +64,48 @@ template <class XC> inline void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask) { + DPRINTF(LLSC,"%s: handleing snoop for address: %#x locked: %d\n", + xc->getCpuPtr()->name(),pkt->getAddr(), + xc->readMiscReg(MISCREG_LOCKFLAG)); if (!xc->readMiscReg(MISCREG_LOCKFLAG)) return; Addr locked_addr = xc->readMiscReg(MISCREG_LOCKADDR) & cacheBlockMask; + // If no caches are attached, the snoop address always needs to be masked Addr snoop_addr = pkt->getAddr() & cacheBlockMask; - if (locked_addr == snoop_addr) + DPRINTF(LLSC,"%s: handleing snoop for address: %#x locked addr: %#x\n", + xc->getCpuPtr()->name(),snoop_addr, locked_addr); + if (locked_addr == snoop_addr) { + DPRINTF(LLSC,"%s: address match, clearing lock and signaling sev\n", + xc->getCpuPtr()->name()); xc->setMiscReg(MISCREG_LOCKFLAG, false); + // Implement ARMv8 WFE/SEV semantics + xc->setMiscReg(MISCREG_SEV_MAILBOX, true); + xc->getCpuPtr()->wakeup(); + } } template <class XC> inline void -handleLockedSnoopHit(XC *xc) +handleLockedRead(XC *xc, Request *req) { + xc->setMiscReg(MISCREG_LOCKADDR, req->getPaddr()); + xc->setMiscReg(MISCREG_LOCKFLAG, true); + DPRINTF(LLSC,"%s: Placing address %#x in monitor\n", xc->getCpuPtr()->name(), + req->getPaddr()); } template <class XC> inline void -handleLockedRead(XC *xc, Request *req) +handleLockedSnoopHit(XC *xc) { - xc->setMiscReg(MISCREG_LOCKADDR, req->getPaddr()); - xc->setMiscReg(MISCREG_LOCKFLAG, true); + DPRINTF(LLSC,"%s: handling snoop lock hit address: %#x\n", + xc->getCpuPtr()->name(), xc->readMiscReg(MISCREG_LOCKADDR)); + xc->setMiscReg(MISCREG_LOCKFLAG, false); + xc->setMiscReg(MISCREG_SEV_MAILBOX, true); } - template <class XC> inline bool handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask) @@ -94,6 +113,8 @@ handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask) if (req->isSwap()) return true; + DPRINTF(LLSC,"%s: handling locked write for address %#x in monitor\n", + xc->getCpuPtr()->name(), req->getPaddr()); // Verify that the lock flag is still set and the address // is correct bool lock_flag = xc->readMiscReg(MISCREG_LOCKFLAG); @@ -103,6 +124,8 @@ handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask) // don't even bother sending to memory system req->setExtraData(0); xc->setMiscReg(MISCREG_LOCKFLAG, false); + DPRINTF(LLSC,"%s: clearing lock flag in handle locked write\n", + xc->getCpuPtr()->name()); // the rest of this code is not architectural; // it's just a debugging aid to help detect // livelock by warning on long sequences of failed diff --git a/src/arch/arm/miscregs.cc b/src/arch/arm/miscregs.cc index 3a64b557a..6fa304938 100644 --- a/src/arch/arm/miscregs.cc +++ b/src/arch/arm/miscregs.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -36,11 +36,13 @@ * * Authors: Gabe Black * Ali Saidi + * Giacomo Gabrielli */ #include "arch/arm/isa.hh" #include "arch/arm/miscregs.hh" #include "base/misc.hh" +#include "cpu/thread_context.hh" namespace ArmISA { @@ -50,23 +52,31 @@ decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) { switch(crn) { case 0: - switch (opc2) { + switch (opc1) { case 0: - switch (crm) { + switch (opc2) { case 0: - return MISCREG_DBGDIDR; - case 1: - return MISCREG_DBGDSCR_INT; - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; - } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; + switch (crm) { + case 0: + return MISCREG_DBGDIDR; + case 1: + return MISCREG_DBGDSCRint; + } + break; + } + break; + case 7: + switch (opc2) { + case 0: + switch (crm) { + case 0: + return MISCREG_JIDR; + } + break; + } + break; } + break; case 1: switch (opc1) { case 6: @@ -75,29 +85,1270 @@ decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) switch (opc2) { case 0: return MISCREG_TEEHBR; - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; - } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; + break; + } + break; + case 7: + switch (crm) { + case 0: + switch (opc2) { + case 0: + return MISCREG_JOSCR; + } + break; + } + break; } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; + break; + case 2: + switch (opc1) { + case 7: + switch (crm) { + case 0: + switch (opc2) { + case 0: + return MISCREG_JMCR; + } + break; + } + break; + } + break; } - + // If we get here then it must be a register that we haven't implemented + warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", + crn, opc1, crm, opc2); + return MISCREG_CP14_UNIMPL; } +using namespace std; + +bitset<NUM_MISCREG_INFOS> miscRegInfo[NUM_MISCREGS] = { + // MISCREG_CPSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_FIQ + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_IRQ + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_SVC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_MON + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_ABT + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_HYP + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_UND + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_ELR_HYP + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSID + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MVFR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MVFR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPEXC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + + // Helper registers + // MISCREG_CPSR_MODE + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CPSR_Q + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSCR_Q + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSCR_EXC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_LOCKADDR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_LOCKFLAG + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PRRR_MAIR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001101")), + // MISCREG_PRRR_MAIR0_NS + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_PRRR_MAIR0_S + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_NMRR_MAIR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001101")), + // MISCREG_NMRR_MAIR1_NS + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_NMRR_MAIR1_S + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_PMXEVTYPER_PMCCFILTR + bitset<NUM_MISCREG_INFOS>(string("0000000000000000101")), + // MISCREG_SCTLR_RST + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SEV_MAILBOX + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + + // AArch32 CP14 registers + // MISCREG_DBGDIDR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_DBGDSCRint + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_DBGDCCINT + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDTRTXint + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDTRRXint + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWFAR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGVCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDTRRXext + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDSCRext + bitset<NUM_MISCREG_INFOS>(string("1111111111111100010")), + // MISCREG_DBGDTRTXext + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGOSECCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR4 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR5 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR4 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR5 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDRAR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGBXVR4 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBXVR5 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGOSLAR + bitset<NUM_MISCREG_INFOS>(string("1010111111111100000")), + // MISCREG_DBGOSLSR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGOSDLR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGPRCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDSAR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGCLAIMSET + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGCLAIMCLR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGAUTHSTATUS + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGDEVID2 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGDEVID1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGDEVID0 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_TEECR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_JIDR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_TEEHBR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_JOSCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_JMCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + + // AArch32 CP15 registers + // MISCREG_MIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CTR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_TCMTR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_TLBTR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MPIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_REVIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000010")), + // MISCREG_ID_PFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_PFR1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_DFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR2 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR3 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR2 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR3 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR4 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR5 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CCSIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CLIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_AIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CSSELR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CSSELR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_CSSELR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_VPIDR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_VMPIDR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_SCTLR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_SCTLR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_SCTLR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_ACTLR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_ACTLR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_ACTLR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_CPACR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SCR + bitset<NUM_MISCREG_INFOS>(string("1111001100000000001")), + // MISCREG_SDER + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_NSACR + bitset<NUM_MISCREG_INFOS>(string("1111011101000000001")), + // MISCREG_HSCTLR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HACTLR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HDCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HCPTR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HSTR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HACR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")), + // MISCREG_TTBR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TTBR0_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TTBR0_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TTBR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TTBR1_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TTBR1_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TTBCR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TTBCR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TTBCR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HTCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_VTCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_DACR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_DACR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_DACR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_DFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_DFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_DFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_IFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_IFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_IFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_ADFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001010")), + // MISCREG_ADFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010010")), + // MISCREG_ADFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010010")), + // MISCREG_AIFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001010")), + // MISCREG_AIFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010010")), + // MISCREG_AIFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010010")), + // MISCREG_HADFSR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HAIFSR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HSR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_DFAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_DFAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_DFAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_IFAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_IFAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_IFAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HDFAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HIFAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HPFAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_ICIALLUIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_BPIALLIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_PAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_PAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_PAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_ICIALLU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ICIMVAU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_CP15ISB + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_BPIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_BPIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCIMVAC + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCISW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_ATS1CPR + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS1CPW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS1CUR + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS1CUW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS12NSOPR + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_ATS12NSOPW + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_ATS12NSOUR + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_ATS12NSOUW + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_DCCMVAC + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DCCSW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_CP15DSB + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_CP15DMB + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_DCCMVAU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCCIMVAC + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCCISW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_ATS1HR + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_ATS1HW + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIALLIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIASIDIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAAIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVALIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_TLBIMVAALIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_ITLBIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ITLBIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ITLBIASID + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DTLBIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DTLBIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DTLBIASID + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIASID + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_TLBIMVAAL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_TLBIIPAS2IS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIIPAS2LIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIALLHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVAHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIALLNSNHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVALHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIIPAS2 + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIIPAS2L + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIALLH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVAH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIALLNSNH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVALH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_PMCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENSET + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENCLR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMOVSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMSWINC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMSELR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCEID0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCEID1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCCNTR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVTYPER + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCCFILTR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVCNTR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMUSERENR + bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")), + // MISCREG_PMINTENSET + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMINTENCLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMOVSSET + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_L2CTLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2ECTLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_PRRR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_PRRR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_PRRR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_MAIR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_MAIR0_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_MAIR0_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_NMRR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_NMRR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_NMRR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_MAIR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_MAIR1_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_MAIR1_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_AMAIR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_AMAIR0_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_AMAIR0_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_AMAIR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_AMAIR1_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_AMAIR1_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HMAIR0 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HMAIR1 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HAMAIR0 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")), + // MISCREG_HAMAIR1 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")), + // MISCREG_VBAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_VBAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_VBAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_MVBAR + bitset<NUM_MISCREG_INFOS>(string("1111001100000000001")), + // MISCREG_RMR + bitset<NUM_MISCREG_INFOS>(string("1111001100000000000")), + // MISCREG_ISR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_HVBAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_FCSEIDR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000010")), + // MISCREG_CONTEXTIDR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CONTEXTIDR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_CONTEXTIDR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TPIDRURW + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TPIDRURW_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")), + // MISCREG_TPIDRURW_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TPIDRURO + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TPIDRURO_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011010110001")), + // MISCREG_TPIDRURO_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TPIDRPRW + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TPIDRPRW_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TPIDRPRW_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HTPIDR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTFRQ + bitset<NUM_MISCREG_INFOS>(string("1111010101010100001")), + // MISCREG_CNTKCTL + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CNTP_TVAL + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CNTP_TVAL_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")), + // MISCREG_CNTP_TVAL_S + bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")), + // MISCREG_CNTP_CTL + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CNTP_CTL_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")), + // MISCREG_CNTP_CTL_S + bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")), + // MISCREG_CNTV_TVAL + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_CTL + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTHCTL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTHP_TVAL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTHP_CTL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_IL1DATA0 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_IL1DATA1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_IL1DATA2 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_IL1DATA3 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA0 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA2 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA3 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA4 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_RAMINDEX + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_L2ACTLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_CBAR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000000")), + // MISCREG_HTTBR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_VTTBR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTPCT + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTVCT + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTP_CVAL + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CNTP_CVAL_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110000")), + // MISCREG_CNTP_CVAL_S + bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")), + // MISCREG_CNTV_CVAL + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTVOFF + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTHP_CVAL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CPUMERRSR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_L2MERRSR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + + // AArch64 registers (Op0=2) + // MISCREG_MDCCINT_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_OSDTRRX_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDSCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_OSDTRTX_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_OSECCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR4_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR5_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR4_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR5_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDCCSR_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_MDDTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDDTRTX_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDDTRRX_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGVCR32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDRAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_OSLAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1010111111111100001")), + // MISCREG_OSLSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_OSDLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGPRCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGCLAIMSET_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGCLAIMCLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGAUTHSTATUS_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_TEECR32_EL1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")), + // MISCREG_TEEHBR32_EL1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")), + + // AArch64 registers (Op0=1,3) + // MISCREG_MIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MPIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_REVIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_PFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_PFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_DFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR4_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR5_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MVFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MVFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MVFR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64PFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64PFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64DFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64DFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64AFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64AFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64ISAR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64ISAR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64MMFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64MMFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CCSIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CLIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_AIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CSSELR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_DCZID_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_VPIDR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_VMPIDR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SCTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_ACTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPACR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SCTLR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_ACTLR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_MDCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CPTR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HSTR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HACR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SCTLR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_ACTLR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_SCR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_SDER32_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_CPTR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_MDCR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_TTBR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TTBR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TTBR0_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_TCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_VTTBR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_VTCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_TTBR0_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_TCR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_DACR32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_ELR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SP_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SPSEL + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CURRENTEL + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_NZCV + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DAIF + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DSPSR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DLR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_ELR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SP_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_IRQ_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_ABT_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_UND_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_FIQ_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_ELR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_SP_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_AFSR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_AFSR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_ESR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IFSR32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AFSR0_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AFSR1_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_ESR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_FPEXC32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AFSR0_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_AFSR1_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_ESR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_FAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_FAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HPFAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_FAR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_IC_IALLUIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_PAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IC_IALLU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_IVAC_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_ISW_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_AT_S1E1R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_AT_S1E1W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_AT_S1E0R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_AT_S1E0W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DC_CSW_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_CISW_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_ZVA_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010001000011")), + // MISCREG_IC_IVAU_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_DC_CVAC_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")), + // MISCREG_DC_CVAU_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")), + // MISCREG_DC_CIVAC_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")), + // MISCREG_AT_S1E2R_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_AT_S1E2W_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_AT_S12E1R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S12E1W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S12E0R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S12E0W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S1E3R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_AT_S1E3W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VMALLE1IS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_ASIDE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAAE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VALE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAALE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VMALLE1 + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_ASIDE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAAE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VALE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAALE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_IPAS2E1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_IPAS2LE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_ALLE2IS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VAE2IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_ALLE1IS + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_VALE2IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VMALLS12E1IS + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_IPAS2E1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_IPAS2LE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_ALLE2 + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VAE2_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_ALLE1 + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_VALE2_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VMALLS12E1 + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_ALLE3IS + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VAE3IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VALE3IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_ALLE3 + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VAE3_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VALE3_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_PMINTENSET_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMINTENCLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMCR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENSET_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENCLR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMOVSCLR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMSWINC_EL0 + bitset<NUM_MISCREG_INFOS>(string("1010101010111100001")), + // MISCREG_PMSELR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCEID0_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101111100001")), + // MISCREG_PMCEID1_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101111100001")), + // MISCREG_PMCCNTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVTYPER_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCCFILTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVCNTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMUSERENR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")), + // MISCREG_PMOVSSET_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MAIR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_AMAIR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_MAIR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AMAIR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_MAIR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_AMAIR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_L2CTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2ECTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_VBAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_RVBAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ISR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_VBAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_RVBAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("0101010000000000001")), + // MISCREG_VBAR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_RVBAR_EL3 + bitset<NUM_MISCREG_INFOS>(string("0101000000000000001")), + // MISCREG_RMR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_CONTEXTIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TPIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TPIDR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_TPIDRRO_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")), + // MISCREG_TPIDR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_TPIDR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_CNTKCTL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CNTFRQ_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111010101010100001")), + // MISCREG_CNTPCT_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTVCT_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTP_TVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTP_CTL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTP_CVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_TVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_CTL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_CVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR0_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR1_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR2_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR3_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR4_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR5_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER0_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER1_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER2_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER3_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER4_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER5_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTVOFF_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHCTL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHP_TVAL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHP_CTL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHP_CVAL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTPS_TVAL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTPS_CTL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTPS_CVAL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_IL1DATA0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IL1DATA1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IL1DATA2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IL1DATA3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA4_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2ACTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPUACTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPUECTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPUMERRSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2MERRSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CBAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + + // Dummy registers + // MISCREG_NOP + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_RAZ + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CP14_UNIMPL + bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")), + // MISCREG_CP15_UNIMPL + bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")), + // MISCREG_A64_UNIMPL + bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")), + // MISCREG_UNKNOWN + bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")) +}; + MiscRegIndex decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) { @@ -116,6 +1367,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_TLBTR; case 5: return MISCREG_MPIDR; + case 6: + return MISCREG_REVIDR; default: return MISCREG_MIDR; } @@ -180,6 +1433,14 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_CSSELR; } break; + case 4: + if (crm == 0) { + if (opc2 == 0) + return MISCREG_VPIDR; + else if (opc2 == 5) + return MISCREG_VMPIDR; + } + break; } break; case 1: @@ -203,6 +1464,26 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_NSACR; } } + } else if (opc1 == 4) { + if (crm == 0) { + if (opc2 == 0) + return MISCREG_HSCTLR; + else if (opc2 == 1) + return MISCREG_HACTLR; + } else if (crm == 1) { + switch (opc2) { + case 0: + return MISCREG_HCR; + case 1: + return MISCREG_HDCR; + case 2: + return MISCREG_HCPTR; + case 3: + return MISCREG_HSTR; + case 7: + return MISCREG_HACR; + } + } } break; case 2: @@ -215,6 +1496,11 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 2: return MISCREG_TTBCR; } + } else if (opc1 == 4) { + if (crm == 0 && opc2 == 2) + return MISCREG_HTCR; + else if (crm == 1 && opc2 == 2) + return MISCREG_VTCR; } break; case 3: @@ -237,6 +1523,15 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_AIFSR; } } + } else if (opc1 == 4) { + if (crm == 1) { + if (opc2 == 0) + return MISCREG_HADFSR; + else if (opc2 == 1) + return MISCREG_HAIFSR; + } else if (crm == 2 && opc2 == 0) { + return MISCREG_HSR; + } } break; case 6: @@ -247,6 +1542,15 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 2: return MISCREG_IFAR; } + } else if (opc1 == 4 && crm == 0) { + switch (opc2) { + case 0: + return MISCREG_HDFAR; + case 2: + return MISCREG_HIFAR; + case 4: + return MISCREG_HPFAR; + } } break; case 7: @@ -294,21 +1598,21 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 8: switch (opc2) { case 0: - return MISCREG_V2PCWPR; + return MISCREG_ATS1CPR; case 1: - return MISCREG_V2PCWPW; + return MISCREG_ATS1CPW; case 2: - return MISCREG_V2PCWUR; + return MISCREG_ATS1CUR; case 3: - return MISCREG_V2PCWUW; + return MISCREG_ATS1CUW; case 4: - return MISCREG_V2POWPR; + return MISCREG_ATS12NSOPR; case 5: - return MISCREG_V2POWPW; + return MISCREG_ATS12NSOPW; case 6: - return MISCREG_V2POWUR; + return MISCREG_ATS12NSOUR; case 7: - return MISCREG_V2POWUW; + return MISCREG_ATS12NSOUW; } break; case 10: @@ -316,7 +1620,7 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 1: return MISCREG_DCCMVAC; case 2: - return MISCREG_MCCSW; + return MISCREG_DCCSW; case 4: return MISCREG_CP15DSB; case 5: @@ -341,6 +1645,11 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) } break; } + } else if (opc1 == 4 && crm == 8) { + if (opc2 == 0) + return MISCREG_ATS1HR; + else if (opc2 == 1) + return MISCREG_ATS1HW; } break; case 8: @@ -391,6 +1700,26 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) } break; } + } else if (opc1 == 4) { + if (crm == 3) { + switch (opc2) { + case 0: + return MISCREG_TLBIALLHIS; + case 1: + return MISCREG_TLBIMVAHIS; + case 4: + return MISCREG_TLBIALLNSNHIS; + } + } else if (crm == 7) { + switch (opc2) { + case 0: + return MISCREG_TLBIALLH; + case 1: + return MISCREG_TLBIMVAH; + case 4: + return MISCREG_TLBIALLNSNH; + } + } } break; case 9: @@ -421,7 +1750,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 0: return MISCREG_PMCCNTR; case 1: - return MISCREG_PMC_OTHER; + // Selector is PMSELR.SEL + return MISCREG_PMXEVTYPER_PMCCFILTR; case 2: return MISCREG_PMXEVCNTR; } @@ -434,6 +1764,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_PMINTENSET; case 2: return MISCREG_PMINTENCLR; + case 3: + return MISCREG_PMOVSSET; } break; } @@ -443,28 +1775,45 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) switch (opc2) { case 2: // L2CTLR, L2 Control Register return MISCREG_L2CTLR; - default: - warn("Uknown miscregs: crn:%d crm:%d opc1:%d opc2:%d\n", - crn,crm, opc1,opc2); - break; + case 3: + return MISCREG_L2ECTLR; } break; - default: - return MISCREG_L2LATENCY; + break; } } - //Reserved for Branch Predictor, Cache and TCM operations break; case 10: if (opc1 == 0) { // crm 0, 1, 4, and 8, with op2 0 - 7, reserved for TLB lockdown if (crm == 2) { // TEX Remap Registers if (opc2 == 0) { - return MISCREG_PRRR; + // Selector is TTBCR.EAE + return MISCREG_PRRR_MAIR0; } else if (opc2 == 1) { - return MISCREG_NMRR; + // Selector is TTBCR.EAE + return MISCREG_NMRR_MAIR1; + } + } else if (crm == 3) { + if (opc2 == 0) { + return MISCREG_AMAIR0; + } else if (opc2 == 1) { + return MISCREG_AMAIR1; } } + } else if (opc1 == 4) { + // crm 0, 1, 4, and 8, with op2 0 - 7, reserved for TLB lockdown + if (crm == 2) { + if (opc2 == 0) + return MISCREG_HMAIR0; + else if (opc2 == 1) + return MISCREG_HMAIR1; + } else if (crm == 3) { + if (opc2 == 0) + return MISCREG_HAMAIR0; + else if (opc2 == 1) + return MISCREG_HAMAIR1; + } } break; case 11: @@ -498,6 +1847,9 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_ISR; } } + } else if (opc1 == 4) { + if (crm == 0 && opc2 == 0) + return MISCREG_HVBAR; } break; case 13: @@ -505,7 +1857,7 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) if (crm == 0) { switch (opc2) { case 0: - return MISCREG_FCEIDR; + return MISCREG_FCSEIDR; case 1: return MISCREG_CONTEXTIDR; case 2: @@ -516,14 +1868,1682 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_TPIDRPRW; } } + } else if (opc1 == 4) { + if (crm == 0 && opc2 == 2) + return MISCREG_HTPIDR; + } + break; + case 14: + if (opc1 == 0) { + switch (crm) { + case 0: + if (opc2 == 0) + return MISCREG_CNTFRQ; + break; + case 1: + if (opc2 == 0) + return MISCREG_CNTKCTL; + break; + case 2: + if (opc2 == 0) + return MISCREG_CNTP_TVAL; + else if (opc2 == 1) + return MISCREG_CNTP_CTL; + break; + case 3: + if (opc2 == 0) + return MISCREG_CNTV_TVAL; + else if (opc2 == 1) + return MISCREG_CNTV_CTL; + break; + } + } else if (opc1 == 4) { + if (crm == 1 && opc2 == 0) { + return MISCREG_CNTHCTL; + } else if (crm == 2) { + if (opc2 == 0) + return MISCREG_CNTHP_TVAL; + else if (opc2 == 1) + return MISCREG_CNTHP_CTL; + } } break; case 15: // Implementation defined - return MISCREG_CRN15; + return MISCREG_CP15_UNIMPL; } // Unrecognized register - return NUM_MISCREGS; + return MISCREG_CP15_UNIMPL; +} + +MiscRegIndex +decodeCP15Reg64(unsigned crm, unsigned opc1) +{ + switch (crm) { + case 2: + switch (opc1) { + case 0: + return MISCREG_TTBR0; + case 1: + return MISCREG_TTBR1; + case 4: + return MISCREG_HTTBR; + case 6: + return MISCREG_VTTBR; + } + break; + case 7: + if (opc1 == 0) + return MISCREG_PAR; + break; + case 14: + switch (opc1) { + case 0: + return MISCREG_CNTPCT; + case 1: + return MISCREG_CNTVCT; + case 2: + return MISCREG_CNTP_CVAL; + case 3: + return MISCREG_CNTV_CVAL; + case 4: + return MISCREG_CNTVOFF; + case 6: + return MISCREG_CNTHP_CVAL; + } + break; + case 15: + if (opc1 == 0) + return MISCREG_CPUMERRSR; + else if (opc1 == 1) + return MISCREG_L2MERRSR; + break; + } + // Unrecognized register + return MISCREG_CP15_UNIMPL; +} + +bool +canReadCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + bool secure = !scr.ns; + bool canRead; + + switch (cpsr.mode) { + case MODE_USER: + canRead = secure ? miscRegInfo[reg][MISCREG_USR_S_RD] : + miscRegInfo[reg][MISCREG_USR_NS_RD]; + break; + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + canRead = secure ? miscRegInfo[reg][MISCREG_PRI_S_RD] : + miscRegInfo[reg][MISCREG_PRI_NS_RD]; + break; + case MODE_MON: + canRead = secure ? miscRegInfo[reg][MISCREG_MON_NS0_RD] : + miscRegInfo[reg][MISCREG_MON_NS1_RD]; + break; + case MODE_HYP: + canRead = miscRegInfo[reg][MISCREG_HYP_RD]; + break; + default: + panic("Unrecognized mode setting in CPSR.\n"); + } + // can't do permissions checkes on the root of a banked pair of regs + assert(!miscRegInfo[reg][MISCREG_BANKED]); + return canRead; +} + +bool +canWriteCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + bool secure = !scr.ns; + bool canWrite; + + switch (cpsr.mode) { + case MODE_USER: + canWrite = secure ? miscRegInfo[reg][MISCREG_USR_S_WR] : + miscRegInfo[reg][MISCREG_USR_NS_WR]; + break; + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + canWrite = secure ? miscRegInfo[reg][MISCREG_PRI_S_WR] : + miscRegInfo[reg][MISCREG_PRI_NS_WR]; + break; + case MODE_MON: + canWrite = secure ? miscRegInfo[reg][MISCREG_MON_NS0_WR] : + miscRegInfo[reg][MISCREG_MON_NS1_WR]; + break; + case MODE_HYP: + canWrite = miscRegInfo[reg][MISCREG_HYP_WR]; + break; + default: + panic("Unrecognized mode setting in CPSR.\n"); + } + // can't do permissions checkes on the root of a banked pair of regs + assert(!miscRegInfo[reg][MISCREG_BANKED]); + return canWrite; +} + +int +flattenMiscRegNsBanked(int reg, ThreadContext *tc) +{ + if (miscRegInfo[reg][MISCREG_BANKED]) { + SCR scr = tc->readMiscReg(MISCREG_SCR); + reg += (ArmSystem::haveSecurity(tc) && !scr.ns) ? 2 : 1; + } + return reg; +} + +int +flattenMiscRegNsBanked(int reg, ThreadContext *tc, bool ns) +{ + if (miscRegInfo[reg][MISCREG_BANKED]) { + reg += (ArmSystem::haveSecurity(tc) && !ns) ? 2 : 1; + } + return reg; } + +/** + * If the reg is a child reg of a banked set, then the parent is the last + * banked one in the list. This is messy, and the wish is to eventually have + * the bitmap replaced with a better data structure. the preUnflatten function + * initializes a lookup table to speed up the search for these banked + * registers. + */ + +int unflattenResultMiscReg[NUM_MISCREGS]; + +void +preUnflattenMiscReg() +{ + int reg = -1; + for (int i = 0 ; i < NUM_MISCREGS; i++){ + if (miscRegInfo[i][MISCREG_BANKED]) + reg = i; + if (miscRegInfo[i][MISCREG_BANKED_CHILD]) + unflattenResultMiscReg[i] = reg; + else + unflattenResultMiscReg[i] = i; + // if this assert fails, no parent was found, and something is broken + assert(unflattenResultMiscReg[i] > -1); + } } + +int +unflattenMiscReg(int reg) +{ + return unflattenResultMiscReg[reg]; +} + +bool +canReadAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + // Check for SP_EL0 access while SPSEL == 0 + if ((reg == MISCREG_SP_EL0) && (tc->readMiscReg(MISCREG_SPSEL) == 0)) + return false; + + // Check for RVBAR access + if (reg == MISCREG_RVBAR_EL1) { + ExceptionLevel highest_el = ArmSystem::highestEL(tc); + if (highest_el == EL2 || highest_el == EL3) + return false; + } + if (reg == MISCREG_RVBAR_EL2) { + ExceptionLevel highest_el = ArmSystem::highestEL(tc); + if (highest_el == EL3) + return false; + } + + bool secure = ArmSystem::haveSecurity(tc) && !scr.ns; + + switch (opModeToEL((OperatingMode) (uint8_t) cpsr.mode)) { + case EL0: + return secure ? miscRegInfo[reg][MISCREG_USR_S_RD] : + miscRegInfo[reg][MISCREG_USR_NS_RD]; + case EL1: + return secure ? miscRegInfo[reg][MISCREG_PRI_S_RD] : + miscRegInfo[reg][MISCREG_PRI_NS_RD]; + // @todo: uncomment this to enable Virtualization + // case EL2: + // return miscRegInfo[reg][MISCREG_HYP_RD]; + case EL3: + return secure ? miscRegInfo[reg][MISCREG_MON_NS0_RD] : + miscRegInfo[reg][MISCREG_MON_NS1_RD]; + default: + panic("Invalid exception level"); + } +} + +bool +canWriteAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + // Check for SP_EL0 access while SPSEL == 0 + if ((reg == MISCREG_SP_EL0) && (tc->readMiscReg(MISCREG_SPSEL) == 0)) + return false; + ExceptionLevel el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode); + if (reg == MISCREG_DAIF) { + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + if (el == EL0 && !sctlr.uma) + return false; + } + if (reg == MISCREG_DC_ZVA_Xt) { + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + if (el == EL0 && !sctlr.dze) + return false; + } + if (reg == MISCREG_DC_CVAC_Xt || reg == MISCREG_DC_CIVAC_Xt) { + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + if (el == EL0 && !sctlr.uci) + return false; + } + + bool secure = ArmSystem::haveSecurity(tc) && !scr.ns; + + switch (el) { + case EL0: + return secure ? miscRegInfo[reg][MISCREG_USR_S_WR] : + miscRegInfo[reg][MISCREG_USR_NS_WR]; + case EL1: + return secure ? miscRegInfo[reg][MISCREG_PRI_S_WR] : + miscRegInfo[reg][MISCREG_PRI_NS_WR]; + // @todo: uncomment this to enable Virtualization + // case EL2: + // return miscRegInfo[reg][MISCREG_HYP_WR]; + case EL3: + return secure ? miscRegInfo[reg][MISCREG_MON_NS0_WR] : + miscRegInfo[reg][MISCREG_MON_NS1_WR]; + default: + panic("Invalid exception level"); + } +} + +MiscRegIndex +decodeAArch64SysReg(unsigned op0, unsigned op1, + unsigned crn, unsigned crm, + unsigned op2) +{ + switch (op0) { + case 1: + switch (crn) { + case 7: + switch (op1) { + case 0: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_IC_IALLUIS; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_IC_IALLU; + } + break; + case 6: + switch (op2) { + case 1: + return MISCREG_DC_IVAC_Xt; + case 2: + return MISCREG_DC_ISW_Xt; + } + break; + case 8: + switch (op2) { + case 0: + return MISCREG_AT_S1E1R_Xt; + case 1: + return MISCREG_AT_S1E1W_Xt; + case 2: + return MISCREG_AT_S1E0R_Xt; + case 3: + return MISCREG_AT_S1E0W_Xt; + } + break; + case 10: + switch (op2) { + case 2: + return MISCREG_DC_CSW_Xt; + } + break; + case 14: + switch (op2) { + case 2: + return MISCREG_DC_CISW_Xt; + } + break; + } + break; + case 3: + switch (crm) { + case 4: + switch (op2) { + case 1: + return MISCREG_DC_ZVA_Xt; + } + break; + case 5: + switch (op2) { + case 1: + return MISCREG_IC_IVAU_Xt; + } + break; + case 10: + switch (op2) { + case 1: + return MISCREG_DC_CVAC_Xt; + } + break; + case 11: + switch (op2) { + case 1: + return MISCREG_DC_CVAU_Xt; + } + break; + case 14: + switch (op2) { + case 1: + return MISCREG_DC_CIVAC_Xt; + } + break; + } + break; + case 4: + switch (crm) { + case 8: + switch (op2) { + case 0: + return MISCREG_AT_S1E2R_Xt; + case 1: + return MISCREG_AT_S1E2W_Xt; + case 4: + return MISCREG_AT_S12E1R_Xt; + case 5: + return MISCREG_AT_S12E1W_Xt; + case 6: + return MISCREG_AT_S12E0R_Xt; + case 7: + return MISCREG_AT_S12E0W_Xt; + } + break; + } + break; + case 6: + switch (crm) { + case 8: + switch (op2) { + case 0: + return MISCREG_AT_S1E3R_Xt; + case 1: + return MISCREG_AT_S1E3W_Xt; + } + break; + } + break; + } + break; + case 8: + switch (op1) { + case 0: + switch (crm) { + case 3: + switch (op2) { + case 0: + return MISCREG_TLBI_VMALLE1IS; + case 1: + return MISCREG_TLBI_VAE1IS_Xt; + case 2: + return MISCREG_TLBI_ASIDE1IS_Xt; + case 3: + return MISCREG_TLBI_VAAE1IS_Xt; + case 5: + return MISCREG_TLBI_VALE1IS_Xt; + case 7: + return MISCREG_TLBI_VAALE1IS_Xt; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_TLBI_VMALLE1; + case 1: + return MISCREG_TLBI_VAE1_Xt; + case 2: + return MISCREG_TLBI_ASIDE1_Xt; + case 3: + return MISCREG_TLBI_VAAE1_Xt; + case 5: + return MISCREG_TLBI_VALE1_Xt; + case 7: + return MISCREG_TLBI_VAALE1_Xt; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_TLBI_IPAS2E1IS_Xt; + case 5: + return MISCREG_TLBI_IPAS2LE1IS_Xt; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE2IS; + case 1: + return MISCREG_TLBI_VAE2IS_Xt; + case 4: + return MISCREG_TLBI_ALLE1IS; + case 5: + return MISCREG_TLBI_VALE2IS_Xt; + case 6: + return MISCREG_TLBI_VMALLS12E1IS; + } + break; + case 4: + switch (op2) { + case 1: + return MISCREG_TLBI_IPAS2E1_Xt; + case 5: + return MISCREG_TLBI_IPAS2LE1_Xt; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE2; + case 1: + return MISCREG_TLBI_VAE2_Xt; + case 4: + return MISCREG_TLBI_ALLE1; + case 5: + return MISCREG_TLBI_VALE2_Xt; + case 6: + return MISCREG_TLBI_VMALLS12E1; + } + break; + } + break; + case 6: + switch (crm) { + case 3: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE3IS; + case 1: + return MISCREG_TLBI_VAE3IS_Xt; + case 5: + return MISCREG_TLBI_VALE3IS_Xt; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE3; + case 1: + return MISCREG_TLBI_VAE3_Xt; + case 5: + return MISCREG_TLBI_VALE3_Xt; + } + break; + } + break; + } + break; + } + break; + case 2: + switch (crn) { + case 0: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_OSDTRRX_EL1; + case 4: + return MISCREG_DBGBVR0_EL1; + case 5: + return MISCREG_DBGBCR0_EL1; + case 6: + return MISCREG_DBGWVR0_EL1; + case 7: + return MISCREG_DBGWCR0_EL1; + } + break; + case 1: + switch (op2) { + case 4: + return MISCREG_DBGBVR1_EL1; + case 5: + return MISCREG_DBGBCR1_EL1; + case 6: + return MISCREG_DBGWVR1_EL1; + case 7: + return MISCREG_DBGWCR1_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_MDCCINT_EL1; + case 2: + return MISCREG_MDSCR_EL1; + case 4: + return MISCREG_DBGBVR2_EL1; + case 5: + return MISCREG_DBGBCR2_EL1; + case 6: + return MISCREG_DBGWVR2_EL1; + case 7: + return MISCREG_DBGWCR2_EL1; + } + break; + case 3: + switch (op2) { + case 2: + return MISCREG_OSDTRTX_EL1; + case 4: + return MISCREG_DBGBVR3_EL1; + case 5: + return MISCREG_DBGBCR3_EL1; + case 6: + return MISCREG_DBGWVR3_EL1; + case 7: + return MISCREG_DBGWCR3_EL1; + } + break; + case 4: + switch (op2) { + case 4: + return MISCREG_DBGBVR4_EL1; + case 5: + return MISCREG_DBGBCR4_EL1; + } + break; + case 5: + switch (op2) { + case 4: + return MISCREG_DBGBVR5_EL1; + case 5: + return MISCREG_DBGBCR5_EL1; + } + break; + case 6: + switch (op2) { + case 2: + return MISCREG_OSECCR_EL1; + } + break; + } + break; + case 2: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TEECR32_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_MDCCSR_EL0; + } + break; + case 4: + switch (op2) { + case 0: + return MISCREG_MDDTR_EL0; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_MDDTRRX_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 7: + switch (op2) { + case 0: + return MISCREG_DBGVCR32_EL2; + } + break; + } + break; + } + break; + case 1: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_MDRAR_EL1; + case 4: + return MISCREG_OSLAR_EL1; + } + break; + case 1: + switch (op2) { + case 4: + return MISCREG_OSLSR_EL1; + } + break; + case 3: + switch (op2) { + case 4: + return MISCREG_OSDLR_EL1; + } + break; + case 4: + switch (op2) { + case 4: + return MISCREG_DBGPRCR_EL1; + } + break; + } + break; + case 2: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TEEHBR32_EL1; + } + break; + } + break; + } + break; + case 7: + switch (op1) { + case 0: + switch (crm) { + case 8: + switch (op2) { + case 6: + return MISCREG_DBGCLAIMSET_EL1; + } + break; + case 9: + switch (op2) { + case 6: + return MISCREG_DBGCLAIMCLR_EL1; + } + break; + case 14: + switch (op2) { + case 6: + return MISCREG_DBGAUTHSTATUS_EL1; + } + break; + } + break; + } + break; + } + break; + case 3: + switch (crn) { + case 0: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_MIDR_EL1; + case 5: + return MISCREG_MPIDR_EL1; + case 6: + return MISCREG_REVIDR_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_ID_PFR0_EL1; + case 1: + return MISCREG_ID_PFR1_EL1; + case 2: + return MISCREG_ID_DFR0_EL1; + case 3: + return MISCREG_ID_AFR0_EL1; + case 4: + return MISCREG_ID_MMFR0_EL1; + case 5: + return MISCREG_ID_MMFR1_EL1; + case 6: + return MISCREG_ID_MMFR2_EL1; + case 7: + return MISCREG_ID_MMFR3_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ID_ISAR0_EL1; + case 1: + return MISCREG_ID_ISAR1_EL1; + case 2: + return MISCREG_ID_ISAR2_EL1; + case 3: + return MISCREG_ID_ISAR3_EL1; + case 4: + return MISCREG_ID_ISAR4_EL1; + case 5: + return MISCREG_ID_ISAR5_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_MVFR0_EL1; + case 1: + return MISCREG_MVFR1_EL1; + case 2: + return MISCREG_MVFR2_EL1; + case 3 ... 7: + return MISCREG_RAZ; + } + break; + case 4: + switch (op2) { + case 0: + return MISCREG_ID_AA64PFR0_EL1; + case 1: + return MISCREG_ID_AA64PFR1_EL1; + case 2 ... 7: + return MISCREG_RAZ; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_ID_AA64DFR0_EL1; + case 1: + return MISCREG_ID_AA64DFR1_EL1; + case 4: + return MISCREG_ID_AA64AFR0_EL1; + case 5: + return MISCREG_ID_AA64AFR1_EL1; + case 2: + case 3: + case 6: + case 7: + return MISCREG_RAZ; + } + break; + case 6: + switch (op2) { + case 0: + return MISCREG_ID_AA64ISAR0_EL1; + case 1: + return MISCREG_ID_AA64ISAR1_EL1; + case 2 ... 7: + return MISCREG_RAZ; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_ID_AA64MMFR0_EL1; + case 1: + return MISCREG_ID_AA64MMFR1_EL1; + case 2 ... 7: + return MISCREG_RAZ; + } + break; + } + break; + case 1: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_CCSIDR_EL1; + case 1: + return MISCREG_CLIDR_EL1; + case 7: + return MISCREG_AIDR_EL1; + } + break; + } + break; + case 2: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_CSSELR_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_CTR_EL0; + case 7: + return MISCREG_DCZID_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VPIDR_EL2; + case 5: + return MISCREG_VMPIDR_EL2; + } + break; + } + break; + } + break; + case 1: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SCTLR_EL1; + case 1: + return MISCREG_ACTLR_EL1; + case 2: + return MISCREG_CPACR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SCTLR_EL2; + case 1: + return MISCREG_ACTLR_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_HCR_EL2; + case 1: + return MISCREG_MDCR_EL2; + case 2: + return MISCREG_CPTR_EL2; + case 3: + return MISCREG_HSTR_EL2; + case 7: + return MISCREG_HACR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SCTLR_EL3; + case 1: + return MISCREG_ACTLR_EL3; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SCR_EL3; + case 1: + return MISCREG_SDER32_EL3; + case 2: + return MISCREG_CPTR_EL3; + } + break; + case 3: + switch (op2) { + case 1: + return MISCREG_MDCR_EL3; + } + break; + } + break; + } + break; + case 2: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TTBR0_EL1; + case 1: + return MISCREG_TTBR1_EL1; + case 2: + return MISCREG_TCR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TTBR0_EL2; + case 2: + return MISCREG_TCR_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_VTTBR_EL2; + case 2: + return MISCREG_VTCR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TTBR0_EL3; + case 2: + return MISCREG_TCR_EL3; + } + break; + } + break; + } + break; + case 3: + switch (op1) { + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_DACR32_EL2; + } + break; + } + break; + } + break; + case 4: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SPSR_EL1; + case 1: + return MISCREG_ELR_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SP_EL0; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_SPSEL; + case 2: + return MISCREG_CURRENTEL; + } + break; + } + break; + case 3: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_NZCV; + case 1: + return MISCREG_DAIF; + } + break; + case 4: + switch (op2) { + case 0: + return MISCREG_FPCR; + case 1: + return MISCREG_FPSR; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_DSPSR_EL0; + case 1: + return MISCREG_DLR_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SPSR_EL2; + case 1: + return MISCREG_ELR_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SP_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_SPSR_IRQ_AA64; + case 1: + return MISCREG_SPSR_ABT_AA64; + case 2: + return MISCREG_SPSR_UND_AA64; + case 3: + return MISCREG_SPSR_FIQ_AA64; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SPSR_EL3; + case 1: + return MISCREG_ELR_EL3; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SP_EL2; + } + break; + } + break; + } + break; + case 5: + switch (op1) { + case 0: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_AFSR0_EL1; + case 1: + return MISCREG_AFSR1_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ESR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_IFSR32_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_AFSR0_EL2; + case 1: + return MISCREG_AFSR1_EL2; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ESR_EL2; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_FPEXC32_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_AFSR0_EL3; + case 1: + return MISCREG_AFSR1_EL3; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ESR_EL3; + } + break; + } + break; + } + break; + case 6: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_FAR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_FAR_EL2; + case 4: + return MISCREG_HPFAR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_FAR_EL3; + } + break; + } + break; + } + break; + case 7: + switch (op1) { + case 0: + switch (crm) { + case 4: + switch (op2) { + case 0: + return MISCREG_PAR_EL1; + } + break; + } + break; + } + break; + case 9: + switch (op1) { + case 0: + switch (crm) { + case 14: + switch (op2) { + case 1: + return MISCREG_PMINTENSET_EL1; + case 2: + return MISCREG_PMINTENCLR_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 12: + switch (op2) { + case 0: + return MISCREG_PMCR_EL0; + case 1: + return MISCREG_PMCNTENSET_EL0; + case 2: + return MISCREG_PMCNTENCLR_EL0; + case 3: + return MISCREG_PMOVSCLR_EL0; + case 4: + return MISCREG_PMSWINC_EL0; + case 5: + return MISCREG_PMSELR_EL0; + case 6: + return MISCREG_PMCEID0_EL0; + case 7: + return MISCREG_PMCEID1_EL0; + } + break; + case 13: + switch (op2) { + case 0: + return MISCREG_PMCCNTR_EL0; + case 1: + return MISCREG_PMCCFILTR_EL0; + case 2: + return MISCREG_PMXEVCNTR_EL0; + } + break; + case 14: + switch (op2) { + case 0: + return MISCREG_PMUSERENR_EL0; + case 3: + return MISCREG_PMOVSSET_EL0; + } + break; + } + break; + } + break; + case 10: + switch (op1) { + case 0: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_MAIR_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_AMAIR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_MAIR_EL2; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_AMAIR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_MAIR_EL3; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_AMAIR_EL3; + } + break; + } + break; + } + break; + case 11: + switch (op1) { + case 1: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_L2CTLR_EL1; + case 3: + return MISCREG_L2ECTLR_EL1; + } + break; + } + break; + } + break; + case 12: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VBAR_EL1; + case 1: + return MISCREG_RVBAR_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_ISR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VBAR_EL2; + case 1: + return MISCREG_RVBAR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VBAR_EL3; + case 1: + return MISCREG_RVBAR_EL3; + case 2: + return MISCREG_RMR_EL3; + } + break; + } + break; + } + break; + case 13: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_CONTEXTIDR_EL1; + case 4: + return MISCREG_TPIDR_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_TPIDR_EL0; + case 3: + return MISCREG_TPIDRRO_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_TPIDR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_TPIDR_EL3; + } + break; + } + break; + } + break; + case 14: + switch (op1) { + case 0: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_CNTKCTL_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_CNTFRQ_EL0; + case 1: + return MISCREG_CNTPCT_EL0; + case 2: + return MISCREG_CNTVCT_EL0; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_CNTP_TVAL_EL0; + case 1: + return MISCREG_CNTP_CTL_EL0; + case 2: + return MISCREG_CNTP_CVAL_EL0; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_CNTV_TVAL_EL0; + case 1: + return MISCREG_CNTV_CTL_EL0; + case 2: + return MISCREG_CNTV_CVAL_EL0; + } + break; + case 8: + switch (op2) { + case 0: + return MISCREG_PMEVCNTR0_EL0; + case 1: + return MISCREG_PMEVCNTR1_EL0; + case 2: + return MISCREG_PMEVCNTR2_EL0; + case 3: + return MISCREG_PMEVCNTR3_EL0; + case 4: + return MISCREG_PMEVCNTR4_EL0; + case 5: + return MISCREG_PMEVCNTR5_EL0; + } + break; + case 12: + switch (op2) { + case 0: + return MISCREG_PMEVTYPER0_EL0; + case 1: + return MISCREG_PMEVTYPER1_EL0; + case 2: + return MISCREG_PMEVTYPER2_EL0; + case 3: + return MISCREG_PMEVTYPER3_EL0; + case 4: + return MISCREG_PMEVTYPER4_EL0; + case 5: + return MISCREG_PMEVTYPER5_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 3: + return MISCREG_CNTVOFF_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_CNTHCTL_EL2; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_CNTHP_TVAL_EL2; + case 1: + return MISCREG_CNTHP_CTL_EL2; + case 2: + return MISCREG_CNTHP_CVAL_EL2; + } + break; + } + break; + case 7: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_CNTPS_TVAL_EL1; + case 1: + return MISCREG_CNTPS_CTL_EL1; + case 2: + return MISCREG_CNTPS_CVAL_EL1; + } + break; + } + break; + } + break; + case 15: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_IL1DATA0_EL1; + case 1: + return MISCREG_IL1DATA1_EL1; + case 2: + return MISCREG_IL1DATA2_EL1; + case 3: + return MISCREG_IL1DATA3_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_DL1DATA0_EL1; + case 1: + return MISCREG_DL1DATA1_EL1; + case 2: + return MISCREG_DL1DATA2_EL1; + case 3: + return MISCREG_DL1DATA3_EL1; + case 4: + return MISCREG_DL1DATA4_EL1; + } + break; + } + break; + case 1: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_L2ACTLR_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_CPUACTLR_EL1; + case 1: + return MISCREG_CPUECTLR_EL1; + case 2: + return MISCREG_CPUMERRSR_EL1; + case 3: + return MISCREG_L2MERRSR_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_CBAR_EL1; + + } + break; + } + break; + } + break; + } + break; + } + + return MISCREG_UNKNOWN; +} + +} // namespace ArmISA diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh index 13234ddf5..c447dcd27 100644 --- a/src/arch/arm/miscregs.hh +++ b/src/arch/arm/miscregs.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -38,13 +38,19 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Gabe Black + * Giacomo Gabrielli */ #ifndef __ARCH_ARM_MISCREGS_HH__ #define __ARCH_ARM_MISCREGS_HH__ +#include <bitset> + #include "base/bitunion.hh" #include "base/compiler.hh" +class ThreadContext; + + namespace ArmISA { enum ConditionCode { @@ -67,284 +73,1330 @@ namespace ArmISA }; enum MiscRegIndex { - MISCREG_CPSR = 0, - MISCREG_CPSR_Q, - MISCREG_SPSR, - MISCREG_SPSR_FIQ, - MISCREG_SPSR_IRQ, - MISCREG_SPSR_SVC, - MISCREG_SPSR_MON, - MISCREG_SPSR_UND, - MISCREG_SPSR_ABT, - MISCREG_FPSR, - MISCREG_FPSID, - MISCREG_FPSCR, - MISCREG_FPSCR_QC, // Cumulative saturation flag - MISCREG_FPSCR_EXC, // Cumulative FP exception flags - MISCREG_FPEXC, - MISCREG_MVFR0, - MISCREG_MVFR1, - MISCREG_SCTLR_RST, - MISCREG_SEV_MAILBOX, - - // CP14 registers - MISCREG_CP14_START, - MISCREG_DBGDIDR = MISCREG_CP14_START, - MISCREG_DBGDSCR_INT, - MISCREG_DBGDTRRX_INT, - MISCREG_DBGTRTX_INT, - MISCREG_DBGWFAR, - MISCREG_DBGVCR, - MISCREG_DBGECR, - MISCREG_DBGDSCCR, - MISCREG_DBGSMCR, - MISCREG_DBGDTRRX_EXT, - MISCREG_DBGDSCR_EXT, - MISCREG_DBGDTRTX_EXT, - MISCREG_DBGDRCR, - MISCREG_DBGBVR, - MISCREG_DBGBCR, - MISCREG_DBGBVR_M, - MISCREG_DBGBCR_M, - MISCREG_DBGDRAR, - MISCREG_DBGBXVR_M, - MISCREG_DBGOSLAR, - MISCREG_DBGOSSRR, - MISCREG_DBGOSDLR, - MISCREG_DBGPRCR, - MISCREG_DBGPRSR, - MISCREG_DBGDSAR, - MISCREG_DBGITCTRL, - MISCREG_DBGCLAIMSET, - MISCREG_DBGCLAIMCLR, - MISCREG_DBGAUTHSTATUS, - MISCREG_DBGDEVID2, - MISCREG_DBGDEVID1, - MISCREG_DBGDEVID, - MISCREG_TEEHBR, - - // CP15 registers - MISCREG_CP15_START, - MISCREG_SCTLR = MISCREG_CP15_START, - MISCREG_DCCISW, - MISCREG_DCCIMVAC, - MISCREG_DCCMVAC, - MISCREG_CONTEXTIDR, - MISCREG_TPIDRURW, - MISCREG_TPIDRURO, - MISCREG_TPIDRPRW, - MISCREG_CP15ISB, - MISCREG_CP15DSB, - MISCREG_CP15DMB, - MISCREG_CPACR, - MISCREG_CLIDR, - MISCREG_CCSIDR, - MISCREG_CSSELR, - MISCREG_ICIALLUIS, - MISCREG_ICIALLU, - MISCREG_ICIMVAU, - MISCREG_BPIMVA, - MISCREG_BPIALLIS, - MISCREG_BPIALL, - MISCREG_MIDR, - MISCREG_TTBR0, - MISCREG_TTBR1, - MISCREG_TLBTR, - MISCREG_DACR, - MISCREG_TLBIALLIS, - MISCREG_TLBIMVAIS, - MISCREG_TLBIASIDIS, - MISCREG_TLBIMVAAIS, - MISCREG_ITLBIALL, - MISCREG_ITLBIMVA, - MISCREG_ITLBIASID, - MISCREG_DTLBIALL, - MISCREG_DTLBIMVA, - MISCREG_DTLBIASID, - MISCREG_TLBIALL, - MISCREG_TLBIMVA, - MISCREG_TLBIASID, - MISCREG_TLBIMVAA, - MISCREG_DFSR, - MISCREG_IFSR, - MISCREG_DFAR, - MISCREG_IFAR, - MISCREG_MPIDR, - MISCREG_PRRR, - MISCREG_NMRR, - MISCREG_TTBCR, - MISCREG_ID_PFR0, - MISCREG_CTR, - MISCREG_SCR, - MISCREG_SDER, - MISCREG_PAR, - MISCREG_V2PCWPR, - MISCREG_V2PCWPW, - MISCREG_V2PCWUR, - MISCREG_V2PCWUW, - MISCREG_V2POWPR, - MISCREG_V2POWPW, - MISCREG_V2POWUR, - MISCREG_V2POWUW, - MISCREG_ID_MMFR0, - MISCREG_ID_MMFR2, - MISCREG_ID_MMFR3, - MISCREG_ACTLR, - MISCREG_PMCR, - MISCREG_PMCCNTR, - MISCREG_PMCNTENSET, - MISCREG_PMCNTENCLR, - MISCREG_PMOVSR, - MISCREG_PMSWINC, - MISCREG_PMSELR, - MISCREG_PMCEID0, - MISCREG_PMCEID1, - MISCREG_PMC_OTHER, - MISCREG_PMXEVCNTR, - MISCREG_PMUSERENR, - MISCREG_PMINTENSET, - MISCREG_PMINTENCLR, - MISCREG_ID_ISAR0, - MISCREG_ID_ISAR1, - MISCREG_ID_ISAR2, - MISCREG_ID_ISAR3, - MISCREG_ID_ISAR4, - MISCREG_ID_ISAR5, - MISCREG_LOCKFLAG, - MISCREG_LOCKADDR, - MISCREG_ID_PFR1, - MISCREG_L2CTLR, - MISCREG_CP15_UNIMP_START, - MISCREG_TCMTR = MISCREG_CP15_UNIMP_START, - MISCREG_ID_DFR0, - MISCREG_ID_AFR0, - MISCREG_ID_MMFR1, - MISCREG_AIDR, - MISCREG_ADFSR, - MISCREG_AIFSR, - MISCREG_DCIMVAC, - MISCREG_DCISW, - MISCREG_MCCSW, - MISCREG_DCCMVAU, - MISCREG_NSACR, - MISCREG_VBAR, - MISCREG_MVBAR, - MISCREG_ISR, - MISCREG_FCEIDR, - MISCREG_L2LATENCY, - MISCREG_CRN15, - - - MISCREG_CP15_END, - - // Dummy indices - MISCREG_NOP = MISCREG_CP15_END, - MISCREG_RAZ, - - NUM_MISCREGS + MISCREG_CPSR = 0, // 0 + MISCREG_SPSR, // 1 + MISCREG_SPSR_FIQ, // 2 + MISCREG_SPSR_IRQ, // 3 + MISCREG_SPSR_SVC, // 4 + MISCREG_SPSR_MON, // 5 + MISCREG_SPSR_ABT, // 6 + MISCREG_SPSR_HYP, // 7 + MISCREG_SPSR_UND, // 8 + MISCREG_ELR_HYP, // 9 + MISCREG_FPSID, // 10 + MISCREG_FPSCR, // 11 + MISCREG_MVFR1, // 12 + MISCREG_MVFR0, // 13 + MISCREG_FPEXC, // 14 + + // Helper registers + MISCREG_CPSR_MODE, // 15 + MISCREG_CPSR_Q, // 16 + MISCREG_FPSCR_EXC, // 17 + MISCREG_FPSCR_QC, // 18 + MISCREG_LOCKADDR, // 19 + MISCREG_LOCKFLAG, // 20 + MISCREG_PRRR_MAIR0, // 21 + MISCREG_PRRR_MAIR0_NS, // 22 + MISCREG_PRRR_MAIR0_S, // 23 + MISCREG_NMRR_MAIR1, // 24 + MISCREG_NMRR_MAIR1_NS, // 25 + MISCREG_NMRR_MAIR1_S, // 26 + MISCREG_PMXEVTYPER_PMCCFILTR, // 27 + MISCREG_SCTLR_RST, // 28 + MISCREG_SEV_MAILBOX, // 29 + + // AArch32 CP14 registers (debug/trace/ThumbEE/Jazelle control) + MISCREG_DBGDIDR, // 30 + MISCREG_DBGDSCRint, // 31 + MISCREG_DBGDCCINT, // 32 + MISCREG_DBGDTRTXint, // 33 + MISCREG_DBGDTRRXint, // 34 + MISCREG_DBGWFAR, // 35 + MISCREG_DBGVCR, // 36 + MISCREG_DBGDTRRXext, // 37 + MISCREG_DBGDSCRext, // 38 + MISCREG_DBGDTRTXext, // 39 + MISCREG_DBGOSECCR, // 40 + MISCREG_DBGBVR0, // 41 + MISCREG_DBGBVR1, // 42 + MISCREG_DBGBVR2, // 43 + MISCREG_DBGBVR3, // 44 + MISCREG_DBGBVR4, // 45 + MISCREG_DBGBVR5, // 46 + MISCREG_DBGBCR0, // 47 + MISCREG_DBGBCR1, // 48 + MISCREG_DBGBCR2, // 49 + MISCREG_DBGBCR3, // 50 + MISCREG_DBGBCR4, // 51 + MISCREG_DBGBCR5, // 52 + MISCREG_DBGWVR0, // 53 + MISCREG_DBGWVR1, // 54 + MISCREG_DBGWVR2, // 55 + MISCREG_DBGWVR3, // 56 + MISCREG_DBGWCR0, // 57 + MISCREG_DBGWCR1, // 58 + MISCREG_DBGWCR2, // 59 + MISCREG_DBGWCR3, // 60 + MISCREG_DBGDRAR, // 61 + MISCREG_DBGBXVR4, // 62 + MISCREG_DBGBXVR5, // 63 + MISCREG_DBGOSLAR, // 64 + MISCREG_DBGOSLSR, // 65 + MISCREG_DBGOSDLR, // 66 + MISCREG_DBGPRCR, // 67 + MISCREG_DBGDSAR, // 68 + MISCREG_DBGCLAIMSET, // 69 + MISCREG_DBGCLAIMCLR, // 70 + MISCREG_DBGAUTHSTATUS, // 71 + MISCREG_DBGDEVID2, // 72 + MISCREG_DBGDEVID1, // 73 + MISCREG_DBGDEVID0, // 74 + MISCREG_TEECR, // 75 + MISCREG_JIDR, // 76 + MISCREG_TEEHBR, // 77 + MISCREG_JOSCR, // 78 + MISCREG_JMCR, // 79 + + // AArch32 CP15 registers (system control) + MISCREG_MIDR, // 80 + MISCREG_CTR, // 81 + MISCREG_TCMTR, // 82 + MISCREG_TLBTR, // 83 + MISCREG_MPIDR, // 84 + MISCREG_REVIDR, // 85 + MISCREG_ID_PFR0, // 86 + MISCREG_ID_PFR1, // 87 + MISCREG_ID_DFR0, // 88 + MISCREG_ID_AFR0, // 89 + MISCREG_ID_MMFR0, // 90 + MISCREG_ID_MMFR1, // 91 + MISCREG_ID_MMFR2, // 92 + MISCREG_ID_MMFR3, // 93 + MISCREG_ID_ISAR0, // 94 + MISCREG_ID_ISAR1, // 95 + MISCREG_ID_ISAR2, // 96 + MISCREG_ID_ISAR3, // 97 + MISCREG_ID_ISAR4, // 98 + MISCREG_ID_ISAR5, // 99 + MISCREG_CCSIDR, // 100 + MISCREG_CLIDR, // 101 + MISCREG_AIDR, // 102 + MISCREG_CSSELR, // 103 + MISCREG_CSSELR_NS, // 104 + MISCREG_CSSELR_S, // 105 + MISCREG_VPIDR, // 106 + MISCREG_VMPIDR, // 107 + MISCREG_SCTLR, // 108 + MISCREG_SCTLR_NS, // 109 + MISCREG_SCTLR_S, // 110 + MISCREG_ACTLR, // 111 + MISCREG_ACTLR_NS, // 112 + MISCREG_ACTLR_S, // 113 + MISCREG_CPACR, // 114 + MISCREG_SCR, // 115 + MISCREG_SDER, // 116 + MISCREG_NSACR, // 117 + MISCREG_HSCTLR, // 118 + MISCREG_HACTLR, // 119 + MISCREG_HCR, // 120 + MISCREG_HDCR, // 121 + MISCREG_HCPTR, // 122 + MISCREG_HSTR, // 123 + MISCREG_HACR, // 124 + MISCREG_TTBR0, // 125 + MISCREG_TTBR0_NS, // 126 + MISCREG_TTBR0_S, // 127 + MISCREG_TTBR1, // 128 + MISCREG_TTBR1_NS, // 129 + MISCREG_TTBR1_S, // 130 + MISCREG_TTBCR, // 131 + MISCREG_TTBCR_NS, // 132 + MISCREG_TTBCR_S, // 133 + MISCREG_HTCR, // 134 + MISCREG_VTCR, // 135 + MISCREG_DACR, // 136 + MISCREG_DACR_NS, // 137 + MISCREG_DACR_S, // 138 + MISCREG_DFSR, // 139 + MISCREG_DFSR_NS, // 140 + MISCREG_DFSR_S, // 141 + MISCREG_IFSR, // 142 + MISCREG_IFSR_NS, // 143 + MISCREG_IFSR_S, // 144 + MISCREG_ADFSR, // 145 + MISCREG_ADFSR_NS, // 146 + MISCREG_ADFSR_S, // 147 + MISCREG_AIFSR, // 148 + MISCREG_AIFSR_NS, // 149 + MISCREG_AIFSR_S, // 150 + MISCREG_HADFSR, // 151 + MISCREG_HAIFSR, // 152 + MISCREG_HSR, // 153 + MISCREG_DFAR, // 154 + MISCREG_DFAR_NS, // 155 + MISCREG_DFAR_S, // 156 + MISCREG_IFAR, // 157 + MISCREG_IFAR_NS, // 158 + MISCREG_IFAR_S, // 159 + MISCREG_HDFAR, // 160 + MISCREG_HIFAR, // 161 + MISCREG_HPFAR, // 162 + MISCREG_ICIALLUIS, // 163 + MISCREG_BPIALLIS, // 164 + MISCREG_PAR, // 165 + MISCREG_PAR_NS, // 166 + MISCREG_PAR_S, // 167 + MISCREG_ICIALLU, // 168 + MISCREG_ICIMVAU, // 169 + MISCREG_CP15ISB, // 170 + MISCREG_BPIALL, // 171 + MISCREG_BPIMVA, // 172 + MISCREG_DCIMVAC, // 173 + MISCREG_DCISW, // 174 + MISCREG_ATS1CPR, // 175 + MISCREG_ATS1CPW, // 176 + MISCREG_ATS1CUR, // 177 + MISCREG_ATS1CUW, // 178 + MISCREG_ATS12NSOPR, // 179 + MISCREG_ATS12NSOPW, // 180 + MISCREG_ATS12NSOUR, // 181 + MISCREG_ATS12NSOUW, // 182 + MISCREG_DCCMVAC, // 183 + MISCREG_DCCSW, // 184 + MISCREG_CP15DSB, // 185 + MISCREG_CP15DMB, // 186 + MISCREG_DCCMVAU, // 187 + MISCREG_DCCIMVAC, // 188 + MISCREG_DCCISW, // 189 + MISCREG_ATS1HR, // 190 + MISCREG_ATS1HW, // 191 + MISCREG_TLBIALLIS, // 192 + MISCREG_TLBIMVAIS, // 193 + MISCREG_TLBIASIDIS, // 194 + MISCREG_TLBIMVAAIS, // 195 + MISCREG_TLBIMVALIS, // 196 + MISCREG_TLBIMVAALIS, // 197 + MISCREG_ITLBIALL, // 198 + MISCREG_ITLBIMVA, // 199 + MISCREG_ITLBIASID, // 200 + MISCREG_DTLBIALL, // 201 + MISCREG_DTLBIMVA, // 202 + MISCREG_DTLBIASID, // 203 + MISCREG_TLBIALL, // 204 + MISCREG_TLBIMVA, // 205 + MISCREG_TLBIASID, // 206 + MISCREG_TLBIMVAA, // 207 + MISCREG_TLBIMVAL, // 208 + MISCREG_TLBIMVAAL, // 209 + MISCREG_TLBIIPAS2IS, // 210 + MISCREG_TLBIIPAS2LIS, // 211 + MISCREG_TLBIALLHIS, // 212 + MISCREG_TLBIMVAHIS, // 213 + MISCREG_TLBIALLNSNHIS, // 214 + MISCREG_TLBIMVALHIS, // 215 + MISCREG_TLBIIPAS2, // 216 + MISCREG_TLBIIPAS2L, // 217 + MISCREG_TLBIALLH, // 218 + MISCREG_TLBIMVAH, // 219 + MISCREG_TLBIALLNSNH, // 220 + MISCREG_TLBIMVALH, // 221 + MISCREG_PMCR, // 222 + MISCREG_PMCNTENSET, // 223 + MISCREG_PMCNTENCLR, // 224 + MISCREG_PMOVSR, // 225 + MISCREG_PMSWINC, // 226 + MISCREG_PMSELR, // 227 + MISCREG_PMCEID0, // 228 + MISCREG_PMCEID1, // 229 + MISCREG_PMCCNTR, // 230 + MISCREG_PMXEVTYPER, // 231 + MISCREG_PMCCFILTR, // 232 + MISCREG_PMXEVCNTR, // 233 + MISCREG_PMUSERENR, // 234 + MISCREG_PMINTENSET, // 235 + MISCREG_PMINTENCLR, // 236 + MISCREG_PMOVSSET, // 237 + MISCREG_L2CTLR, // 238 + MISCREG_L2ECTLR, // 239 + MISCREG_PRRR, // 240 + MISCREG_PRRR_NS, // 241 + MISCREG_PRRR_S, // 242 + MISCREG_MAIR0, // 243 + MISCREG_MAIR0_NS, // 244 + MISCREG_MAIR0_S, // 245 + MISCREG_NMRR, // 246 + MISCREG_NMRR_NS, // 247 + MISCREG_NMRR_S, // 248 + MISCREG_MAIR1, // 249 + MISCREG_MAIR1_NS, // 250 + MISCREG_MAIR1_S, // 251 + MISCREG_AMAIR0, // 252 + MISCREG_AMAIR0_NS, // 253 + MISCREG_AMAIR0_S, // 254 + MISCREG_AMAIR1, // 255 + MISCREG_AMAIR1_NS, // 256 + MISCREG_AMAIR1_S, // 257 + MISCREG_HMAIR0, // 258 + MISCREG_HMAIR1, // 259 + MISCREG_HAMAIR0, // 260 + MISCREG_HAMAIR1, // 261 + MISCREG_VBAR, // 262 + MISCREG_VBAR_NS, // 263 + MISCREG_VBAR_S, // 264 + MISCREG_MVBAR, // 265 + MISCREG_RMR, // 266 + MISCREG_ISR, // 267 + MISCREG_HVBAR, // 268 + MISCREG_FCSEIDR, // 269 + MISCREG_CONTEXTIDR, // 270 + MISCREG_CONTEXTIDR_NS, // 271 + MISCREG_CONTEXTIDR_S, // 272 + MISCREG_TPIDRURW, // 273 + MISCREG_TPIDRURW_NS, // 274 + MISCREG_TPIDRURW_S, // 275 + MISCREG_TPIDRURO, // 276 + MISCREG_TPIDRURO_NS, // 277 + MISCREG_TPIDRURO_S, // 278 + MISCREG_TPIDRPRW, // 279 + MISCREG_TPIDRPRW_NS, // 280 + MISCREG_TPIDRPRW_S, // 281 + MISCREG_HTPIDR, // 282 + MISCREG_CNTFRQ, // 283 + MISCREG_CNTKCTL, // 284 + MISCREG_CNTP_TVAL, // 285 + MISCREG_CNTP_TVAL_NS, // 286 + MISCREG_CNTP_TVAL_S, // 287 + MISCREG_CNTP_CTL, // 288 + MISCREG_CNTP_CTL_NS, // 289 + MISCREG_CNTP_CTL_S, // 290 + MISCREG_CNTV_TVAL, // 291 + MISCREG_CNTV_CTL, // 292 + MISCREG_CNTHCTL, // 293 + MISCREG_CNTHP_TVAL, // 294 + MISCREG_CNTHP_CTL, // 295 + MISCREG_IL1DATA0, // 296 + MISCREG_IL1DATA1, // 297 + MISCREG_IL1DATA2, // 298 + MISCREG_IL1DATA3, // 299 + MISCREG_DL1DATA0, // 300 + MISCREG_DL1DATA1, // 301 + MISCREG_DL1DATA2, // 302 + MISCREG_DL1DATA3, // 303 + MISCREG_DL1DATA4, // 304 + MISCREG_RAMINDEX, // 305 + MISCREG_L2ACTLR, // 306 + MISCREG_CBAR, // 307 + MISCREG_HTTBR, // 308 + MISCREG_VTTBR, // 309 + MISCREG_CNTPCT, // 310 + MISCREG_CNTVCT, // 311 + MISCREG_CNTP_CVAL, // 312 + MISCREG_CNTP_CVAL_NS, // 313 + MISCREG_CNTP_CVAL_S, // 314 + MISCREG_CNTV_CVAL, // 315 + MISCREG_CNTVOFF, // 316 + MISCREG_CNTHP_CVAL, // 317 + MISCREG_CPUMERRSR, // 318 + MISCREG_L2MERRSR, // 319 + + // AArch64 registers (Op0=2) + MISCREG_MDCCINT_EL1, // 320 + MISCREG_OSDTRRX_EL1, // 321 + MISCREG_MDSCR_EL1, // 322 + MISCREG_OSDTRTX_EL1, // 323 + MISCREG_OSECCR_EL1, // 324 + MISCREG_DBGBVR0_EL1, // 325 + MISCREG_DBGBVR1_EL1, // 326 + MISCREG_DBGBVR2_EL1, // 327 + MISCREG_DBGBVR3_EL1, // 328 + MISCREG_DBGBVR4_EL1, // 329 + MISCREG_DBGBVR5_EL1, // 330 + MISCREG_DBGBCR0_EL1, // 331 + MISCREG_DBGBCR1_EL1, // 332 + MISCREG_DBGBCR2_EL1, // 333 + MISCREG_DBGBCR3_EL1, // 334 + MISCREG_DBGBCR4_EL1, // 335 + MISCREG_DBGBCR5_EL1, // 336 + MISCREG_DBGWVR0_EL1, // 337 + MISCREG_DBGWVR1_EL1, // 338 + MISCREG_DBGWVR2_EL1, // 339 + MISCREG_DBGWVR3_EL1, // 340 + MISCREG_DBGWCR0_EL1, // 341 + MISCREG_DBGWCR1_EL1, // 342 + MISCREG_DBGWCR2_EL1, // 343 + MISCREG_DBGWCR3_EL1, // 344 + MISCREG_MDCCSR_EL0, // 345 + MISCREG_MDDTR_EL0, // 346 + MISCREG_MDDTRTX_EL0, // 347 + MISCREG_MDDTRRX_EL0, // 348 + MISCREG_DBGVCR32_EL2, // 349 + MISCREG_MDRAR_EL1, // 350 + MISCREG_OSLAR_EL1, // 351 + MISCREG_OSLSR_EL1, // 352 + MISCREG_OSDLR_EL1, // 353 + MISCREG_DBGPRCR_EL1, // 354 + MISCREG_DBGCLAIMSET_EL1, // 355 + MISCREG_DBGCLAIMCLR_EL1, // 356 + MISCREG_DBGAUTHSTATUS_EL1, // 357 + MISCREG_TEECR32_EL1, // 358 + MISCREG_TEEHBR32_EL1, // 359 + + // AArch64 registers (Op0=1,3) + MISCREG_MIDR_EL1, // 360 + MISCREG_MPIDR_EL1, // 361 + MISCREG_REVIDR_EL1, // 362 + MISCREG_ID_PFR0_EL1, // 363 + MISCREG_ID_PFR1_EL1, // 364 + MISCREG_ID_DFR0_EL1, // 365 + MISCREG_ID_AFR0_EL1, // 366 + MISCREG_ID_MMFR0_EL1, // 367 + MISCREG_ID_MMFR1_EL1, // 368 + MISCREG_ID_MMFR2_EL1, // 369 + MISCREG_ID_MMFR3_EL1, // 370 + MISCREG_ID_ISAR0_EL1, // 371 + MISCREG_ID_ISAR1_EL1, // 372 + MISCREG_ID_ISAR2_EL1, // 373 + MISCREG_ID_ISAR3_EL1, // 374 + MISCREG_ID_ISAR4_EL1, // 375 + MISCREG_ID_ISAR5_EL1, // 376 + MISCREG_MVFR0_EL1, // 377 + MISCREG_MVFR1_EL1, // 378 + MISCREG_MVFR2_EL1, // 379 + MISCREG_ID_AA64PFR0_EL1, // 380 + MISCREG_ID_AA64PFR1_EL1, // 381 + MISCREG_ID_AA64DFR0_EL1, // 382 + MISCREG_ID_AA64DFR1_EL1, // 383 + MISCREG_ID_AA64AFR0_EL1, // 384 + MISCREG_ID_AA64AFR1_EL1, // 385 + MISCREG_ID_AA64ISAR0_EL1, // 386 + MISCREG_ID_AA64ISAR1_EL1, // 387 + MISCREG_ID_AA64MMFR0_EL1, // 388 + MISCREG_ID_AA64MMFR1_EL1, // 389 + MISCREG_CCSIDR_EL1, // 390 + MISCREG_CLIDR_EL1, // 391 + MISCREG_AIDR_EL1, // 392 + MISCREG_CSSELR_EL1, // 393 + MISCREG_CTR_EL0, // 394 + MISCREG_DCZID_EL0, // 395 + MISCREG_VPIDR_EL2, // 396 + MISCREG_VMPIDR_EL2, // 397 + MISCREG_SCTLR_EL1, // 398 + MISCREG_ACTLR_EL1, // 399 + MISCREG_CPACR_EL1, // 400 + MISCREG_SCTLR_EL2, // 401 + MISCREG_ACTLR_EL2, // 402 + MISCREG_HCR_EL2, // 403 + MISCREG_MDCR_EL2, // 404 + MISCREG_CPTR_EL2, // 405 + MISCREG_HSTR_EL2, // 406 + MISCREG_HACR_EL2, // 407 + MISCREG_SCTLR_EL3, // 408 + MISCREG_ACTLR_EL3, // 409 + MISCREG_SCR_EL3, // 410 + MISCREG_SDER32_EL3, // 411 + MISCREG_CPTR_EL3, // 412 + MISCREG_MDCR_EL3, // 413 + MISCREG_TTBR0_EL1, // 414 + MISCREG_TTBR1_EL1, // 415 + MISCREG_TCR_EL1, // 416 + MISCREG_TTBR0_EL2, // 417 + MISCREG_TCR_EL2, // 418 + MISCREG_VTTBR_EL2, // 419 + MISCREG_VTCR_EL2, // 420 + MISCREG_TTBR0_EL3, // 421 + MISCREG_TCR_EL3, // 422 + MISCREG_DACR32_EL2, // 423 + MISCREG_SPSR_EL1, // 424 + MISCREG_ELR_EL1, // 425 + MISCREG_SP_EL0, // 426 + MISCREG_SPSEL, // 427 + MISCREG_CURRENTEL, // 428 + MISCREG_NZCV, // 429 + MISCREG_DAIF, // 430 + MISCREG_FPCR, // 431 + MISCREG_FPSR, // 432 + MISCREG_DSPSR_EL0, // 433 + MISCREG_DLR_EL0, // 434 + MISCREG_SPSR_EL2, // 435 + MISCREG_ELR_EL2, // 436 + MISCREG_SP_EL1, // 437 + MISCREG_SPSR_IRQ_AA64, // 438 + MISCREG_SPSR_ABT_AA64, // 439 + MISCREG_SPSR_UND_AA64, // 440 + MISCREG_SPSR_FIQ_AA64, // 441 + MISCREG_SPSR_EL3, // 442 + MISCREG_ELR_EL3, // 443 + MISCREG_SP_EL2, // 444 + MISCREG_AFSR0_EL1, // 445 + MISCREG_AFSR1_EL1, // 446 + MISCREG_ESR_EL1, // 447 + MISCREG_IFSR32_EL2, // 448 + MISCREG_AFSR0_EL2, // 449 + MISCREG_AFSR1_EL2, // 450 + MISCREG_ESR_EL2, // 451 + MISCREG_FPEXC32_EL2, // 452 + MISCREG_AFSR0_EL3, // 453 + MISCREG_AFSR1_EL3, // 454 + MISCREG_ESR_EL3, // 455 + MISCREG_FAR_EL1, // 456 + MISCREG_FAR_EL2, // 457 + MISCREG_HPFAR_EL2, // 458 + MISCREG_FAR_EL3, // 459 + MISCREG_IC_IALLUIS, // 460 + MISCREG_PAR_EL1, // 461 + MISCREG_IC_IALLU, // 462 + MISCREG_DC_IVAC_Xt, // 463 + MISCREG_DC_ISW_Xt, // 464 + MISCREG_AT_S1E1R_Xt, // 465 + MISCREG_AT_S1E1W_Xt, // 466 + MISCREG_AT_S1E0R_Xt, // 467 + MISCREG_AT_S1E0W_Xt, // 468 + MISCREG_DC_CSW_Xt, // 469 + MISCREG_DC_CISW_Xt, // 470 + MISCREG_DC_ZVA_Xt, // 471 + MISCREG_IC_IVAU_Xt, // 472 + MISCREG_DC_CVAC_Xt, // 473 + MISCREG_DC_CVAU_Xt, // 474 + MISCREG_DC_CIVAC_Xt, // 475 + MISCREG_AT_S1E2R_Xt, // 476 + MISCREG_AT_S1E2W_Xt, // 477 + MISCREG_AT_S12E1R_Xt, // 478 + MISCREG_AT_S12E1W_Xt, // 479 + MISCREG_AT_S12E0R_Xt, // 480 + MISCREG_AT_S12E0W_Xt, // 481 + MISCREG_AT_S1E3R_Xt, // 482 + MISCREG_AT_S1E3W_Xt, // 483 + MISCREG_TLBI_VMALLE1IS, // 484 + MISCREG_TLBI_VAE1IS_Xt, // 485 + MISCREG_TLBI_ASIDE1IS_Xt, // 486 + MISCREG_TLBI_VAAE1IS_Xt, // 487 + MISCREG_TLBI_VALE1IS_Xt, // 488 + MISCREG_TLBI_VAALE1IS_Xt, // 489 + MISCREG_TLBI_VMALLE1, // 490 + MISCREG_TLBI_VAE1_Xt, // 491 + MISCREG_TLBI_ASIDE1_Xt, // 492 + MISCREG_TLBI_VAAE1_Xt, // 493 + MISCREG_TLBI_VALE1_Xt, // 494 + MISCREG_TLBI_VAALE1_Xt, // 495 + MISCREG_TLBI_IPAS2E1IS_Xt, // 496 + MISCREG_TLBI_IPAS2LE1IS_Xt, // 497 + MISCREG_TLBI_ALLE2IS, // 498 + MISCREG_TLBI_VAE2IS_Xt, // 499 + MISCREG_TLBI_ALLE1IS, // 500 + MISCREG_TLBI_VALE2IS_Xt, // 501 + MISCREG_TLBI_VMALLS12E1IS, // 502 + MISCREG_TLBI_IPAS2E1_Xt, // 503 + MISCREG_TLBI_IPAS2LE1_Xt, // 504 + MISCREG_TLBI_ALLE2, // 505 + MISCREG_TLBI_VAE2_Xt, // 506 + MISCREG_TLBI_ALLE1, // 507 + MISCREG_TLBI_VALE2_Xt, // 508 + MISCREG_TLBI_VMALLS12E1, // 509 + MISCREG_TLBI_ALLE3IS, // 510 + MISCREG_TLBI_VAE3IS_Xt, // 511 + MISCREG_TLBI_VALE3IS_Xt, // 512 + MISCREG_TLBI_ALLE3, // 513 + MISCREG_TLBI_VAE3_Xt, // 514 + MISCREG_TLBI_VALE3_Xt, // 515 + MISCREG_PMINTENSET_EL1, // 516 + MISCREG_PMINTENCLR_EL1, // 517 + MISCREG_PMCR_EL0, // 518 + MISCREG_PMCNTENSET_EL0, // 519 + MISCREG_PMCNTENCLR_EL0, // 520 + MISCREG_PMOVSCLR_EL0, // 521 + MISCREG_PMSWINC_EL0, // 522 + MISCREG_PMSELR_EL0, // 523 + MISCREG_PMCEID0_EL0, // 524 + MISCREG_PMCEID1_EL0, // 525 + MISCREG_PMCCNTR_EL0, // 526 + MISCREG_PMXEVTYPER_EL0, // 527 + MISCREG_PMCCFILTR_EL0, // 528 + MISCREG_PMXEVCNTR_EL0, // 529 + MISCREG_PMUSERENR_EL0, // 530 + MISCREG_PMOVSSET_EL0, // 531 + MISCREG_MAIR_EL1, // 532 + MISCREG_AMAIR_EL1, // 533 + MISCREG_MAIR_EL2, // 534 + MISCREG_AMAIR_EL2, // 535 + MISCREG_MAIR_EL3, // 536 + MISCREG_AMAIR_EL3, // 537 + MISCREG_L2CTLR_EL1, // 538 + MISCREG_L2ECTLR_EL1, // 539 + MISCREG_VBAR_EL1, // 540 + MISCREG_RVBAR_EL1, // 541 + MISCREG_ISR_EL1, // 542 + MISCREG_VBAR_EL2, // 543 + MISCREG_RVBAR_EL2, // 544 + MISCREG_VBAR_EL3, // 545 + MISCREG_RVBAR_EL3, // 546 + MISCREG_RMR_EL3, // 547 + MISCREG_CONTEXTIDR_EL1, // 548 + MISCREG_TPIDR_EL1, // 549 + MISCREG_TPIDR_EL0, // 550 + MISCREG_TPIDRRO_EL0, // 551 + MISCREG_TPIDR_EL2, // 552 + MISCREG_TPIDR_EL3, // 553 + MISCREG_CNTKCTL_EL1, // 554 + MISCREG_CNTFRQ_EL0, // 555 + MISCREG_CNTPCT_EL0, // 556 + MISCREG_CNTVCT_EL0, // 557 + MISCREG_CNTP_TVAL_EL0, // 558 + MISCREG_CNTP_CTL_EL0, // 559 + MISCREG_CNTP_CVAL_EL0, // 560 + MISCREG_CNTV_TVAL_EL0, // 561 + MISCREG_CNTV_CTL_EL0, // 562 + MISCREG_CNTV_CVAL_EL0, // 563 + MISCREG_PMEVCNTR0_EL0, // 564 + MISCREG_PMEVCNTR1_EL0, // 565 + MISCREG_PMEVCNTR2_EL0, // 566 + MISCREG_PMEVCNTR3_EL0, // 567 + MISCREG_PMEVCNTR4_EL0, // 568 + MISCREG_PMEVCNTR5_EL0, // 569 + MISCREG_PMEVTYPER0_EL0, // 570 + MISCREG_PMEVTYPER1_EL0, // 571 + MISCREG_PMEVTYPER2_EL0, // 572 + MISCREG_PMEVTYPER3_EL0, // 573 + MISCREG_PMEVTYPER4_EL0, // 574 + MISCREG_PMEVTYPER5_EL0, // 575 + MISCREG_CNTVOFF_EL2, // 576 + MISCREG_CNTHCTL_EL2, // 577 + MISCREG_CNTHP_TVAL_EL2, // 578 + MISCREG_CNTHP_CTL_EL2, // 579 + MISCREG_CNTHP_CVAL_EL2, // 580 + MISCREG_CNTPS_TVAL_EL1, // 581 + MISCREG_CNTPS_CTL_EL1, // 582 + MISCREG_CNTPS_CVAL_EL1, // 583 + MISCREG_IL1DATA0_EL1, // 584 + MISCREG_IL1DATA1_EL1, // 585 + MISCREG_IL1DATA2_EL1, // 586 + MISCREG_IL1DATA3_EL1, // 587 + MISCREG_DL1DATA0_EL1, // 588 + MISCREG_DL1DATA1_EL1, // 589 + MISCREG_DL1DATA2_EL1, // 590 + MISCREG_DL1DATA3_EL1, // 591 + MISCREG_DL1DATA4_EL1, // 592 + MISCREG_L2ACTLR_EL1, // 593 + MISCREG_CPUACTLR_EL1, // 594 + MISCREG_CPUECTLR_EL1, // 595 + MISCREG_CPUMERRSR_EL1, // 596 + MISCREG_L2MERRSR_EL1, // 597 + MISCREG_CBAR_EL1, // 598 + + // Dummy registers + MISCREG_NOP, // 599 + MISCREG_RAZ, // 600 + MISCREG_CP14_UNIMPL, // 601 + MISCREG_CP15_UNIMPL, // 602 + MISCREG_A64_UNIMPL, // 603 + MISCREG_UNKNOWN, // 604 + + NUM_MISCREGS // 605 }; + enum MiscRegInfo { + MISCREG_IMPLEMENTED, + MISCREG_WARN_NOT_FAIL, // If MISCREG_IMPLEMENTED is deasserted, it + // tells whether the instruction should raise a + // warning or fail + MISCREG_MUTEX, // True if the register corresponds to a pair of + // mutually exclusive registers + MISCREG_BANKED, // True if the register is banked between the two + // security states, and this is the parent node of the + // two banked registers + MISCREG_BANKED_CHILD, // The entry is one of the child registers that + // forms a banked set of regs (along with the + // other child regs) + + // Access permissions + // User mode + MISCREG_USR_NS_RD, + MISCREG_USR_NS_WR, + MISCREG_USR_S_RD, + MISCREG_USR_S_WR, + // Privileged modes other than hypervisor or monitor + MISCREG_PRI_NS_RD, + MISCREG_PRI_NS_WR, + MISCREG_PRI_S_RD, + MISCREG_PRI_S_WR, + // Hypervisor mode + MISCREG_HYP_RD, + MISCREG_HYP_WR, + // Monitor mode, SCR.NS == 0 + MISCREG_MON_NS0_RD, + MISCREG_MON_NS0_WR, + // Monitor mode, SCR.NS == 1 + MISCREG_MON_NS1_RD, + MISCREG_MON_NS1_WR, + + NUM_MISCREG_INFOS + }; + + extern std::bitset<NUM_MISCREG_INFOS> miscRegInfo[NUM_MISCREGS]; + + // Decodes 32-bit CP14 registers accessible through MCR/MRC instructions MiscRegIndex decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2); + MiscRegIndex decodeAArch64SysReg(unsigned op0, unsigned op1, + unsigned crn, unsigned crm, + unsigned op2); + // Whether a particular AArch64 system register is -always- read only. + bool aarch64SysRegReadOnly(MiscRegIndex miscReg); + // Decodes 32-bit CP15 registers accessible through MCR/MRC instructions MiscRegIndex decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2); + // Decodes 64-bit CP15 registers accessible through MCRR/MRRC instructions + MiscRegIndex decodeCP15Reg64(unsigned crm, unsigned opc1); + const char * const miscRegName[] = { - "cpsr", "cpsr_q", "spsr", "spsr_fiq", "spsr_irq", "spsr_svc", - "spsr_mon", "spsr_und", "spsr_abt", - "fpsr", "fpsid", "fpscr", "fpscr_qc", "fpscr_exc", "fpexc", - "mvfr0", "mvfr1", - "sctlr_rst", "sev_mailbox", - "DBGDIDR", - "DBGDSCR_INT", - "DBGDTRRX_INT", - "DBGTRTX_INT", - "DBGWFAR", - "DBGVCR", - "DBGECR", - "DBGDSCCR", - "DBGSMCR", - "DBGDTRRX_EXT", - "DBGDSCR_EXT", - "DBGDTRTX_EXT", - "DBGDRCR", - "DBGBVR", - "DBGBCR", - "DBGBVR_M", - "DBGBCR_M", - "DBGDRAR", - "DBGBXVR_M", - "DBGOSLAR", - "DBGOSSRR", - "DBGOSDLR", - "DBGPRCR", - "DBGPRSR", - "DBGDSAR", - "DBGITCTRL", - "DBGCLAIMSET", - "DBGCLAIMCLR", - "DBGAUTHSTATUS", - "DBGDEVID2", - "DBGDEVID1", - "DBGDEVID", - "TEEHBR", - "sctlr", "dccisw", "dccimvac", "dccmvac", - "contextidr", "tpidrurw", "tpidruro", "tpidrprw", - "cp15isb", "cp15dsb", "cp15dmb", "cpacr", - "clidr", "ccsidr", "csselr", - "icialluis", "iciallu", "icimvau", - "bpimva", "bpiallis", "bpiall", - "midr", "ttbr0", "ttbr1", "tlbtr", "dacr", - "tlbiallis", "tlbimvais", "tlbiasidis", "tlbimvaais", - "itlbiall", "itlbimva", "itlbiasid", - "dtlbiall", "dtlbimva", "dtlbiasid", - "tlbiall", "tlbimva", "tlbiasid", "tlbimvaa", - "dfsr", "ifsr", "dfar", "ifar", "mpidr", - "prrr", "nmrr", "ttbcr", "id_pfr0", "ctr", - "scr", "sder", "par", - "v2pcwpr", "v2pcwpw", "v2pcwur", "v2pcwuw", - "v2powpr", "v2powpw", "v2powur", "v2powuw", - "id_mmfr0", "id_mmfr2", "id_mmfr3", "actlr", "pmcr", "pmccntr", - "pmcntenset", "pmcntenclr", "pmovsr", - "pmswinc", "pmselr", "pmceid0", - "pmceid1", "pmc_other", "pmxevcntr", - "pmuserenr", "pmintenset", "pmintenclr", - "id_isar0", "id_isar1", "id_isar2", "id_isar3", "id_isar4", "id_isar5", - "lockflag", "lockaddr", "id_pfr1", - "l2ctlr", - // Unimplemented below + "cpsr", + "spsr", + "spsr_fiq", + "spsr_irq", + "spsr_svc", + "spsr_mon", + "spsr_abt", + "spsr_hyp", + "spsr_und", + "elr_hyp", + "fpsid", + "fpscr", + "mvfr1", + "mvfr0", + "fpexc", + + // Helper registers + "cpsr_mode", + "cpsr_q", + "fpscr_exc", + "fpscr_qc", + "lockaddr", + "lockflag", + "prrr_mair0", + "prrr_mair0_ns", + "prrr_mair0_s", + "nmrr_mair1", + "nmrr_mair1_ns", + "nmrr_mair1_s", + "pmxevtyper_pmccfiltr", + "sctlr_rst", + "sev_mailbox", + + // AArch32 CP14 registers + "dbgdidr", + "dbgdscrint", + "dbgdccint", + "dbgdtrtxint", + "dbgdtrrxint", + "dbgwfar", + "dbgvcr", + "dbgdtrrxext", + "dbgdscrext", + "dbgdtrtxext", + "dbgoseccr", + "dbgbvr0", + "dbgbvr1", + "dbgbvr2", + "dbgbvr3", + "dbgbvr4", + "dbgbvr5", + "dbgbcr0", + "dbgbcr1", + "dbgbcr2", + "dbgbcr3", + "dbgbcr4", + "dbgbcr5", + "dbgwvr0", + "dbgwvr1", + "dbgwvr2", + "dbgwvr3", + "dbgwcr0", + "dbgwcr1", + "dbgwcr2", + "dbgwcr3", + "dbgdrar", + "dbgbxvr4", + "dbgbxvr5", + "dbgoslar", + "dbgoslsr", + "dbgosdlr", + "dbgprcr", + "dbgdsar", + "dbgclaimset", + "dbgclaimclr", + "dbgauthstatus", + "dbgdevid2", + "dbgdevid1", + "dbgdevid0", + "teecr", + "jidr", + "teehbr", + "joscr", + "jmcr", + + // AArch32 CP15 registers + "midr", + "ctr", "tcmtr", - "id_dfr0", "id_afr0", + "tlbtr", + "mpidr", + "revidr", + "id_pfr0", + "id_pfr1", + "id_dfr0", + "id_afr0", + "id_mmfr0", "id_mmfr1", - "aidr", "adfsr", "aifsr", - "dcimvac", "dcisw", "mccsw", - "dccmvau", + "id_mmfr2", + "id_mmfr3", + "id_isar0", + "id_isar1", + "id_isar2", + "id_isar3", + "id_isar4", + "id_isar5", + "ccsidr", + "clidr", + "aidr", + "csselr", + "csselr_ns", + "csselr_s", + "vpidr", + "vmpidr", + "sctlr", + "sctlr_ns", + "sctlr_s", + "actlr", + "actlr_ns", + "actlr_s", + "cpacr", + "scr", + "sder", "nsacr", - "vbar", "mvbar", "isr", "fceidr", "l2latency", - "crn15", - "nop", "raz" + "hsctlr", + "hactlr", + "hcr", + "hdcr", + "hcptr", + "hstr", + "hacr", + "ttbr0", + "ttbr0_ns", + "ttbr0_s", + "ttbr1", + "ttbr1_ns", + "ttbr1_s", + "ttbcr", + "ttbcr_ns", + "ttbcr_s", + "htcr", + "vtcr", + "dacr", + "dacr_ns", + "dacr_s", + "dfsr", + "dfsr_ns", + "dfsr_s", + "ifsr", + "ifsr_ns", + "ifsr_s", + "adfsr", + "adfsr_ns", + "adfsr_s", + "aifsr", + "aifsr_ns", + "aifsr_s", + "hadfsr", + "haifsr", + "hsr", + "dfar", + "dfar_ns", + "dfar_s", + "ifar", + "ifar_ns", + "ifar_s", + "hdfar", + "hifar", + "hpfar", + "icialluis", + "bpiallis", + "par", + "par_ns", + "par_s", + "iciallu", + "icimvau", + "cp15isb", + "bpiall", + "bpimva", + "dcimvac", + "dcisw", + "ats1cpr", + "ats1cpw", + "ats1cur", + "ats1cuw", + "ats12nsopr", + "ats12nsopw", + "ats12nsour", + "ats12nsouw", + "dccmvac", + "dccsw", + "cp15dsb", + "cp15dmb", + "dccmvau", + "dccimvac", + "dccisw", + "ats1hr", + "ats1hw", + "tlbiallis", + "tlbimvais", + "tlbiasidis", + "tlbimvaais", + "tlbimvalis", + "tlbimvaalis", + "itlbiall", + "itlbimva", + "itlbiasid", + "dtlbiall", + "dtlbimva", + "dtlbiasid", + "tlbiall", + "tlbimva", + "tlbiasid", + "tlbimvaa", + "tlbimval", + "tlbimvaal", + "tlbiipas2is", + "tlbiipas2lis", + "tlbiallhis", + "tlbimvahis", + "tlbiallnsnhis", + "tlbimvalhis", + "tlbiipas2", + "tlbiipas2l", + "tlbiallh", + "tlbimvah", + "tlbiallnsnh", + "tlbimvalh", + "pmcr", + "pmcntenset", + "pmcntenclr", + "pmovsr", + "pmswinc", + "pmselr", + "pmceid0", + "pmceid1", + "pmccntr", + "pmxevtyper", + "pmccfiltr", + "pmxevcntr", + "pmuserenr", + "pmintenset", + "pmintenclr", + "pmovsset", + "l2ctlr", + "l2ectlr", + "prrr", + "prrr_ns", + "prrr_s", + "mair0", + "mair0_ns", + "mair0_s", + "nmrr", + "nmrr_ns", + "nmrr_s", + "mair1", + "mair1_ns", + "mair1_s", + "amair0", + "amair0_ns", + "amair0_s", + "amair1", + "amair1_ns", + "amair1_s", + "hmair0", + "hmair1", + "hamair0", + "hamair1", + "vbar", + "vbar_ns", + "vbar_s", + "mvbar", + "rmr", + "isr", + "hvbar", + "fcseidr", + "contextidr", + "contextidr_ns", + "contextidr_s", + "tpidrurw", + "tpidrurw_ns", + "tpidrurw_s", + "tpidruro", + "tpidruro_ns", + "tpidruro_s", + "tpidrprw", + "tpidrprw_ns", + "tpidrprw_s", + "htpidr", + "cntfrq", + "cntkctl", + "cntp_tval", + "cntp_tval_ns", + "cntp_tval_s", + "cntp_ctl", + "cntp_ctl_ns", + "cntp_ctl_s", + "cntv_tval", + "cntv_ctl", + "cnthctl", + "cnthp_tval", + "cnthp_ctl", + "il1data0", + "il1data1", + "il1data2", + "il1data3", + "dl1data0", + "dl1data1", + "dl1data2", + "dl1data3", + "dl1data4", + "ramindex", + "l2actlr", + "cbar", + "httbr", + "vttbr", + "cntpct", + "cntvct", + "cntp_cval", + "cntp_cval_ns", + "cntp_cval_s", + "cntv_cval", + "cntvoff", + "cnthp_cval", + "cpumerrsr", + "l2merrsr", + + // AArch64 registers (Op0=2) + "mdccint_el1", + "osdtrrx_el1", + "mdscr_el1", + "osdtrtx_el1", + "oseccr_el1", + "dbgbvr0_el1", + "dbgbvr1_el1", + "dbgbvr2_el1", + "dbgbvr3_el1", + "dbgbvr4_el1", + "dbgbvr5_el1", + "dbgbcr0_el1", + "dbgbcr1_el1", + "dbgbcr2_el1", + "dbgbcr3_el1", + "dbgbcr4_el1", + "dbgbcr5_el1", + "dbgwvr0_el1", + "dbgwvr1_el1", + "dbgwvr2_el1", + "dbgwvr3_el1", + "dbgwcr0_el1", + "dbgwcr1_el1", + "dbgwcr2_el1", + "dbgwcr3_el1", + "mdccsr_el0", + "mddtr_el0", + "mddtrtx_el0", + "mddtrrx_el0", + "dbgvcr32_el2", + "mdrar_el1", + "oslar_el1", + "oslsr_el1", + "osdlr_el1", + "dbgprcr_el1", + "dbgclaimset_el1", + "dbgclaimclr_el1", + "dbgauthstatus_el1", + "teecr32_el1", + "teehbr32_el1", + + // AArch64 registers (Op0=1,3) + "midr_el1", + "mpidr_el1", + "revidr_el1", + "id_pfr0_el1", + "id_pfr1_el1", + "id_dfr0_el1", + "id_afr0_el1", + "id_mmfr0_el1", + "id_mmfr1_el1", + "id_mmfr2_el1", + "id_mmfr3_el1", + "id_isar0_el1", + "id_isar1_el1", + "id_isar2_el1", + "id_isar3_el1", + "id_isar4_el1", + "id_isar5_el1", + "mvfr0_el1", + "mvfr1_el1", + "mvfr2_el1", + "id_aa64pfr0_el1", + "id_aa64pfr1_el1", + "id_aa64dfr0_el1", + "id_aa64dfr1_el1", + "id_aa64afr0_el1", + "id_aa64afr1_el1", + "id_aa64isar0_el1", + "id_aa64isar1_el1", + "id_aa64mmfr0_el1", + "id_aa64mmfr1_el1", + "ccsidr_el1", + "clidr_el1", + "aidr_el1", + "csselr_el1", + "ctr_el0", + "dczid_el0", + "vpidr_el2", + "vmpidr_el2", + "sctlr_el1", + "actlr_el1", + "cpacr_el1", + "sctlr_el2", + "actlr_el2", + "hcr_el2", + "mdcr_el2", + "cptr_el2", + "hstr_el2", + "hacr_el2", + "sctlr_el3", + "actlr_el3", + "scr_el3", + "sder32_el3", + "cptr_el3", + "mdcr_el3", + "ttbr0_el1", + "ttbr1_el1", + "tcr_el1", + "ttbr0_el2", + "tcr_el2", + "vttbr_el2", + "vtcr_el2", + "ttbr0_el3", + "tcr_el3", + "dacr32_el2", + "spsr_el1", + "elr_el1", + "sp_el0", + "spsel", + "currentel", + "nzcv", + "daif", + "fpcr", + "fpsr", + "dspsr_el0", + "dlr_el0", + "spsr_el2", + "elr_el2", + "sp_el1", + "spsr_irq_aa64", + "spsr_abt_aa64", + "spsr_und_aa64", + "spsr_fiq_aa64", + "spsr_el3", + "elr_el3", + "sp_el2", + "afsr0_el1", + "afsr1_el1", + "esr_el1", + "ifsr32_el2", + "afsr0_el2", + "afsr1_el2", + "esr_el2", + "fpexc32_el2", + "afsr0_el3", + "afsr1_el3", + "esr_el3", + "far_el1", + "far_el2", + "hpfar_el2", + "far_el3", + "ic_ialluis", + "par_el1", + "ic_iallu", + "dc_ivac_xt", + "dc_isw_xt", + "at_s1e1r_xt", + "at_s1e1w_xt", + "at_s1e0r_xt", + "at_s1e0w_xt", + "dc_csw_xt", + "dc_cisw_xt", + "dc_zva_xt", + "ic_ivau_xt", + "dc_cvac_xt", + "dc_cvau_xt", + "dc_civac_xt", + "at_s1e2r_xt", + "at_s1e2w_xt", + "at_s12e1r_xt", + "at_s12e1w_xt", + "at_s12e0r_xt", + "at_s12e0w_xt", + "at_s1e3r_xt", + "at_s1e3w_xt", + "tlbi_vmalle1is", + "tlbi_vae1is_xt", + "tlbi_aside1is_xt", + "tlbi_vaae1is_xt", + "tlbi_vale1is_xt", + "tlbi_vaale1is_xt", + "tlbi_vmalle1", + "tlbi_vae1_xt", + "tlbi_aside1_xt", + "tlbi_vaae1_xt", + "tlbi_vale1_xt", + "tlbi_vaale1_xt", + "tlbi_ipas2e1is_xt", + "tlbi_ipas2le1is_xt", + "tlbi_alle2is", + "tlbi_vae2is_xt", + "tlbi_alle1is", + "tlbi_vale2is_xt", + "tlbi_vmalls12e1is", + "tlbi_ipas2e1_xt", + "tlbi_ipas2le1_xt", + "tlbi_alle2", + "tlbi_vae2_xt", + "tlbi_alle1", + "tlbi_vale2_xt", + "tlbi_vmalls12e1", + "tlbi_alle3is", + "tlbi_vae3is_xt", + "tlbi_vale3is_xt", + "tlbi_alle3", + "tlbi_vae3_xt", + "tlbi_vale3_xt", + "pmintenset_el1", + "pmintenclr_el1", + "pmcr_el0", + "pmcntenset_el0", + "pmcntenclr_el0", + "pmovsclr_el0", + "pmswinc_el0", + "pmselr_el0", + "pmceid0_el0", + "pmceid1_el0", + "pmccntr_el0", + "pmxevtyper_el0", + "pmccfiltr_el0", + "pmxevcntr_el0", + "pmuserenr_el0", + "pmovsset_el0", + "mair_el1", + "amair_el1", + "mair_el2", + "amair_el2", + "mair_el3", + "amair_el3", + "l2ctlr_el1", + "l2ectlr_el1", + "vbar_el1", + "rvbar_el1", + "isr_el1", + "vbar_el2", + "rvbar_el2", + "vbar_el3", + "rvbar_el3", + "rmr_el3", + "contextidr_el1", + "tpidr_el1", + "tpidr_el0", + "tpidrro_el0", + "tpidr_el2", + "tpidr_el3", + "cntkctl_el1", + "cntfrq_el0", + "cntpct_el0", + "cntvct_el0", + "cntp_tval_el0", + "cntp_ctl_el0", + "cntp_cval_el0", + "cntv_tval_el0", + "cntv_ctl_el0", + "cntv_cval_el0", + "pmevcntr0_el0", + "pmevcntr1_el0", + "pmevcntr2_el0", + "pmevcntr3_el0", + "pmevcntr4_el0", + "pmevcntr5_el0", + "pmevtyper0_el0", + "pmevtyper1_el0", + "pmevtyper2_el0", + "pmevtyper3_el0", + "pmevtyper4_el0", + "pmevtyper5_el0", + "cntvoff_el2", + "cnthctl_el2", + "cnthp_tval_el2", + "cnthp_ctl_el2", + "cnthp_cval_el2", + "cntps_tval_el1", + "cntps_ctl_el1", + "cntps_cval_el1", + "il1data0_el1", + "il1data1_el1", + "il1data2_el1", + "il1data3_el1", + "dl1data0_el1", + "dl1data1_el1", + "dl1data2_el1", + "dl1data3_el1", + "dl1data4_el1", + "l2actlr_el1", + "cpuactlr_el1", + "cpuectlr_el1", + "cpumerrsr_el1", + "l2merrsr_el1", + "cbar_el1", + + // Dummy registers + "nop", + "raz", + "cp14_unimpl", + "cp15_unimpl", + "a64_unimpl", + "unknown" }; static_assert(sizeof(miscRegName) / sizeof(*miscRegName) == NUM_MISCREGS, "The miscRegName array and NUM_MISCREGS are inconsistent."); BitUnion32(CPSR) - Bitfield<31,30> nz; + Bitfield<31, 30> nz; Bitfield<29> c; Bitfield<28> v; Bitfield<27> q; - Bitfield<26,25> it1; + Bitfield<26, 25> it1; Bitfield<24> j; + Bitfield<23, 22> res0_23_22; + Bitfield<21> ss; // AArch64 + Bitfield<20> il; // AArch64 Bitfield<19, 16> ge; - Bitfield<15,10> it2; + Bitfield<15, 10> it2; + Bitfield<9> d; // AArch64 Bitfield<9> e; Bitfield<8> a; Bitfield<7> i; Bitfield<6> f; + Bitfield<9, 6> daif; // AArch64 Bitfield<5> t; + Bitfield<4> width; // AArch64 + Bitfield<3, 2> el; // AArch64 Bitfield<4, 0> mode; + Bitfield<0> sp; // AArch64 EndBitUnion(CPSR) // This mask selects bits of the CPSR that actually go in the CondCodes @@ -352,32 +1404,190 @@ namespace ArmISA static const uint32_t CondCodesMask = 0xF00F0000; static const uint32_t CpsrMaskQ = 0x08000000; + BitUnion32(HDCR) + Bitfield<11> tdra; + Bitfield<10> tdosa; + Bitfield<9> tda; + Bitfield<8> tde; + Bitfield<7> hpme; + Bitfield<6> tpm; + Bitfield<5> tpmcr; + Bitfield<4, 0> hpmn; + EndBitUnion(HDCR) + + BitUnion32(HCPTR) + Bitfield<31> tcpac; + Bitfield<20> tta; + Bitfield<15> tase; + Bitfield<13> tcp13; + Bitfield<12> tcp12; + Bitfield<11> tcp11; + Bitfield<10> tcp10; + Bitfield<10> tfp; // AArch64 + Bitfield<9> tcp9; + Bitfield<8> tcp8; + Bitfield<7> tcp7; + Bitfield<6> tcp6; + Bitfield<5> tcp5; + Bitfield<4> tcp4; + Bitfield<3> tcp3; + Bitfield<2> tcp2; + Bitfield<1> tcp1; + Bitfield<0> tcp0; + EndBitUnion(HCPTR) + + BitUnion32(HSTR) + Bitfield<17> tjdbx; + Bitfield<16> ttee; + Bitfield<15> t15; + Bitfield<13> t13; + Bitfield<12> t12; + Bitfield<11> t11; + Bitfield<10> t10; + Bitfield<9> t9; + Bitfield<8> t8; + Bitfield<7> t7; + Bitfield<6> t6; + Bitfield<5> t5; + Bitfield<4> t4; + Bitfield<3> t3; + Bitfield<2> t2; + Bitfield<1> t1; + Bitfield<0> t0; + EndBitUnion(HSTR) + + BitUnion64(HCR) + Bitfield<33> id; // AArch64 + Bitfield<32> cd; // AArch64 + Bitfield<31> rw; // AArch64 + Bitfield<30> trvm; // AArch64 + Bitfield<29> hcd; // AArch64 + Bitfield<28> tdz; // AArch64 + + Bitfield<27> tge; + Bitfield<26> tvm; + Bitfield<25> ttlb; + Bitfield<24> tpu; + Bitfield<23> tpc; + Bitfield<22> tsw; + Bitfield<21> tac; + Bitfield<21> tacr; // AArch64 + Bitfield<20> tidcp; + Bitfield<19> tsc; + Bitfield<18> tid3; + Bitfield<17> tid2; + Bitfield<16> tid1; + Bitfield<15> tid0; + Bitfield<14> twe; + Bitfield<13> twi; + Bitfield<12> dc; + Bitfield<11, 10> bsu; + Bitfield<9> fb; + Bitfield<8> va; + Bitfield<8> vse; // AArch64 + Bitfield<7> vi; + Bitfield<6> vf; + Bitfield<5> amo; + Bitfield<4> imo; + Bitfield<3> fmo; + Bitfield<2> ptw; + Bitfield<1> swio; + Bitfield<0> vm; + EndBitUnion(HCR) + + BitUnion32(NSACR) + Bitfield<20> nstrcdis; + Bitfield<19> rfr; + Bitfield<15> nsasedis; + Bitfield<14> nsd32dis; + Bitfield<13> cp13; + Bitfield<12> cp12; + Bitfield<11> cp11; + Bitfield<10> cp10; + Bitfield<9> cp9; + Bitfield<8> cp8; + Bitfield<7> cp7; + Bitfield<6> cp6; + Bitfield<5> cp5; + Bitfield<4> cp4; + Bitfield<3> cp3; + Bitfield<2> cp2; + Bitfield<1> cp1; + Bitfield<0> cp0; + EndBitUnion(NSACR) + + BitUnion32(SCR) + Bitfield<13> twe; + Bitfield<12> twi; + Bitfield<11> st; // AArch64 + Bitfield<10> rw; // AArch64 + Bitfield<9> sif; + Bitfield<8> hce; + Bitfield<7> scd; + Bitfield<7> smd; // AArch64 + Bitfield<6> nEt; + Bitfield<5> aw; + Bitfield<4> fw; + Bitfield<3> ea; + Bitfield<2> fiq; + Bitfield<1> irq; + Bitfield<0> ns; + EndBitUnion(SCR) + BitUnion32(SCTLR) - Bitfield<31> ie; // Instruction endianness - Bitfield<30> te; // Thumb Exception Enable - Bitfield<29> afe; // Access flag enable - Bitfield<28> tre; // TEX Remap bit - Bitfield<27> nmfi;// Non-maskable fast interrupts enable - Bitfield<25> ee; // Exception Endianness bit - Bitfield<24> ve; // Interrupt vectors enable - Bitfield<23> xp; // Extended page table enable bit - Bitfield<22> u; // Alignment (now unused) - Bitfield<21> fi; // Fast interrupts configuration enable - Bitfield<19> dz; // Divide by Zero fault enable bit - Bitfield<18> rao2;// Read as one - Bitfield<17> br; // Background region bit - Bitfield<16> rao3;// Read as one - Bitfield<14> rr; // Round robin cache replacement - Bitfield<13> v; // Base address for exception vectors - Bitfield<12> i; // instruction cache enable - Bitfield<11> z; // branch prediction enable bit - Bitfield<10> sw; // Enable swp/swpb - Bitfield<9,8> rs; // deprecated protection bits - Bitfield<6,3> rao4;// Read as one - Bitfield<7> b; // Endianness support (unused) - Bitfield<2> c; // Cache enable bit - Bitfield<1> a; // Alignment fault checking - Bitfield<0> m; // MMU enable bit + Bitfield<30> te; // Thumb Exception Enable (AArch32 only) + Bitfield<29> afe; // Access flag enable (AArch32 only) + Bitfield<28> tre; // TEX remap enable (AArch32 only) + Bitfield<27> nmfi; // Non-maskable FIQ support (ARMv7 only) + Bitfield<26> uci; // Enable EL0 access to DC CVAU, DC CIVAC, + // DC CVAC and IC IVAU instructions + // (AArch64 SCTLR_EL1 only) + Bitfield<25> ee; // Exception Endianness + Bitfield<24> ve; // Interrupt Vectors Enable (ARMv7 only) + Bitfield<24> e0e; // Endianness of explicit data accesses at EL0 + // (AArch64 SCTLR_EL1 only) + Bitfield<23> xp; // Extended page table enable (dropped in ARMv7) + Bitfield<22> u; // Alignment (dropped in ARMv7) + Bitfield<21> fi; // Fast interrupts configuration enable + // (ARMv7 only) + Bitfield<20> uwxn; // Unprivileged write permission implies EL1 XN + // (AArch32 only) + Bitfield<19> dz; // Divide by Zero fault enable + // (dropped in ARMv7) + Bitfield<19> wxn; // Write permission implies XN + Bitfield<18> ntwe; // Not trap WFE + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<18> rao2; // Read as one + Bitfield<16> ntwi; // Not trap WFI + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<16> rao3; // Read as one + Bitfield<15> uct; // Enable EL0 access to CTR_EL0 + // (AArch64 SCTLR_EL1 only) + Bitfield<14> rr; // Round Robin select (ARMv7 only) + Bitfield<14> dze; // Enable EL0 access to DC ZVA + // (AArch64 SCTLR_EL1 only) + Bitfield<13> v; // Vectors bit (AArch32 only) + Bitfield<12> i; // Instruction cache enable + Bitfield<11> z; // Branch prediction enable (ARMv7 only) + Bitfield<10> sw; // SWP/SWPB enable (ARMv7 only) + Bitfield<9, 8> rs; // Deprecated protection bits (dropped in ARMv7) + Bitfield<9> uma; // User mask access (AArch64 SCTLR_EL1 only) + Bitfield<8> sed; // SETEND disable + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<7> b; // Endianness support (dropped in ARMv7) + Bitfield<7> itd; // IT disable + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<6, 3> rao4; // Read as one + Bitfield<6> thee; // ThumbEE enable + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<5> cp15ben; // CP15 barrier enable + // (AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<4> sa0; // Stack Alignment Check Enable for EL0 + // (AArch64 SCTLR_EL1 only) + Bitfield<3> sa; // Stack Alignment Check Enable (AArch64 only) + Bitfield<2> c; // Cache enable + Bitfield<1> a; // Alignment check enable + Bitfield<0> m; // MMU enable EndBitUnion(SCTLR) BitUnion32(CPACR) @@ -392,20 +1602,25 @@ namespace ArmISA Bitfield<17, 16> cp8; Bitfield<19, 18> cp9; Bitfield<21, 20> cp10; + Bitfield<21, 20> fpen; // AArch64 Bitfield<23, 22> cp11; Bitfield<25, 24> cp12; Bitfield<27, 26> cp13; Bitfield<29, 28> rsvd; + Bitfield<28> tta; // AArch64 Bitfield<30> d32dis; Bitfield<31> asedis; EndBitUnion(CPACR) BitUnion32(FSR) Bitfield<3, 0> fsLow; + Bitfield<5, 0> status; // LPAE Bitfield<7, 4> domain; + Bitfield<9> lpae; Bitfield<10> fsHigh; Bitfield<11> wnr; Bitfield<12> ext; + Bitfield<13> cm; // LPAE EndBitUnion(FSR) BitUnion32(FPSCR) @@ -470,6 +1685,52 @@ namespace ArmISA Bitfield<31, 28> raz; EndBitUnion(MVFR1) + BitUnion64(TTBCR) + // Short-descriptor translation table format + Bitfield<2, 0> n; + Bitfield<4> pd0; + Bitfield<5> pd1; + // Long-descriptor translation table format + Bitfield<5, 0> t0sz; + Bitfield<7> epd0; + Bitfield<9, 8> irgn0; + Bitfield<11, 10> orgn0; + Bitfield<13, 12> sh0; + Bitfield<14> tg0; + Bitfield<21, 16> t1sz; + Bitfield<22> a1; + Bitfield<23> epd1; + Bitfield<25, 24> irgn1; + Bitfield<27, 26> orgn1; + Bitfield<29, 28> sh1; + Bitfield<30> tg1; + Bitfield<34, 32> ips; + Bitfield<36> as; + Bitfield<37> tbi0; + Bitfield<38> tbi1; + // Common + Bitfield<31> eae; + // TCR_EL2/3 (AArch64) + Bitfield<18, 16> ps; + Bitfield<20> tbi; + EndBitUnion(TTBCR) + + BitUnion32(HTCR) + Bitfield<2, 0> t0sz; + Bitfield<9, 8> irgn0; + Bitfield<11, 10> orgn0; + Bitfield<13, 12> sh0; + EndBitUnion(HTCR) + + BitUnion32(VTCR_t) + Bitfield<3, 0> t0sz; + Bitfield<4> s; + Bitfield<7, 6> sl0; + Bitfield<9, 8> irgn0; + Bitfield<11, 10> orgn0; + Bitfield<13, 12> sh0; + EndBitUnion(VTCR_t) + BitUnion32(PRRR) Bitfield<1,0> tr0; Bitfield<3,2> tr1; @@ -544,6 +1805,72 @@ namespace ArmISA Bitfield<28> raz_28; Bitfield<31,29> format; EndBitUnion(CTR) + + BitUnion32(PMSELR) + Bitfield<4, 0> sel; + EndBitUnion(PMSELR) + + BitUnion64(PAR) + // 64-bit format + Bitfield<63, 56> attr; + Bitfield<39, 12> pa; + Bitfield<11> lpae; + Bitfield<9> ns; + Bitfield<8, 7> sh; + Bitfield<0> f; + EndBitUnion(PAR) + + BitUnion32(ESR) + Bitfield<31, 26> ec; + Bitfield<25> il; + Bitfield<15, 0> imm16; + EndBitUnion(ESR) + + BitUnion32(CPTR) + Bitfield<31> tcpac; + Bitfield<20> tta; + Bitfield<13, 12> res1_13_12_el2; + Bitfield<10> tfp; + Bitfield<9, 0> res1_9_0_el2; + EndBitUnion(CPTR) + + + // Checks read access permissions to coproc. registers + bool canReadCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Checks write access permissions to coproc. registers + bool canWriteCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Checks read access permissions to AArch64 system registers + bool canReadAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Checks write access permissions to AArch64 system registers + bool canWriteAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Uses just the scr.ns bit to pre flatten the misc regs. This is useful + // for MCR/MRC instructions + int + flattenMiscRegNsBanked(int reg, ThreadContext *tc); + + // Flattens a misc reg index using the specified security state. This is + // used for opperations (eg address translations) where the security + // state of the register access may differ from the current state of the + // processor + int + flattenMiscRegNsBanked(int reg, ThreadContext *tc, bool ns); + + // Takes a misc reg index and returns the root reg if its one of a set of + // banked registers + void + preUnflattenMiscReg(); + + int + unflattenMiscReg(int reg); + } #endif // __ARCH_ARM_MISCREGS_HH__ diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc index 21dff8b7c..9ba3fa84a 100644 --- a/src/arch/arm/nativetrace.cc +++ b/src/arch/arm/nativetrace.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2011 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -124,7 +124,7 @@ Trace::ArmNativeTrace::ThreadState::update(ThreadContext *tc) newState[STATE_CPSR] = cpsr; changed[STATE_CPSR] = (newState[STATE_CPSR] != oldState[STATE_CPSR]); - for (int i = 0; i < NumFloatArchRegs; i += 2) { + for (int i = 0; i < NumFloatV7ArchRegs; i += 2) { newState[STATE_F0 + (i >> 1)] = static_cast<uint64_t>(tc->readFloatRegBits(i + 1)) << 32 | tc->readFloatRegBits(i); diff --git a/src/arch/arm/pagetable.hh b/src/arch/arm/pagetable.hh index 898ab3191..591ec9807 100644 --- a/src/arch/arm/pagetable.hh +++ b/src/arch/arm/pagetable.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,6 +43,8 @@ #ifndef __ARCH_ARM_PAGETABLE_H__ #define __ARCH_ARM_PAGETABLE_H__ +#include <cstdint> + #include "arch/arm/isa_traits.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" @@ -71,69 +73,107 @@ struct PTE }; +// Lookup level +enum LookupLevel { + L0 = 0, // AArch64 only + L1, + L2, + L3, + MAX_LOOKUP_LEVELS +}; + // ITB/DTB table entry struct TlbEntry { public: - enum MemoryType { + enum class MemoryType : std::uint8_t { StronglyOrdered, Device, Normal }; - enum DomainType { - DomainNoAccess = 0, - DomainClient, - DomainReserved, - DomainManager + + enum class DomainType : std::uint8_t { + NoAccess = 0, + Client, + Reserved, + Manager }; // Matching variables Addr pfn; Addr size; // Size of this entry, == Type of TLB Rec Addr vpn; // Virtual Page Number - uint32_t asid; // Address Space Identifier + uint64_t attributes; // Memory attributes formatted for PAR + + LookupLevel lookupLevel; // Lookup level where the descriptor was fetched + // from. Used to set the FSR for faults + // occurring while the long desc. format is in + // use (AArch32 w/ LPAE and AArch64) + + uint16_t asid; // Address Space Identifier + uint8_t vmid; // Virtual machine Identifier uint8_t N; // Number of bits in pagesize + uint8_t innerAttrs; + uint8_t outerAttrs; + uint8_t ap; // Access permissions bits + uint8_t hap; // Hyp access permissions bits + DomainType domain; // Access Domain + + MemoryType mtype; + + // True if the long descriptor format is used for this entry (LPAE only) + bool longDescFormat; // @todo use this in the update attribute bethod + + bool isHyp; bool global; bool valid; + // True if the entry targets the non-secure physical address space + bool ns; + // True if the entry was brought in from a non-secure page table + bool nstid; + // Exception level on insert, AARCH64 EL0&1, AARCH32 -> el=1 + uint8_t el; + // Type of memory bool nonCacheable; // Can we wrap this in mtype? - bool sNp; // Section descriptor // Memory Attributes - MemoryType mtype; - uint8_t innerAttrs; - uint8_t outerAttrs; bool shareable; - uint32_t attributes; // Memory attributes formatted for PAR - + bool outerShareable; // Access permissions bool xn; // Execute Never - uint8_t ap; // Access permissions bits - uint8_t domain; // Access Domain + bool pxn; // Privileged Execute Never (LPAE only) //Construct an entry that maps to physical address addr for SE mode - TlbEntry(Addr _asn, Addr _vaddr, Addr _paddr) + TlbEntry(Addr _asn, Addr _vaddr, Addr _paddr) : + pfn(_paddr >> PageShift), size(PageBytes - 1), vpn(_vaddr >> PageShift), + attributes(0), lookupLevel(L1), asid(_asn), vmid(0), N(0), + innerAttrs(0), outerAttrs(0), ap(0), hap(0x3), + domain(DomainType::Client), mtype(MemoryType::StronglyOrdered), + longDescFormat(false), isHyp(false), global(false), valid(true), + ns(true), nstid(true), el(0), nonCacheable(false), shareable(false), + outerShareable(false), xn(0), pxn(0) { - pfn = _paddr >> PageShift; - size = PageBytes - 1; - asid = _asn; - global = false; - valid = true; + // no restrictions by default, hap = 0x3 - vpn = _vaddr >> PageShift; + // @todo Check the memory type + } - nonCacheable = sNp = false; + TlbEntry() : + pfn(0), size(0), vpn(0), attributes(0), lookupLevel(L1), asid(0), + vmid(0), N(0), innerAttrs(0), outerAttrs(0), ap(0), hap(0x3), + domain(DomainType::Client), mtype(MemoryType::StronglyOrdered), + longDescFormat(false), isHyp(false), global(false), valid(true), + ns(true), nstid(true), el(0), nonCacheable(false), + shareable(false), outerShareable(false), xn(0), pxn(0) + { + // no restrictions by default, hap = 0x3 - xn = 0; - ap = 0; // ??? - domain = DomainClient; //??? + // @todo Check the memory type } - TlbEntry() - {} - void updateVaddr(Addr new_vaddr) { @@ -141,67 +181,165 @@ struct TlbEntry } Addr - pageStart() + pageStart() const { return pfn << PageShift; } bool - match(Addr va, uint8_t cid) + match(Addr va, uint8_t _vmid, bool hypLookUp, bool secure_lookup, + uint8_t target_el) const + { + return match(va, 0, _vmid, hypLookUp, secure_lookup, true, target_el); + } + + bool + match(Addr va, uint16_t asn, uint8_t _vmid, bool hypLookUp, + bool secure_lookup, bool ignore_asn, uint8_t target_el) const { + bool match = false; Addr v = vpn << N; - if (valid && va >= v && va <= v + size && (global || cid == asid)) - return true; - return false; + + if (valid && va >= v && va <= v + size && (secure_lookup == !nstid) && + (hypLookUp == isHyp)) + { + if (target_el == 2 || target_el == 3) + match = (el == target_el); + else + match = (el == 0) || (el == 1); + if (match && !ignore_asn) { + match = global || (asn == asid); + } + if (match && nstid) { + match = isHyp || (_vmid == vmid); + } + } + return match; } Addr - pAddr(Addr va) + pAddr(Addr va) const { return (pfn << N) | (va & size); } void + updateAttributes() + { + uint64_t mask; + uint64_t newBits; + + // chec bit 11 to determine if its currently LPAE or VMSA format. + if ( attributes & (1 << 11) ) { + newBits = ((outerShareable ? 0x2 : + shareable ? 0x3 : 0) << 7); + mask = 0x180; + } else { + /** Formatting for Physical Address Register (PAR) + * Only including lower bits (TLB info here) + * PAR (32-bit format): + * PA [31:12] + * LPAE [11] (Large Physical Address Extension) + * TLB info [10:1] + * NOS [10] (Not Outer Sharable) + * NS [9] (Non-Secure) + * -- [8] (Implementation Defined) + * SH [7] (Sharable) + * Inner[6:4](Inner memory attributes) + * Outer[3:2](Outer memory attributes) + * SS [1] (SuperSection) + * F [0] (Fault, Fault Status in [6:1] if faulted) + */ + newBits = ((outerShareable ? 0:1) << 10) | + ((shareable ? 1:0) << 7) | + (innerAttrs << 4) | + (outerAttrs << 2); + // TODO: Supersection bit + mask = 0x4FC; + } + // common bits + newBits |= ns << 9; // NS bit + mask |= 1 << 9; + // add in the new bits + attributes &= ~mask; + attributes |= newBits; + } + + void + setAttributes(bool lpae) + { + attributes = lpae ? (1 << 11) : 0; + updateAttributes(); + } + + std::string + print() const + { + return csprintf("%#x, asn %d vmn %d hyp %d ppn %#x size: %#x ap:%d " + "ns:%d nstid:%d g:%d el:%d", vpn << N, asid, vmid, + isHyp, pfn << N, size, ap, ns, nstid, global, el); + } + + void serialize(std::ostream &os) { + SERIALIZE_SCALAR(longDescFormat); SERIALIZE_SCALAR(pfn); SERIALIZE_SCALAR(size); SERIALIZE_SCALAR(vpn); SERIALIZE_SCALAR(asid); + SERIALIZE_SCALAR(vmid); + SERIALIZE_SCALAR(isHyp); SERIALIZE_SCALAR(N); SERIALIZE_SCALAR(global); SERIALIZE_SCALAR(valid); + SERIALIZE_SCALAR(ns); + SERIALIZE_SCALAR(nstid); SERIALIZE_SCALAR(nonCacheable); - SERIALIZE_SCALAR(sNp); + SERIALIZE_ENUM(lookupLevel); SERIALIZE_ENUM(mtype); SERIALIZE_SCALAR(innerAttrs); SERIALIZE_SCALAR(outerAttrs); SERIALIZE_SCALAR(shareable); + SERIALIZE_SCALAR(outerShareable); SERIALIZE_SCALAR(attributes); SERIALIZE_SCALAR(xn); + SERIALIZE_SCALAR(pxn); SERIALIZE_SCALAR(ap); - SERIALIZE_SCALAR(domain); + SERIALIZE_SCALAR(hap); + uint8_t domain_ = static_cast<uint8_t>(domain); + paramOut(os, "domain", domain_); } void unserialize(Checkpoint *cp, const std::string §ion) { + UNSERIALIZE_SCALAR(longDescFormat); UNSERIALIZE_SCALAR(pfn); UNSERIALIZE_SCALAR(size); UNSERIALIZE_SCALAR(vpn); UNSERIALIZE_SCALAR(asid); + UNSERIALIZE_SCALAR(vmid); + UNSERIALIZE_SCALAR(isHyp); UNSERIALIZE_SCALAR(N); UNSERIALIZE_SCALAR(global); UNSERIALIZE_SCALAR(valid); + UNSERIALIZE_SCALAR(ns); + UNSERIALIZE_SCALAR(nstid); UNSERIALIZE_SCALAR(nonCacheable); - UNSERIALIZE_SCALAR(sNp); + UNSERIALIZE_ENUM(lookupLevel); UNSERIALIZE_ENUM(mtype); UNSERIALIZE_SCALAR(innerAttrs); UNSERIALIZE_SCALAR(outerAttrs); UNSERIALIZE_SCALAR(shareable); + UNSERIALIZE_SCALAR(outerShareable); UNSERIALIZE_SCALAR(attributes); UNSERIALIZE_SCALAR(xn); + UNSERIALIZE_SCALAR(pxn); UNSERIALIZE_SCALAR(ap); - UNSERIALIZE_SCALAR(domain); + UNSERIALIZE_SCALAR(hap); + uint8_t domain_; + paramIn(cp, section, "domain", domain_); + domain = static_cast<DomainType>(domain_); } }; diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc index 37999c905..dd23a5e21 100644 --- a/src/arch/arm/process.cc +++ b/src/arch/arm/process.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -61,6 +61,12 @@ ArmLiveProcess::ArmLiveProcess(LiveProcessParams *params, ObjectFile *objFile, ObjectFile::Arch _arch) : LiveProcess(params, objFile), arch(_arch) { +} + +ArmLiveProcess32::ArmLiveProcess32(LiveProcessParams *params, + ObjectFile *objFile, ObjectFile::Arch _arch) + : ArmLiveProcess(params, objFile, _arch) +{ stack_base = 0xbf000000L; // Set pointer for next thread stack. Reserve 8M for main stack. @@ -74,11 +80,28 @@ ArmLiveProcess::ArmLiveProcess(LiveProcessParams *params, ObjectFile *objFile, mmap_start = mmap_end = 0x40000000L; } +ArmLiveProcess64::ArmLiveProcess64(LiveProcessParams *params, + ObjectFile *objFile, ObjectFile::Arch _arch) + : ArmLiveProcess(params, objFile, _arch) +{ + stack_base = 0x7fffff0000L; + + // Set pointer for next thread stack. Reserve 8M for main stack. + next_thread_stack_base = stack_base - (8 * 1024 * 1024); + + // Set up break point (Top of Heap) + brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); + brk_point = roundUp(brk_point, VMPageSize); + + // Set up region for mmaps. For now, start at bottom of kuseg space. + mmap_start = mmap_end = 0x4000000000L; +} + void -ArmLiveProcess::initState() +ArmLiveProcess32::initState() { LiveProcess::initState(); - argsInit(MachineBytes, VMPageSize); + argsInit<uint32_t>(VMPageSize, INTREG_SP); for (int i = 0; i < contextIds.size(); i++) { ThreadContext * tc = system->getThreadContext(contextIds[i]); CPACR cpacr = tc->readMiscReg(MISCREG_CPACR); @@ -94,9 +117,34 @@ ArmLiveProcess::initState() } void -ArmLiveProcess::argsInit(int intSize, int pageSize) +ArmLiveProcess64::initState() { - typedef AuxVector<uint32_t> auxv_t; + LiveProcess::initState(); + argsInit<uint64_t>(VMPageSize, INTREG_SP0); + for (int i = 0; i < contextIds.size(); i++) { + ThreadContext * tc = system->getThreadContext(contextIds[i]); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + cpsr.mode = MODE_EL0T; + tc->setMiscReg(MISCREG_CPSR, cpsr); + CPACR cpacr = tc->readMiscReg(MISCREG_CPACR_EL1); + // Enable the floating point coprocessors. + cpacr.cp10 = 0x3; + cpacr.cp11 = 0x3; + tc->setMiscReg(MISCREG_CPACR_EL1, cpacr); + // Generically enable floating point support. + FPEXC fpexc = tc->readMiscReg(MISCREG_FPEXC); + fpexc.en = 1; + tc->setMiscReg(MISCREG_FPEXC, fpexc); + } +} + +template <class IntType> +void +ArmLiveProcess::argsInit(int pageSize, IntRegIndex spIndex) +{ + int intSize = sizeof(IntType); + + typedef AuxVector<IntType> auxv_t; std::vector<auxv_t> auxv; string filename; @@ -133,7 +181,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) //Auxilliary vectors are loaded only for elf formatted executables. ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile); if (elfObject) { - uint32_t features = + IntType features = Arm_Swp | Arm_Half | Arm_Thumb | @@ -253,16 +301,16 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) allocateMem(roundDown(stack_min, pageSize), roundUp(stack_size, pageSize)); // map out initial stack contents - uint32_t sentry_base = stack_base - sentry_size; - uint32_t aux_data_base = sentry_base - aux_data_size; - uint32_t env_data_base = aux_data_base - env_data_size; - uint32_t arg_data_base = env_data_base - arg_data_size; - uint32_t platform_base = arg_data_base - platform_size; - uint32_t aux_random_base = platform_base - aux_random_size; - uint32_t auxv_array_base = aux_random_base - aux_array_size - aux_padding; - uint32_t envp_array_base = auxv_array_base - envp_array_size; - uint32_t argv_array_base = envp_array_base - argv_array_size; - uint32_t argc_base = argv_array_base - argc_size; + IntType sentry_base = stack_base - sentry_size; + IntType aux_data_base = sentry_base - aux_data_size; + IntType env_data_base = aux_data_base - env_data_size; + IntType arg_data_base = env_data_base - arg_data_size; + IntType platform_base = arg_data_base - platform_size; + IntType aux_random_base = platform_base - aux_random_size; + IntType auxv_array_base = aux_random_base - aux_array_size - aux_padding; + IntType envp_array_base = auxv_array_base - envp_array_size; + IntType argv_array_base = envp_array_base - argv_array_size; + IntType argc_base = argv_array_base - argc_size; DPRINTF(Stack, "The addresses of items on the initial stack:\n"); DPRINTF(Stack, "0x%x - aux data\n", aux_data_base); @@ -279,11 +327,11 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) // write contents to stack // figure out argc - uint32_t argc = argv.size(); - uint32_t guestArgc = ArmISA::htog(argc); + IntType argc = argv.size(); + IntType guestArgc = ArmISA::htog(argc); //Write out the sentry void * - uint32_t sentry_NULL = 0; + IntType sentry_NULL = 0; initVirtMem.writeBlob(sentry_base, (uint8_t*)&sentry_NULL, sentry_size); @@ -302,8 +350,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) } //Copy the aux stuff - for(int x = 0; x < auxv.size(); x++) - { + for (int x = 0; x < auxv.size(); x++) { initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize, (uint8_t*)&(auxv[x].a_type), intSize); initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize, @@ -321,7 +368,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) ThreadContext *tc = system->getThreadContext(contextIds[0]); //Set the stack pointer register - tc->setIntReg(StackPointerReg, stack_min); + tc->setIntReg(spIndex, stack_min); //A pointer to a function to run when the program exits. We'll set this //to zero explicitly to make sure this isn't used. tc->setIntReg(ArgumentReg0, 0); @@ -342,6 +389,8 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) PCState pc; pc.thumb(arch == ObjectFile::Thumb); pc.nextThumb(pc.thumb()); + pc.aarch64(arch == ObjectFile::Arm64); + pc.nextAArch64(pc.aarch64()); pc.set(objFile->entryPoint() & ~mask(1)); tc->pcState(pc); @@ -350,14 +399,21 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) } ArmISA::IntReg -ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i) +ArmLiveProcess32::getSyscallArg(ThreadContext *tc, int &i) { assert(i < 6); return tc->readIntReg(ArgumentReg0 + i++); } -uint64_t -ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width) +ArmISA::IntReg +ArmLiveProcess64::getSyscallArg(ThreadContext *tc, int &i) +{ + assert(i < 8); + return tc->readIntReg(ArgumentReg0 + i++); +} + +ArmISA::IntReg +ArmLiveProcess32::getSyscallArg(ThreadContext *tc, int &i, int width) { assert(width == 32 || width == 64); if (width == 32) @@ -375,17 +431,37 @@ ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width) return val; } +ArmISA::IntReg +ArmLiveProcess64::getSyscallArg(ThreadContext *tc, int &i, int width) +{ + return getSyscallArg(tc, i); +} + + +void +ArmLiveProcess32::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val) +{ + assert(i < 6); + tc->setIntReg(ArgumentReg0 + i, val); +} void -ArmLiveProcess::setSyscallArg(ThreadContext *tc, +ArmLiveProcess64::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val) { - assert(i < 4); + assert(i < 8); tc->setIntReg(ArgumentReg0 + i, val); } void -ArmLiveProcess::setSyscallReturn(ThreadContext *tc, +ArmLiveProcess32::setSyscallReturn(ThreadContext *tc, + SyscallReturn return_value) +{ + tc->setIntReg(ReturnValueReg, return_value.value()); +} + +void +ArmLiveProcess64::setSyscallReturn(ThreadContext *tc, SyscallReturn return_value) { tc->setIntReg(ReturnValueReg, return_value.value()); diff --git a/src/arch/arm/process.hh b/src/arch/arm/process.hh index f8d821037..34ce1dd02 100644 --- a/src/arch/arm/process.hh +++ b/src/arch/arm/process.hh @@ -1,4 +1,16 @@ /* +* Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2007-2008 The Florida State University * All rights reserved. * @@ -34,6 +46,7 @@ #include <string> #include <vector> +#include "arch/arm/intregs.hh" #include "base/loader/object_file.hh" #include "sim/process.hh" @@ -47,11 +60,37 @@ class ArmLiveProcess : public LiveProcess ObjectFile::Arch arch; ArmLiveProcess(LiveProcessParams * params, ObjectFile *objFile, ObjectFile::Arch _arch); + template<class IntType> + void argsInit(int pageSize, ArmISA::IntRegIndex spIndex); +}; + +class ArmLiveProcess32 : public ArmLiveProcess +{ + protected: + ObjectFile::Arch arch; + ArmLiveProcess32(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); + + void initState(); + + public: + + ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i, int width); + ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i); + void setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val); + void setSyscallReturn(ThreadContext *tc, SyscallReturn return_value); +}; + +class ArmLiveProcess64 : public ArmLiveProcess +{ + protected: + ObjectFile::Arch arch; + ArmLiveProcess64(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); void initState(); public: - void argsInit(int intSize, int pageSize); ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i, int width); ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i); diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index b9033fd5b..09041f306 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2011 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -74,11 +74,12 @@ typedef uint8_t CCReg; // Constants Related to the number of registers const int NumIntArchRegs = NUM_ARCH_INTREGS; // The number of single precision floating point registers -const int NumFloatArchRegs = 64; -const int NumFloatSpecialRegs = 8; +const int NumFloatV7ArchRegs = 64; +const int NumFloatV8ArchRegs = 128; +const int NumFloatSpecialRegs = 32; const int NumIntRegs = NUM_INTREGS; -const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs; +const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumCCRegs = 0; const int NumMiscRegs = NUM_MISCREGS; @@ -89,6 +90,7 @@ const int ReturnValueReg = 0; const int ReturnValueReg1 = 1; const int ReturnValueReg2 = 2; const int NumArgumentRegs = 4; +const int NumArgumentRegs64 = 8; const int ArgumentReg0 = 0; const int ArgumentReg1 = 1; const int ArgumentReg2 = 2; diff --git a/src/arch/arm/remote_gdb.cc b/src/arch/arm/remote_gdb.cc index 4078630d6..74c3c7ff3 100644 --- a/src/arch/arm/remote_gdb.cc +++ b/src/arch/arm/remote_gdb.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -138,6 +138,7 @@ #include "arch/arm/pagetable.hh" #include "arch/arm/registers.hh" #include "arch/arm/remote_gdb.hh" +#include "arch/arm/system.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" #include "base/intmath.hh" @@ -159,7 +160,7 @@ using namespace std; using namespace ArmISA; RemoteGDB::RemoteGDB(System *_system, ThreadContext *tc) - : BaseRemoteGDB(_system, tc, NUMREGS) + : BaseRemoteGDB(_system, tc, MAX_NUMREGS) { } @@ -204,45 +205,65 @@ RemoteGDB::getregs() memset(gdbregs.regs, 0, gdbregs.bytes()); - // R0-R15 supervisor mode - // arm registers are 32 bits wide, gdb registers are 64 bits wide - // two arm registers are packed into one gdb register (little endian) - gdbregs.regs[REG_R0 + 0] = context->readIntReg(INTREG_R1) << 32 | - context->readIntReg(INTREG_R0); - gdbregs.regs[REG_R0 + 1] = context->readIntReg(INTREG_R3) << 32 | - context->readIntReg(INTREG_R2); - gdbregs.regs[REG_R0 + 2] = context->readIntReg(INTREG_R5) << 32 | - context->readIntReg(INTREG_R4); - gdbregs.regs[REG_R0 + 3] = context->readIntReg(INTREG_R7) << 32 | - context->readIntReg(INTREG_R6); - gdbregs.regs[REG_R0 + 4] = context->readIntReg(INTREG_R9) << 32 | - context->readIntReg(INTREG_R8); - gdbregs.regs[REG_R0 + 5] = context->readIntReg(INTREG_R11) << 32| - context->readIntReg(INTREG_R10); - gdbregs.regs[REG_R0 + 6] = context->readIntReg(INTREG_SP) << 32 | - context->readIntReg(INTREG_R12); - gdbregs.regs[REG_R0 + 7] = context->pcState().pc() << 32 | - context->readIntReg(INTREG_LR); - - // CPSR - gdbregs.regs[REG_CPSR] = context->readMiscRegNoEffect(MISCREG_CPSR); - - // vfpv3/neon floating point registers (32 double or 64 float) - - gdbregs.regs[REG_F0] = - static_cast<uint64_t>(context->readFloatRegBits(0)) << 32 | - gdbregs.regs[REG_CPSR]; - - for (int i = 1; i < (NumFloatArchRegs>>1); ++i) { - gdbregs.regs[i + REG_F0] = - static_cast<uint64_t>(context->readFloatRegBits(2*i)) << 32 | - context->readFloatRegBits(2*i-1); + if (inAArch64(context)) { // AArch64 + // x0-x31 + for (int i = 0; i < 32; ++i) { + gdbregs.regs[REG_X0 + i] = context->readIntReg(INTREG_X0 + i); + } + // pc + gdbregs.regs[REG_PC_64] = context->pcState().pc(); + // cpsr + gdbregs.regs[REG_CPSR_64] = context->readMiscRegNoEffect(MISCREG_CPSR); + // v0-v31 + for (int i = 0; i < 32; ++i) { + gdbregs.regs[REG_V0 + 2 * i] = static_cast<uint64_t>( + context->readFloatRegBits(i * 4 + 3)) << 32 | + context->readFloatRegBits(i * 4 + 2); + gdbregs.regs[REG_V0 + 2 * i + 1] = static_cast<uint64_t>( + context->readFloatRegBits(i * 4 + 1)) << 32 | + context->readFloatRegBits(i * 4 + 0); + } + } else { // AArch32 + // R0-R15 supervisor mode + // arm registers are 32 bits wide, gdb registers are 64 bits wide two + // arm registers are packed into one gdb register (little endian) + gdbregs.regs[REG_R0 + 0] = context->readIntReg(INTREG_R1) << 32 | + context->readIntReg(INTREG_R0); + gdbregs.regs[REG_R0 + 1] = context->readIntReg(INTREG_R3) << 32 | + context->readIntReg(INTREG_R2); + gdbregs.regs[REG_R0 + 2] = context->readIntReg(INTREG_R5) << 32 | + context->readIntReg(INTREG_R4); + gdbregs.regs[REG_R0 + 3] = context->readIntReg(INTREG_R7) << 32 | + context->readIntReg(INTREG_R6); + gdbregs.regs[REG_R0 + 4] = context->readIntReg(INTREG_R9) << 32 | + context->readIntReg(INTREG_R8); + gdbregs.regs[REG_R0 + 5] = context->readIntReg(INTREG_R11) << 32| + context->readIntReg(INTREG_R10); + gdbregs.regs[REG_R0 + 6] = context->readIntReg(INTREG_SP) << 32 | + context->readIntReg(INTREG_R12); + gdbregs.regs[REG_R0 + 7] = context->pcState().pc() << 32 | + context->readIntReg(INTREG_LR); + + // CPSR + gdbregs.regs[REG_CPSR] = context->readMiscRegNoEffect(MISCREG_CPSR); + + // vfpv3/neon floating point registers (32 double or 64 float) + + gdbregs.regs[REG_F0] = + static_cast<uint64_t>(context->readFloatRegBits(0)) << 32 | + gdbregs.regs[REG_CPSR]; + + for (int i = 1; i < (NumFloatV7ArchRegs>>1); ++i) { + gdbregs.regs[i + REG_F0] = + static_cast<uint64_t>(context->readFloatRegBits(2*i)) << 32 | + context->readFloatRegBits(2*i-1); + } + + // FPSCR + gdbregs.regs[REG_FPSCR] = static_cast<uint64_t>( + context->readMiscRegNoEffect(MISCREG_FPSCR)) << 32 | + context->readFloatRegBits(NumFloatV7ArchRegs - 1); } - - // FPSCR - gdbregs.regs[REG_FPSCR] = - static_cast<uint64_t>(context->readMiscRegNoEffect(MISCREG_FPSCR)) << 32 | - context->readFloatRegBits(NumFloatArchRegs - 1); } /* @@ -254,46 +275,66 @@ RemoteGDB::setregs() { DPRINTF(GDBAcc, "setregs in remotegdb \n"); + if (inAArch64(context)) { // AArch64 + // x0-x31 + for (int i = 0; i < 32; ++i) { + context->setIntReg(INTREG_X0 + i, gdbregs.regs[REG_X0 + i]); + } + // pc + context->pcState(gdbregs.regs[REG_PC_64]); + // cpsr + context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR_64]); + // v0-v31 + for (int i = 0; i < 32; ++i) { + context->setFloatRegBits(i * 4 + 3, + gdbregs.regs[REG_V0 + 2 * i] >> 32); + context->setFloatRegBits(i * 4 + 2, + gdbregs.regs[REG_V0 + 2 * i]); + context->setFloatRegBits(i * 4 + 1, + gdbregs.regs[REG_V0 + 2 * i + 1] >> 32); + context->setFloatRegBits(i * 4 + 0, + gdbregs.regs[REG_V0 + 2 * i + 1]); + } + } else { // AArch32 + // R0-R15 supervisor mode + // arm registers are 32 bits wide, gdb registers are 64 bits wide + // two arm registers are packed into one gdb register (little endian) + context->setIntReg(INTREG_R0 , bits(gdbregs.regs[REG_R0 + 0], 31, 0)); + context->setIntReg(INTREG_R1 , bits(gdbregs.regs[REG_R0 + 0], 63, 32)); + context->setIntReg(INTREG_R2 , bits(gdbregs.regs[REG_R0 + 1], 31, 0)); + context->setIntReg(INTREG_R3 , bits(gdbregs.regs[REG_R0 + 1], 63, 32)); + context->setIntReg(INTREG_R4 , bits(gdbregs.regs[REG_R0 + 2], 31, 0)); + context->setIntReg(INTREG_R5 , bits(gdbregs.regs[REG_R0 + 2], 63, 32)); + context->setIntReg(INTREG_R6 , bits(gdbregs.regs[REG_R0 + 3], 31, 0)); + context->setIntReg(INTREG_R7 , bits(gdbregs.regs[REG_R0 + 3], 63, 32)); + context->setIntReg(INTREG_R8 , bits(gdbregs.regs[REG_R0 + 4], 31, 0)); + context->setIntReg(INTREG_R9 , bits(gdbregs.regs[REG_R0 + 4], 63, 32)); + context->setIntReg(INTREG_R10, bits(gdbregs.regs[REG_R0 + 5], 31, 0)); + context->setIntReg(INTREG_R11, bits(gdbregs.regs[REG_R0 + 5], 63, 32)); + context->setIntReg(INTREG_R12, bits(gdbregs.regs[REG_R0 + 6], 31, 0)); + context->setIntReg(INTREG_SP , bits(gdbregs.regs[REG_R0 + 6], 63, 32)); + context->setIntReg(INTREG_LR , bits(gdbregs.regs[REG_R0 + 7], 31, 0)); + context->pcState(bits(gdbregs.regs[REG_R0 + 7], 63, 32)); + + //CPSR + context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR]); + + //vfpv3/neon floating point registers (32 double or 64 float) + context->setFloatRegBits(0, gdbregs.regs[REG_F0]>>32); + + for (int i = 1; i < NumFloatV7ArchRegs; ++i) { + if (i%2) { + int j = (i+1)/2; + context->setFloatRegBits(i, bits(gdbregs.regs[j + REG_F0], 31, 0)); + } else { + int j = i/2; + context->setFloatRegBits(i, gdbregs.regs[j + REG_F0]>>32); + } + } - // R0-R15 supervisor mode - // arm registers are 32 bits wide, gdb registers are 64 bits wide - // two arm registers are packed into one gdb register (little endian) - context->setIntReg(INTREG_R0 , bits(gdbregs.regs[REG_R0 + 0], 31, 0)); - context->setIntReg(INTREG_R1 , bits(gdbregs.regs[REG_R0 + 0], 63, 32)); - context->setIntReg(INTREG_R2 , bits(gdbregs.regs[REG_R0 + 1], 31, 0)); - context->setIntReg(INTREG_R3 , bits(gdbregs.regs[REG_R0 + 1], 63, 32)); - context->setIntReg(INTREG_R4 , bits(gdbregs.regs[REG_R0 + 2], 31, 0)); - context->setIntReg(INTREG_R5 , bits(gdbregs.regs[REG_R0 + 2], 63, 32)); - context->setIntReg(INTREG_R6 , bits(gdbregs.regs[REG_R0 + 3], 31, 0)); - context->setIntReg(INTREG_R7 , bits(gdbregs.regs[REG_R0 + 3], 63, 32)); - context->setIntReg(INTREG_R8 , bits(gdbregs.regs[REG_R0 + 4], 31, 0)); - context->setIntReg(INTREG_R9 , bits(gdbregs.regs[REG_R0 + 4], 63, 32)); - context->setIntReg(INTREG_R10, bits(gdbregs.regs[REG_R0 + 5], 31, 0)); - context->setIntReg(INTREG_R11, bits(gdbregs.regs[REG_R0 + 5], 63, 32)); - context->setIntReg(INTREG_R12, bits(gdbregs.regs[REG_R0 + 6], 31, 0)); - context->setIntReg(INTREG_SP , bits(gdbregs.regs[REG_R0 + 6], 63, 32)); - context->setIntReg(INTREG_LR , bits(gdbregs.regs[REG_R0 + 7], 31, 0)); - context->pcState(bits(gdbregs.regs[REG_R0 + 7], 63, 32)); - - //CPSR - context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR]); - - //vfpv3/neon floating point registers (32 double or 64 float) - context->setFloatRegBits(0, gdbregs.regs[REG_F0]>>32); - - for (int i = 1; i < NumFloatArchRegs; ++i) { - if(i%2){ - int j = (i+1)/2; - context->setFloatRegBits(i, bits(gdbregs.regs[j + REG_F0], 31, 0)); - } - else{ - int j = i/2; - context->setFloatRegBits(i, gdbregs.regs[j + REG_F0]>>32); - } + //FPSCR + context->setMiscReg(MISCREG_FPSCR, gdbregs.regs[REG_FPSCR]>>32); } - - //FPSCR - context->setMiscReg(MISCREG_FPSCR, gdbregs.regs[REG_FPSCR]>>32); } void diff --git a/src/arch/arm/remote_gdb.hh b/src/arch/arm/remote_gdb.hh index b75d921fb..a6b2b9d35 100644 --- a/src/arch/arm/remote_gdb.hh +++ b/src/arch/arm/remote_gdb.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2002-2005 The Regents of The University of Michigan * Copyright (c) 2007-2008 The Florida State University * All rights reserved. @@ -40,13 +52,24 @@ class ThreadContext; namespace ArmISA { -// registers for arm with vfpv3/neon + +// AArch32 registers with vfpv3/neon const int NUMREGS = 41; /* r0-r15, cpsr, d0-d31, fpscr */ const int REG_R0 = 0; const int REG_F0 = 8; const int REG_CPSR = 8; /* bit 512 to bit 543 */ const int REG_FPSCR = 40; /* bit 2592 to bit 2623 */ +// AArch64 registers +const int NUMREGS_64 = 98; // x0-x31, pc, cpsr (64-bit GPRs) + // v0-v31 (128-bit FPRs) +const int REG_X0 = 0; +const int REG_PC_64 = 32; +const int REG_CPSR_64 = 33; +const int REG_V0 = 34; + +const int MAX_NUMREGS = NUMREGS_64; + class RemoteGDB : public BaseRemoteGDB { diff --git a/src/arch/arm/stage2_lookup.cc b/src/arch/arm/stage2_lookup.cc new file mode 100755 index 000000000..1299ade68 --- /dev/null +++ b/src/arch/arm/stage2_lookup.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2010-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + * Giacomo Gabrielli + */ + +#include "arch/arm/faults.hh" +#include "arch/arm/stage2_lookup.hh" +#include "arch/arm/system.hh" +#include "arch/arm/table_walker.hh" +#include "arch/arm/tlb.hh" +#include "cpu/base.hh" +#include "cpu/thread_context.hh" +#include "debug/Checkpoint.hh" +#include "debug/TLB.hh" +#include "debug/TLBVerbose.hh" +#include "sim/system.hh" + +using namespace ArmISA; + +Fault +Stage2LookUp::getTe(ThreadContext *tc, TlbEntry *destTe) + +{ + fault = stage2Tlb->getTE(&stage2Te, &req, tc, mode, this, timing, + functional, false, tranType); + // Call finish if we're done already + if ((fault != NoFault) || (stage2Te != NULL)) { + mergeTe(&req, mode); + *destTe = stage1Te; + } + return fault; +} + +void +Stage2LookUp::mergeTe(RequestPtr req, BaseTLB::Mode mode) +{ + // Since we directly requested the table entry (which we need later on to + // merge the attributes) then we've skipped some stage 2 permissinos + // checking. So call translate on stage 2 to do the checking. As the entry + // is now in the TLB this should always hit the cache. + if (fault == NoFault) { + fault = stage2Tlb->checkPermissions(stage2Te, req, mode); + } + + // Check again that we haven't got a fault + if (fault == NoFault) { + assert(stage2Te != NULL); + + // Now we have the table entries for both stages of translation + // merge them and insert the result into the stage 1 TLB. See + // CombineS1S2Desc() in pseudocode + stage1Te.N = stage2Te->N; + stage1Te.nonCacheable |= stage2Te->nonCacheable; + stage1Te.xn |= stage2Te->xn; + + if (stage1Te.size > stage2Te->size) { + // Size mismatch also implies vpn mismatch (this is shifted by + // sizebits!). + stage1Te.vpn = s1Req->getVaddr() / (stage2Te->size+1); + stage1Te.pfn = stage2Te->pfn; + stage1Te.size = stage2Te->size; + } else if (stage1Te.size < stage2Te->size) { + // Guest 4K could well be section-backed by host hugepage! In this + // case a 4K entry is added but pfn needs to be adjusted. New PFN = + // offset into section PFN given by stage2 IPA treated as a stage1 + // page size. + stage1Te.pfn = (stage2Te->pfn * ((stage2Te->size+1) / (stage1Te.size+1))) + + (stage2Te->vpn / (stage1Te.size+1)); + // Size remains smaller of the two. + } else { + // Matching sizes + stage1Te.pfn = stage2Te->pfn; + } + + if (stage2Te->mtype == TlbEntry::MemoryType::StronglyOrdered || + stage1Te.mtype == TlbEntry::MemoryType::StronglyOrdered) { + stage1Te.mtype = TlbEntry::MemoryType::StronglyOrdered; + } else if (stage2Te->mtype == TlbEntry::MemoryType::Device || + stage1Te.mtype == TlbEntry::MemoryType::Device) { + stage1Te.mtype = TlbEntry::MemoryType::Device; + } else { + stage1Te.mtype = TlbEntry::MemoryType::Normal; + } + + if (stage1Te.mtype == TlbEntry::MemoryType::Normal) { + + if (stage2Te->innerAttrs == 0 || + stage1Te.innerAttrs == 0) { + // either encoding Non-cacheable + stage1Te.innerAttrs = 0; + } else if (stage2Te->innerAttrs == 2 || + stage1Te.innerAttrs == 2) { + // either encoding Write-Through cacheable + stage1Te.innerAttrs = 2; + } else { + // both encodings Write-Back + stage1Te.innerAttrs = 3; + } + + if (stage2Te->outerAttrs == 0 || + stage1Te.outerAttrs == 0) { + // either encoding Non-cacheable + stage1Te.outerAttrs = 0; + } else if (stage2Te->outerAttrs == 2 || + stage1Te.outerAttrs == 2) { + // either encoding Write-Through cacheable + stage1Te.outerAttrs = 2; + } else { + // both encodings Write-Back + stage1Te.outerAttrs = 3; + } + + stage1Te.shareable |= stage2Te->shareable; + stage1Te.outerShareable |= stage2Te->outerShareable; + if (stage1Te.innerAttrs == 0 && + stage1Te.outerAttrs == 0) { + // something Non-cacheable at each level is outer shareable + stage1Te.shareable = true; + stage1Te.outerShareable = true; + } + } else { + stage1Te.shareable = true; + stage1Te.outerShareable = true; + } + stage1Te.updateAttributes(); + } + + // if there's a fault annotate it, + if (fault != NoFault) { + // If the second stage of translation generated a fault add the + // details of the original stage 1 virtual address + reinterpret_cast<ArmFault *>(fault.get())->annotate(ArmFault::OVA, + s1Req->getVaddr()); + } + complete = true; +} + +void +Stage2LookUp::finish(Fault _fault, RequestPtr req, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = _fault; + // if we haven't got the table entry get it now + if ((fault == NoFault) && (stage2Te == NULL)) { + fault = stage2Tlb->getTE(&stage2Te, req, tc, mode, this, + timing, functional, false, tranType); + } + + // Now we have the stage 2 table entry we need to merge it with the stage + // 1 entry we were given at the start + mergeTe(req, mode); + + if (fault != NoFault) { + transState->finish(fault, req, tc, mode); + } else if (timing) { + // Now notify the original stage 1 translation that we finally have + // a result + stage1Tlb->translateComplete(s1Req, tc, transState, mode, tranType, true); + } + // if we have been asked to delete ourselfs do it now + if (selfDelete) { + delete this; + } +} + diff --git a/src/arch/arm/stage2_lookup.hh b/src/arch/arm/stage2_lookup.hh new file mode 100755 index 000000000..3a1228f46 --- /dev/null +++ b/src/arch/arm/stage2_lookup.hh @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2010-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + * Giacomo Gabrielli + */ + +#ifndef __ARCH_ARM_STAGE2_LOOKUP_HH__ +#define __ARCH_ARM_STAGE2_LOOKUP_HH__ + +#include <list> + +#include "arch/arm/system.hh" +#include "arch/arm/table_walker.hh" +#include "arch/arm/tlb.hh" +#include "mem/request.hh" +#include "sim/tlb.hh" + +class ThreadContext; + +namespace ArmISA { +class Translation; +class TLB; + + +class Stage2LookUp : public BaseTLB::Translation +{ + private: + TLB *stage1Tlb; + TLB *stage2Tlb; + TlbEntry stage1Te; + RequestPtr s1Req; + TLB::Translation *transState; + BaseTLB::Mode mode; + bool timing; + bool functional; + TLB::ArmTranslationType tranType; + TlbEntry *stage2Te; + Request req; + Fault fault; + bool complete; + bool selfDelete; + + public: + Stage2LookUp(TLB *s1Tlb, TLB *s2Tlb, TlbEntry s1Te, RequestPtr _req, + TLB::Translation *_transState, BaseTLB::Mode _mode, bool _timing, + bool _functional, TLB::ArmTranslationType _tranType) : + stage1Tlb(s1Tlb), stage2Tlb(s2Tlb), stage1Te(s1Te), s1Req(_req), + transState(_transState), mode(_mode), timing(_timing), + functional(_functional), tranType(_tranType), fault(NoFault), + complete(false), selfDelete(false) + { + req.setVirt(0, s1Te.pAddr(s1Req->getVaddr()), s1Req->getSize(), + s1Req->getFlags(), s1Req->masterId(), 0); + } + + Fault getTe(ThreadContext *tc, TlbEntry *destTe); + + void mergeTe(RequestPtr req, BaseTLB::Mode mode); + + void setSelfDelete() { selfDelete = true; } + + bool isComplete() const { return complete; } + + void markDelayed() {} + + void finish(Fault fault, RequestPtr req, ThreadContext *tc, + BaseTLB::Mode mode); +}; + + +} // namespace ArmISA + +#endif //__ARCH_ARM_STAGE2_LOOKUP_HH__ + diff --git a/src/arch/arm/stage2_mmu.cc b/src/arch/arm/stage2_mmu.cc new file mode 100755 index 000000000..01451548c --- /dev/null +++ b/src/arch/arm/stage2_mmu.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Thomas Grocutt + */ + +#include "arch/arm/faults.hh" +#include "arch/arm/stage2_mmu.hh" +#include "arch/arm/system.hh" +#include "arch/arm/tlb.hh" +#include "cpu/base.hh" +#include "cpu/thread_context.hh" +#include "debug/Checkpoint.hh" +#include "debug/TLB.hh" +#include "debug/TLBVerbose.hh" + +using namespace ArmISA; + +Stage2MMU::Stage2MMU(const Params *p) + : SimObject(p), _stage1Tlb(p->tlb), _stage2Tlb(p->stage2_tlb) +{ + stage1Tlb()->setMMU(this); + stage2Tlb()->setMMU(this); +} + +Fault +Stage2MMU::readDataUntimed(ThreadContext *tc, Addr oVAddr, Addr descAddr, + uint8_t *data, int numBytes, Request::Flags flags, int masterId, + bool isFunctional) +{ + Fault fault; + + // translate to physical address using the second stage MMU + Request req = Request(); + req.setVirt(0, descAddr, numBytes, flags | Request::PT_WALK, masterId, 0); + if (isFunctional) { + fault = stage2Tlb()->translateFunctional(&req, tc, BaseTLB::Read); + } else { + fault = stage2Tlb()->translateAtomic(&req, tc, BaseTLB::Read); + } + + // Now do the access. + if (fault == NoFault && !req.getFlags().isSet(Request::NO_ACCESS)) { + Packet pkt = Packet(&req, MemCmd::ReadReq); + pkt.dataStatic(data); + if (isFunctional) { + stage1Tlb()->getWalkerPort().sendFunctional(&pkt); + } else { + stage1Tlb()->getWalkerPort().sendAtomic(&pkt); + } + assert(!pkt.isError()); + } + + // If there was a fault annotate it with the flag saying the foult occured + // while doing a translation for a stage 1 page table walk. + if (fault != NoFault) { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + armFault->annotate(ArmFault::S1PTW, true); + armFault->annotate(ArmFault::OVA, oVAddr); + } + return fault; +} + +Fault +Stage2MMU::readDataTimed(ThreadContext *tc, Addr descAddr, + Stage2Translation *translation, int numBytes, Request::Flags flags, + int masterId) +{ + Fault fault; + // translate to physical address using the second stage MMU + translation->setVirt(descAddr, numBytes, flags | Request::PT_WALK, masterId); + fault = translation->translateTiming(tc); + return fault; +} + +Stage2MMU::Stage2Translation::Stage2Translation(Stage2MMU &_parent, + uint8_t *_data, Event *_event, Addr _oVAddr) + : data(_data), event(_event), parent(_parent), oVAddr(_oVAddr), + fault(NoFault) +{ +} + +void +Stage2MMU::Stage2Translation::finish(Fault _fault, RequestPtr req, ThreadContext *tc, + BaseTLB::Mode mode) +{ + fault = _fault; + + // If there was a fault annotate it with the flag saying the foult occured + // while doing a translation for a stage 1 page table walk. + if (fault != NoFault) { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + armFault->annotate(ArmFault::S1PTW, true); + armFault->annotate(ArmFault::OVA, oVAddr); + } + + if (_fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) { + DmaPort& port = parent.stage1Tlb()->getWalkerPort(); + port.dmaAction(MemCmd::ReadReq, req->getPaddr(), numBytes, + event, data, tc->getCpuPtr()->clockPeriod(), + req->getFlags()); + } else { + // We can't do the DMA access as there's been a problem, so tell the + // event we're done + event->process(); + } +} + +ArmISA::Stage2MMU * +ArmStage2MMUParams::create() +{ + return new ArmISA::Stage2MMU(this); +} diff --git a/src/arch/arm/stage2_mmu.hh b/src/arch/arm/stage2_mmu.hh new file mode 100755 index 000000000..d1812c4ed --- /dev/null +++ b/src/arch/arm/stage2_mmu.hh @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Thomas Grocutt + */ + +#ifndef __ARCH_ARM_STAGE2_MMU_HH__ +#define __ARCH_ARM_STAGE2_MMU_HH__ + +#include "arch/arm/faults.hh" +#include "arch/arm/tlb.hh" +#include "mem/request.hh" +#include "params/ArmStage2MMU.hh" +#include "sim/eventq.hh" + +namespace ArmISA { + +class Stage2MMU : public SimObject +{ + private: + TLB *_stage1Tlb; + /** The TLB that will cache the stage 2 look ups. */ + TLB *_stage2Tlb; + + public: + /** This translation class is used to trigger the data fetch once a timing + translation returns the translated physical address */ + class Stage2Translation : public BaseTLB::Translation + { + private: + uint8_t *data; + int numBytes; + Request req; + Event *event; + Stage2MMU &parent; + Addr oVAddr; + + public: + Fault fault; + + Stage2Translation(Stage2MMU &_parent, uint8_t *_data, Event *_event, + Addr _oVAddr); + + void + markDelayed() {} + + void + finish(Fault fault, RequestPtr req, ThreadContext *tc, + BaseTLB::Mode mode); + + void setVirt(Addr vaddr, int size, Request::Flags flags, int masterId) + { + numBytes = size; + req.setVirt(0, vaddr, size, flags, masterId, 0); + } + + Fault translateTiming(ThreadContext *tc) + { + return (parent.stage2Tlb()->translateTiming(&req, tc, this, BaseTLB::Read)); + } + }; + + typedef ArmStage2MMUParams Params; + Stage2MMU(const Params *p); + + Fault readDataUntimed(ThreadContext *tc, Addr oVAddr, Addr descAddr, + uint8_t *data, int numBytes, Request::Flags flags, int masterId, + bool isFunctional); + Fault readDataTimed(ThreadContext *tc, Addr descAddr, + Stage2Translation *translation, int numBytes, Request::Flags flags, + int masterId); + + TLB* stage1Tlb() const { return _stage1Tlb; } + TLB* stage2Tlb() const { return _stage2Tlb; } +}; + + + +} // namespace ArmISA + +#endif //__ARCH_ARM_STAGE2_MMU_HH__ + diff --git a/src/arch/arm/system.cc b/src/arch/arm/system.cc index b09784b64..00d9d7613 100644 --- a/src/arch/arm/system.cc +++ b/src/arch/arm/system.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -48,19 +48,46 @@ #include "cpu/thread_context.hh" #include "mem/physical.hh" #include "mem/fs_translating_port_proxy.hh" +#include "sim/full_system.hh" using namespace std; using namespace Linux; ArmSystem::ArmSystem(Params *p) - : System(p), bootldr(NULL), multiProc(p->multi_proc) + : System(p), bootldr(NULL), _haveSecurity(p->have_security), + _haveLPAE(p->have_lpae), + _haveVirtualization(p->have_virtualization), + _haveGenericTimer(p->have_generic_timer), + _highestELIs64(p->highest_el_is_64), + _resetAddr64(p->reset_addr_64), + _physAddrRange64(p->phys_addr_range_64), + _haveLargeAsid64(p->have_large_asid_64), + multiProc(p->multi_proc) { + // Check if the physical address range is valid + if (_highestELIs64 && ( + _physAddrRange64 < 32 || + _physAddrRange64 > 48 || + (_physAddrRange64 % 4 != 0 && _physAddrRange64 != 42))) { + fatal("Invalid physical address range (%d)\n", _physAddrRange64); + } + if (p->boot_loader != "") { bootldr = createObjectFile(p->boot_loader); if (!bootldr) fatal("Could not read bootloader: %s\n", p->boot_loader); + if ((bootldr->getArch() == ObjectFile::Arm64) && !_highestELIs64) { + warn("Highest ARM exception-level set to AArch32 but bootloader " + "is for AArch64. Assuming you wanted these to match.\n"); + _highestELIs64 = true; + } else if ((bootldr->getArch() == ObjectFile::Arm) && _highestELIs64) { + warn("Highest ARM exception-level set to AArch64 but bootloader " + "is for AArch32. Assuming you wanted these to match.\n"); + _highestELIs64 = false; + } + bootldr->loadGlobalSymbols(debugSymbolTable); } @@ -81,11 +108,21 @@ ArmSystem::initState() if (bootldr) { bootldr->loadSections(physProxy); - uint8_t jump_to_bl[] = + uint8_t jump_to_bl_32[] = + { + 0x07, 0xf0, 0xa0, 0xe1 // branch to r7 in aarch32 + }; + + uint8_t jump_to_bl_64[] = { - 0x07, 0xf0, 0xa0, 0xe1 // branch to r7 + 0xe0, 0x00, 0x1f, 0xd6 // instruction "br x7" in aarch64 }; - physProxy.writeBlob(0x0, jump_to_bl, sizeof(jump_to_bl)); + + // write the jump to branch table into address 0 + if (!_highestELIs64) + physProxy.writeBlob(0x0, jump_to_bl_32, sizeof(jump_to_bl_32)); + else + physProxy.writeBlob(0x0, jump_to_bl_64, sizeof(jump_to_bl_64)); inform("Using bootloader at address %#x\n", bootldr->entryPoint()); @@ -96,24 +133,116 @@ ArmSystem::initState() fatal("gic_cpu_addr && flags_addr must be set with bootloader\n"); for (int i = 0; i < threadContexts.size(); i++) { - threadContexts[i]->setIntReg(3, kernelEntry & loadAddrMask); + if (!_highestELIs64) + threadContexts[i]->setIntReg(3, (kernelEntry & loadAddrMask) + + loadAddrOffset); threadContexts[i]->setIntReg(4, params()->gic_cpu_addr); threadContexts[i]->setIntReg(5, params()->flags_addr); threadContexts[i]->setIntReg(7, bootldr->entryPoint()); } + inform("Using kernel entry physical address at %#x\n", + (kernelEntry & loadAddrMask) + loadAddrOffset); } else { // Set the initial PC to be at start of the kernel code - threadContexts[0]->pcState(kernelEntry & loadAddrMask); + if (!_highestELIs64) + threadContexts[0]->pcState((kernelEntry & loadAddrMask) + + loadAddrOffset); + } +} + +GenericTimer::ArchTimer * +ArmSystem::getArchTimer(int cpu_id) const +{ + if (_genericTimer) { + return _genericTimer->getArchTimer(cpu_id); } + return NULL; } +GenericTimer::SystemCounter * +ArmSystem::getSystemCounter() const +{ + if (_genericTimer) { + return _genericTimer->getSystemCounter(); + } + return NULL; +} + +bool +ArmSystem::haveSecurity(ThreadContext *tc) +{ + if (!FullSystem) + return false; + + ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr()); + assert(a_sys); + return a_sys->haveSecurity(); +} + + ArmSystem::~ArmSystem() { if (debugPrintkEvent) delete debugPrintkEvent; } +bool +ArmSystem::haveLPAE(ThreadContext *tc) +{ + if (!FullSystem) + return false; + ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr()); + assert(a_sys); + return a_sys->haveLPAE(); +} + +bool +ArmSystem::haveVirtualization(ThreadContext *tc) +{ + if (!FullSystem) + return false; + + ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr()); + assert(a_sys); + return a_sys->haveVirtualization(); +} + +bool +ArmSystem::highestELIs64(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->highestELIs64(); +} + +ExceptionLevel +ArmSystem::highestEL(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->highestEL(); +} + +Addr +ArmSystem::resetAddr64(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->resetAddr64(); +} + +uint8_t +ArmSystem::physAddrRange(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->physAddrRange(); +} + +Addr +ArmSystem::physAddrMask(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->physAddrMask(); +} + +bool +ArmSystem::haveLargeAsid64(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->haveLargeAsid64(); +} ArmSystem * ArmSystemParams::create() { diff --git a/src/arch/arm/system.hh b/src/arch/arm/system.hh index 3135c5da1..f906dc2d2 100644 --- a/src/arch/arm/system.hh +++ b/src/arch/arm/system.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -46,11 +46,14 @@ #include <string> #include <vector> +#include "dev/arm/generic_timer.hh" #include "kern/linux/events.hh" #include "params/ArmSystem.hh" #include "sim/sim_object.hh" #include "sim/system.hh" +class ThreadContext; + class ArmSystem : public System { protected: @@ -65,6 +68,54 @@ class ArmSystem : public System */ ObjectFile *bootldr; + /** + * True if this system implements the Security Extensions + */ + const bool _haveSecurity; + + /** + * True if this system implements the Large Physical Address Extension + */ + const bool _haveLPAE; + + /** + * True if this system implements the virtualization Extensions + */ + const bool _haveVirtualization; + + /** + * True if this system implements the Generic Timer extension + */ + const bool _haveGenericTimer; + + /** + * Pointer to the Generic Timer wrapper. + */ + GenericTimer *_genericTimer; + + /** + * True if the register width of the highest implemented exception level is + * 64 bits (ARMv8) + */ + bool _highestELIs64; + + /** + * Reset address if the highest implemented exception level is 64 bits + * (ARMv8) + */ + const Addr _resetAddr64; + + /** + * Supported physical address range in bits if the highest implemented + * exception level is 64 bits (ARMv8) + */ + const uint8_t _physAddrRange64; + + /** + * True if ASID is 16 bits in AArch64 (ARMv8) + */ + const bool _haveLargeAsid64; + public: typedef ArmSystemParams Params; const Params * @@ -101,6 +152,120 @@ class ArmSystem : public System /** true if this a multiprocessor system */ bool multiProc; + + /** Returns true if this system implements the Security Extensions */ + bool haveSecurity() const { return _haveSecurity; } + + /** Returns true if this system implements the Large Physical Address + * Extension */ + bool haveLPAE() const { return _haveLPAE; } + + /** Returns true if this system implements the virtualization + * Extensions + */ + bool haveVirtualization() const { return _haveVirtualization; } + + /** Returns true if this system implements the Generic Timer extension. */ + bool haveGenericTimer() const { return _haveGenericTimer; } + + /** Sets the pointer to the Generic Timer. */ + void setGenericTimer(GenericTimer *generic_timer) + { + _genericTimer = generic_timer; + } + + /** Returns a pointer to the system counter. */ + GenericTimer::SystemCounter *getSystemCounter() const; + + /** Returns a pointer to the appropriate architected timer. */ + GenericTimer::ArchTimer *getArchTimer(int cpu_id) const; + + /** Returns true if the register width of the highest implemented exception + * level is 64 bits (ARMv8) */ + bool highestELIs64() const { return _highestELIs64; } + + /** Returns the highest implemented exception level */ + ExceptionLevel highestEL() const + { + if (_haveSecurity) + return EL3; + // @todo: uncomment this to enable Virtualization + // if (_haveVirtualization) + // return EL2; + return EL1; + } + + /** Returns the reset address if the highest implemented exception level is + * 64 bits (ARMv8) */ + Addr resetAddr64() const { return _resetAddr64; } + + /** Returns true if ASID is 16 bits in AArch64 (ARMv8) */ + bool haveLargeAsid64() const { return _haveLargeAsid64; } + + /** Returns the supported physical address range in bits if the highest + * implemented exception level is 64 bits (ARMv8) */ + uint8_t physAddrRange64() const { return _physAddrRange64; } + + /** Returns the supported physical address range in bits */ + uint8_t physAddrRange() const + { + if (_highestELIs64) + return _physAddrRange64; + if (_haveLPAE) + return 40; + return 32; + } + + /** Returns the physical address mask */ + Addr physAddrMask() const + { + return mask(physAddrRange()); + } + + /** Returns true if the system of a specific thread context implements the + * Security Extensions + */ + static bool haveSecurity(ThreadContext *tc); + + /** Returns true if the system of a specific thread context implements the + * virtualization Extensions + */ + static bool haveVirtualization(ThreadContext *tc); + + /** Returns true if the system of a specific thread context implements the + * Large Physical Address Extension + */ + static bool haveLPAE(ThreadContext *tc); + + /** Returns true if the register width of the highest implemented exception + * level for the system of a specific thread context is 64 bits (ARMv8) + */ + static bool highestELIs64(ThreadContext *tc); + + /** Returns the highest implemented exception level for the system of a + * specific thread context + */ + static ExceptionLevel highestEL(ThreadContext *tc); + + /** Returns the reset address if the highest implemented exception level for + * the system of a specific thread context is 64 bits (ARMv8) + */ + static Addr resetAddr64(ThreadContext *tc); + + /** Returns the supported physical address range in bits for the system of a + * specific thread context + */ + static uint8_t physAddrRange(ThreadContext *tc); + + /** Returns the physical address mask for the system of a specific thread + * context + */ + static Addr physAddrMask(ThreadContext *tc); + + /** Returns true if ASID is 16 bits for the system of a specific thread + * context while in AArch64 (ARMv8) */ + static bool haveLargeAsid64(ThreadContext *tc); + }; #endif diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc index d419fdec5..7eda13b3e 100644 --- a/src/arch/arm/table_walker.cc +++ b/src/arch/arm/table_walker.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -35,9 +35,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Giacomo Gabrielli */ #include "arch/arm/faults.hh" +#include "arch/arm/stage2_mmu.hh" +#include "arch/arm/system.hh" #include "arch/arm/table_walker.hh" #include "arch/arm/tlb.hh" #include "cpu/base.hh" @@ -51,13 +54,33 @@ using namespace ArmISA; TableWalker::TableWalker(const Params *p) - : MemObject(p), port(this, params()->sys), drainManager(NULL), - tlb(NULL), currState(NULL), pending(false), - masterId(p->sys->getMasterId(name())), + : MemObject(p), port(this, p->sys), drainManager(NULL), + stage2Mmu(NULL), isStage2(p->is_stage2), tlb(NULL), + currState(NULL), pending(false), masterId(p->sys->getMasterId(name())), numSquashable(p->num_squash_per_cycle), - doL1DescEvent(this), doL2DescEvent(this), doProcessEvent(this) + doL1DescEvent(this), doL2DescEvent(this), + doL0LongDescEvent(this), doL1LongDescEvent(this), doL2LongDescEvent(this), + doL3LongDescEvent(this), + doProcessEvent(this) { sctlr = 0; + + // Cache system-level properties + if (FullSystem) { + armSys = dynamic_cast<ArmSystem *>(p->sys); + assert(armSys); + haveSecurity = armSys->haveSecurity(); + _haveLPAE = armSys->haveLPAE(); + _haveVirtualization = armSys->haveVirtualization(); + physAddrRange = armSys->physAddrRange(); + _haveLargeAsid64 = armSys->haveLargeAsid64(); + } else { + armSys = NULL; + haveSecurity = _haveLPAE = _haveVirtualization = false; + _haveLargeAsid64 = false; + physAddrRange = 32; + } + } TableWalker::~TableWalker() @@ -65,10 +88,14 @@ TableWalker::~TableWalker() ; } +TableWalker::WalkerState::WalkerState() : stage2Tran(NULL), l2Desc(l1Desc) +{ +} + void TableWalker::completeDrain() { - if (drainManager && stateQueueL1.empty() && stateQueueL2.empty() && + if (drainManager && stateQueues[L1].empty() && stateQueues[L2].empty() && pendingQueue.empty()) { setDrainState(Drainable::Drained); DPRINTF(Drain, "TableWalker done draining, processing drain event\n"); @@ -82,21 +109,28 @@ TableWalker::drain(DrainManager *dm) { unsigned int count = port.drain(dm); - if (stateQueueL1.empty() && stateQueueL2.empty() && - pendingQueue.empty()) { - setDrainState(Drainable::Drained); - DPRINTF(Drain, "TableWalker free, no need to drain\n"); + bool state_queues_not_empty = false; - // table walker is drained, but its ports may still need to be drained - return count; - } else { + for (int i = 0; i < MAX_LOOKUP_LEVELS; ++i) { + if (!stateQueues[i].empty()) { + state_queues_not_empty = true; + break; + } + } + + if (state_queues_not_empty || pendingQueue.size()) { drainManager = dm; setDrainState(Drainable::Draining); DPRINTF(Drain, "TableWalker not drained\n"); // return port drain count plus the table walker itself needs to drain return count + 1; + } else { + setDrainState(Drainable::Drained); + DPRINTF(Drain, "TableWalker free, no need to drain\n"); + // table walker is drained, but its ports may still need to be drained + return count; } } @@ -120,10 +154,13 @@ TableWalker::getMasterPort(const std::string &if_name, PortID idx) } Fault -TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint8_t _cid, TLB::Mode _mode, - TLB::Translation *_trans, bool _timing, bool _functional) +TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid, + uint8_t _vmid, bool _isHyp, TLB::Mode _mode, + TLB::Translation *_trans, bool _timing, bool _functional, + bool secure, TLB::ArmTranslationType tranType) { assert(!(_functional && _timing)); + if (!currState) { // For atomic mode, a new WalkerState instance should be only created // once per TLB. For timing mode, a new instance is generated for every @@ -139,41 +176,113 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint8_t _cid, TLB::Mode _ // misprediction, in which case nothing will happen or we'll use // this fault to re-execute the faulting instruction which should clean // up everything. - if (currState->vaddr == _req->getVaddr()) { + if (currState->vaddr_tainted == _req->getVaddr()) { return new ReExec; } - panic("currState should always be empty in timing mode!\n"); } currState->tc = _tc; + currState->aarch64 = opModeIs64(currOpMode(_tc)); + currState->el = currEL(_tc); currState->transState = _trans; currState->req = _req; currState->fault = NoFault; - currState->contextId = _cid; + currState->asid = _asid; + currState->vmid = _vmid; + currState->isHyp = _isHyp; currState->timing = _timing; currState->functional = _functional; currState->mode = _mode; + currState->tranType = tranType; + currState->isSecure = secure; + currState->physAddrRange = physAddrRange; /** @todo These should be cached or grabbed from cached copies in the TLB, all these miscreg reads are expensive */ - currState->vaddr = currState->req->getVaddr(); - currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR); + currState->vaddr_tainted = currState->req->getVaddr(); + if (currState->aarch64) + currState->vaddr = purifyTaggedAddr(currState->vaddr_tainted, + currState->tc, currState->el); + else + currState->vaddr = currState->vaddr_tainted; + + if (currState->aarch64) { + switch (currState->el) { + case EL0: + case EL1: + currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL1); + currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL1); + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(haveVirtualization); + // currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL2); + // currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL2); + // break; + case EL3: + assert(haveSecurity); + currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL3); + currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL3); + break; + default: + panic("Invalid exception level"); + break; + } + } else { + currState->sctlr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_SCTLR, currState->tc, !currState->isSecure)); + currState->ttbcr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBCR, currState->tc, !currState->isSecure)); + currState->htcr = currState->tc->readMiscReg(MISCREG_HTCR); + currState->hcr = currState->tc->readMiscReg(MISCREG_HCR); + currState->vtcr = currState->tc->readMiscReg(MISCREG_VTCR); + } sctlr = currState->sctlr; - currState->N = currState->tc->readMiscReg(MISCREG_TTBCR); currState->isFetch = (currState->mode == TLB::Execute); currState->isWrite = (currState->mode == TLB::Write); + // We only do a second stage of translation if we're not secure, or in + // hyp mode, the second stage MMU is enabled, and this table walker + // instance is the first stage. + currState->doingStage2 = false; + // @todo: for now disable this in AArch64 (HCR is not set) + currState->stage2Req = !currState->aarch64 && currState->hcr.vm && + !isStage2 && !currState->isSecure && !currState->isHyp; + + bool long_desc_format = currState->aarch64 || + (_haveLPAE && currState->ttbcr.eae) || + _isHyp || isStage2; + + if (long_desc_format) { + // Helper variables used for hierarchical permissions + currState->secureLookup = currState->isSecure; + currState->rwTable = true; + currState->userTable = true; + currState->xnTable = false; + currState->pxnTable = false; + } - if (!currState->timing) - return processWalk(); + if (!currState->timing) { + if (currState->aarch64) + return processWalkAArch64(); + else if (long_desc_format) + return processWalkLPAE(); + else + return processWalk(); + } if (pending || pendingQueue.size()) { pendingQueue.push_back(currState); currState = NULL; } else { pending = true; - return processWalk(); + if (currState->aarch64) + return processWalkAArch64(); + else if (long_desc_format) + return processWalkLPAE(); + else + return processWalk(); } return NoFault; @@ -186,8 +295,17 @@ TableWalker::processWalkWrapper() assert(pendingQueue.size()); currState = pendingQueue.front(); + ExceptionLevel target_el = EL0; + if (currState->aarch64) + target_el = currEL(currState->tc); + else + target_el = EL1; + // Check if a previous walk filled this request already - TlbEntry* te = tlb->lookup(currState->vaddr, currState->contextId, true); + // @TODO Should this always be the TLB or should we look in the stage2 TLB? + TlbEntry* te = tlb->lookup(currState->vaddr, currState->asid, + currState->vmid, currState->isHyp, currState->isSecure, true, false, + target_el); // Check if we still need to have a walk for this request. If the requesting // instruction has been squashed, or a previous walk has filled the TLB with @@ -198,7 +316,12 @@ TableWalker::processWalkWrapper() // We've got a valid request, lets process it pending = true; pendingQueue.pop_front(); - processWalk(); + if (currState->aarch64) + processWalkAArch64(); + else if ((_haveLPAE && currState->ttbcr.eae) || currState->isHyp || isStage2) + processWalkLPAE(); + else + processWalk(); return; } @@ -212,7 +335,8 @@ TableWalker::processWalkWrapper() pendingQueue.pop_front(); num_squashed++; - DPRINTF(TLB, "Squashing table walk for address %#x\n", currState->vaddr); + DPRINTF(TLB, "Squashing table walk for address %#x\n", + currState->vaddr_tainted); if (currState->transState->squashed()) { // finish the translation which will delete the translation object @@ -220,8 +344,9 @@ TableWalker::processWalkWrapper() currState->req, currState->tc, currState->mode); } else { // translate the request now that we know it will work - currState->fault = tlb->translateTiming(currState->req, currState->tc, - currState->transState, currState->mode); + tlb->translateTiming(currState->req, currState->tc, + currState->transState, currState->mode); + } // delete the current request @@ -230,7 +355,9 @@ TableWalker::processWalkWrapper() // peak at the next one if (pendingQueue.size()) { currState = pendingQueue.front(); - te = tlb->lookup(currState->vaddr, currState->contextId, true); + te = tlb->lookup(currState->vaddr, currState->asid, + currState->vmid, currState->isHyp, currState->isSecure, true, + false, target_el); } else { // Terminate the loop, nothing more to do currState = NULL; @@ -249,32 +376,62 @@ TableWalker::processWalk() Addr ttbr = 0; // If translation isn't enabled, we shouldn't be here - assert(currState->sctlr.m); + assert(currState->sctlr.m || isStage2); - DPRINTF(TLB, "Begining table walk for address %#x, TTBCR: %#x, bits:%#x\n", - currState->vaddr, currState->N, mbits(currState->vaddr, 31, - 32-currState->N)); + DPRINTF(TLB, "Beginning table walk for address %#x, TTBCR: %#x, bits:%#x\n", + currState->vaddr_tainted, currState->ttbcr, mbits(currState->vaddr, 31, + 32 - currState->ttbcr.n)); - if (currState->N == 0 || !mbits(currState->vaddr, 31, 32-currState->N)) { + if (currState->ttbcr.n == 0 || !mbits(currState->vaddr, 31, + 32 - currState->ttbcr.n)) { DPRINTF(TLB, " - Selecting TTBR0\n"); - ttbr = currState->tc->readMiscReg(MISCREG_TTBR0); + // Check if table walk is allowed when Security Extensions are enabled + if (haveSecurity && currState->ttbcr.pd0) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::VmsaTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::TranslationLL + L1, isStage2, + ArmFault::VmsaTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR0, currState->tc, !currState->isSecure)); } else { DPRINTF(TLB, " - Selecting TTBR1\n"); - ttbr = currState->tc->readMiscReg(MISCREG_TTBR1); - currState->N = 0; + // Check if table walk is allowed when Security Extensions are enabled + if (haveSecurity && currState->ttbcr.pd1) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::VmsaTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::TranslationLL + L1, isStage2, + ArmFault::VmsaTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR1, currState->tc, !currState->isSecure)); + currState->ttbcr.n = 0; } - Addr l1desc_addr = mbits(ttbr, 31, 14-currState->N) | - (bits(currState->vaddr,31-currState->N,20) << 2); - DPRINTF(TLB, " - Descriptor at address %#x\n", l1desc_addr); - + Addr l1desc_addr = mbits(ttbr, 31, 14 - currState->ttbcr.n) | + (bits(currState->vaddr, 31 - currState->ttbcr.n, 20) << 2); + DPRINTF(TLB, " - Descriptor at address %#x (%s)\n", l1desc_addr, + currState->isSecure ? "s" : "ns"); // Trickbox address check Fault f; - f = tlb->walkTrickBoxCheck(l1desc_addr, currState->vaddr, sizeof(uint32_t), - currState->isFetch, currState->isWrite, 0, true); + f = tlb->walkTrickBoxCheck(l1desc_addr, currState->isSecure, + currState->vaddr, sizeof(uint32_t), currState->isFetch, + currState->isWrite, TlbEntry::DomainType::NoAccess, L1); if (f) { - DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr); + DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted); if (currState->timing) { pending = false; nextWalk(currState->tc); @@ -291,28 +448,422 @@ TableWalker::processWalk() flag = Request::UNCACHEABLE; } + bool delayed; + delayed = fetchDescriptor(l1desc_addr, (uint8_t*)&currState->l1Desc.data, + sizeof(uint32_t), flag, L1, &doL1DescEvent, + &TableWalker::doL1Descriptor); + if (!delayed) { + f = currState->fault; + } + + return f; +} + +Fault +TableWalker::processWalkLPAE() +{ + Addr ttbr, ttbr0_max, ttbr1_min, desc_addr; + int tsz, n; + LookupLevel start_lookup_level = L1; + + DPRINTF(TLB, "Beginning table walk for address %#x, TTBCR: %#x\n", + currState->vaddr_tainted, currState->ttbcr); + + Request::Flags flag = 0; + if (currState->isSecure) + flag.set(Request::SECURE); + + // work out which base address register to use, if in hyp mode we always + // use HTTBR + if (isStage2) { + DPRINTF(TLB, " - Selecting VTTBR (long-desc.)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_VTTBR); + tsz = sext<4>(currState->vtcr.t0sz); + start_lookup_level = currState->vtcr.sl0 ? L1 : L2; + } else if (currState->isHyp) { + DPRINTF(TLB, " - Selecting HTTBR (long-desc.)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_HTTBR); + tsz = currState->htcr.t0sz; + } else { + assert(_haveLPAE && currState->ttbcr.eae); + + // Determine boundaries of TTBR0/1 regions + if (currState->ttbcr.t0sz) + ttbr0_max = (1ULL << (32 - currState->ttbcr.t0sz)) - 1; + else if (currState->ttbcr.t1sz) + ttbr0_max = (1ULL << 32) - + (1ULL << (32 - currState->ttbcr.t1sz)) - 1; + else + ttbr0_max = (1ULL << 32) - 1; + if (currState->ttbcr.t1sz) + ttbr1_min = (1ULL << 32) - (1ULL << (32 - currState->ttbcr.t1sz)); + else + ttbr1_min = (1ULL << (32 - currState->ttbcr.t0sz)); + + // The following code snippet selects the appropriate translation table base + // address (TTBR0 or TTBR1) and the appropriate starting lookup level + // depending on the address range supported by the translation table (ARM + // ARM issue C B3.6.4) + if (currState->vaddr <= ttbr0_max) { + DPRINTF(TLB, " - Selecting TTBR0 (long-desc.)\n"); + // Check if table walk is allowed + if (currState->ttbcr.epd0) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR0, currState->tc, !currState->isSecure)); + tsz = currState->ttbcr.t0sz; + if (ttbr0_max < (1ULL << 30)) // Upper limit < 1 GB + start_lookup_level = L2; + } else if (currState->vaddr >= ttbr1_min) { + DPRINTF(TLB, " - Selecting TTBR1 (long-desc.)\n"); + // Check if table walk is allowed + if (currState->ttbcr.epd1) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR1, currState->tc, !currState->isSecure)); + tsz = currState->ttbcr.t1sz; + if (ttbr1_min >= (1ULL << 31) + (1ULL << 30)) // Lower limit >= 3 GB + start_lookup_level = L2; + } else { + // Out of boundaries -> translation fault + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, ArmFault::TranslationLL + L1, + isStage2, ArmFault::LpaeTran); + } + + } + + // Perform lookup (ARM ARM issue C B3.6.6) + if (start_lookup_level == L1) { + n = 5 - tsz; + desc_addr = mbits(ttbr, 39, n) | + (bits(currState->vaddr, n + 26, 30) << 3); + DPRINTF(TLB, " - Descriptor at address %#x (%s) (long-desc.)\n", + desc_addr, currState->isSecure ? "s" : "ns"); + } else { + // Skip first-level lookup + n = (tsz >= 2 ? 14 - tsz : 12); + desc_addr = mbits(ttbr, 39, n) | + (bits(currState->vaddr, n + 17, 21) << 3); + DPRINTF(TLB, " - Descriptor at address %#x (%s) (long-desc.)\n", + desc_addr, currState->isSecure ? "s" : "ns"); + } + + // Trickbox address check + Fault f = tlb->walkTrickBoxCheck(desc_addr, currState->isSecure, + currState->vaddr, sizeof(uint64_t), currState->isFetch, + currState->isWrite, TlbEntry::DomainType::NoAccess, + start_lookup_level); + if (f) { + DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted); + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + } + + if (currState->sctlr.c == 0) { + flag = Request::UNCACHEABLE; + } + + if (currState->isSecure) + flag.set(Request::SECURE); + + currState->longDesc.lookupLevel = start_lookup_level; + currState->longDesc.aarch64 = false; + currState->longDesc.largeGrain = false; + currState->longDesc.grainSize = 12; + + Event *event = start_lookup_level == L1 ? (Event *) &doL1LongDescEvent + : (Event *) &doL2LongDescEvent; + + bool delayed = fetchDescriptor(desc_addr, (uint8_t*)&currState->longDesc.data, + sizeof(uint64_t), flag, start_lookup_level, + event, &TableWalker::doLongDescriptor); + if (!delayed) { + f = currState->fault; + } + + return f; +} + +unsigned +TableWalker::adjustTableSizeAArch64(unsigned tsz) +{ + if (tsz < 25) + return 25; + if (tsz > 48) + return 48; + return tsz; +} + +bool +TableWalker::checkAddrSizeFaultAArch64(Addr addr, int currPhysAddrRange) +{ + return (currPhysAddrRange != MaxPhysAddrRange && + bits(addr, MaxPhysAddrRange - 1, currPhysAddrRange)); +} + +Fault +TableWalker::processWalkAArch64() +{ + assert(currState->aarch64); + + DPRINTF(TLB, "Beginning table walk for address %#llx, TTBCR: %#llx\n", + currState->vaddr_tainted, currState->ttbcr); + + // Determine TTBR, table size, granule size and phys. address range + Addr ttbr = 0; + int tsz = 0, ps = 0; + bool large_grain = false; + bool fault = false; + switch (currState->el) { + case EL0: + case EL1: + switch (bits(currState->vaddr, 63,48)) { + case 0: + DPRINTF(TLB, " - Selecting TTBR0 (AArch64)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL1); + tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t0sz); + large_grain = currState->ttbcr.tg0; + if (bits(currState->vaddr, 63, tsz) != 0x0 || + currState->ttbcr.epd0) + fault = true; + break; + case 0xffff: + DPRINTF(TLB, " - Selecting TTBR1 (AArch64)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_TTBR1_EL1); + tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t1sz); + large_grain = currState->ttbcr.tg1; + if (bits(currState->vaddr, 63, tsz) != mask(64-tsz) || + currState->ttbcr.epd1) + fault = true; + break; + default: + // top two bytes must be all 0s or all 1s, else invalid addr + fault = true; + } + ps = currState->ttbcr.ips; + break; + case EL2: + case EL3: + switch(bits(currState->vaddr, 63,48)) { + case 0: + DPRINTF(TLB, " - Selecting TTBR0 (AArch64)\n"); + if (currState->el == EL2) + ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL2); + else + ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL3); + tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t0sz); + large_grain = currState->ttbcr.tg0; + break; + default: + // invalid addr if top two bytes are not all 0s + fault = true; + } + ps = currState->ttbcr.ps; + break; + } + + if (fault) { + Fault f; + if (currState->isFetch) + f = new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L0, isStage2, + ArmFault::LpaeTran); + else + f = new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L0, + isStage2, ArmFault::LpaeTran); + + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + + } + + // Determine starting lookup level + LookupLevel start_lookup_level; + int grain_size, stride; + if (large_grain) { // 64 KB granule + grain_size = 16; + stride = grain_size - 3; + if (tsz > grain_size + 2 * stride) + start_lookup_level = L1; + else if (tsz > grain_size + stride) + start_lookup_level = L2; + else + start_lookup_level = L3; + } else { // 4 KB granule + grain_size = 12; + stride = grain_size - 3; + if (tsz > grain_size + 3 * stride) + start_lookup_level = L0; + else if (tsz > grain_size + 2 * stride) + start_lookup_level = L1; + else + start_lookup_level = L2; + } + + // Determine table base address + int base_addr_lo = 3 + tsz - stride * (3 - start_lookup_level) - + grain_size; + Addr base_addr = mbits(ttbr, 47, base_addr_lo); + + // Determine physical address size and raise an Address Size Fault if + // necessary + int pa_range = decodePhysAddrRange64(ps); + // Clamp to lower limit + if (pa_range > physAddrRange) + currState->physAddrRange = physAddrRange; + else + currState->physAddrRange = pa_range; + if (checkAddrSizeFaultAArch64(base_addr, currState->physAddrRange)) { + DPRINTF(TLB, "Address size fault before any lookup\n"); + Fault f; + if (currState->isFetch) + f = new PrefetchAbort(currState->vaddr_tainted, + ArmFault::AddressSizeLL + start_lookup_level, + isStage2, + ArmFault::LpaeTran); + else + f = new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::AddressSizeLL + start_lookup_level, + isStage2, + ArmFault::LpaeTran); + + + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + + } + + // Determine descriptor address + Addr desc_addr = base_addr | + (bits(currState->vaddr, tsz - 1, + stride * (3 - start_lookup_level) + grain_size) << 3); + + // Trickbox address check + Fault f = tlb->walkTrickBoxCheck(desc_addr, currState->isSecure, + currState->vaddr, sizeof(uint64_t), currState->isFetch, + currState->isWrite, TlbEntry::DomainType::NoAccess, + start_lookup_level); + if (f) { + DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted); + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + } + + Request::Flags flag = 0; + if (currState->sctlr.c == 0) { + flag = Request::UNCACHEABLE; + } + + currState->longDesc.lookupLevel = start_lookup_level; + currState->longDesc.aarch64 = true; + currState->longDesc.largeGrain = large_grain; + currState->longDesc.grainSize = grain_size; + if (currState->timing) { - port.dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t), - &doL1DescEvent, (uint8_t*)&currState->l1Desc.data, + Event *event; + switch (start_lookup_level) { + case L0: + event = (Event *) &doL0LongDescEvent; + break; + case L1: + event = (Event *) &doL1LongDescEvent; + break; + case L2: + event = (Event *) &doL2LongDescEvent; + break; + case L3: + event = (Event *) &doL3LongDescEvent; + break; + default: + panic("Invalid table lookup level"); + break; + } + port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t), event, + (uint8_t*) &currState->longDesc.data, currState->tc->getCpuPtr()->clockPeriod(), flag); - DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before " - "adding: %d\n", - stateQueueL1.size()); - stateQueueL1.push_back(currState); + DPRINTF(TLBVerbose, + "Adding to walker fifo: queue size before adding: %d\n", + stateQueues[start_lookup_level].size()); + stateQueues[start_lookup_level].push_back(currState); currState = NULL; } else if (!currState->functional) { - port.dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t), - NULL, (uint8_t*)&currState->l1Desc.data, + port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t), + NULL, (uint8_t*) &currState->longDesc.data, currState->tc->getCpuPtr()->clockPeriod(), flag); - doL1Descriptor(); + doLongDescriptor(); f = currState->fault; } else { - RequestPtr req = new Request(l1desc_addr, sizeof(uint32_t), flag, masterId); - req->taskId(ContextSwitchTaskId::DMA); + RequestPtr req = new Request(desc_addr, sizeof(uint64_t), flag, + masterId); PacketPtr pkt = new Packet(req, MemCmd::ReadReq); - pkt->dataStatic((uint8_t*)&currState->l1Desc.data); + pkt->dataStatic((uint8_t*) &currState->longDesc.data); port.sendFunctional(pkt); - doL1Descriptor(); + doLongDescriptor(); delete req; delete pkt; f = currState->fault; @@ -330,38 +881,38 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, DPRINTF(TLBVerbose, "memAttrs texcb:%d s:%d\n", texcb, s); te.shareable = false; // default value te.nonCacheable = false; - bool outer_shareable = false; + te.outerShareable = false; if (sctlr.tre == 0 || ((sctlr.tre == 1) && (sctlr.m == 0))) { switch(texcb) { case 0: // Stongly-ordered te.nonCacheable = true; - te.mtype = TlbEntry::StronglyOrdered; + te.mtype = TlbEntry::MemoryType::StronglyOrdered; te.shareable = true; te.innerAttrs = 1; te.outerAttrs = 0; break; case 1: // Shareable Device te.nonCacheable = true; - te.mtype = TlbEntry::Device; + te.mtype = TlbEntry::MemoryType::Device; te.shareable = true; te.innerAttrs = 3; te.outerAttrs = 0; break; case 2: // Outer and Inner Write-Through, no Write-Allocate - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 6; te.outerAttrs = bits(texcb, 1, 0); break; case 3: // Outer and Inner Write-Back, no Write-Allocate - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 7; te.outerAttrs = bits(texcb, 1, 0); break; case 4: // Outer and Inner Non-cacheable te.nonCacheable = true; - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 0; te.outerAttrs = bits(texcb, 1, 0); @@ -373,14 +924,14 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, panic("Implementation-defined texcb value!\n"); break; case 7: // Outer and Inner Write-Back, Write-Allocate - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 5; te.outerAttrs = 1; break; case 8: // Non-shareable Device te.nonCacheable = true; - te.mtype = TlbEntry::Device; + te.mtype = TlbEntry::MemoryType::Device; te.shareable = false; te.innerAttrs = 3; te.outerAttrs = 0; @@ -389,7 +940,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, panic("Reserved texcb value!\n"); break; case 16 ... 31: // Cacheable Memory - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; if (bits(texcb, 1,0) == 0 || bits(texcb, 3,2) == 0) te.nonCacheable = true; @@ -401,8 +952,10 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, } } else { assert(tc); - PRRR prrr = tc->readMiscReg(MISCREG_PRRR); - NMRR nmrr = tc->readMiscReg(MISCREG_NMRR); + PRRR prrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_PRRR, + currState->tc, !currState->isSecure)); + NMRR nmrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_NMRR, + currState->tc, !currState->isSecure)); DPRINTF(TLBVerbose, "memAttrs PRRR:%08x NMRR:%08x\n", prrr, nmrr); uint8_t curr_tr = 0, curr_ir = 0, curr_or = 0; switch(bits(texcb, 2,0)) { @@ -410,37 +963,37 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, curr_tr = prrr.tr0; curr_ir = nmrr.ir0; curr_or = nmrr.or0; - outer_shareable = (prrr.nos0 == 0); + te.outerShareable = (prrr.nos0 == 0); break; case 1: curr_tr = prrr.tr1; curr_ir = nmrr.ir1; curr_or = nmrr.or1; - outer_shareable = (prrr.nos1 == 0); + te.outerShareable = (prrr.nos1 == 0); break; case 2: curr_tr = prrr.tr2; curr_ir = nmrr.ir2; curr_or = nmrr.or2; - outer_shareable = (prrr.nos2 == 0); + te.outerShareable = (prrr.nos2 == 0); break; case 3: curr_tr = prrr.tr3; curr_ir = nmrr.ir3; curr_or = nmrr.or3; - outer_shareable = (prrr.nos3 == 0); + te.outerShareable = (prrr.nos3 == 0); break; case 4: curr_tr = prrr.tr4; curr_ir = nmrr.ir4; curr_or = nmrr.or4; - outer_shareable = (prrr.nos4 == 0); + te.outerShareable = (prrr.nos4 == 0); break; case 5: curr_tr = prrr.tr5; curr_ir = nmrr.ir5; curr_or = nmrr.or5; - outer_shareable = (prrr.nos5 == 0); + te.outerShareable = (prrr.nos5 == 0); break; case 6: panic("Imp defined type\n"); @@ -448,14 +1001,14 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, curr_tr = prrr.tr7; curr_ir = nmrr.ir7; curr_or = nmrr.or7; - outer_shareable = (prrr.nos7 == 0); + te.outerShareable = (prrr.nos7 == 0); break; } switch(curr_tr) { case 0: DPRINTF(TLBVerbose, "StronglyOrdered\n"); - te.mtype = TlbEntry::StronglyOrdered; + te.mtype = TlbEntry::MemoryType::StronglyOrdered; te.nonCacheable = true; te.innerAttrs = 1; te.outerAttrs = 0; @@ -464,7 +1017,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, case 1: DPRINTF(TLBVerbose, "Device ds1:%d ds0:%d s:%d\n", prrr.ds1, prrr.ds0, s); - te.mtype = TlbEntry::Device; + te.mtype = TlbEntry::MemoryType::Device; te.nonCacheable = true; te.innerAttrs = 3; te.outerAttrs = 0; @@ -476,7 +1029,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, case 2: DPRINTF(TLBVerbose, "Normal ns1:%d ns0:%d s:%d\n", prrr.ns1, prrr.ns0, s); - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; if (prrr.ns1 && s) te.shareable = true; if (prrr.ns0 && !s) @@ -486,7 +1039,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, panic("Reserved type"); } - if (te.mtype == TlbEntry::Normal){ + if (te.mtype == TlbEntry::MemoryType::Normal){ switch(curr_ir) { case 0: te.nonCacheable = true; @@ -523,40 +1076,192 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, DPRINTF(TLBVerbose, "memAttrs: shareable: %d, innerAttrs: %d, \ outerAttrs: %d\n", te.shareable, te.innerAttrs, te.outerAttrs); + te.setAttributes(false); +} + +void +TableWalker::memAttrsLPAE(ThreadContext *tc, TlbEntry &te, + LongDescriptor &lDescriptor) +{ + assert(_haveLPAE); + + uint8_t attr; + uint8_t sh = lDescriptor.sh(); + // Different format and source of attributes if this is a stage 2 + // translation + if (isStage2) { + attr = lDescriptor.memAttr(); + uint8_t attr_3_2 = (attr >> 2) & 0x3; + uint8_t attr_1_0 = attr & 0x3; + + DPRINTF(TLBVerbose, "memAttrsLPAE MemAttr:%#x sh:%#x\n", attr, sh); + + if (attr_3_2 == 0) { + te.mtype = attr_1_0 == 0 ? TlbEntry::MemoryType::StronglyOrdered + : TlbEntry::MemoryType::Device; + te.outerAttrs = 0; + te.innerAttrs = attr_1_0 == 0 ? 1 : 3; + te.nonCacheable = true; + } else { + te.mtype = TlbEntry::MemoryType::Normal; + te.outerAttrs = attr_3_2 == 1 ? 0 : + attr_3_2 == 2 ? 2 : 1; + te.innerAttrs = attr_1_0 == 1 ? 0 : + attr_1_0 == 2 ? 6 : 5; + te.nonCacheable = (attr_3_2 == 1) || (attr_1_0 == 1); + } + } else { + uint8_t attrIndx = lDescriptor.attrIndx(); + + // LPAE always uses remapping of memory attributes, irrespective of the + // value of SCTLR.TRE + int reg = attrIndx & 0x4 ? MISCREG_MAIR1 : MISCREG_MAIR0; + reg = flattenMiscRegNsBanked(reg, currState->tc, !currState->isSecure); + uint32_t mair = currState->tc->readMiscReg(reg); + attr = (mair >> (8 * (attrIndx % 4))) & 0xff; + uint8_t attr_7_4 = bits(attr, 7, 4); + uint8_t attr_3_0 = bits(attr, 3, 0); + DPRINTF(TLBVerbose, "memAttrsLPAE AttrIndx:%#x sh:%#x, attr %#x\n", attrIndx, sh, attr); + + // Note: the memory subsystem only cares about the 'cacheable' memory + // attribute. The other attributes are only used to fill the PAR register + // accordingly to provide the illusion of full support + te.nonCacheable = false; + + switch (attr_7_4) { + case 0x0: + // Strongly-ordered or Device memory + if (attr_3_0 == 0x0) + te.mtype = TlbEntry::MemoryType::StronglyOrdered; + else if (attr_3_0 == 0x4) + te.mtype = TlbEntry::MemoryType::Device; + else + panic("Unpredictable behavior\n"); + te.nonCacheable = true; + te.outerAttrs = 0; + break; + case 0x4: + // Normal memory, Outer Non-cacheable + te.mtype = TlbEntry::MemoryType::Normal; + te.outerAttrs = 0; + if (attr_3_0 == 0x4) + // Inner Non-cacheable + te.nonCacheable = true; + else if (attr_3_0 < 0x8) + panic("Unpredictable behavior\n"); + break; + case 0x8: + case 0x9: + case 0xa: + case 0xb: + case 0xc: + case 0xd: + case 0xe: + case 0xf: + if (attr_7_4 & 0x4) { + te.outerAttrs = (attr_7_4 & 1) ? 1 : 3; + } else { + te.outerAttrs = 0x2; + } + // Normal memory, Outer Cacheable + te.mtype = TlbEntry::MemoryType::Normal; + if (attr_3_0 != 0x4 && attr_3_0 < 0x8) + panic("Unpredictable behavior\n"); + break; + default: + panic("Unpredictable behavior\n"); + break; + } + + switch (attr_3_0) { + case 0x0: + te.innerAttrs = 0x1; + break; + case 0x4: + te.innerAttrs = attr_7_4 == 0 ? 0x3 : 0; + break; + case 0x8: + case 0x9: + case 0xA: + case 0xB: + te.innerAttrs = 6; + break; + case 0xC: + case 0xD: + case 0xE: + case 0xF: + te.innerAttrs = attr_3_0 & 1 ? 0x5 : 0x7; + break; + default: + panic("Unpredictable behavior\n"); + break; + } + } + + te.outerShareable = sh == 2; + te.shareable = (sh & 0x2) ? true : false; + te.setAttributes(true); + te.attributes |= (uint64_t) attr << 56; +} - /** Formatting for Physical Address Register (PAR) - * Only including lower bits (TLB info here) - * PAR: - * PA [31:12] - * Reserved [11] - * TLB info [10:1] - * NOS [10] (Not Outer Sharable) - * NS [9] (Non-Secure) - * -- [8] (Implementation Defined) - * SH [7] (Sharable) - * Inner[6:4](Inner memory attributes) - * Outer[3:2](Outer memory attributes) - * SS [1] (SuperSection) - * F [0] (Fault, Fault Status in [6:1] if faulted) - */ - te.attributes = ( - ((outer_shareable ? 0:1) << 10) | - // TODO: NS Bit - ((te.shareable ? 1:0) << 7) | - (te.innerAttrs << 4) | - (te.outerAttrs << 2) - // TODO: Supersection bit - // TODO: Fault bit - ); +void +TableWalker::memAttrsAArch64(ThreadContext *tc, TlbEntry &te, uint8_t attrIndx, + uint8_t sh) +{ + DPRINTF(TLBVerbose, "memAttrsAArch64 AttrIndx:%#x sh:%#x\n", attrIndx, sh); + + // Select MAIR + uint64_t mair; + switch (currState->el) { + case EL0: + case EL1: + mair = tc->readMiscReg(MISCREG_MAIR_EL1); + break; + case EL2: + mair = tc->readMiscReg(MISCREG_MAIR_EL2); + break; + case EL3: + mair = tc->readMiscReg(MISCREG_MAIR_EL3); + break; + default: + panic("Invalid exception level"); + break; + } + + // Select attributes + uint8_t attr = bits(mair, 8 * attrIndx + 7, 8 * attrIndx); + uint8_t attr_lo = bits(attr, 3, 0); + uint8_t attr_hi = bits(attr, 7, 4); + // Memory type + te.mtype = attr_hi == 0 ? TlbEntry::MemoryType::Device : TlbEntry::MemoryType::Normal; + // Cacheability + te.nonCacheable = false; + if (te.mtype == TlbEntry::MemoryType::Device || // Device memory + attr_hi == 0x8 || // Normal memory, Outer Non-cacheable + attr_lo == 0x8) { // Normal memory, Inner Non-cacheable + te.nonCacheable = true; + } + + te.shareable = sh == 2; + te.outerShareable = (sh & 0x2) ? true : false; + // Attributes formatted according to the 64-bit PAR + te.attributes = ((uint64_t) attr << 56) | + (1 << 11) | // LPAE bit + (te.ns << 9) | // NS bit + (sh << 7); } void TableWalker::doL1Descriptor() { + if (currState->fault != NoFault) { + return; + } + DPRINTF(TLB, "L1 descriptor for %#x is %#x\n", - currState->vaddr, currState->l1Desc.data); + currState->vaddr_tainted, currState->l1Desc.data); TlbEntry te; switch (currState->l1Desc.type()) { @@ -569,11 +1274,17 @@ TableWalker::doL1Descriptor() DPRINTF(TLB, "L1 Descriptor Reserved/Ignore, causing fault\n"); if (currState->isFetch) currState->fault = - new PrefetchAbort(currState->vaddr, ArmFault::Translation0); + new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::VmsaTran); else currState->fault = - new DataAbort(currState->vaddr, 0, currState->isWrite, - ArmFault::Translation0); + new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L1, isStage2, + ArmFault::VmsaTran); return; case L1Descriptor::Section: if (currState->sctlr.afe && bits(currState->l1Desc.ap(), 0) == 0) { @@ -582,85 +1293,251 @@ TableWalker::doL1Descriptor() * AccessFlag0 */ - currState->fault = new DataAbort(currState->vaddr, - currState->l1Desc.domain(), currState->isWrite, - ArmFault::AccessFlag0); + currState->fault = new DataAbort(currState->vaddr_tainted, + currState->l1Desc.domain(), + currState->isWrite, + ArmFault::AccessFlagLL + L1, + isStage2, + ArmFault::VmsaTran); } if (currState->l1Desc.supersection()) { panic("Haven't implemented supersections\n"); } - te.N = 20; - te.pfn = currState->l1Desc.pfn(); - te.size = (1<<te.N) - 1; - te.global = !currState->l1Desc.global(); - te.valid = true; - te.vpn = currState->vaddr >> te.N; - te.sNp = true; - te.xn = currState->l1Desc.xn(); - te.ap = currState->l1Desc.ap(); - te.domain = currState->l1Desc.domain(); - te.asid = currState->contextId; - memAttrs(currState->tc, te, currState->sctlr, - currState->l1Desc.texcb(), currState->l1Desc.shareable()); - - DPRINTF(TLB, "Inserting Section Descriptor into TLB\n"); - DPRINTF(TLB, " - N:%d pfn:%#x size: %#x global:%d valid: %d\n", - te.N, te.pfn, te.size, te.global, te.valid); - DPRINTF(TLB, " - vpn:%#x sNp: %d xn:%d ap:%d domain: %d asid:%d nc:%d\n", - te.vpn, te.sNp, te.xn, te.ap, te.domain, te.asid, - te.nonCacheable); - DPRINTF(TLB, " - domain from l1 desc: %d data: %#x bits:%d\n", - currState->l1Desc.domain(), currState->l1Desc.data, - (currState->l1Desc.data >> 5) & 0xF ); + insertTableEntry(currState->l1Desc, false); + return; + case L1Descriptor::PageTable: + { + Addr l2desc_addr; + l2desc_addr = currState->l1Desc.l2Addr() | + (bits(currState->vaddr, 19, 12) << 2); + DPRINTF(TLB, "L1 descriptor points to page table at: %#x (%s)\n", + l2desc_addr, currState->isSecure ? "s" : "ns"); + + // Trickbox address check + currState->fault = tlb->walkTrickBoxCheck( + l2desc_addr, currState->isSecure, currState->vaddr, + sizeof(uint32_t), currState->isFetch, currState->isWrite, + currState->l1Desc.domain(), L2); + + if (currState->fault) { + if (!currState->timing) { + currState->tc = NULL; + currState->req = NULL; + } + return; + } + + Request::Flags flag = 0; + if (currState->isSecure) + flag.set(Request::SECURE); + + bool delayed; + delayed = fetchDescriptor(l2desc_addr, + (uint8_t*)&currState->l2Desc.data, + sizeof(uint32_t), flag, -1, &doL2DescEvent, + &TableWalker::doL2Descriptor); + if (delayed) { + currState->delayed = true; + } + return; + } + default: + panic("A new type in a 2 bit field?\n"); + } +} + +void +TableWalker::doLongDescriptor() +{ + if (currState->fault != NoFault) { + return; + } + + DPRINTF(TLB, "L%d descriptor for %#llx is %#llx (%s)\n", + currState->longDesc.lookupLevel, currState->vaddr_tainted, + currState->longDesc.data, + currState->aarch64 ? "AArch64" : "long-desc."); + + if ((currState->longDesc.type() == LongDescriptor::Block) || + (currState->longDesc.type() == LongDescriptor::Page)) { + DPRINTF(TLBVerbose, "Analyzing L%d descriptor: %#llx, pxn: %d, " + "xn: %d, ap: %d, af: %d, type: %d\n", + currState->longDesc.lookupLevel, + currState->longDesc.data, + currState->longDesc.pxn(), + currState->longDesc.xn(), + currState->longDesc.ap(), + currState->longDesc.af(), + currState->longDesc.type()); + } else { + DPRINTF(TLBVerbose, "Analyzing L%d descriptor: %#llx, type: %d\n", + currState->longDesc.lookupLevel, + currState->longDesc.data, + currState->longDesc.type()); + } + + TlbEntry te; + + switch (currState->longDesc.type()) { + case LongDescriptor::Invalid: if (!currState->timing) { currState->tc = NULL; currState->req = NULL; } - tlb->insert(currState->vaddr, te); + DPRINTF(TLB, "L%d descriptor Invalid, causing fault type %d\n", + currState->longDesc.lookupLevel, + ArmFault::TranslationLL + currState->longDesc.lookupLevel); + if (currState->isFetch) + currState->fault = new PrefetchAbort( + currState->vaddr_tainted, + ArmFault::TranslationLL + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + else + currState->fault = new DataAbort( + currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); return; - case L1Descriptor::PageTable: - Addr l2desc_addr; - l2desc_addr = currState->l1Desc.l2Addr() | - (bits(currState->vaddr, 19,12) << 2); - DPRINTF(TLB, "L1 descriptor points to page table at: %#x\n", - l2desc_addr); - - // Trickbox address check - currState->fault = tlb->walkTrickBoxCheck(l2desc_addr, currState->vaddr, - sizeof(uint32_t), currState->isFetch, currState->isWrite, - currState->l1Desc.domain(), false); - - if (currState->fault) { - if (!currState->timing) { - currState->tc = NULL; - currState->req = NULL; + case LongDescriptor::Block: + case LongDescriptor::Page: + { + bool fault = false; + bool aff = false; + // Check for address size fault + if (checkAddrSizeFaultAArch64( + mbits(currState->longDesc.data, MaxPhysAddrRange - 1, + currState->longDesc.offsetBits()), + currState->physAddrRange)) { + fault = true; + DPRINTF(TLB, "L%d descriptor causing Address Size Fault\n", + currState->longDesc.lookupLevel); + // Check for access fault + } else if (currState->longDesc.af() == 0) { + fault = true; + DPRINTF(TLB, "L%d descriptor causing Access Fault\n", + currState->longDesc.lookupLevel); + aff = true; + } + if (fault) { + if (currState->isFetch) + currState->fault = new PrefetchAbort( + currState->vaddr_tainted, + (aff ? ArmFault::AccessFlagLL : ArmFault::AddressSizeLL) + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + else + currState->fault = new DataAbort( + currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + (aff ? ArmFault::AccessFlagLL : ArmFault::AddressSizeLL) + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + } else { + insertTableEntry(currState->longDesc, true); } - return; } + return; + case LongDescriptor::Table: + { + // Set hierarchical permission flags + currState->secureLookup = currState->secureLookup && + currState->longDesc.secureTable(); + currState->rwTable = currState->rwTable && + currState->longDesc.rwTable(); + currState->userTable = currState->userTable && + currState->longDesc.userTable(); + currState->xnTable = currState->xnTable || + currState->longDesc.xnTable(); + currState->pxnTable = currState->pxnTable || + currState->longDesc.pxnTable(); + + // Set up next level lookup + Addr next_desc_addr = currState->longDesc.nextDescAddr( + currState->vaddr); + + DPRINTF(TLB, "L%d descriptor points to L%d descriptor at: %#x (%s)\n", + currState->longDesc.lookupLevel, + currState->longDesc.lookupLevel + 1, + next_desc_addr, + currState->secureLookup ? "s" : "ns"); + + // Check for address size fault + if (currState->aarch64 && checkAddrSizeFaultAArch64( + next_desc_addr, currState->physAddrRange)) { + DPRINTF(TLB, "L%d descriptor causing Address Size Fault\n", + currState->longDesc.lookupLevel); + if (currState->isFetch) + currState->fault = new PrefetchAbort( + currState->vaddr_tainted, + ArmFault::AddressSizeLL + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + else + currState->fault = new DataAbort( + currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::AddressSizeLL + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + return; + } + // Trickbox address check + currState->fault = tlb->walkTrickBoxCheck( + next_desc_addr, currState->vaddr, + currState->vaddr, sizeof(uint64_t), + currState->isFetch, currState->isWrite, + TlbEntry::DomainType::Client, + toLookupLevel(currState->longDesc.lookupLevel +1)); + + if (currState->fault) { + if (!currState->timing) { + currState->tc = NULL; + currState->req = NULL; + } + return; + } - if (currState->timing) { - currState->delayed = true; - port.dmaAction(MemCmd::ReadReq, l2desc_addr, sizeof(uint32_t), - &doL2DescEvent, (uint8_t*)&currState->l2Desc.data, - currState->tc->getCpuPtr()->clockPeriod()); - } else if (!currState->functional) { - port.dmaAction(MemCmd::ReadReq, l2desc_addr, sizeof(uint32_t), - NULL, (uint8_t*)&currState->l2Desc.data, - currState->tc->getCpuPtr()->clockPeriod()); - doL2Descriptor(); - } else { - RequestPtr req = new Request(l2desc_addr, sizeof(uint32_t), 0, - masterId); - req->taskId(ContextSwitchTaskId::DMA); - PacketPtr pkt = new Packet(req, MemCmd::ReadReq); - pkt->dataStatic((uint8_t*)&currState->l2Desc.data); - port.sendFunctional(pkt); - doL2Descriptor(); - delete req; - delete pkt; + Request::Flags flag = 0; + if (currState->secureLookup) + flag.set(Request::SECURE); + + currState->longDesc.lookupLevel = + (LookupLevel) (currState->longDesc.lookupLevel + 1); + Event *event = NULL; + switch (currState->longDesc.lookupLevel) { + case L1: + assert(currState->aarch64); + event = &doL1LongDescEvent; + break; + case L2: + event = &doL2LongDescEvent; + break; + case L3: + event = &doL3LongDescEvent; + break; + default: + panic("Wrong lookup level in table walk\n"); + break; + } + + bool delayed; + delayed = fetchDescriptor(next_desc_addr, (uint8_t*)&currState->longDesc.data, + sizeof(uint64_t), flag, -1, event, + &TableWalker::doLongDescriptor); + if (delayed) { + currState->delayed = true; + } } return; default: @@ -671,8 +1548,12 @@ TableWalker::doL1Descriptor() void TableWalker::doL2Descriptor() { + if (currState->fault != NoFault) { + return; + } + DPRINTF(TLB, "L2 descriptor for %#x is %#x\n", - currState->vaddr, currState->l2Desc.data); + currState->vaddr_tainted, currState->l2Desc.data); TlbEntry te; if (currState->l2Desc.invalid()) { @@ -683,11 +1564,16 @@ TableWalker::doL2Descriptor() } if (currState->isFetch) currState->fault = - new PrefetchAbort(currState->vaddr, ArmFault::Translation1); + new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L2, + isStage2, + ArmFault::VmsaTran); else currState->fault = - new DataAbort(currState->vaddr, currState->l1Desc.domain(), - currState->isWrite, ArmFault::Translation1); + new DataAbort(currState->vaddr_tainted, currState->l1Desc.domain(), + currState->isWrite, ArmFault::TranslationLL + L2, + isStage2, + ArmFault::VmsaTran); return; } @@ -695,53 +1581,38 @@ TableWalker::doL2Descriptor() /** @todo: check sctlr.ha (bit[17]) if Hardware Access Flag is enabled * if set, do l2.Desc.setAp0() instead of generating AccessFlag0 */ + DPRINTF(TLB, "Generating access fault at L2, afe: %d, ap: %d\n", + currState->sctlr.afe, currState->l2Desc.ap()); currState->fault = - new DataAbort(currState->vaddr, 0, currState->isWrite, - ArmFault::AccessFlag1); - + new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::AccessFlagLL + L2, isStage2, + ArmFault::VmsaTran); } - if (currState->l2Desc.large()) { - te.N = 16; - te.pfn = currState->l2Desc.pfn(); - } else { - te.N = 12; - te.pfn = currState->l2Desc.pfn(); - } - - te.valid = true; - te.size = (1 << te.N) - 1; - te.asid = currState->contextId; - te.sNp = false; - te.vpn = currState->vaddr >> te.N; - te.global = currState->l2Desc.global(); - te.xn = currState->l2Desc.xn(); - te.ap = currState->l2Desc.ap(); - te.domain = currState->l1Desc.domain(); - memAttrs(currState->tc, te, currState->sctlr, currState->l2Desc.texcb(), - currState->l2Desc.shareable()); - - if (!currState->timing) { - currState->tc = NULL; - currState->req = NULL; - } - tlb->insert(currState->vaddr, te); + insertTableEntry(currState->l2Desc, false); } void TableWalker::doL1DescriptorWrapper() { - currState = stateQueueL1.front(); + currState = stateQueues[L1].front(); currState->delayed = false; + // if there's a stage2 translation object we don't need it any more + if (currState->stage2Tran) { + delete currState->stage2Tran; + currState->stage2Tran = NULL; + } + DPRINTF(TLBVerbose, "L1 Desc object host addr: %p\n",&currState->l1Desc.data); DPRINTF(TLBVerbose, "L1 Desc object data: %08x\n",currState->l1Desc.data); - DPRINTF(TLBVerbose, "calling doL1Descriptor for vaddr:%#x\n", currState->vaddr); + DPRINTF(TLBVerbose, "calling doL1Descriptor for vaddr:%#x\n", currState->vaddr_tainted); doL1Descriptor(); - stateQueueL1.pop_front(); + stateQueues[L1].pop_front(); completeDrain(); // Check if fault was generated if (currState->fault != NoFault) { @@ -758,9 +1629,12 @@ TableWalker::doL1DescriptorWrapper() } else if (!currState->delayed) { // delay is not set so there is no L2 to do - DPRINTF(TLBVerbose, "calling translateTiming again\n"); - currState->fault = tlb->translateTiming(currState->req, currState->tc, - currState->transState, currState->mode); + // Don't finish the translation if a stage 2 look up is underway + if (!currState->doingStage2) { + DPRINTF(TLBVerbose, "calling translateTiming again\n"); + currState->fault = tlb->translateTiming(currState->req, currState->tc, + currState->transState, currState->mode); + } pending = false; nextWalk(currState->tc); @@ -771,7 +1645,7 @@ TableWalker::doL1DescriptorWrapper() delete currState; } else { // need to do L2 descriptor - stateQueueL2.push_back(currState); + stateQueues[L2].push_back(currState); } currState = NULL; } @@ -779,11 +1653,16 @@ TableWalker::doL1DescriptorWrapper() void TableWalker::doL2DescriptorWrapper() { - currState = stateQueueL2.front(); + currState = stateQueues[L2].front(); assert(currState->delayed); + // if there's a stage2 translation object we don't need it any more + if (currState->stage2Tran) { + delete currState->stage2Tran; + currState->stage2Tran = NULL; + } DPRINTF(TLBVerbose, "calling doL2Descriptor for vaddr:%#x\n", - currState->vaddr); + currState->vaddr_tainted); doL2Descriptor(); // Check if fault was generated @@ -792,13 +1671,16 @@ TableWalker::doL2DescriptorWrapper() currState->tc, currState->mode); } else { - DPRINTF(TLBVerbose, "calling translateTiming again\n"); - currState->fault = tlb->translateTiming(currState->req, currState->tc, - currState->transState, currState->mode); + // Don't finish the translation if a stage 2 look up is underway + if (!currState->doingStage2) { + DPRINTF(TLBVerbose, "calling translateTiming again\n"); + currState->fault = tlb->translateTiming(currState->req, + currState->tc, currState->transState, currState->mode); + } } - stateQueueL2.pop_front(); + stateQueues[L2].pop_front(); completeDrain(); pending = false; nextWalk(currState->tc); @@ -812,13 +1694,234 @@ TableWalker::doL2DescriptorWrapper() } void +TableWalker::doL0LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L0); +} + +void +TableWalker::doL1LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L1); +} + +void +TableWalker::doL2LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L2); +} + +void +TableWalker::doL3LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L3); +} + +void +TableWalker::doLongDescriptorWrapper(LookupLevel curr_lookup_level) +{ + currState = stateQueues[curr_lookup_level].front(); + assert(curr_lookup_level == currState->longDesc.lookupLevel); + currState->delayed = false; + + // if there's a stage2 translation object we don't need it any more + if (currState->stage2Tran) { + delete currState->stage2Tran; + currState->stage2Tran = NULL; + } + + DPRINTF(TLBVerbose, "calling doLongDescriptor for vaddr:%#x\n", + currState->vaddr_tainted); + doLongDescriptor(); + + stateQueues[curr_lookup_level].pop_front(); + + if (currState->fault != NoFault) { + // A fault was generated + currState->transState->finish(currState->fault, currState->req, + currState->tc, currState->mode); + + pending = false; + nextWalk(currState->tc); + + currState->req = NULL; + currState->tc = NULL; + currState->delayed = false; + delete currState; + } else if (!currState->delayed) { + // No additional lookups required + // Don't finish the translation if a stage 2 look up is underway + if (!currState->doingStage2) { + DPRINTF(TLBVerbose, "calling translateTiming again\n"); + currState->fault = tlb->translateTiming(currState->req, currState->tc, + currState->transState, + currState->mode); + } + + pending = false; + nextWalk(currState->tc); + + currState->req = NULL; + currState->tc = NULL; + currState->delayed = false; + delete currState; + } else { + if (curr_lookup_level >= MAX_LOOKUP_LEVELS - 1) + panic("Max. number of lookups already reached in table walk\n"); + // Need to perform additional lookups + stateQueues[currState->longDesc.lookupLevel].push_back(currState); + } + currState = NULL; +} + + +void TableWalker::nextWalk(ThreadContext *tc) { if (pendingQueue.size()) schedule(doProcessEvent, clockEdge(Cycles(1))); } +bool +TableWalker::fetchDescriptor(Addr descAddr, uint8_t *data, int numBytes, + Request::Flags flags, int queueIndex, Event *event, + void (TableWalker::*doDescriptor)()) +{ + bool isTiming = currState->timing; + + // do the requests for the page table descriptors have to go through the + // second stage MMU + if (currState->stage2Req) { + Fault fault; + flags = flags | TLB::MustBeOne; + + if (isTiming) { + Stage2MMU::Stage2Translation *tran = new + Stage2MMU::Stage2Translation(*stage2Mmu, data, event, + currState->vaddr); + currState->stage2Tran = tran; + stage2Mmu->readDataTimed(currState->tc, descAddr, tran, numBytes, + flags, masterId); + fault = tran->fault; + } else { + fault = stage2Mmu->readDataUntimed(currState->tc, + currState->vaddr, descAddr, data, numBytes, flags, masterId, + currState->functional); + } + + if (fault != NoFault) { + currState->fault = fault; + } + if (isTiming) { + if (queueIndex >= 0) { + DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n", + stateQueues[queueIndex].size()); + stateQueues[queueIndex].push_back(currState); + currState = NULL; + } + } else { + (this->*doDescriptor)(); + } + } else { + if (isTiming) { + port.dmaAction(MemCmd::ReadReq, descAddr, numBytes, event, data, + currState->tc->getCpuPtr()->clockPeriod(), flags); + if (queueIndex >= 0) { + DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n", + stateQueues[queueIndex].size()); + stateQueues[queueIndex].push_back(currState); + currState = NULL; + } + } else if (!currState->functional) { + port.dmaAction(MemCmd::ReadReq, descAddr, numBytes, NULL, data, + currState->tc->getCpuPtr()->clockPeriod(), flags); + (this->*doDescriptor)(); + } else { + RequestPtr req = new Request(descAddr, numBytes, flags, masterId); + req->taskId(ContextSwitchTaskId::DMA); + PacketPtr pkt = new Packet(req, MemCmd::ReadReq); + pkt->dataStatic(data); + port.sendFunctional(pkt); + (this->*doDescriptor)(); + delete req; + delete pkt; + } + } + return (isTiming); +} + +void +TableWalker::insertTableEntry(DescriptorBase &descriptor, bool longDescriptor) +{ + TlbEntry te; + // Create and fill a new page table entry + te.valid = true; + te.longDescFormat = longDescriptor; + te.isHyp = currState->isHyp; + te.asid = currState->asid; + te.vmid = currState->vmid; + te.N = descriptor.offsetBits(); + te.vpn = currState->vaddr >> te.N; + te.size = (1<<te.N) - 1; + te.pfn = descriptor.pfn(); + te.domain = descriptor.domain(); + te.lookupLevel = descriptor.lookupLevel; + te.ns = !descriptor.secure(haveSecurity, currState) || isStage2; + te.nstid = !currState->isSecure; + te.xn = descriptor.xn(); + if (currState->aarch64) + te.el = currState->el; + else + te.el = 1; + + // ASID has no meaning for stage 2 TLB entries, so mark all stage 2 entries + // as global + te.global = descriptor.global(currState) || isStage2; + if (longDescriptor) { + LongDescriptor lDescriptor = + dynamic_cast<LongDescriptor &>(descriptor); + + te.xn |= currState->xnTable; + te.pxn = currState->pxnTable || lDescriptor.pxn(); + if (isStage2) { + // this is actually the HAP field, but its stored in the same bit + // possitions as the AP field in a stage 1 translation. + te.hap = lDescriptor.ap(); + } else { + te.ap = ((!currState->rwTable || descriptor.ap() >> 1) << 1) | + (currState->userTable && (descriptor.ap() & 0x1)); + } + if (currState->aarch64) + memAttrsAArch64(currState->tc, te, currState->longDesc.attrIndx(), + currState->longDesc.sh()); + else + memAttrsLPAE(currState->tc, te, lDescriptor); + } else { + te.ap = descriptor.ap(); + memAttrs(currState->tc, te, currState->sctlr, descriptor.texcb(), + descriptor.shareable()); + } + + // Debug output + DPRINTF(TLB, descriptor.dbgHeader().c_str()); + DPRINTF(TLB, " - N:%d pfn:%#x size:%#x global:%d valid:%d\n", + te.N, te.pfn, te.size, te.global, te.valid); + DPRINTF(TLB, " - vpn:%#x xn:%d pxn:%d ap:%d domain:%d asid:%d " + "vmid:%d hyp:%d nc:%d ns:%d\n", te.vpn, te.xn, te.pxn, + te.ap, static_cast<uint8_t>(te.domain), te.asid, te.vmid, te.isHyp, + te.nonCacheable, te.ns); + DPRINTF(TLB, " - domain from L%d desc:%d data:%#x\n", + descriptor.lookupLevel, static_cast<uint8_t>(descriptor.domain()), + descriptor.getRawData()); + + // Insert the entry into the TLB + tlb->insert(currState->vaddr, te); + if (!currState->timing) { + currState->tc = NULL; + currState->req = NULL; + } +} ArmISA::TableWalker * ArmTableWalkerParams::create() @@ -826,3 +1929,17 @@ ArmTableWalkerParams::create() return new ArmISA::TableWalker(this); } +LookupLevel +TableWalker::toLookupLevel(uint8_t lookup_level_as_int) +{ + switch (lookup_level_as_int) { + case L1: + return L1; + case L2: + return L2; + case L3: + return L3; + default: + panic("Invalid lookup level conversion"); + } +} diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh index 23464f56d..4753fe6a0 100644 --- a/src/arch/arm/table_walker.hh +++ b/src/arch/arm/table_walker.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -35,6 +35,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Giacomo Gabrielli */ #ifndef __ARCH_ARM_TABLE_WALKER_HH__ @@ -43,6 +44,7 @@ #include <list> #include "arch/arm/miscregs.hh" +#include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "dev/dma_device.hh" #include "mem/mem_object.hh" @@ -56,11 +58,39 @@ class ThreadContext; namespace ArmISA { class Translation; class TLB; +class Stage2MMU; class TableWalker : public MemObject { public: - struct L1Descriptor { + class WalkerState; + + class DescriptorBase { + public: + /** Current lookup level for this descriptor */ + LookupLevel lookupLevel; + + virtual Addr pfn() const = 0; + virtual TlbEntry::DomainType domain() const = 0; + virtual bool xn() const = 0; + virtual uint8_t ap() const = 0; + virtual bool global(WalkerState *currState) const = 0; + virtual uint8_t offsetBits() const = 0; + virtual bool secure(bool have_security, WalkerState *currState) const = 0; + virtual std::string dbgHeader() const = 0; + virtual uint64_t getRawData() const = 0; + virtual uint8_t texcb() const + { + panic("texcb() not implemented for this class\n"); + } + virtual bool shareable() const + { + panic("shareable() not implemented for this class\n"); + } + }; + + class L1Descriptor : public DescriptorBase { + public: /** Type of page table entry ARM DDI 0406B: B3-8*/ enum EntryType { Ignore, @@ -76,6 +106,27 @@ class TableWalker : public MemObject * written back to memory */ bool _dirty; + /** Default ctor */ + L1Descriptor() + { + lookupLevel = L1; + } + + virtual uint64_t getRawData() const + { + return (data); + } + + virtual std::string dbgHeader() const + { + return "Inserting Section Descriptor into TLB\n"; + } + + virtual uint8_t offsetBits() const + { + return 20; + } + EntryType type() const { return (EntryType)(data & 0x3); @@ -112,9 +163,9 @@ class TableWalker : public MemObject } /** Is the translation global (no asid used)? */ - bool global() const + bool global(WalkerState *currState) const { - return bits(data, 17); + return !bits(data, 17); } /** Is the translation not allow execution? */ @@ -130,9 +181,9 @@ class TableWalker : public MemObject } /** Domain Client/Manager: ARM DDI 0406B: B3-31 */ - uint8_t domain() const + TlbEntry::DomainType domain() const { - return bits(data, 8, 5); + return static_cast<TlbEntry::DomainType>(bits(data, 8, 5)); } /** Address of L2 descriptor if it exists */ @@ -171,18 +222,70 @@ class TableWalker : public MemObject { return _dirty; } + + /** + * Returns true if this entry targets the secure physical address + * map. + */ + bool secure(bool have_security, WalkerState *currState) const + { + if (have_security) { + if (type() == PageTable) + return !bits(data, 3); + else + return !bits(data, 19); + } + return false; + } }; /** Level 2 page table descriptor */ - struct L2Descriptor { - + class L2Descriptor : public DescriptorBase { + public: /** The raw bits of the entry. */ - uint32_t data; + uint32_t data; + L1Descriptor *l1Parent; /** This entry has been modified (access flag set) and needs to be * written back to memory */ bool _dirty; + /** Default ctor */ + L2Descriptor() + { + lookupLevel = L2; + } + + L2Descriptor(L1Descriptor &parent) : l1Parent(&parent) + { + lookupLevel = L2; + } + + virtual uint64_t getRawData() const + { + return (data); + } + + virtual std::string dbgHeader() const + { + return "Inserting L2 Descriptor into TLB\n"; + } + + virtual TlbEntry::DomainType domain() const + { + return l1Parent->domain(); + } + + bool secure(bool have_security, WalkerState *currState) const + { + return l1Parent->secure(have_security, currState); + } + + virtual uint8_t offsetBits() const + { + return large() ? 16 : 12; + } + /** Is the entry invalid */ bool invalid() const { @@ -202,7 +305,7 @@ class TableWalker : public MemObject } /** Is the translation global (no asid used)? */ - bool global() const + bool global(WalkerState *currState) const { return !bits(data, 11); } @@ -259,49 +362,329 @@ class TableWalker : public MemObject }; - protected: + /** Long-descriptor format (LPAE) */ + class LongDescriptor : public DescriptorBase { + public: + /** Descriptor type */ + enum EntryType { + Invalid, + Table, + Block, + Page + }; - /** - * A snooping DMA port that currently does nothing besides - * extending the DMA port to accept snoops without complaining. - */ - class SnoopingDmaPort : public DmaPort - { + /** The raw bits of the entry */ + uint64_t data; - protected: + /** This entry has been modified (access flag set) and needs to be + * written back to memory */ + bool _dirty; - virtual void recvTimingSnoopReq(PacketPtr pkt) - { } + virtual uint64_t getRawData() const + { + return (data); + } - virtual Tick recvAtomicSnoop(PacketPtr pkt) - { return 0; } + virtual std::string dbgHeader() const + { + if (type() == LongDescriptor::Page) { + assert(lookupLevel == L3); + return "Inserting Page descriptor into TLB\n"; + } else { + assert(lookupLevel < L3); + return "Inserting Block descriptor into TLB\n"; + } + } - virtual void recvFunctionalSnoop(PacketPtr pkt) - { } + /** + * Returns true if this entry targets the secure physical address + * map. + */ + bool secure(bool have_security, WalkerState *currState) const + { + assert(type() == Block || type() == Page); + return have_security && (currState->secureLookup && !bits(data, 5)); + } - virtual bool isSnooping() const { return true; } + /** True if the current lookup is performed in AArch64 state */ + bool aarch64; - public: + /** True if the granule size is 64 KB (AArch64 only) */ + bool largeGrain; - /** - * A snooping DMA port merely calls the construtor of the DMA - * port. - */ - SnoopingDmaPort(MemObject *dev, System *s) : - DmaPort(dev, s) - { } + /** Width of the granule size in bits */ + int grainSize; + + /** Return the descriptor type */ + EntryType type() const + { + switch (bits(data, 1, 0)) { + case 0x1: + // In AArch64 blocks are not allowed at L0 for the 4 KB granule + // and at L1 for the 64 KB granule + if (largeGrain) + return lookupLevel == L2 ? Block : Invalid; + return lookupLevel == L0 || lookupLevel == L3 ? Invalid : Block; + case 0x3: + return lookupLevel == L3 ? Page : Table; + default: + return Invalid; + } + } + + /** Return the bit width of the page/block offset */ + uint8_t offsetBits() const + { + assert(type() == Block || type() == Page); + if (largeGrain) { + if (type() == Block) + return 29 /* 512 MB */; + return 16 /* 64 KB */; // type() == Page + } else { + if (type() == Block) + return lookupLevel == L1 ? 30 /* 1 GB */ : 21 /* 2 MB */; + return 12 /* 4 KB */; // type() == Page + } + } + + /** Return the physical frame, bits shifted right */ + Addr pfn() const + { + if (aarch64) + return bits(data, 47, offsetBits()); + return bits(data, 39, offsetBits()); + } + + /** Return the complete physical address given a VA */ + Addr paddr(Addr va) const + { + int n = offsetBits(); + if (aarch64) + return mbits(data, 47, n) | mbits(va, n - 1, 0); + return mbits(data, 39, n) | mbits(va, n - 1, 0); + } + + /** Return the physical address of the entry */ + Addr paddr() const + { + if (aarch64) + return mbits(data, 47, offsetBits()); + return mbits(data, 39, offsetBits()); + } + + /** Return the address of the next page table */ + Addr nextTableAddr() const + { + assert(type() == Table); + if (aarch64) + return mbits(data, 47, grainSize); + else + return mbits(data, 39, 12); + } + + /** Return the address of the next descriptor */ + Addr nextDescAddr(Addr va) const + { + assert(type() == Table); + Addr pa = 0; + if (aarch64) { + int stride = grainSize - 3; + int va_lo = stride * (3 - (lookupLevel + 1)) + grainSize; + int va_hi = va_lo + stride - 1; + pa = nextTableAddr() | (bits(va, va_hi, va_lo) << 3); + } else { + if (lookupLevel == L1) + pa = nextTableAddr() | (bits(va, 29, 21) << 3); + else // lookupLevel == L2 + pa = nextTableAddr() | (bits(va, 20, 12) << 3); + } + return pa; + } + + /** Is execution allowed on this mapping? */ + bool xn() const + { + assert(type() == Block || type() == Page); + return bits(data, 54); + } + + /** Is privileged execution allowed on this mapping? (LPAE only) */ + bool pxn() const + { + assert(type() == Block || type() == Page); + return bits(data, 53); + } + + /** Contiguous hint bit. */ + bool contiguousHint() const + { + assert(type() == Block || type() == Page); + return bits(data, 52); + } + + /** Is the translation global (no asid used)? */ + bool global(WalkerState *currState) const + { + assert(currState && (type() == Block || type() == Page)); + if (!currState->aarch64 && (currState->isSecure && + !currState->secureLookup)) { + return false; // ARM ARM issue C B3.6.3 + } else if (currState->aarch64) { + if (currState->el == EL2 || currState->el == EL3) { + return true; // By default translations are treated as global + // in AArch64 EL2 and EL3 + } else if (currState->isSecure && !currState->secureLookup) { + return false; + } + } + return !bits(data, 11); + } + + /** Returns true if the access flag (AF) is set. */ + bool af() const + { + assert(type() == Block || type() == Page); + return bits(data, 10); + } + + /** 2-bit shareability field */ + uint8_t sh() const + { + assert(type() == Block || type() == Page); + return bits(data, 9, 8); + } + + /** 2-bit access protection flags */ + uint8_t ap() const + { + assert(type() == Block || type() == Page); + // Long descriptors only support the AP[2:1] scheme + return bits(data, 7, 6); + } + + /** Read/write access protection flag */ + bool rw() const + { + assert(type() == Block || type() == Page); + return !bits(data, 7); + } + + /** User/privileged level access protection flag */ + bool user() const + { + assert(type() == Block || type() == Page); + return bits(data, 6); + } + + /** Return the AP bits as compatible with the AP[2:0] format. Utility + * function used to simplify the code in the TLB for performing + * permission checks. */ + static uint8_t ap(bool rw, bool user) + { + return ((!rw) << 2) | (user << 1); + } + + TlbEntry::DomainType domain() const + { + // Long-desc. format only supports Client domain + assert(type() == Block || type() == Page); + return TlbEntry::DomainType::Client; + } + + /** Attribute index */ + uint8_t attrIndx() const + { + assert(type() == Block || type() == Page); + return bits(data, 4, 2); + } + + /** Memory attributes, only used by stage 2 translations */ + uint8_t memAttr() const + { + assert(type() == Block || type() == Page); + return bits(data, 5, 2); + } + + /** Set access flag that this entry has been touched. Mark the entry as + * requiring a writeback, in the future. */ + void setAf() + { + data |= 1 << 10; + _dirty = true; + } + + /** This entry needs to be written back to memory */ + bool dirty() const + { + return _dirty; + } + + /** Whether the subsequent levels of lookup are secure */ + bool secureTable() const + { + assert(type() == Table); + return !bits(data, 63); + } + + /** Two bit access protection flags for subsequent levels of lookup */ + uint8_t apTable() const + { + assert(type() == Table); + return bits(data, 62, 61); + } + + /** R/W protection flag for subsequent levels of lookup */ + uint8_t rwTable() const + { + assert(type() == Table); + return !bits(data, 62); + } + + /** User/privileged mode protection flag for subsequent levels of + * lookup */ + uint8_t userTable() const + { + assert(type() == Table); + return !bits(data, 61); + } + + /** Is execution allowed on subsequent lookup levels? */ + bool xnTable() const + { + assert(type() == Table); + return bits(data, 60); + } + + /** Is privileged execution allowed on subsequent lookup levels? */ + bool pxnTable() const + { + assert(type() == Table); + return bits(data, 59); + } }; - struct WalkerState //: public SimObject + class WalkerState { + public: /** Thread context that we're doing the walk for */ ThreadContext *tc; + /** If the access is performed in AArch64 state */ + bool aarch64; + + /** Current exception level */ + ExceptionLevel el; + + /** Current physical address range in bits */ + int physAddrRange; + /** Request that is currently being serviced */ RequestPtr req; - /** Context ID that we're servicing the request under */ - uint8_t contextId; + /** ASID that we're servicing the request under */ + uint16_t asid; + uint8_t vmid; + bool isHyp; /** Translation state for delayed requests */ TLB::Translation *transState; @@ -309,14 +692,32 @@ class TableWalker : public MemObject /** The fault that we are going to return */ Fault fault; - /** The virtual address that is being translated */ + /** The virtual address that is being translated with tagging removed.*/ Addr vaddr; + /** The virtual address that is being translated */ + Addr vaddr_tainted; + /** Cached copy of the sctlr as it existed when translation began */ SCTLR sctlr; - /** Width of the base address held in TTRB0 */ - uint32_t N; + /** Cached copy of the scr as it existed when translation began */ + SCR scr; + + /** Cached copy of the cpsr as it existed when translation began */ + CPSR cpsr; + + /** Cached copy of the ttbcr as it existed when translation began. */ + TTBCR ttbcr; + + /** Cached copy of the htcr as it existed when translation began. */ + HTCR htcr; + + /** Cached copy of the htcr as it existed when translation began. */ + HCR hcr; + + /** Cached copy of the vtcr as it existed when translation began. */ + VTCR_t vtcr; /** If the access is a write */ bool isWrite; @@ -324,6 +725,28 @@ class TableWalker : public MemObject /** If the access is a fetch (for execution, and no-exec) must be checked?*/ bool isFetch; + /** If the access comes from the secure state. */ + bool isSecure; + + /** Helper variables used to implement hierarchical access permissions + * when the long-desc. format is used (LPAE only) */ + bool secureLookup; + bool rwTable; + bool userTable; + bool xnTable; + bool pxnTable; + + /** Flag indicating if a second stage of lookup is required */ + bool stage2Req; + + /** Indicates whether the translation has been passed onto the second + * stage mmu, and no more work is required from the first stage. + */ + bool doingStage2; + + /** A pointer to the stage 2 translation that's in progress */ + TLB::Translation *stage2Tran; + /** If the mode is timing or atomic */ bool timing; @@ -333,10 +756,18 @@ class TableWalker : public MemObject /** Save mode for use in delayed response */ BaseTLB::Mode mode; + /** The translation type that has been requested */ + TLB::ArmTranslationType tranType; + + /** Short-format descriptors */ L1Descriptor l1Desc; L2Descriptor l2Desc; - /** Whether L1/L2 descriptor response is delayed in timing mode */ + /** Long-format descriptor (LPAE and AArch64) */ + LongDescriptor longDesc; + + /** Whether the response is delayed in timing mode due to additional + * lookups */ bool delayed; TableWalker *tableWalker; @@ -344,16 +775,48 @@ class TableWalker : public MemObject void doL1Descriptor(); void doL2Descriptor(); - std::string name() const {return tableWalker->name();} + void doLongDescriptor(); + + WalkerState(); + + std::string name() const { return tableWalker->name(); } }; + protected: + + /** + * A snooping DMA port that currently does nothing besides + * extending the DMA port to accept snoops without complaining. + */ + class SnoopingDmaPort : public DmaPort + { - /** Queue of requests that need processing first level translation */ - std::list<WalkerState *> stateQueueL1; + protected: - /** Queue of requests that have passed first level translation and - * require an additional level. */ - std::list<WalkerState *> stateQueueL2; + virtual void recvTimingSnoopReq(PacketPtr pkt) + { } + + virtual Tick recvAtomicSnoop(PacketPtr pkt) + { return 0; } + + virtual void recvFunctionalSnoop(PacketPtr pkt) + { } + + virtual bool isSnooping() const { return true; } + + public: + + /** + * A snooping DMA port merely calls the construtor of the DMA + * port. + */ + SnoopingDmaPort(MemObject *dev, System *s) : + DmaPort(dev, s) + { } + }; + + /** Queues of requests for all the different lookup levels */ + std::list<WalkerState *> stateQueues[MAX_LOOKUP_LEVELS]; /** Queue of requests that have passed are waiting because the walker is * currently busy. */ @@ -366,6 +829,12 @@ class TableWalker : public MemObject /** If we're draining keep the drain event around until we're drained */ DrainManager *drainManager; + /** The MMU to forward second stage look upts to */ + Stage2MMU *stage2Mmu; + + /** Indicates whether this table walker is part of the stage 2 mmu */ + const bool isStage2; + /** TLB that is initiating these table walks */ TLB *tlb; @@ -384,8 +853,16 @@ class TableWalker : public MemObject * removed from the pendingQueue per cycle. */ unsigned numSquashable; + /** Cached copies of system-level properties */ + bool haveSecurity; + bool _haveLPAE; + bool _haveVirtualization; + uint8_t physAddrRange; + bool _haveLargeAsid64; + ArmSystem *armSys; + public: - typedef ArmTableWalkerParams Params; + typedef ArmTableWalkerParams Params; TableWalker(const Params *p); virtual ~TableWalker(); @@ -395,38 +872,90 @@ class TableWalker : public MemObject return dynamic_cast<const Params *>(_params); } + bool haveLPAE() const { return _haveLPAE; } + bool haveVirtualization() const { return _haveVirtualization; } + bool haveLargeAsid64() const { return _haveLargeAsid64; } /** Checks if all state is cleared and if so, completes drain */ void completeDrain(); unsigned int drain(DrainManager *dm); - void drainResume(); + virtual void drainResume(); virtual BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx = InvalidPortID); - Fault walk(RequestPtr req, ThreadContext *tc, uint8_t cid, TLB::Mode mode, - TLB::Translation *_trans, bool timing, bool functional = false); + /** + * Allow the MMU (overseeing both stage 1 and stage 2 TLBs) to + * access the table walker port through the TLB so that it can + * orchestrate staged translations. + * + * @return Our DMA port + */ + DmaPort& getWalkerPort() { return port; } + + Fault walk(RequestPtr req, ThreadContext *tc, uint16_t asid, uint8_t _vmid, + bool _isHyp, TLB::Mode mode, TLB::Translation *_trans, + bool timing, bool functional, bool secure, + TLB::ArmTranslationType tranType); void setTlb(TLB *_tlb) { tlb = _tlb; } + TLB* getTlb() { return tlb; } + void setMMU(Stage2MMU *m) { stage2Mmu = m; } void memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, uint8_t texcb, bool s); + void memAttrsLPAE(ThreadContext *tc, TlbEntry &te, + LongDescriptor &lDescriptor); + void memAttrsAArch64(ThreadContext *tc, TlbEntry &te, uint8_t attrIndx, + uint8_t sh); + + static LookupLevel toLookupLevel(uint8_t lookup_level_as_int); private: void doL1Descriptor(); void doL1DescriptorWrapper(); - EventWrapper<TableWalker, &TableWalker::doL1DescriptorWrapper> doL1DescEvent; + EventWrapper<TableWalker, + &TableWalker::doL1DescriptorWrapper> doL1DescEvent; void doL2Descriptor(); void doL2DescriptorWrapper(); - EventWrapper<TableWalker, &TableWalker::doL2DescriptorWrapper> doL2DescEvent; + EventWrapper<TableWalker, + &TableWalker::doL2DescriptorWrapper> doL2DescEvent; + + void doLongDescriptor(); + + void doL0LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL0LongDescriptorWrapper> doL0LongDescEvent; + void doL1LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL1LongDescriptorWrapper> doL1LongDescEvent; + void doL2LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL2LongDescriptorWrapper> doL2LongDescEvent; + void doL3LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL3LongDescriptorWrapper> doL3LongDescEvent; + + void doLongDescriptorWrapper(LookupLevel curr_lookup_level); + + bool fetchDescriptor(Addr descAddr, uint8_t *data, int numBytes, + Request::Flags flags, int queueIndex, Event *event, + void (TableWalker::*doDescriptor)()); + + void insertTableEntry(DescriptorBase &descriptor, bool longDescriptor); Fault processWalk(); + Fault processWalkLPAE(); + static unsigned adjustTableSizeAArch64(unsigned tsz); + /// Returns true if the address exceeds the range permitted by the + /// system-wide setting or by the TCR_ELx IPS/PS setting + static bool checkAddrSizeFaultAArch64(Addr addr, int currPhysAddrRange); + Fault processWalkAArch64(); void processWalkWrapper(); EventWrapper<TableWalker, &TableWalker::processWalkWrapper> doProcessEvent; void nextWalk(ThreadContext *tc); }; - } // namespace ArmISA #endif //__ARCH_ARM_TABLE_WALKER_HH__ diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc index 805898576..037f7490e 100644 --- a/src/arch/arm/tlb.cc +++ b/src/arch/arm/tlb.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -49,6 +49,8 @@ #include "arch/arm/pagetable.hh" #include "arch/arm/system.hh" #include "arch/arm/table_walker.hh" +#include "arch/arm/stage2_lookup.hh" +#include "arch/arm/stage2_mmu.hh" #include "arch/arm/tlb.hh" #include "arch/arm/utility.hh" #include "base/inifile.hh" @@ -67,28 +69,51 @@ using namespace std; using namespace ArmISA; -TLB::TLB(const Params *p) - : BaseTLB(p), size(p->size) , tableWalker(p->walker), - rangeMRU(1), bootUncacheability(false), miscRegValid(false) +TLB::TLB(const ArmTLBParams *p) + : BaseTLB(p), table(new TlbEntry[p->size]), size(p->size), + isStage2(p->is_stage2), tableWalker(p->walker), stage2Tlb(NULL), + stage2Mmu(NULL), rangeMRU(1), bootUncacheability(false), + miscRegValid(false), curTranType(NormalTran) { - table = new TlbEntry[size]; - memset(table, 0, sizeof(TlbEntry) * size); - tableWalker->setTlb(this); + + // Cache system-level properties + haveLPAE = tableWalker->haveLPAE(); + haveVirtualization = tableWalker->haveVirtualization(); + haveLargeAsid64 = tableWalker->haveLargeAsid64(); } TLB::~TLB() { - if (table) - delete [] table; + delete[] table; +} + +void +TLB::init() +{ + if (stage2Mmu && !isStage2) + stage2Tlb = stage2Mmu->stage2Tlb(); +} + +void +TLB::setMMU(Stage2MMU *m) +{ + stage2Mmu = m; + tableWalker->setMMU(m); } bool TLB::translateFunctional(ThreadContext *tc, Addr va, Addr &pa) { - if (!miscRegValid) - updateMiscReg(tc); - TlbEntry *e = lookup(va, contextId, true); + updateMiscReg(tc); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateFunctional(tc, va, pa); + } + + TlbEntry *e = lookup(va, asid, vmid, isHyp, isSecure, true, false, + aarch64 ? aarch64EL : EL1); if (!e) return false; pa = e->pAddr(va); @@ -102,22 +127,24 @@ TLB::finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const } TlbEntry* -TLB::lookup(Addr va, uint8_t cid, bool functional) +TLB::lookup(Addr va, uint16_t asn, uint8_t vmid, bool hyp, bool secure, + bool functional, bool ignore_asn, uint8_t target_el) { TlbEntry *retval = NULL; - // Maitaining LRU array - + // Maintaining LRU array int x = 0; while (retval == NULL && x < size) { - if (table[x].match(va, cid)) { - - // We only move the hit entry ahead when the position is higher than rangeMRU + if ((!ignore_asn && table[x].match(va, asn, vmid, hyp, secure, false, + target_el)) || + (ignore_asn && table[x].match(va, vmid, hyp, secure, target_el))) { + // We only move the hit entry ahead when the position is higher + // than rangeMRU if (x > rangeMRU && !functional) { TlbEntry tmp_entry = table[x]; for(int i = x; i > 0; i--) - table[i] = table[i-1]; + table[i] = table[i - 1]; table[0] = tmp_entry; retval = &table[0]; } else { @@ -125,14 +152,19 @@ TLB::lookup(Addr va, uint8_t cid, bool functional) } break; } - x++; + ++x; } - DPRINTF(TLBVerbose, "Lookup %#x, cid %#x -> %s ppn %#x size: %#x pa: %#x ap:%d\n", - va, cid, retval ? "hit" : "miss", retval ? retval->pfn : 0, - retval ? retval->size : 0, retval ? retval->pAddr(va) : 0, - retval ? retval->ap : 0); - ; + DPRINTF(TLBVerbose, "Lookup %#x, asn %#x -> %s vmn 0x%x hyp %d secure %d " + "ppn %#x size: %#x pa: %#x ap:%d ns:%d nstid:%d g:%d asid: %d " + "el: %d\n", + va, asn, retval ? "hit" : "miss", vmid, hyp, secure, + retval ? retval->pfn : 0, retval ? retval->size : 0, + retval ? retval->pAddr(va) : 0, retval ? retval->ap : 0, + retval ? retval->ns : 0, retval ? retval->nstid : 0, + retval ? retval->global : 0, retval ? retval->asid : 0, + retval ? retval->el : 0, retval ? retval->el : 0); + return retval; } @@ -141,122 +173,176 @@ void TLB::insert(Addr addr, TlbEntry &entry) { DPRINTF(TLB, "Inserting entry into TLB with pfn:%#x size:%#x vpn: %#x" - " asid:%d N:%d global:%d valid:%d nc:%d sNp:%d xn:%d ap:%#x" - " domain:%#x\n", entry.pfn, entry.size, entry.vpn, entry.asid, - entry.N, entry.global, entry.valid, entry.nonCacheable, entry.sNp, - entry.xn, entry.ap, entry.domain); - - if (table[size-1].valid) - DPRINTF(TLB, " - Replacing Valid entry %#x, asn %d ppn %#x size: %#x ap:%d\n", + " asid:%d vmid:%d N:%d global:%d valid:%d nc:%d xn:%d" + " ap:%#x domain:%#x ns:%d nstid:%d isHyp:%d\n", entry.pfn, + entry.size, entry.vpn, entry.asid, entry.vmid, entry.N, + entry.global, entry.valid, entry.nonCacheable, entry.xn, + entry.ap, static_cast<uint8_t>(entry.domain), entry.ns, entry.nstid, + entry.isHyp); + + if (table[size - 1].valid) + DPRINTF(TLB, " - Replacing Valid entry %#x, asn %d vmn %d ppn %#x " + "size: %#x ap:%d ns:%d nstid:%d g:%d isHyp:%d el: %d\n", table[size-1].vpn << table[size-1].N, table[size-1].asid, - table[size-1].pfn << table[size-1].N, table[size-1].size, - table[size-1].ap); + table[size-1].vmid, table[size-1].pfn << table[size-1].N, + table[size-1].size, table[size-1].ap, table[size-1].ns, + table[size-1].nstid, table[size-1].global, table[size-1].isHyp, + table[size-1].el); //inserting to MRU position and evicting the LRU one - for(int i = size-1; i > 0; i--) - table[i] = table[i-1]; + for (int i = size - 1; i > 0; --i) + table[i] = table[i-1]; table[0] = entry; inserts++; } void -TLB::printTlb() +TLB::printTlb() const { int x = 0; TlbEntry *te; DPRINTF(TLB, "Current TLB contents:\n"); while (x < size) { - te = &table[x]; - if (te->valid) - DPRINTF(TLB, " * %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); - x++; + te = &table[x]; + if (te->valid) + DPRINTF(TLB, " * %s\n", te->print()); + ++x; } } - void -TLB::flushAll() +TLB::flushAllSecurity(bool secure_lookup, uint8_t target_el, bool ignore_el) { - DPRINTF(TLB, "Flushing all TLB entries\n"); + DPRINTF(TLB, "Flushing all TLB entries (%s lookup)\n", + (secure_lookup ? "secure" : "non-secure")); int x = 0; TlbEntry *te; while (x < size) { - te = &table[x]; - if (te->valid) { - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); - flushedEntries++; - } - x++; - } + te = &table[x]; + if (te->valid && secure_lookup == !te->nstid && + (te->vmid == vmid || secure_lookup) && + checkELMatch(target_el, te->el, ignore_el)) { - memset(table, 0, sizeof(TlbEntry) * size); + DPRINTF(TLB, " - %s\n", te->print()); + te->valid = false; + flushedEntries++; + } + ++x; + } flushTlb++; -} + // If there's a second stage TLB (and we're not it) then flush it as well + // if we're currently in hyp mode + if (!isStage2 && isHyp) { + stage2Tlb->flushAllSecurity(secure_lookup, true); + } +} void -TLB::flushMvaAsid(Addr mva, uint64_t asn) +TLB::flushAllNs(bool hyp, uint8_t target_el, bool ignore_el) { - DPRINTF(TLB, "Flushing mva %#x asid: %#x\n", mva, asn); + DPRINTF(TLB, "Flushing all NS TLB entries (%s lookup)\n", + (hyp ? "hyp" : "non-hyp")); + int x = 0; TlbEntry *te; + while (x < size) { + te = &table[x]; + if (te->valid && te->nstid && te->isHyp == hyp && + checkELMatch(target_el, te->el, ignore_el)) { - te = lookup(mva, asn); - while (te != NULL) { - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); - te->valid = false; - flushedEntries++; - te = lookup(mva,asn); + DPRINTF(TLB, " - %s\n", te->print()); + flushedEntries++; + te->valid = false; + } + ++x; + } + + flushTlb++; + + // If there's a second stage TLB (and we're not it) then flush it as well + if (!isStage2 && !hyp) { + stage2Tlb->flushAllNs(false, true); } +} + +void +TLB::flushMvaAsid(Addr mva, uint64_t asn, bool secure_lookup, uint8_t target_el) +{ + DPRINTF(TLB, "Flushing TLB entries with mva: %#x, asid: %#x " + "(%s lookup)\n", mva, asn, (secure_lookup ? + "secure" : "non-secure")); + _flushMva(mva, asn, secure_lookup, false, false, target_el); flushTlbMvaAsid++; } void -TLB::flushAsid(uint64_t asn) +TLB::flushAsid(uint64_t asn, bool secure_lookup, uint8_t target_el) { - DPRINTF(TLB, "Flushing all entries with asid: %#x\n", asn); + DPRINTF(TLB, "Flushing TLB entries with asid: %#x (%s lookup)\n", asn, + (secure_lookup ? "secure" : "non-secure")); - int x = 0; + int x = 0 ; TlbEntry *te; while (x < size) { te = &table[x]; - if (te->asid == asn) { + if (te->valid && te->asid == asn && secure_lookup == !te->nstid && + (te->vmid == vmid || secure_lookup) && + checkELMatch(target_el, te->el, false)) { + te->valid = false; - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); + DPRINTF(TLB, " - %s\n", te->print()); flushedEntries++; } - x++; + ++x; } flushTlbAsid++; } void -TLB::flushMva(Addr mva) +TLB::flushMva(Addr mva, bool secure_lookup, bool hyp, uint8_t target_el) { - DPRINTF(TLB, "Flushing all entries with mva: %#x\n", mva); + DPRINTF(TLB, "Flushing TLB entries with mva: %#x (%s lookup)\n", mva, + (secure_lookup ? "secure" : "non-secure")); + _flushMva(mva, 0xbeef, secure_lookup, hyp, true, target_el); + flushTlbMva++; +} - int x = 0; +void +TLB::_flushMva(Addr mva, uint64_t asn, bool secure_lookup, bool hyp, + bool ignore_asn, uint8_t target_el) +{ TlbEntry *te; - - while (x < size) { - te = &table[x]; - Addr v = te->vpn << te->N; - if (mva >= v && mva < v + te->size) { + // D5.7.2: Sign-extend address to 64 bits + mva = sext<56>(mva); + te = lookup(mva, asn, vmid, hyp, secure_lookup, false, ignore_asn, + target_el); + while (te != NULL) { + if (secure_lookup == !te->nstid) { + DPRINTF(TLB, " - %s\n", te->print()); te->valid = false; - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); flushedEntries++; } - x++; + te = lookup(mva, asn, vmid, hyp, secure_lookup, false, ignore_asn, + target_el); } - flushTlbMva++; +} + +bool +TLB::checkELMatch(uint8_t target_el, uint8_t tentry_el, bool ignore_el) +{ + bool elMatch = true; + if (!ignore_el) { + if (target_el == 2 || target_el == 3) { + elMatch = (tentry_el == target_el); + } else { + elMatch = (tentry_el == 0) || (tentry_el == 1); + } + } + return elMatch; } void @@ -273,6 +359,10 @@ TLB::serialize(ostream &os) DPRINTF(Checkpoint, "Serializing Arm TLB\n"); SERIALIZE_SCALAR(_attr); + SERIALIZE_SCALAR(haveLPAE); + SERIALIZE_SCALAR(directToStage2); + SERIALIZE_SCALAR(stage2Req); + SERIALIZE_SCALAR(bootUncacheability); int num_entries = size; SERIALIZE_SCALAR(num_entries); @@ -288,6 +378,11 @@ TLB::unserialize(Checkpoint *cp, const string §ion) DPRINTF(Checkpoint, "Unserializing Arm TLB\n"); UNSERIALIZE_SCALAR(_attr); + UNSERIALIZE_SCALAR(haveLPAE); + UNSERIALIZE_SCALAR(directToStage2); + UNSERIALIZE_SCALAR(stage2Req); + UNSERIALIZE_SCALAR(bootUncacheability); + int num_entries; UNSERIALIZE_SCALAR(num_entries); for(int i = 0; i < min(size, num_entries); i++){ @@ -413,11 +508,15 @@ TLB::regStats() Fault TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode, - Translation *translation, bool &delay, bool timing) + Translation *translation, bool &delay, bool timing) { - if (!miscRegValid) - updateMiscReg(tc); - Addr vaddr = req->getVaddr(); + updateMiscReg(tc); + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = 0; + if (aarch64) + vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL); + else + vaddr = vaddr_tainted; uint32_t flags = req->getFlags(); bool is_fetch = (mode == Execute); @@ -426,8 +525,12 @@ TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode, if (!is_fetch) { assert(flags & MustBeOne); if (sctlr.a || !(flags & AllowUnaligned)) { - if (vaddr & flags & AlignmentMask) { - return new DataAbort(vaddr, 0, is_write, ArmFault::AlignmentFault); + if (vaddr & mask(flags & AlignmentMask)) { + // LPAE is always disabled in SE mode + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + ArmFault::VmsaTran); } } } @@ -436,56 +539,411 @@ TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode, Process *p = tc->getProcessPtr(); if (!p->pTable->translate(vaddr, paddr)) - return Fault(new GenericPageTableFault(vaddr)); + return Fault(new GenericPageTableFault(vaddr_tainted)); req->setPaddr(paddr); return NoFault; } Fault -TLB::trickBoxCheck(RequestPtr req, Mode mode, uint8_t domain, bool sNp) +TLB::trickBoxCheck(RequestPtr req, Mode mode, TlbEntry::DomainType domain) { return NoFault; } Fault -TLB::walkTrickBoxCheck(Addr pa, Addr va, Addr sz, bool is_exec, - bool is_write, uint8_t domain, bool sNp) +TLB::walkTrickBoxCheck(Addr pa, bool is_secure, Addr va, Addr sz, bool is_exec, + bool is_write, TlbEntry::DomainType domain, LookupLevel lookup_level) +{ + return NoFault; +} + +Fault +TLB::checkPermissions(TlbEntry *te, RequestPtr req, Mode mode) +{ + Addr vaddr = req->getVaddr(); // 32-bit don't have to purify + uint32_t flags = req->getFlags(); + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + bool is_priv = isPriv && !(flags & UserMode); + + // Get the translation type from the actuall table entry + ArmFault::TranMethod tranMethod = te->longDescFormat ? ArmFault::LpaeTran + : ArmFault::VmsaTran; + + // If this is the second stage of translation and the request is for a + // stage 1 page table walk then we need to check the HCR.PTW bit. This + // allows us to generate a fault if the request targets an area marked + // as a device or strongly ordered. + if (isStage2 && req->isPTWalk() && hcr.ptw && + (te->mtype != TlbEntry::MemoryType::Normal)) { + return new DataAbort(vaddr, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, tranMethod); + } + + // Generate an alignment fault for unaligned data accesses to device or + // strongly ordered memory + if (!is_fetch) { + if (te->mtype != TlbEntry::MemoryType::Normal) { + if (vaddr & mask(flags & AlignmentMask)) { + alignFaults++; + return new DataAbort(vaddr, TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + tranMethod); + } + } + } + + if (te->nonCacheable) { + // Prevent prefetching from I/O devices. + if (req->isPrefetch()) { + // Here we can safely use the fault status for the short + // desc. format in all cases + return new PrefetchAbort(vaddr, ArmFault::PrefetchUncacheable, + isStage2, tranMethod); + } + } + + if (!te->longDescFormat) { + switch ((dacr >> (static_cast<uint8_t>(te->domain) * 2)) & 0x3) { + case 0: + domainFaults++; + DPRINTF(TLB, "TLB Fault: Data abort on domain. DACR: %#x" + " domain: %#x write:%d\n", dacr, + static_cast<uint8_t>(te->domain), is_write); + if (is_fetch) + return new PrefetchAbort(vaddr, + ArmFault::DomainLL + te->lookupLevel, + isStage2, tranMethod); + else + return new DataAbort(vaddr, te->domain, is_write, + ArmFault::DomainLL + te->lookupLevel, + isStage2, tranMethod); + case 1: + // Continue with permissions check + break; + case 2: + panic("UNPRED domain\n"); + case 3: + return NoFault; + } + } + + // The 'ap' variable is AP[2:0] or {AP[2,1],1b'0}, i.e. always three bits + uint8_t ap = te->longDescFormat ? te->ap << 1 : te->ap; + uint8_t hap = te->hap; + + if (sctlr.afe == 1 || te->longDescFormat) + ap |= 1; + + bool abt; + bool isWritable = true; + // If this is a stage 2 access (eg for reading stage 1 page table entries) + // then don't perform the AP permissions check, we stil do the HAP check + // below. + if (isStage2) { + abt = false; + } else { + switch (ap) { + case 0: + DPRINTF(TLB, "Access permissions 0, checking rs:%#x\n", + (int)sctlr.rs); + if (!sctlr.xp) { + switch ((int)sctlr.rs) { + case 2: + abt = is_write; + break; + case 1: + abt = is_write || !is_priv; + break; + case 0: + case 3: + default: + abt = true; + break; + } + } else { + abt = true; + } + break; + case 1: + abt = !is_priv; + break; + case 2: + abt = !is_priv && is_write; + isWritable = is_priv; + break; + case 3: + abt = false; + break; + case 4: + panic("UNPRED premissions\n"); + case 5: + abt = !is_priv || is_write; + isWritable = false; + break; + case 6: + case 7: + abt = is_write; + isWritable = false; + break; + default: + panic("Unknown permissions %#x\n", ap); + } + } + + bool hapAbt = is_write ? !(hap & 2) : !(hap & 1); + bool xn = te->xn || (isWritable && sctlr.wxn) || + (ap == 3 && sctlr.uwxn && is_priv); + if (is_fetch && (abt || xn || + (te->longDescFormat && te->pxn && !is_priv) || + (isSecure && te->ns && scr.sif))) { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. AP:%d " + "priv:%d write:%d ns:%d sif:%d sctlr.afe: %d \n", + ap, is_priv, is_write, te->ns, scr.sif,sctlr.afe); + return new PrefetchAbort(vaddr, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, tranMethod); + } else if (abt | hapAbt) { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d priv:%d" + " write:%d\n", ap, is_priv, is_write); + return new DataAbort(vaddr, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2 | !abt, tranMethod); + } + return NoFault; +} + + +Fault +TLB::checkPermissions64(TlbEntry *te, RequestPtr req, Mode mode, + ThreadContext *tc) { + assert(aarch64); + + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL); + + uint32_t flags = req->getFlags(); + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + bool is_priv M5_VAR_USED = isPriv && !(flags & UserMode); + + updateMiscReg(tc, curTranType); + + // If this is the second stage of translation and the request is for a + // stage 1 page table walk then we need to check the HCR.PTW bit. This + // allows us to generate a fault if the request targets an area marked + // as a device or strongly ordered. + if (isStage2 && req->isPTWalk() && hcr.ptw && + (te->mtype != TlbEntry::MemoryType::Normal)) { + return new DataAbort(vaddr_tainted, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, ArmFault::LpaeTran); + } + + // Generate an alignment fault for unaligned accesses to device or + // strongly ordered memory + if (!is_fetch) { + if (te->mtype != TlbEntry::MemoryType::Normal) { + if (vaddr & mask(flags & AlignmentMask)) { + alignFaults++; + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + ArmFault::LpaeTran); + } + } + } + + if (te->nonCacheable) { + // Prevent prefetching from I/O devices. + if (req->isPrefetch()) { + // Here we can safely use the fault status for the short + // desc. format in all cases + return new PrefetchAbort(vaddr_tainted, + ArmFault::PrefetchUncacheable, + isStage2, ArmFault::LpaeTran); + } + } + + uint8_t ap = 0x3 & (te->ap); // 2-bit access protection field + bool grant = false; + + uint8_t xn = te->xn; + uint8_t pxn = te->pxn; + bool r = !is_write && !is_fetch; + bool w = is_write; + bool x = is_fetch; + DPRINTF(TLBVerbose, "Checking permissions: ap:%d, xn:%d, pxn:%d, r:%d, " + "w:%d, x:%d\n", ap, xn, pxn, r, w, x); + + if (isStage2) { + panic("Virtualization in AArch64 state is not supported yet"); + } else { + switch (aarch64EL) { + case EL0: + { + uint8_t perm = (ap << 2) | (xn << 1) | pxn; + switch (perm) { + case 0: + case 1: + case 8: + case 9: + grant = x; + break; + case 4: + case 5: + grant = r || w || (x && !sctlr.wxn); + break; + case 6: + case 7: + grant = r || w; + break; + case 12: + case 13: + grant = r || x; + break; + case 14: + case 15: + grant = r; + break; + default: + grant = false; + } + } + break; + case EL1: + { + uint8_t perm = (ap << 2) | (xn << 1) | pxn; + switch (perm) { + case 0: + case 2: + grant = r || w || (x && !sctlr.wxn); + break; + case 1: + case 3: + case 4: + case 5: + case 6: + case 7: + // regions that are writeable at EL0 should not be + // executable at EL1 + grant = r || w; + break; + case 8: + case 10: + case 12: + case 14: + grant = r || x; + break; + case 9: + case 11: + case 13: + case 15: + grant = r; + break; + default: + grant = false; + } + } + break; + case EL2: + case EL3: + { + uint8_t perm = (ap & 0x2) | xn; + switch (perm) { + case 0: + grant = r || w || (x && !sctlr.wxn) ; + break; + case 1: + grant = r || w; + break; + case 2: + grant = r || x; + break; + case 3: + grant = r; + break; + default: + grant = false; + } + } + break; + } + } + + if (!grant) { + if (is_fetch) { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. " + "AP:%d priv:%d write:%d ns:%d sif:%d " + "sctlr.afe: %d\n", + ap, is_priv, is_write, te->ns, scr.sif, sctlr.afe); + // Use PC value instead of vaddr because vaddr might be aligned to + // cache line and should not be the address reported in FAR + return new PrefetchAbort(req->getPC(), + ArmFault::PermissionLL + te->lookupLevel, + isStage2, ArmFault::LpaeTran); + } else { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d " + "priv:%d write:%d\n", ap, is_priv, is_write); + return new DataAbort(vaddr_tainted, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, ArmFault::LpaeTran); + } + } + return NoFault; } Fault TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, - Translation *translation, bool &delay, bool timing, bool functional) + Translation *translation, bool &delay, bool timing, + TLB::ArmTranslationType tranType, bool functional) { // No such thing as a functional timing access assert(!(timing && functional)); - if (!miscRegValid) { - updateMiscReg(tc); - DPRINTF(TLBVerbose, "TLB variables changed!\n"); - } + updateMiscReg(tc, tranType); - Addr vaddr = req->getVaddr(); + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = 0; + if (aarch64) + vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL); + else + vaddr = vaddr_tainted; uint32_t flags = req->getFlags(); - bool is_fetch = (mode == Execute); - bool is_write = (mode == Write); - bool is_priv = isPriv && !(flags & UserMode); + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + bool long_desc_format = aarch64 || (haveLPAE && ttbcr.eae); + ArmFault::TranMethod tranMethod = long_desc_format ? ArmFault::LpaeTran + : ArmFault::VmsaTran; + + req->setAsid(asid); - req->setAsid(contextId.asid); - if (is_priv) - req->setFlags(Request::PRIVILEGED); + DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d secure:%d S1S2NsTran:%d\n", + isPriv, flags & UserMode, isSecure, tranType & S1S2NsTran); - req->taskId(tc->getCpuPtr()->taskId()); + DPRINTF(TLB, "translateFs addr %#x, mode %d, st2 %d, scr %#x sctlr %#x " + "flags %#x tranType 0x%x\n", vaddr_tainted, mode, isStage2, + scr, sctlr, flags, tranType); + + // Generate an alignment fault for unaligned PC + if (aarch64 && is_fetch && (req->getPC() & mask(2))) { + return new PCAlignmentFault(req->getPC()); + } - DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d\n", - isPriv, flags & UserMode); // If this is a clrex instruction, provide a PA of 0 with no fault // This will force the monitor to set the tracked address to 0 // a bit of a hack but this effectively clrears this processors monitor if (flags & Request::CLEAR_LL){ + // @todo: check implications of security extensions req->setPaddr(0); req->setFlags(Request::UNCACHEABLE); req->setFlags(Request::CLEAR_LL); @@ -498,209 +956,139 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, if (!is_fetch) { assert(flags & MustBeOne); if (sctlr.a || !(flags & AllowUnaligned)) { - if (vaddr & flags & AlignmentMask) { + if (vaddr & mask(flags & AlignmentMask)) { alignFaults++; - return new DataAbort(vaddr, 0, is_write, ArmFault::AlignmentFault); + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + tranMethod); } } } - Fault fault; + // If guest MMU is off or hcr.vm=0 go straight to stage2 + if ((isStage2 && !hcr.vm) || (!isStage2 && !sctlr.m)) { - if (!sctlr.m) { req->setPaddr(vaddr); - if (sctlr.tre == 0) { + // When the MMU is off the security attribute corresponds to the + // security state of the processor + if (isSecure) + req->setFlags(Request::SECURE); + + // @todo: double check this (ARM ARM issue C B3.2.1) + if (long_desc_format || sctlr.tre == 0) { req->setFlags(Request::UNCACHEABLE); } else { if (nmrr.ir0 == 0 || nmrr.or0 == 0 || prrr.tr0 != 0x2) - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE); } // Set memory attributes TlbEntry temp_te; - tableWalker->memAttrs(tc, temp_te, sctlr, 0, 1); - temp_te.shareable = true; + temp_te.ns = !isSecure; + if (isStage2 || hcr.dc == 0 || isSecure || + (isHyp && !(tranType & S1CTran))) { + + temp_te.mtype = is_fetch ? TlbEntry::MemoryType::Normal + : TlbEntry::MemoryType::StronglyOrdered; + temp_te.innerAttrs = 0x0; + temp_te.outerAttrs = 0x0; + temp_te.shareable = true; + temp_te.outerShareable = true; + } else { + temp_te.mtype = TlbEntry::MemoryType::Normal; + temp_te.innerAttrs = 0x3; + temp_te.outerAttrs = 0x3; + temp_te.shareable = false; + temp_te.outerShareable = false; + } + temp_te.setAttributes(long_desc_format); DPRINTF(TLBVerbose, "(No MMU) setting memory attributes: shareable:\ - %d, innerAttrs: %d, outerAttrs: %d\n", temp_te.shareable, - temp_te.innerAttrs, temp_te.outerAttrs); + %d, innerAttrs: %d, outerAttrs: %d, isStage2: %d\n", + temp_te.shareable, temp_te.innerAttrs, temp_te.outerAttrs, + isStage2); setAttr(temp_te.attributes); - return trickBoxCheck(req, mode, 0, false); + return trickBoxCheck(req, mode, TlbEntry::DomainType::NoAccess); } - DPRINTF(TLBVerbose, "Translating vaddr=%#x context=%d\n", vaddr, contextId); + DPRINTF(TLBVerbose, "Translating %s=%#x context=%d\n", + isStage2 ? "IPA" : "VA", vaddr_tainted, asid); // Translation enabled - TlbEntry *te = lookup(vaddr, contextId); - if (te == NULL) { - if (req->isPrefetch()){ - //if the request is a prefetch don't attempt to fill the TLB - //or go any further with the memory access - prefetchFaults++; - return new PrefetchAbort(vaddr, ArmFault::PrefetchTLBMiss); - } - - if (is_fetch) - instMisses++; - else if (is_write) - writeMisses++; - else - readMisses++; + TlbEntry *te = NULL; + TlbEntry mergeTe; + Fault fault = getResultTe(&te, req, tc, mode, translation, timing, + functional, &mergeTe); + // only proceed if we have a valid table entry + if ((te == NULL) && (fault == NoFault)) delay = true; - // start translation table walk, pass variables rather than - // re-retreaving in table walker for speed - DPRINTF(TLB, "TLB Miss: Starting hardware table walker for %#x(%d)\n", - vaddr, contextId); - fault = tableWalker->walk(req, tc, contextId, mode, translation, - timing, functional); - if (timing && fault == NoFault) { - delay = true; - // for timing mode, return and wait for table walk - return fault; + // If we have the table entry transfer some of the attributes to the + // request that triggered the translation + if (te != NULL) { + // Set memory attributes + DPRINTF(TLBVerbose, + "Setting memory attributes: shareable: %d, innerAttrs: %d, \ + outerAttrs: %d, mtype: %d, isStage2: %d\n", + te->shareable, te->innerAttrs, te->outerAttrs, + static_cast<uint8_t>(te->mtype), isStage2); + setAttr(te->attributes); + if (te->nonCacheable) { + req->setFlags(Request::UNCACHEABLE); } - if (fault) - return fault; - - te = lookup(vaddr, contextId); - if (!te) - printTlb(); - assert(te); - } else { - if (is_fetch) - instHits++; - else if (is_write) - writeHits++; - else - readHits++; - } - - // Set memory attributes - DPRINTF(TLBVerbose, - "Setting memory attributes: shareable: %d, innerAttrs: %d, \ - outerAttrs: %d\n", - te->shareable, te->innerAttrs, te->outerAttrs); - setAttr(te->attributes); - if (te->nonCacheable) { - req->setFlags(Request::UNCACHEABLE); - // Prevent prefetching from I/O devices. - if (req->isPrefetch()) { - return new PrefetchAbort(vaddr, ArmFault::PrefetchUncacheable); + if (!bootUncacheability && + ((ArmSystem*)tc->getSystemPtr())->adderBootUncacheable(vaddr)) { + req->setFlags(Request::UNCACHEABLE); } - } - - if (!bootUncacheability && - ((ArmSystem*)tc->getSystemPtr())->adderBootUncacheable(vaddr)) - req->setFlags(Request::UNCACHEABLE); - switch ( (dacr >> (te->domain * 2)) & 0x3) { - case 0: - domainFaults++; - DPRINTF(TLB, "TLB Fault: Data abort on domain. DACR: %#x domain: %#x" - " write:%d sNp:%d\n", dacr, te->domain, is_write, te->sNp); - if (is_fetch) - return new PrefetchAbort(vaddr, - (te->sNp ? ArmFault::Domain0 : ArmFault::Domain1)); - else - return new DataAbort(vaddr, te->domain, is_write, - (te->sNp ? ArmFault::Domain0 : ArmFault::Domain1)); - case 1: - // Continue with permissions check - break; - case 2: - panic("UNPRED domain\n"); - case 3: req->setPaddr(te->pAddr(vaddr)); - fault = trickBoxCheck(req, mode, te->domain, te->sNp); - if (fault) - return fault; - return NoFault; - } - - uint8_t ap = te->ap; - - if (sctlr.afe == 1) - ap |= 1; - - bool abt; + if (isSecure && !te->ns) { + req->setFlags(Request::SECURE); + } + if ((!is_fetch) && (vaddr & mask(flags & AlignmentMask)) && + (te->mtype != TlbEntry::MemoryType::Normal)) { + // Unaligned accesses to Device memory should always cause an + // abort regardless of sctlr.a + alignFaults++; + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + tranMethod); + } - /* if (!sctlr.xp) - ap &= 0x3; -*/ - switch (ap) { - case 0: - DPRINTF(TLB, "Access permissions 0, checking rs:%#x\n", (int)sctlr.rs); - if (!sctlr.xp) { - switch ((int)sctlr.rs) { - case 2: - abt = is_write; - break; - case 1: - abt = is_write || !is_priv; - break; - case 0: - case 3: - default: - abt = true; - break; - } - } else { - abt = true; + // Check for a trickbox generated address fault + if (fault == NoFault) { + fault = trickBoxCheck(req, mode, te->domain); } - break; - case 1: - abt = !is_priv; - break; - case 2: - abt = !is_priv && is_write; - break; - case 3: - abt = false; - break; - case 4: - panic("UNPRED premissions\n"); - case 5: - abt = !is_priv || is_write; - break; - case 6: - case 7: - abt = is_write; - break; - default: - panic("Unknown permissions\n"); - } - if ((is_fetch) && (abt || te->xn)) { - permsFaults++; - DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. AP:%d priv:%d" - " write:%d sNp:%d\n", ap, is_priv, is_write, te->sNp); - return new PrefetchAbort(vaddr, - (te->sNp ? ArmFault::Permission0 : - ArmFault::Permission1)); - } else if (abt) { - permsFaults++; - DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d priv:%d" - " write:%d sNp:%d\n", ap, is_priv, is_write, te->sNp); - return new DataAbort(vaddr, te->domain, is_write, - (te->sNp ? ArmFault::Permission0 : - ArmFault::Permission1)); } - req->setPaddr(te->pAddr(vaddr)); - // Check for a trickbox generated address fault - fault = trickBoxCheck(req, mode, te->domain, te->sNp); - if (fault) - return fault; + // Generate Illegal Inst Set State fault if IL bit is set in CPSR + if (fault == NoFault) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + if (aarch64 && is_fetch && cpsr.il == 1) { + return new IllegalInstSetStateFault(); + } + } - return NoFault; + return fault; } Fault -TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode) +TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, + TLB::ArmTranslationType tranType) { + updateMiscReg(tc, tranType); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateAtomic(req, tc, mode, tranType); + } + bool delay = false; Fault fault; if (FullSystem) - fault = translateFs(req, tc, mode, NULL, delay, false); + fault = translateFs(req, tc, mode, NULL, delay, false, tranType); else fault = translateSe(req, tc, mode, NULL, delay, false); assert(!delay); @@ -708,13 +1096,21 @@ TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode) } Fault -TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode) +TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode, + TLB::ArmTranslationType tranType) { + updateMiscReg(tc, tranType); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateFunctional(req, tc, mode, tranType); + } + bool delay = false; Fault fault; if (FullSystem) - fault = translateFs(req, tc, mode, NULL, delay, false, true); - else + fault = translateFs(req, tc, mode, NULL, delay, false, tranType, true); + else fault = translateSe(req, tc, mode, NULL, delay, false); assert(!delay); return fault; @@ -722,21 +1118,45 @@ TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode) Fault TLB::translateTiming(RequestPtr req, ThreadContext *tc, - Translation *translation, Mode mode) + Translation *translation, Mode mode, TLB::ArmTranslationType tranType) { + updateMiscReg(tc, tranType); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateTiming(req, tc, translation, mode, tranType); + } + assert(translation); + + return translateComplete(req, tc, translation, mode, tranType, isStage2); +} + +Fault +TLB::translateComplete(RequestPtr req, ThreadContext *tc, + Translation *translation, Mode mode, TLB::ArmTranslationType tranType, + bool callFromS2) +{ bool delay = false; Fault fault; if (FullSystem) - fault = translateFs(req, tc, mode, translation, delay, true); + fault = translateFs(req, tc, mode, translation, delay, true, tranType); else fault = translateSe(req, tc, mode, translation, delay, true); DPRINTF(TLBVerbose, "Translation returning delay=%d fault=%d\n", delay, fault != NoFault); - if (!delay) - translation->finish(fault, req, tc, mode); - else - translation->markDelayed(); + // If we have a translation, and we're not in the middle of doing a stage + // 2 translation tell the translation that we've either finished or its + // going to take a while. By not doing this when we're in the middle of a + // stage 2 translation we prevent marking the translation as delayed twice, + // one when the translation starts and again when the stage 1 translation + // completes. + if (translation && (callFromS2 || !stage2Req || req->hasPaddr() || fault != NoFault)) { + if (!delay) + translation->finish(fault, req, tc, mode); + else + translation->markDelayed(); + } return fault; } @@ -746,7 +1166,229 @@ TLB::getMasterPort() return &tableWalker->getMasterPort("port"); } +DmaPort& +TLB::getWalkerPort() +{ + return tableWalker->getWalkerPort(); +} + +void +TLB::updateMiscReg(ThreadContext *tc, ArmTranslationType tranType) +{ + // check if the regs have changed, or the translation mode is different. + // NOTE: the tran type doesn't affect stage 2 TLB's as they only handle + // one type of translation anyway + if (miscRegValid && ((tranType == curTranType) || isStage2)) { + return; + } + + DPRINTF(TLBVerbose, "TLB variables changed!\n"); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + // Dependencies: SCR/SCR_EL3, CPSR + isSecure = inSecureState(tc); + isSecure &= (tranType & HypMode) == 0; + isSecure &= (tranType & S1S2NsTran) == 0; + aarch64 = !cpsr.width; + if (aarch64) { // AArch64 + aarch64EL = (ExceptionLevel) (uint8_t) cpsr.el; + switch (aarch64EL) { + case EL0: + case EL1: + { + sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + ttbcr = tc->readMiscReg(MISCREG_TCR_EL1); + uint64_t ttbr_asid = ttbcr.a1 ? + tc->readMiscReg(MISCREG_TTBR1_EL1) : + tc->readMiscReg(MISCREG_TTBR0_EL1); + asid = bits(ttbr_asid, + (haveLargeAsid64 && ttbcr.as) ? 63 : 55, 48); + } + break; + case EL2: + sctlr = tc->readMiscReg(MISCREG_SCTLR_EL2); + ttbcr = tc->readMiscReg(MISCREG_TCR_EL2); + asid = -1; + break; + case EL3: + sctlr = tc->readMiscReg(MISCREG_SCTLR_EL3); + ttbcr = tc->readMiscReg(MISCREG_TCR_EL3); + asid = -1; + break; + } + scr = tc->readMiscReg(MISCREG_SCR_EL3); + isPriv = aarch64EL != EL0; + // @todo: modify this behaviour to support Virtualization in + // AArch64 + vmid = 0; + isHyp = false; + directToStage2 = false; + stage2Req = false; + } else { // AArch32 + sctlr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_SCTLR, tc, + !isSecure)); + ttbcr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_TTBCR, tc, + !isSecure)); + scr = tc->readMiscReg(MISCREG_SCR); + isPriv = cpsr.mode != MODE_USER; + if (haveLPAE && ttbcr.eae) { + // Long-descriptor translation table format in use + uint64_t ttbr_asid = tc->readMiscReg( + flattenMiscRegNsBanked(ttbcr.a1 ? MISCREG_TTBR1 + : MISCREG_TTBR0, + tc, !isSecure)); + asid = bits(ttbr_asid, 55, 48); + } else { + // Short-descriptor translation table format in use + CONTEXTIDR context_id = tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_CONTEXTIDR, tc,!isSecure)); + asid = context_id.asid; + } + prrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_PRRR, tc, + !isSecure)); + nmrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_NMRR, tc, + !isSecure)); + dacr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_DACR, tc, + !isSecure)); + hcr = tc->readMiscReg(MISCREG_HCR); + + if (haveVirtualization) { + vmid = bits(tc->readMiscReg(MISCREG_VTTBR), 55, 48); + isHyp = cpsr.mode == MODE_HYP; + isHyp |= tranType & HypMode; + isHyp &= (tranType & S1S2NsTran) == 0; + isHyp &= (tranType & S1CTran) == 0; + if (isHyp) { + sctlr = tc->readMiscReg(MISCREG_HSCTLR); + } + // Work out if we should skip the first stage of translation and go + // directly to stage 2. This value is cached so we don't have to + // compute it for every translation. + stage2Req = hcr.vm && !isStage2 && !isHyp && !isSecure && + !(tranType & S1CTran); + directToStage2 = stage2Req && !sctlr.m; + } else { + vmid = 0; + stage2Req = false; + isHyp = false; + directToStage2 = false; + } + } + miscRegValid = true; + curTranType = tranType; +} + +Fault +TLB::getTE(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode, + Translation *translation, bool timing, bool functional, + bool is_secure, TLB::ArmTranslationType tranType) +{ + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = 0; + ExceptionLevel target_el = aarch64 ? aarch64EL : EL1; + if (aarch64) { + vaddr = purifyTaggedAddr(vaddr_tainted, tc, target_el); + } else { + vaddr = vaddr_tainted; + } + *te = lookup(vaddr, asid, vmid, isHyp, is_secure, false, false, target_el); + if (*te == NULL) { + if (req->isPrefetch()) { + // if the request is a prefetch don't attempt to fill the TLB or go + // any further with the memory access (here we can safely use the + // fault status for the short desc. format in all cases) + prefetchFaults++; + return new PrefetchAbort(vaddr_tainted, ArmFault::PrefetchTLBMiss, isStage2); + } + + if (is_fetch) + instMisses++; + else if (is_write) + writeMisses++; + else + readMisses++; + + // start translation table walk, pass variables rather than + // re-retreaving in table walker for speed + DPRINTF(TLB, "TLB Miss: Starting hardware table walker for %#x(%d:%d)\n", + vaddr_tainted, asid, vmid); + Fault fault; + fault = tableWalker->walk(req, tc, asid, vmid, isHyp, mode, + translation, timing, functional, is_secure, + tranType); + // for timing mode, return and wait for table walk, + if (timing || fault != NoFault) { + return fault; + } + + *te = lookup(vaddr, asid, vmid, isHyp, is_secure, false, false, target_el); + if (!*te) + printTlb(); + assert(*te); + } else { + if (is_fetch) + instHits++; + else if (is_write) + writeHits++; + else + readHits++; + } + return NoFault; +} +Fault +TLB::getResultTe(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode, + Translation *translation, bool timing, bool functional, + TlbEntry *mergeTe) +{ + Fault fault; + TlbEntry *s1Te = NULL; + + Addr vaddr_tainted = req->getVaddr(); + + // Get the stage 1 table entry + fault = getTE(&s1Te, req, tc, mode, translation, timing, functional, + isSecure, curTranType); + // only proceed if we have a valid table entry + if ((s1Te != NULL) && (fault == NoFault)) { + // Check stage 1 permissions before checking stage 2 + if (aarch64) + fault = checkPermissions64(s1Te, req, mode, tc); + else + fault = checkPermissions(s1Te, req, mode); + if (stage2Req & (fault == NoFault)) { + Stage2LookUp *s2Lookup = new Stage2LookUp(this, stage2Tlb, *s1Te, + req, translation, mode, timing, functional, curTranType); + fault = s2Lookup->getTe(tc, mergeTe); + if (s2Lookup->isComplete()) { + *te = mergeTe; + // We've finished with the lookup so delete it + delete s2Lookup; + } else { + // The lookup hasn't completed, so we can't delete it now. We + // get round this by asking the object to self delete when the + // translation is complete. + s2Lookup->setSelfDelete(); + } + } else { + // This case deals with an S1 hit (or bypass), followed by + // an S2 hit-but-perms issue + if (isStage2) { + DPRINTF(TLBVerbose, "s2TLB: reqVa %#x, reqPa %#x, fault %p\n", + vaddr_tainted, req->hasPaddr() ? req->getPaddr() : ~0, fault); + if (fault != NoFault) { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + armFault->annotate(ArmFault::S1PTW, false); + armFault->annotate(ArmFault::OVA, vaddr_tainted); + } + } + *te = s1Te; + } + } + return fault; +} ArmISA::TLB * ArmTLBParams::create() diff --git a/src/arch/arm/tlb.hh b/src/arch/arm/tlb.hh index a66e28b06..ac8c672bf 100644 --- a/src/arch/arm/tlb.hh +++ b/src/arch/arm/tlb.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,13 +43,13 @@ #ifndef __ARCH_ARM_TLB_HH__ #define __ARCH_ARM_TLB_HH__ -#include <map> #include "arch/arm/isa_traits.hh" #include "arch/arm/pagetable.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" #include "base/statistics.hh" +#include "dev/dma_device.hh" #include "mem/request.hh" #include "params/ArmTLB.hh" #include "sim/fault_fwd.hh" @@ -60,36 +60,51 @@ class ThreadContext; namespace ArmISA { class TableWalker; +class Stage2LookUp; +class Stage2MMU; class TLB : public BaseTLB { public: enum ArmFlags { - AlignmentMask = 0x1f, + AlignmentMask = 0x7, AlignByte = 0x0, AlignHalfWord = 0x1, - AlignWord = 0x3, - AlignDoubleWord = 0x7, - AlignQuadWord = 0xf, - AlignOctWord = 0x1f, + AlignWord = 0x2, + AlignDoubleWord = 0x3, + AlignQuadWord = 0x4, + AlignOctWord = 0x5, - AllowUnaligned = 0x20, + AllowUnaligned = 0x8, // Priv code operating as if it wasn't - UserMode = 0x40, + UserMode = 0x10, // Because zero otherwise looks like a valid setting and may be used // accidentally, this bit must be non-zero to show it was used on // purpose. - MustBeOne = 0x80 + MustBeOne = 0x40 }; - protected: - - TlbEntry *table; // the Page Table - int size; // TLB Size - uint32_t _attr; // Memory attributes for last accessed TLB entry + enum ArmTranslationType { + NormalTran = 0, + S1CTran = 0x1, + HypMode = 0x2, + // Secure code operating as if it wasn't (required by some Address + // Translate operations) + S1S2NsTran = 0x4 + }; + protected: + TlbEntry* table; // the Page Table + int size; // TLB Size + bool isStage2; // Indicates this TLB is part of the second stage MMU + bool stage2Req; // Indicates whether a stage 2 lookup is also required + uint64_t _attr; // Memory attributes for last accessed TLB entry + bool directToStage2; // Indicates whether all translation requests should + // be routed directly to the stage 2 TLB TableWalker *tableWalker; + TLB *stage2Tlb; + Stage2MMU *stage2Mmu; // Access Stats mutable Stats::Scalar instHits; @@ -121,51 +136,101 @@ class TLB : public BaseTLB bool bootUncacheability; public: - typedef ArmTLBParams Params; - TLB(const Params *p); + TLB(const ArmTLBParams *p); + TLB(const Params *p, int _size, TableWalker *_walker); /** Lookup an entry in the TLB * @param vpn virtual address * @param asn context id/address space id to use + * @param vmid The virtual machine ID used for stage 2 translation + * @param secure if the lookup is secure + * @param hyp if the lookup is done from hyp mode * @param functional if the lookup should modify state - * @return pointer to TLB entrry if it exists + * @param ignore_asn if on lookup asn should be ignored + * @return pointer to TLB entry if it exists */ - TlbEntry *lookup(Addr vpn, uint8_t asn, bool functional = false); + TlbEntry *lookup(Addr vpn, uint16_t asn, uint8_t vmid, bool hyp, + bool secure, bool functional, + bool ignore_asn, uint8_t target_el); virtual ~TLB(); + + /// setup all the back pointers + virtual void init(); + + void setMMU(Stage2MMU *m); + int getsize() const { return size; } void insert(Addr vaddr, TlbEntry &pte); - /** Reset the entire TLB */ - void flushAll(); + Fault getTE(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode, + Translation *translation, bool timing, bool functional, + bool is_secure, ArmTranslationType tranType); + + Fault getResultTe(TlbEntry **te, RequestPtr req, ThreadContext *tc, + Mode mode, Translation *translation, bool timing, + bool functional, TlbEntry *mergeTe); + + Fault checkPermissions(TlbEntry *te, RequestPtr req, Mode mode); + Fault checkPermissions64(TlbEntry *te, RequestPtr req, Mode mode, + ThreadContext *tc); + + + /** Reset the entire TLB + * @param secure_lookup if the operation affects the secure world + */ + void flushAllSecurity(bool secure_lookup, uint8_t target_el, + bool ignore_el = false); + + /** Remove all entries in the non secure world, depending on whether they + * were allocated in hyp mode or not + * @param hyp if the opperation affects hyp mode + */ + void flushAllNs(bool hyp, uint8_t target_el, bool ignore_el = false); + + + /** Reset the entire TLB. Used for CPU switching to prevent stale + * translations after multiple switches + */ + void flushAll() + { + flushAllSecurity(false, 0, true); + flushAllSecurity(true, 0, true); + } /** Remove any entries that match both a va and asn * @param mva virtual address to flush * @param asn contextid/asn to flush on match + * @param secure_lookup if the operation affects the secure world */ - void flushMvaAsid(Addr mva, uint64_t asn); + void flushMvaAsid(Addr mva, uint64_t asn, bool secure_lookup, + uint8_t target_el); /** Remove any entries that match the asn * @param asn contextid/asn to flush on match + * @param secure_lookup if the operation affects the secure world */ - void flushAsid(uint64_t asn); + void flushAsid(uint64_t asn, bool secure_lookup, uint8_t target_el); /** Remove all entries that match the va regardless of asn * @param mva address to flush from cache + * @param secure_lookup if the operation affects the secure world + * @param hyp if the operation affects hyp mode */ - void flushMva(Addr mva); + void flushMva(Addr mva, bool secure_lookup, bool hyp, uint8_t target_el); - Fault trickBoxCheck(RequestPtr req, Mode mode, uint8_t domain, bool sNp); - Fault walkTrickBoxCheck(Addr pa, Addr va, Addr sz, bool is_exec, - bool is_write, uint8_t domain, bool sNp); + Fault trickBoxCheck(RequestPtr req, Mode mode, TlbEntry::DomainType domain); + Fault walkTrickBoxCheck(Addr pa, bool is_secure, Addr va, Addr sz, bool is_exec, + bool is_write, TlbEntry::DomainType domain, LookupLevel lookup_level); - void printTlb(); + void printTlb() const; void allCpusCaching() { bootUncacheability = true; } void demapPage(Addr vaddr, uint64_t asn) { - flushMvaAsid(vaddr, asn); + // needed for x86 only + panic("demapPage() is not implemented.\n"); } static bool validVirtualAddress(Addr vaddr); @@ -184,16 +249,18 @@ class TLB : public BaseTLB * Do a functional lookup on the TLB (for checker cpu) that * behaves like a normal lookup without modifying any page table state. */ - Fault translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode); + Fault translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode, + ArmTranslationType tranType = NormalTran); /** Accessor functions for memory attributes for last accessed TLB entry */ void - setAttr(uint32_t attr) + setAttr(uint64_t attr) { _attr = attr; } - uint32_t + + uint64_t getAttr() const { return _attr; @@ -201,12 +268,17 @@ class TLB : public BaseTLB Fault translateFs(RequestPtr req, ThreadContext *tc, Mode mode, Translation *translation, bool &delay, - bool timing, bool functional = false); + bool timing, ArmTranslationType tranType, bool functional = false); Fault translateSe(RequestPtr req, ThreadContext *tc, Mode mode, Translation *translation, bool &delay, bool timing); - Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode); + Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, + ArmTranslationType tranType = NormalTran); Fault translateTiming(RequestPtr req, ThreadContext *tc, - Translation *translation, Mode mode); + Translation *translation, Mode mode, + ArmTranslationType tranType = NormalTran); + Fault translateComplete(RequestPtr req, ThreadContext *tc, + Translation *translation, Mode mode, ArmTranslationType tranType, + bool callFromS2); Fault finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const; void drainResume(); @@ -229,29 +301,45 @@ class TLB : public BaseTLB */ virtual BaseMasterPort* getMasterPort(); + /** + * Allow the MMU (overseeing both stage 1 and stage 2 TLBs) to + * access the table walker port of this TLB so that it can + * orchestrate staged translations. + * + * @return The table walker DMA port + */ + DmaPort& getWalkerPort(); + // Caching misc register values here. // Writing to misc registers needs to invalidate them. // translateFunctional/translateSe/translateFs checks if they are // invalid and call updateMiscReg if necessary. protected: + bool aarch64; + ExceptionLevel aarch64EL; SCTLR sctlr; + SCR scr; bool isPriv; - CONTEXTIDR contextId; + bool isSecure; + bool isHyp; + TTBCR ttbcr; + uint16_t asid; + uint8_t vmid; PRRR prrr; NMRR nmrr; + HCR hcr; uint32_t dacr; bool miscRegValid; - void updateMiscReg(ThreadContext *tc) - { - sctlr = tc->readMiscReg(MISCREG_SCTLR); - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); - isPriv = cpsr.mode != MODE_USER; - contextId = tc->readMiscReg(MISCREG_CONTEXTIDR); - prrr = tc->readMiscReg(MISCREG_PRRR); - nmrr = tc->readMiscReg(MISCREG_NMRR); - dacr = tc->readMiscReg(MISCREG_DACR); - miscRegValid = true; - } + ArmTranslationType curTranType; + + // Cached copies of system-level properties + bool haveLPAE; + bool haveVirtualization; + bool haveLargeAsid64; + + void updateMiscReg(ThreadContext *tc, + ArmTranslationType tranType = NormalTran); + public: const Params * params() const @@ -259,6 +347,19 @@ public: return dynamic_cast<const Params *>(_params); } inline void invalidateMiscReg() { miscRegValid = false; } + +private: + /** Remove any entries that match both a va and asn + * @param mva virtual address to flush + * @param asn contextid/asn to flush on match + * @param secure_lookup if the operation affects the secure world + * @param hyp if the operation affects hyp mode + * @param ignore_asn if the flush should ignore the asn + */ + void _flushMva(Addr mva, uint64_t asn, bool secure_lookup, + bool hyp, bool ignore_asn, uint8_t target_el); + + bool checkELMatch(uint8_t target_el, uint8_t tentry_el, bool ignore_el); }; } // namespace ArmISA diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh index cd0b74b2d..7b736492b 100644 --- a/src/arch/arm/types.hh +++ b/src/arch/arm/types.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -82,6 +82,7 @@ namespace ArmISA // Bitfields to select mode. Bitfield<36> thumb; Bitfield<35> bigThumb; + Bitfield<34> aarch64; // Made up bitfields that make life easier. Bitfield<33> sevenAndFour; @@ -143,9 +144,9 @@ namespace ArmISA Bitfield<3, 0> immedLo3_0; Bitfield<15, 0> regList; - + Bitfield<23, 0> offset; - + Bitfield<23, 0> immed23_0; Bitfield<11, 8> cpNum; @@ -213,7 +214,8 @@ namespace ArmISA enum FlagBits { ThumbBit = (1 << 0), - JazelleBit = (1 << 1) + JazelleBit = (1 << 1), + AArch64Bit = (1 << 2) }; uint8_t flags; uint8_t nextFlags; @@ -304,6 +306,37 @@ namespace ArmISA nextFlags &= ~JazelleBit; } + bool + aarch64() const + { + return flags & AArch64Bit; + } + + void + aarch64(bool val) + { + if (val) + flags |= AArch64Bit; + else + flags &= ~AArch64Bit; + } + + bool + nextAArch64() const + { + return nextFlags & AArch64Bit; + } + + void + nextAArch64(bool val) + { + if (val) + nextFlags |= AArch64Bit; + else + nextFlags &= ~AArch64Bit; + } + + uint8_t itstate() const { @@ -374,9 +407,15 @@ namespace ArmISA } void - instNPC(uint32_t val) + instNPC(Addr val) { - npc(val &~ mask(nextThumb() ? 1 : 2)); + // @todo: review this when AArch32/64 interprocessing is + // supported + if (aarch64()) + npc(val); // AArch64 doesn't force PC alignment, a PC + // Alignment Fault can be raised instead + else + npc(val &~ mask(nextThumb() ? 1 : 2)); } Addr @@ -387,7 +426,7 @@ namespace ArmISA // Perform an interworking branch. void - instIWNPC(uint32_t val) + instIWNPC(Addr val) { bool thumbEE = (thumb() && jazelle()); @@ -417,7 +456,7 @@ namespace ArmISA // Perform an interworking branch in ARM mode, a regular branch // otherwise. void - instAIWNPC(uint32_t val) + instAIWNPC(Addr val) { if (!thumb() && !jazelle()) instIWNPC(val); @@ -470,6 +509,18 @@ namespace ArmISA ROR }; + // Extension types for ARM instructions + enum ArmExtendType { + UXTB = 0, + UXTH = 1, + UXTW = 2, + UXTX = 3, + SXTB = 4, + SXTH = 5, + SXTW = 6, + SXTX = 7 + }; + typedef uint64_t LargestRead; // Need to use 64 bits to make sure that read requests get handled properly @@ -508,28 +559,163 @@ namespace ArmISA RND_NEAREST }; + enum ExceptionLevel { + EL0 = 0, + EL1, + EL2, + EL3 + }; + enum OperatingMode { + MODE_EL0T = 0x0, + MODE_EL1T = 0x4, + MODE_EL1H = 0x5, + MODE_EL2T = 0x8, + MODE_EL2H = 0x9, + MODE_EL3T = 0xC, + MODE_EL3H = 0xD, MODE_USER = 16, MODE_FIQ = 17, MODE_IRQ = 18, MODE_SVC = 19, MODE_MON = 22, MODE_ABORT = 23, + MODE_HYP = 26, MODE_UNDEFINED = 27, MODE_SYSTEM = 31, MODE_MAXMODE = MODE_SYSTEM }; + enum ExceptionClass { + EC_INVALID = -1, + EC_UNKNOWN = 0x0, + EC_TRAPPED_WFI_WFE = 0x1, + EC_TRAPPED_CP15_MCR_MRC = 0x3, + EC_TRAPPED_CP15_MCRR_MRRC = 0x4, + EC_TRAPPED_CP14_MCR_MRC = 0x5, + EC_TRAPPED_CP14_LDC_STC = 0x6, + EC_TRAPPED_HCPTR = 0x7, + EC_TRAPPED_SIMD_FP = 0x7, // AArch64 alias + EC_TRAPPED_CP10_MRC_VMRS = 0x8, + EC_TRAPPED_BXJ = 0xA, + EC_TRAPPED_CP14_MCRR_MRRC = 0xC, + EC_ILLEGAL_INST = 0xE, + EC_SVC_TO_HYP = 0x11, + EC_SVC = 0x11, // AArch64 alias + EC_HVC = 0x12, + EC_SMC_TO_HYP = 0x13, + EC_SMC = 0x13, // AArch64 alias + EC_SVC_64 = 0x15, + EC_HVC_64 = 0x16, + EC_SMC_64 = 0x17, + EC_TRAPPED_MSR_MRS_64 = 0x18, + EC_PREFETCH_ABORT_TO_HYP = 0x20, + EC_PREFETCH_ABORT_LOWER_EL = 0x20, // AArch64 alias + EC_PREFETCH_ABORT_FROM_HYP = 0x21, + EC_PREFETCH_ABORT_CURR_EL = 0x21, // AArch64 alias + EC_PC_ALIGNMENT = 0x22, + EC_DATA_ABORT_TO_HYP = 0x24, + EC_DATA_ABORT_LOWER_EL = 0x24, // AArch64 alias + EC_DATA_ABORT_FROM_HYP = 0x25, + EC_DATA_ABORT_CURR_EL = 0x25, // AArch64 alias + EC_STACK_PTR_ALIGNMENT = 0x26, + EC_FP_EXCEPTION = 0x28, + EC_FP_EXCEPTION_64 = 0x2C, + EC_SERROR = 0x2F + }; + + BitUnion8(OperatingMode64) + Bitfield<0> spX; + Bitfield<3, 2> el; + Bitfield<4> width; + EndBitUnion(OperatingMode64) + + static bool inline + opModeIs64(OperatingMode mode) + { + return ((OperatingMode64)(uint8_t)mode).width == 0; + } + + static bool inline + opModeIsH(OperatingMode mode) + { + return (mode == MODE_EL1H || mode == MODE_EL2H || mode == MODE_EL3H); + } + + static bool inline + opModeIsT(OperatingMode mode) + { + return (mode == MODE_EL0T || mode == MODE_EL1T || mode == MODE_EL2T || + mode == MODE_EL3T); + } + + static ExceptionLevel inline + opModeToEL(OperatingMode mode) + { + bool aarch32 = ((mode >> 4) & 1) ? true : false; + if (aarch32) { + switch (mode) { + case MODE_USER: + return EL0; + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + return EL1; + case MODE_HYP: + return EL2; + case MODE_MON: + return EL3; + default: + panic("Invalid operating mode: %d", mode); + break; + } + } else { + // aarch64 + return (ExceptionLevel) ((mode >> 2) & 3); + } + } + static inline bool badMode(OperatingMode mode) { switch (mode) { + case MODE_EL0T: + case MODE_EL1T: + case MODE_EL1H: + case MODE_EL2T: + case MODE_EL2H: + case MODE_EL3T: + case MODE_EL3H: + case MODE_USER: + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_MON: + case MODE_ABORT: + case MODE_HYP: + case MODE_UNDEFINED: + case MODE_SYSTEM: + return false; + default: + return true; + } + } + + + static inline bool + badMode32(OperatingMode mode) + { + switch (mode) { case MODE_USER: case MODE_FIQ: case MODE_IRQ: case MODE_SVC: case MODE_MON: case MODE_ABORT: + case MODE_HYP: case MODE_UNDEFINED: case MODE_SYSTEM: return false; diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc index cddc2c5c4..3d7d9c4fc 100644 --- a/src/arch/arm/utility.cc +++ b/src/arch/arm/utility.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012 ARM Limited + * Copyright (c) 2009-2013 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -40,6 +40,7 @@ #include "arch/arm/faults.hh" #include "arch/arm/isa_traits.hh" +#include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" @@ -70,51 +71,68 @@ getArgument(ThreadContext *tc, int &number, uint16_t size, bool fp) M5_DUMMY_RETURN } - if (size == (uint16_t)(-1)) - size = ArmISA::MachineBytes; if (fp) panic("getArgument(): Floating point arguments not implemented\n"); - if (number < NumArgumentRegs) { - // If the argument is 64 bits, it must be in an even regiser - // number. Increment the number here if it isn't even. - if (size == sizeof(uint64_t)) { - if ((number % 2) != 0) - number++; - // Read the two halves of the data. Number is inc here to - // get the second half of the 64 bit reg. - uint64_t tmp; - tmp = tc->readIntReg(number++); - tmp |= tc->readIntReg(number) << 32; - return tmp; + if (inAArch64(tc)) { + if (size == (uint16_t)(-1)) + size = sizeof(uint64_t); + + if (number < 8 /*NumArgumentRegs64*/) { + return tc->readIntReg(number); } else { - return tc->readIntReg(number); + panic("getArgument(): No support reading stack args for AArch64\n"); } } else { - Addr sp = tc->readIntReg(StackPointerReg); - FSTranslatingPortProxy &vp = tc->getVirtProxy(); - uint64_t arg; - if (size == sizeof(uint64_t)) { - // If the argument is even it must be aligned - if ((number % 2) != 0) - number++; - arg = vp.read<uint64_t>(sp + - (number-NumArgumentRegs) * sizeof(uint32_t)); - // since two 32 bit args == 1 64 bit arg, increment number - number++; + if (size == (uint16_t)(-1)) + size = ArmISA::MachineBytes; + + if (number < NumArgumentRegs) { + // If the argument is 64 bits, it must be in an even regiser + // number. Increment the number here if it isn't even. + if (size == sizeof(uint64_t)) { + if ((number % 2) != 0) + number++; + // Read the two halves of the data. Number is inc here to + // get the second half of the 64 bit reg. + uint64_t tmp; + tmp = tc->readIntReg(number++); + tmp |= tc->readIntReg(number) << 32; + return tmp; + } else { + return tc->readIntReg(number); + } } else { - arg = vp.read<uint32_t>(sp + - (number-NumArgumentRegs) * sizeof(uint32_t)); + Addr sp = tc->readIntReg(StackPointerReg); + FSTranslatingPortProxy &vp = tc->getVirtProxy(); + uint64_t arg; + if (size == sizeof(uint64_t)) { + // If the argument is even it must be aligned + if ((number % 2) != 0) + number++; + arg = vp.read<uint64_t>(sp + + (number-NumArgumentRegs) * sizeof(uint32_t)); + // since two 32 bit args == 1 64 bit arg, increment number + number++; + } else { + arg = vp.read<uint32_t>(sp + + (number-NumArgumentRegs) * sizeof(uint32_t)); + } + return arg; } - return arg; } + panic("getArgument() should always return\n"); } void skipFunction(ThreadContext *tc) { PCState newPC = tc->pcState(); - newPC.set(tc->readIntReg(ReturnAddressReg) & ~ULL(1)); + if (inAArch64(tc)) { + newPC.set(tc->readIntReg(INTREG_X30)); + } else { + newPC.set(tc->readIntReg(ReturnAddressReg) & ~ULL(1)); + } CheckerCPU *checker = tc->getCheckerCpuPtr(); if (checker) { @@ -151,6 +169,128 @@ copyRegs(ThreadContext *src, ThreadContext *dest) dest->getDTBPtr()->invalidateMiscReg(); } +bool +inSecureState(ThreadContext *tc) +{ + SCR scr = inAArch64(tc) ? tc->readMiscReg(MISCREG_SCR_EL3) : + tc->readMiscReg(MISCREG_SCR); + return ArmSystem::haveSecurity(tc) && inSecureState( + scr, tc->readMiscReg(MISCREG_CPSR)); +} + +bool +inAArch64(ThreadContext *tc) +{ + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + return opModeIs64((OperatingMode) (uint8_t) cpsr.mode); +} + +bool +longDescFormatInUse(ThreadContext *tc) +{ + TTBCR ttbcr = tc->readMiscReg(MISCREG_TTBCR); + return ArmSystem::haveLPAE(tc) && ttbcr.eae; +} + +uint32_t +getMPIDR(ArmSystem *arm_sys, ThreadContext *tc) +{ + if (arm_sys->multiProc) { + return 0x80000000 | // multiprocessor extensions available + tc->cpuId(); + } else { + return 0x80000000 | // multiprocessor extensions available + 0x40000000 | // in up system + tc->cpuId(); + } +} + +bool +ELIs64(ThreadContext *tc, ExceptionLevel el) +{ + if (ArmSystem::highestEL(tc) == el) + // Register width is hard-wired + return ArmSystem::highestELIs64(tc); + + switch (el) { + case EL0: + return opModeIs64(currOpMode(tc)); + case EL1: + { + // @todo: uncomment this to enable Virtualization + // if (ArmSystem::haveVirtualization(tc)) { + // HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + // return hcr.rw; + // } + assert(ArmSystem::haveSecurity(tc)); + SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); + return scr.rw; + } + case EL2: + { + assert(ArmSystem::haveSecurity(tc)); + SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); + return scr.rw; + } + default: + panic("Invalid exception level"); + break; + } +} + +bool +isBigEndian64(ThreadContext *tc) +{ + switch (opModeToEL(currOpMode(tc))) { + case EL3: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL3)).ee; + case EL2: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL2)).ee; + case EL1: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).ee; + case EL0: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).e0e; + default: + panic("Invalid exception level"); + break; + } +} + +Addr +purifyTaggedAddr(Addr addr, ThreadContext *tc, ExceptionLevel el) +{ + TTBCR tcr; + + switch (el) { + case EL0: + case EL1: + tcr = tc->readMiscReg(MISCREG_TCR_EL1); + if (bits(addr, 55, 48) == 0xFF && tcr.tbi1) + return addr | mask(63, 55); + else if (!bits(addr, 55, 48) && tcr.tbi0) + return bits(addr,55, 0); + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization()); + // tcr = tc->readMiscReg(MISCREG_TCR_EL2); + // if (tcr.tbi) + // return addr & mask(56); + // break; + case EL3: + assert(ArmSystem::haveSecurity(tc)); + tcr = tc->readMiscReg(MISCREG_TCR_EL3); + if (tcr.tbi) + return addr & mask(56); + break; + default: + panic("Invalid exception level"); + break; + } + + return addr; // Nothing to do if this is not a tagged address +} + Addr truncPage(Addr addr) { @@ -163,4 +303,667 @@ roundPage(Addr addr) return (addr + PageBytes - 1) & ~(PageBytes - 1); } +bool +mcrMrc15TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss) +{ + bool isRead; + uint32_t crm; + IntRegIndex rt; + uint32_t crn; + uint32_t opc1; + uint32_t opc2; + bool trapToHype = false; + + + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2); + trapToHype = ((uint32_t) hstr) & (1 << crn); + trapToHype |= hdcr.tpm && (crn == 9) && (crm >= 12); + trapToHype |= hcr.tidcp && ( + ((crn == 9) && ((crm <= 2) || ((crm >= 5) && (crm <= 8)))) || + ((crn == 10) && ((crm <= 1) || (crm == 4) || (crm == 8))) || + ((crn == 11) && ((crm <= 8) || (crm == 15))) ); + + if (!trapToHype) { + switch (unflattenMiscReg(miscReg)) { + case MISCREG_CPACR: + trapToHype = hcptr.tcpac; + break; + case MISCREG_REVIDR: + case MISCREG_TCMTR: + case MISCREG_TLBTR: + case MISCREG_AIDR: + trapToHype = hcr.tid1; + break; + case MISCREG_CTR: + case MISCREG_CCSIDR: + case MISCREG_CLIDR: + case MISCREG_CSSELR: + trapToHype = hcr.tid2; + break; + case MISCREG_ID_PFR0: + case MISCREG_ID_PFR1: + case MISCREG_ID_DFR0: + case MISCREG_ID_AFR0: + case MISCREG_ID_MMFR0: + case MISCREG_ID_MMFR1: + case MISCREG_ID_MMFR2: + case MISCREG_ID_MMFR3: + case MISCREG_ID_ISAR0: + case MISCREG_ID_ISAR1: + case MISCREG_ID_ISAR2: + case MISCREG_ID_ISAR3: + case MISCREG_ID_ISAR4: + case MISCREG_ID_ISAR5: + trapToHype = hcr.tid3; + break; + case MISCREG_DCISW: + case MISCREG_DCCSW: + case MISCREG_DCCISW: + trapToHype = hcr.tsw; + break; + case MISCREG_DCIMVAC: + case MISCREG_DCCIMVAC: + case MISCREG_DCCMVAC: + trapToHype = hcr.tpc; + break; + case MISCREG_ICIMVAU: + case MISCREG_ICIALLU: + case MISCREG_ICIALLUIS: + case MISCREG_DCCMVAU: + trapToHype = hcr.tpu; + break; + case MISCREG_TLBIALLIS: + case MISCREG_TLBIMVAIS: + case MISCREG_TLBIASIDIS: + case MISCREG_TLBIMVAAIS: + case MISCREG_DTLBIALL: + case MISCREG_ITLBIALL: + case MISCREG_DTLBIMVA: + case MISCREG_ITLBIMVA: + case MISCREG_DTLBIASID: + case MISCREG_ITLBIASID: + case MISCREG_TLBIMVAA: + case MISCREG_TLBIALL: + case MISCREG_TLBIMVA: + case MISCREG_TLBIASID: + trapToHype = hcr.ttlb; + break; + case MISCREG_ACTLR: + trapToHype = hcr.tac; + break; + case MISCREG_SCTLR: + case MISCREG_TTBR0: + case MISCREG_TTBR1: + case MISCREG_TTBCR: + case MISCREG_DACR: + case MISCREG_DFSR: + case MISCREG_IFSR: + case MISCREG_DFAR: + case MISCREG_IFAR: + case MISCREG_ADFSR: + case MISCREG_AIFSR: + case MISCREG_PRRR: + case MISCREG_NMRR: + case MISCREG_MAIR0: + case MISCREG_MAIR1: + case MISCREG_CONTEXTIDR: + trapToHype = hcr.tvm & !isRead; + break; + case MISCREG_PMCR: + trapToHype = hdcr.tpmcr; + break; + // No default action needed + default: + break; + } + } + } + return trapToHype; +} + + +bool +mcrMrc14TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss) +{ + bool isRead; + uint32_t crm; + IntRegIndex rt; + uint32_t crn; + uint32_t opc1; + uint32_t opc2; + bool trapToHype = false; + + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2); + inform("trap check M:%x N:%x 1:%x 2:%x hdcr %x, hcptr %x, hstr %x\n", + crm, crn, opc1, opc2, hdcr, hcptr, hstr); + trapToHype = hdcr.tda && (opc1 == 0); + trapToHype |= hcptr.tta && (opc1 == 1); + if (!trapToHype) { + switch (unflattenMiscReg(miscReg)) { + case MISCREG_DBGOSLSR: + case MISCREG_DBGOSLAR: + case MISCREG_DBGOSDLR: + case MISCREG_DBGPRCR: + trapToHype = hdcr.tdosa; + break; + case MISCREG_DBGDRAR: + case MISCREG_DBGDSAR: + trapToHype = hdcr.tdra; + break; + case MISCREG_JIDR: + trapToHype = hcr.tid0; + break; + case MISCREG_JOSCR: + case MISCREG_JMCR: + trapToHype = hstr.tjdbx; + break; + case MISCREG_TEECR: + case MISCREG_TEEHBR: + trapToHype = hstr.ttee; + break; + // No default action needed + default: + break; + } + } + } + return trapToHype; +} + +bool +mcrrMrrc15TrapToHyp(const MiscRegIndex miscReg, CPSR cpsr, SCR scr, HSTR hstr, + HCR hcr, uint32_t iss) +{ + uint32_t crm; + IntRegIndex rt; + uint32_t crn; + uint32_t opc1; + uint32_t opc2; + bool isRead; + bool trapToHype = false; + + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + // This is technically the wrong function, but we can re-use it for + // the moment because we only need one field, which overlaps with the + // mcrmrc layout + mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2); + trapToHype = ((uint32_t) hstr) & (1 << crm); + + if (!trapToHype) { + switch (unflattenMiscReg(miscReg)) { + case MISCREG_SCTLR: + case MISCREG_TTBR0: + case MISCREG_TTBR1: + case MISCREG_TTBCR: + case MISCREG_DACR: + case MISCREG_DFSR: + case MISCREG_IFSR: + case MISCREG_DFAR: + case MISCREG_IFAR: + case MISCREG_ADFSR: + case MISCREG_AIFSR: + case MISCREG_PRRR: + case MISCREG_NMRR: + case MISCREG_MAIR0: + case MISCREG_MAIR1: + case MISCREG_CONTEXTIDR: + trapToHype = hcr.tvm & !isRead; + break; + // No default action needed + default: + break; + } + } + } + return trapToHype; +} + +bool +msrMrs64TrapToSup(const MiscRegIndex miscReg, ExceptionLevel el, + CPACR cpacr /* CPACR_EL1 */) +{ + bool trapToSup = false; + switch (miscReg) { + case MISCREG_FPCR: + case MISCREG_FPSR: + case MISCREG_FPEXC32_EL2: + if ((el == EL0 && cpacr.fpen != 0x3) || + (el == EL1 && !(cpacr.fpen & 0x1))) + trapToSup = true; + break; + default: + break; + } + return trapToSup; +} + +bool +msrMrs64TrapToHyp(const MiscRegIndex miscReg, bool isRead, + CPTR cptr /* CPTR_EL2 */, + HCR hcr /* HCR_EL2 */, + bool * isVfpNeon) +{ + bool trapToHyp = false; + *isVfpNeon = false; + + switch (miscReg) { + // FP/SIMD regs + case MISCREG_FPCR: + case MISCREG_FPSR: + case MISCREG_FPEXC32_EL2: + trapToHyp = cptr.tfp; + *isVfpNeon = true; + break; + // CPACR + case MISCREG_CPACR_EL1: + trapToHyp = cptr.tcpac; + break; + // Virtual memory control regs + case MISCREG_SCTLR_EL1: + case MISCREG_TTBR0_EL1: + case MISCREG_TTBR1_EL1: + case MISCREG_TCR_EL1: + case MISCREG_ESR_EL1: + case MISCREG_FAR_EL1: + case MISCREG_AFSR0_EL1: + case MISCREG_AFSR1_EL1: + case MISCREG_MAIR_EL1: + case MISCREG_AMAIR_EL1: + case MISCREG_CONTEXTIDR_EL1: + trapToHyp = (hcr.trvm && isRead) || (hcr.tvm && !isRead); + break; + // TLB maintenance instructions + case MISCREG_TLBI_VMALLE1: + case MISCREG_TLBI_VAE1_Xt: + case MISCREG_TLBI_ASIDE1_Xt: + case MISCREG_TLBI_VAAE1_Xt: + case MISCREG_TLBI_VALE1_Xt: + case MISCREG_TLBI_VAALE1_Xt: + case MISCREG_TLBI_VMALLE1IS: + case MISCREG_TLBI_VAE1IS_Xt: + case MISCREG_TLBI_ASIDE1IS_Xt: + case MISCREG_TLBI_VAAE1IS_Xt: + case MISCREG_TLBI_VALE1IS_Xt: + case MISCREG_TLBI_VAALE1IS_Xt: + trapToHyp = hcr.ttlb; + break; + // Cache maintenance instructions to the point of unification + case MISCREG_IC_IVAU_Xt: + case MISCREG_ICIALLU: + case MISCREG_ICIALLUIS: + case MISCREG_DC_CVAU_Xt: + trapToHyp = hcr.tpu; + break; + // Data/Unified cache maintenance instructions to the point of coherency + case MISCREG_DC_IVAC_Xt: + case MISCREG_DC_CIVAC_Xt: + case MISCREG_DC_CVAC_Xt: + trapToHyp = hcr.tpc; + break; + // Data/Unified cache maintenance instructions by set/way + case MISCREG_DC_ISW_Xt: + case MISCREG_DC_CSW_Xt: + case MISCREG_DC_CISW_Xt: + trapToHyp = hcr.tsw; + break; + // ACTLR + case MISCREG_ACTLR_EL1: + trapToHyp = hcr.tacr; + break; + + // @todo: Trap implementation-dependent functionality based on + // hcr.tidcp + + // ID regs, group 3 + case MISCREG_ID_PFR0_EL1: + case MISCREG_ID_PFR1_EL1: + case MISCREG_ID_DFR0_EL1: + case MISCREG_ID_AFR0_EL1: + case MISCREG_ID_MMFR0_EL1: + case MISCREG_ID_MMFR1_EL1: + case MISCREG_ID_MMFR2_EL1: + case MISCREG_ID_MMFR3_EL1: + case MISCREG_ID_ISAR0_EL1: + case MISCREG_ID_ISAR1_EL1: + case MISCREG_ID_ISAR2_EL1: + case MISCREG_ID_ISAR3_EL1: + case MISCREG_ID_ISAR4_EL1: + case MISCREG_ID_ISAR5_EL1: + case MISCREG_MVFR0_EL1: + case MISCREG_MVFR1_EL1: + case MISCREG_MVFR2_EL1: + case MISCREG_ID_AA64PFR0_EL1: + case MISCREG_ID_AA64PFR1_EL1: + case MISCREG_ID_AA64DFR0_EL1: + case MISCREG_ID_AA64DFR1_EL1: + case MISCREG_ID_AA64ISAR0_EL1: + case MISCREG_ID_AA64ISAR1_EL1: + case MISCREG_ID_AA64MMFR0_EL1: + case MISCREG_ID_AA64MMFR1_EL1: + case MISCREG_ID_AA64AFR0_EL1: + case MISCREG_ID_AA64AFR1_EL1: + assert(isRead); + trapToHyp = hcr.tid3; + break; + // ID regs, group 2 + case MISCREG_CTR_EL0: + case MISCREG_CCSIDR_EL1: + case MISCREG_CLIDR_EL1: + case MISCREG_CSSELR_EL1: + trapToHyp = hcr.tid2; + break; + // ID regs, group 1 + case MISCREG_AIDR_EL1: + case MISCREG_REVIDR_EL1: + assert(isRead); + trapToHyp = hcr.tid1; + break; + default: + break; + } + return trapToHyp; +} + +bool +msrMrs64TrapToMon(const MiscRegIndex miscReg, CPTR cptr /* CPTR_EL3 */, + ExceptionLevel el, bool * isVfpNeon) +{ + bool trapToMon = false; + *isVfpNeon = false; + + switch (miscReg) { + // FP/SIMD regs + case MISCREG_FPCR: + case MISCREG_FPSR: + case MISCREG_FPEXC32_EL2: + trapToMon = cptr.tfp; + *isVfpNeon = true; + break; + // CPACR, CPTR + case MISCREG_CPACR_EL1: + if (el == EL1) { + trapToMon = cptr.tcpac; + } + break; + case MISCREG_CPTR_EL2: + if (el == EL2) { + trapToMon = cptr.tcpac; + } + break; + default: + break; + } + return trapToMon; +} + +bool +decodeMrsMsrBankedReg(uint8_t sysM, bool r, bool &isIntReg, int ®Idx, + CPSR cpsr, SCR scr, NSACR nsacr, bool checkSecurity) +{ + OperatingMode mode; + bool ok = true; + + // R mostly indicates if its a int register or a misc reg, we override + // below if the few corner cases + isIntReg = !r; + // Loosely based on ARM ARM issue C section B9.3.10 + if (r) { + switch (sysM) + { + case 0xE: + regIdx = MISCREG_SPSR_FIQ; + mode = MODE_FIQ; + break; + case 0x10: + regIdx = MISCREG_SPSR_IRQ; + mode = MODE_IRQ; + break; + case 0x12: + regIdx = MISCREG_SPSR_SVC; + mode = MODE_SVC; + break; + case 0x14: + regIdx = MISCREG_SPSR_ABT; + mode = MODE_ABORT; + break; + case 0x16: + regIdx = MISCREG_SPSR_UND; + mode = MODE_UNDEFINED; + break; + case 0x1C: + regIdx = MISCREG_SPSR_MON; + mode = MODE_MON; + break; + case 0x1E: + regIdx = MISCREG_SPSR_HYP; + mode = MODE_HYP; + break; + default: + ok = false; + break; + } + } else { + int sysM4To3 = bits(sysM, 4, 3); + + if (sysM4To3 == 0) { + mode = MODE_USER; + regIdx = intRegInMode(mode, bits(sysM, 2, 0) + 8); + } else if (sysM4To3 == 1) { + mode = MODE_FIQ; + regIdx = intRegInMode(mode, bits(sysM, 2, 0) + 8); + } else if (sysM4To3 == 3) { + if (bits(sysM, 1) == 0) { + mode = MODE_MON; + regIdx = intRegInMode(mode, 14 - bits(sysM, 0)); + } else { + mode = MODE_HYP; + if (bits(sysM, 0) == 1) { + regIdx = intRegInMode(mode, 13); // R13 in HYP + } else { + isIntReg = false; + regIdx = MISCREG_ELR_HYP; + } + } + } else { // Other Banked registers + int sysM2 = bits(sysM, 2); + int sysM1 = bits(sysM, 1); + + mode = (OperatingMode) ( ((sysM2 || sysM1) << 0) | + (1 << 1) | + ((sysM2 && !sysM1) << 2) | + ((sysM2 && sysM1) << 3) | + (1 << 4) ); + regIdx = intRegInMode(mode, 14 - bits(sysM, 0)); + // Don't flatten the register here. This is going to go through + // setIntReg() which will do the flattening + ok &= mode != cpsr.mode; + } + } + + // Check that the requested register is accessable from the current mode + if (ok && checkSecurity && mode != cpsr.mode) { + switch (cpsr.mode) + { + case MODE_USER: + ok = false; + break; + case MODE_FIQ: + ok &= mode != MODE_HYP; + ok &= (mode != MODE_MON) || !scr.ns; + break; + case MODE_HYP: + ok &= mode != MODE_MON; + ok &= (mode != MODE_FIQ) || !nsacr.rfr; + break; + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + ok &= mode != MODE_HYP; + ok &= (mode != MODE_MON) || !scr.ns; + ok &= (mode != MODE_FIQ) || !nsacr.rfr; + break; + // can access everything, no further checks required + case MODE_MON: + break; + default: + panic("unknown Mode 0x%x\n", cpsr.mode); + break; + } + } + return (ok); +} + +bool +vfpNeonEnabled(uint32_t &seq, HCPTR hcptr, NSACR nsacr, CPACR cpacr, CPSR cpsr, + uint32_t &iss, bool &trap, ThreadContext *tc, FPEXC fpexc, + bool isSIMD) +{ + iss = 0; + trap = false; + bool undefined = false; + bool haveSecurity = ArmSystem::haveSecurity(tc); + bool haveVirtualization = ArmSystem::haveVirtualization(tc); + bool isSecure = inSecureState(tc); + + // Non-secure view of CPACR and HCPTR determines behavior + // Copy register values + uint8_t cpacr_cp10 = cpacr.cp10; + bool cpacr_asedis = cpacr.asedis; + bool hcptr_cp10 = false; + bool hcptr_tase = false; + + bool cp10_enabled = cpacr.cp10 == 0x3 + || (cpacr.cp10 == 0x1 && inPrivilegedMode(cpsr)); + + bool cp11_enabled = cpacr.cp11 == 0x3 + || (cpacr.cp11 == 0x1 && inPrivilegedMode(cpsr)); + + if (cp11_enabled) { + undefined |= !(fpexc.en && cp10_enabled); + } else { + undefined |= !(fpexc.en && cp10_enabled && (cpacr.cp11 == cpacr.cp10)); + } + + if (haveVirtualization) { + hcptr_cp10 = hcptr.tcp10; + undefined |= hcptr.tcp10 != hcptr.tcp11; + hcptr_tase = hcptr.tase; + } + + if (haveSecurity) { + undefined |= nsacr.cp10 != nsacr.cp11; + if (!isSecure) { + // Modify register values to the Non-secure view + if (!nsacr.cp10) { + cpacr_cp10 = 0; + if (haveVirtualization) { + hcptr_cp10 = true; + } + } + if (nsacr.nsasedis) { + cpacr_asedis = true; + if (haveVirtualization) { + hcptr_tase = true; + } + } + } + } + + // Check Coprocessor Access Control Register for permission to use CP10/11. + if (!haveVirtualization || (cpsr.mode != MODE_HYP)) { + switch (cpacr_cp10) + { + case 0: + undefined = true; + break; + case 1: + undefined |= inUserMode(cpsr); + break; + } + + // Check if SIMD operations are disabled + if (isSIMD && cpacr_asedis) undefined = true; + } + + // If required, check FPEXC enabled bit. + undefined |= !fpexc.en; + + if (haveSecurity && haveVirtualization && !isSecure) { + if (hcptr_cp10 || (isSIMD && hcptr_tase)) { + iss = isSIMD ? (1 << 5) : 0xA; + trap = true; + } + } + + return (!undefined); +} + +bool +SPAlignmentCheckEnabled(ThreadContext* tc) +{ + switch (opModeToEL(currOpMode(tc))) { + case EL3: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL3)).sa; + case EL2: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL2)).sa; + case EL1: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).sa; + case EL0: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).sa0; + default: + panic("Invalid exception level"); + break; + } +} + +int +decodePhysAddrRange64(uint8_t pa_enc) +{ + switch (pa_enc) { + case 0x0: + return 32; + case 0x1: + return 36; + case 0x2: + return 40; + case 0x3: + return 42; + case 0x4: + return 44; + case 0x5: + case 0x6: + case 0x7: + return 48; + default: + panic("Invalid phys. address range encoding"); + } +} + +uint8_t +encodePhysAddrRange64(int pa_size) +{ + switch (pa_size) { + case 32: + return 0x0; + case 36: + return 0x1; + case 40: + return 0x2; + case 42: + return 0x3; + case 44: + return 0x4; + case 48: + return 0x5; + default: + panic("Invalid phys. address range"); + } +} + } // namespace ArmISA diff --git a/src/arch/arm/utility.hh b/src/arch/arm/utility.hh index e4fc658e0..1eea743bb 100644 --- a/src/arch/arm/utility.hh +++ b/src/arch/arm/utility.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -54,6 +54,8 @@ #include "cpu/static_inst.hh" #include "cpu/thread_context.hh" +class ArmSystem; + namespace ArmISA { inline PCState @@ -118,7 +120,7 @@ void initCPU(ThreadContext *tc, int cpuId); static inline bool inUserMode(CPSR cpsr) { - return cpsr.mode == MODE_USER; + return cpsr.mode == MODE_USER || cpsr.mode == MODE_EL0T; } static inline bool @@ -139,30 +141,139 @@ inPrivilegedMode(ThreadContext *tc) return !inUserMode(tc); } -static inline bool -vfpEnabled(CPACR cpacr, CPSR cpsr) +bool inAArch64(ThreadContext *tc); + +static inline OperatingMode +currOpMode(ThreadContext *tc) +{ + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + return (OperatingMode) (uint8_t) cpsr.mode; +} + +static inline ExceptionLevel +currEL(ThreadContext *tc) { - return cpacr.cp10 == 0x3 || - (cpacr.cp10 == 0x1 && inPrivilegedMode(cpsr)); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + return (ExceptionLevel) (uint8_t) cpsr.el; } +bool ELIs64(ThreadContext *tc, ExceptionLevel el); + +bool isBigEndian64(ThreadContext *tc); + +/** + * Removes the tag from tagged addresses if that mode is enabled. + * @param addr The address to be purified. + * @param tc The thread context. + * @param el The controlled exception level. + * @return The purified address. + */ +Addr purifyTaggedAddr(Addr addr, ThreadContext *tc, ExceptionLevel el); + static inline bool -vfpEnabled(CPACR cpacr, CPSR cpsr, FPEXC fpexc) +inSecureState(SCR scr, CPSR cpsr) +{ + switch ((OperatingMode) (uint8_t) cpsr.mode) { + case MODE_MON: + case MODE_EL3T: + case MODE_EL3H: + return true; + case MODE_HYP: + case MODE_EL2T: + case MODE_EL2H: + return false; + default: + return !scr.ns; + } +} + +bool longDescFormatInUse(ThreadContext *tc); + +bool inSecureState(ThreadContext *tc); + +uint32_t getMPIDR(ArmSystem *arm_sys, ThreadContext *tc); + +static inline uint32_t +mcrMrcIssBuild(bool isRead, uint32_t crm, IntRegIndex rt, uint32_t crn, + uint32_t opc1, uint32_t opc2) +{ + return (isRead << 0) | + (crm << 1) | + (rt << 5) | + (crn << 10) | + (opc1 << 14) | + (opc2 << 17); +} + +static inline void +mcrMrcIssExtract(uint32_t iss, bool &isRead, uint32_t &crm, IntRegIndex &rt, + uint32_t &crn, uint32_t &opc1, uint32_t &opc2) +{ + isRead = (iss >> 0) & 0x1; + crm = (iss >> 1) & 0xF; + rt = (IntRegIndex) ((iss >> 5) & 0xF); + crn = (iss >> 10) & 0xF; + opc1 = (iss >> 14) & 0x7; + opc2 = (iss >> 17) & 0x7; +} + +static inline uint32_t +mcrrMrrcIssBuild(bool isRead, uint32_t crm, IntRegIndex rt, IntRegIndex rt2, + uint32_t opc1) { - if ((cpacr.cp11 == 0x3) || - ((cpacr.cp11 == 0x1) && inPrivilegedMode(cpsr))) - return fpexc.en && vfpEnabled(cpacr, cpsr); - else - return fpexc.en && vfpEnabled(cpacr, cpsr) && - (cpacr.cp11 == cpacr.cp10); + return (isRead << 0) | + (crm << 1) | + (rt << 5) | + (rt2 << 10) | + (opc1 << 16); } +static inline uint32_t +msrMrs64IssBuild(bool isRead, uint32_t op0, uint32_t op1, uint32_t crn, + uint32_t crm, uint32_t op2, IntRegIndex rt) +{ + return isRead | + (crm << 1) | + (rt << 5) | + (crn << 10) | + (op1 << 14) | + (op2 << 17) | + (op0 << 20); +} + +bool +mcrMrc15TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss); +bool +mcrMrc14TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss); +bool +mcrrMrrc15TrapToHyp(const MiscRegIndex miscReg, CPSR cpsr, SCR scr, HSTR hstr, + HCR hcr, uint32_t iss); + +bool msrMrs64TrapToSup(const MiscRegIndex miscReg, ExceptionLevel el, + CPACR cpacr); +bool msrMrs64TrapToHyp(const MiscRegIndex miscReg, bool isRead, CPTR cptr, + HCR hcr, bool * isVfpNeon); +bool msrMrs64TrapToMon(const MiscRegIndex miscReg, CPTR cptr, + ExceptionLevel el, bool * isVfpNeon); + +bool +vfpNeonEnabled(uint32_t &seq, HCPTR hcptr, NSACR nsacr, CPACR cpacr, CPSR cpsr, + uint32_t &iss, bool &trap, ThreadContext *tc, + FPEXC fpexc = (1<<30), bool isSIMD = false); + static inline bool -neonEnabled(CPACR cpacr, CPSR cpsr, FPEXC fpexc) +vfpNeon64Enabled(CPACR cpacr, ExceptionLevel el) { - return !cpacr.asedis && vfpEnabled(cpacr, cpsr, fpexc); + if ((el == EL0 && cpacr.fpen != 0x3) || + (el == EL1 && !(cpacr.fpen & 0x1))) + return false; + return true; } +bool SPAlignmentCheckEnabled(ThreadContext* tc); + uint64_t getArgument(ThreadContext *tc, int &number, uint16_t size, bool fp); void skipFunction(ThreadContext *tc); @@ -182,6 +293,36 @@ getExecutingAsid(ThreadContext *tc) return tc->readMiscReg(MISCREG_CONTEXTIDR); } +// Decodes the register index to access based on the fields used in a MSR +// or MRS instruction +bool +decodeMrsMsrBankedReg(uint8_t sysM, bool r, bool &isIntReg, int ®Idx, + CPSR cpsr, SCR scr, NSACR nsacr, + bool checkSecurity = true); + +// This wrapper function is used to turn the register index into a source +// parameter for the instruction. See Operands.isa +static inline int +decodeMrsMsrBankedIntRegIndex(uint8_t sysM, bool r) +{ + int regIdx; + bool isIntReg; + bool validReg; + + validReg = decodeMrsMsrBankedReg(sysM, r, isIntReg, regIdx, 0, 0, 0, false); + return (validReg && isIntReg) ? regIdx : INTREG_DUMMY; +} + +/** + * Returns the n. of PA bits corresponding to the specified encoding. + */ +int decodePhysAddrRange64(uint8_t pa_enc); + +/** + * Returns the encoding corresponding to the specified n. of PA bits. + */ +uint8_t encodePhysAddrRange64(int pa_size); + } #endif diff --git a/src/arch/arm/vtophys.cc b/src/arch/arm/vtophys.cc index 7c26962cb..bed76acbd 100644 --- a/src/arch/arm/vtophys.cc +++ b/src/arch/arm/vtophys.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -45,6 +45,7 @@ #include <string> +#include "arch/arm/faults.hh" #include "arch/arm/table_walker.hh" #include "arch/arm/tlb.hh" #include "arch/arm/vtophys.hh" @@ -65,66 +66,30 @@ ArmISA::vtophys(Addr vaddr) Addr ArmISA::vtophys(ThreadContext *tc, Addr addr) { - SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); - if (!sctlr.m) { - // Translation is currently disabled PA == VA - return addr; - } - bool success; - Addr pa; + Fault fault; + // Set up a functional memory Request to pass to the TLB + // to get it to translate the vaddr to a paddr + Request req(0, addr, 64, 0x40, -1, 0, 0, 0); ArmISA::TLB *tlb; - // Check the TLBs far a translation - // It's possible that there is a validy translation in the tlb + // Check the TLBs for a translation + // It's possible that there is a valid translation in the tlb // that is no loger valid in the page table in memory // so we need to check here first + // + // Calling translateFunctional invokes a table-walk if required + // so we should always succeed tlb = static_cast<ArmISA::TLB*>(tc->getDTBPtr()); - success = tlb->translateFunctional(tc, addr, pa); - if (success) - return pa; + fault = tlb->translateFunctional(&req, tc, BaseTLB::Read, TLB::NormalTran); + if (fault == NoFault) + return req.getPaddr(); tlb = static_cast<ArmISA::TLB*>(tc->getITBPtr()); - success = tlb->translateFunctional(tc, addr, pa); - if (success) - return pa; + fault = tlb->translateFunctional(&req, tc, BaseTLB::Read, TLB::NormalTran); + if (fault == NoFault) + return req.getPaddr(); - // We've failed everything, so we need to do a - // hardware tlb walk without messing with any - // state - - uint32_t N = tc->readMiscReg(MISCREG_TTBCR); - Addr ttbr; - if (N == 0 || !mbits(addr, 31, 32-N)) { - ttbr = tc->readMiscReg(MISCREG_TTBR0); - } else { - ttbr = tc->readMiscReg(MISCREG_TTBR1); - N = 0; - } - - PortProxy &port = tc->getPhysProxy(); - Addr l1desc_addr = mbits(ttbr, 31, 14-N) | (bits(addr,31-N,20) << 2); - - TableWalker::L1Descriptor l1desc; - l1desc.data = port.read<uint32_t>(l1desc_addr); - if (l1desc.type() == TableWalker::L1Descriptor::Ignore || - l1desc.type() == TableWalker::L1Descriptor::Reserved) { - warn("Unable to translate virtual address: %#x\n", addr); - return -1; - } - if (l1desc.type() == TableWalker::L1Descriptor::Section) - return l1desc.paddr(addr); - - // Didn't find it at the first level, try againt - Addr l2desc_addr = l1desc.l2Addr() | (bits(addr, 19, 12) << 2); - TableWalker::L2Descriptor l2desc; - l2desc.data = port.read<uint32_t>(l2desc_addr); - - if (l2desc.invalid()) { - warn("Unable to translate virtual address: %#x\n", addr); - return -1; - } - - return l2desc.paddr(addr); + panic("Table walkers support functional accesses. We should never get here\n"); } bool diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc index 6d4c29776..9445f1df9 100644 --- a/src/base/loader/elf_object.cc +++ b/src/base/loader/elf_object.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * All rights reserved. * @@ -61,7 +73,7 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) assert(elf != NULL); // Check that we actually have a elf file - if (gelf_getehdr(elf, &ehdr) ==0) { + if (gelf_getehdr(elf, &ehdr) == 0) { DPRINTFR(Loader, "Not ELF\n"); elf_end(elf); return NULL; @@ -94,23 +106,27 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) } else if (ehdr.e_machine == EM_386 && ehdr.e_ident[EI_CLASS] == ELFCLASS32) { arch = ObjectFile::I386; - } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { - arch = ObjectFile::Alpha; - } else if (ehdr.e_machine == EM_ARM) { + } else if (ehdr.e_machine == EM_ARM && + ehdr.e_ident[EI_CLASS] == ELFCLASS32) { if (bits(ehdr.e_entry, 0)) { arch = ObjectFile::Thumb; } else { arch = ObjectFile::Arm; } + } else if ((ehdr.e_machine == EM_AARCH64) && + ehdr.e_ident[EI_CLASS] == ELFCLASS64) { + arch = ObjectFile::Arm64; + } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { + arch = ObjectFile::Alpha; } else if (ehdr.e_machine == EM_PPC && ehdr.e_ident[EI_CLASS] == ELFCLASS32) { - if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { - arch = ObjectFile::Power; - } else { - fatal("The binary you're trying to load is compiled for " + if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { + arch = ObjectFile::Power; + } else { + fatal("The binary you're trying to load is compiled for " "little endian Power.\nM5 only supports big " "endian Power. Please recompile your binary.\n"); - } + } } else if (ehdr.e_machine == EM_PPC64) { fatal("The binary you're trying to load is compiled for 64-bit " "Power. M5\n only supports 32-bit Power. Please " @@ -121,9 +137,7 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) } //Detect the operating system - switch (ehdr.e_ident[EI_OSABI]) - { - + switch (ehdr.e_ident[EI_OSABI]) { case ELFOSABI_LINUX: opSys = ObjectFile::Linux; break; @@ -206,7 +220,8 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) if(phdr.p_offset <= e_phoff && phdr.p_offset + phdr.p_filesz > e_phoff) { - result->_programHeaderTable = phdr.p_paddr + e_phoff; + result->_programHeaderTable = + phdr.p_paddr + (e_phoff - phdr.p_offset); break; } } @@ -423,15 +438,15 @@ ElfObject::loadWeakSymbols(SymbolTable *symtab, Addr addrMask) } bool -ElfObject::loadSections(PortProxy& memProxy, Addr addrMask) +ElfObject::loadSections(PortProxy& memProxy, Addr addrMask, Addr offset) { - if (!ObjectFile::loadSections(memProxy, addrMask)) + if (!ObjectFile::loadSections(memProxy, addrMask, offset)) return false; vector<Segment>::iterator extraIt; for (extraIt = extraSegments.begin(); extraIt != extraSegments.end(); extraIt++) { - if (!loadSection(&(*extraIt), memProxy, addrMask)) { + if (!loadSection(&(*extraIt), memProxy, addrMask, offset)) { return false; } } diff --git a/src/base/loader/elf_object.hh b/src/base/loader/elf_object.hh index d3d3e5197..84b73b0a8 100644 --- a/src/base/loader/elf_object.hh +++ b/src/base/loader/elf_object.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * All rights reserved. * @@ -66,7 +78,8 @@ class ElfObject : public ObjectFile virtual ~ElfObject() {} bool loadSections(PortProxy& memProxy, - Addr addrMask = std::numeric_limits<Addr>::max()); + Addr addrMask = std::numeric_limits<Addr>::max(), + Addr offset = 0); virtual bool loadGlobalSymbols(SymbolTable *symtab, Addr addrMask = std::numeric_limits<Addr>::max()); virtual bool loadLocalSymbols(SymbolTable *symtab, Addr addrMask = diff --git a/src/base/loader/object_file.cc b/src/base/loader/object_file.cc index b9f84283b..170e18d5e 100644 --- a/src/base/loader/object_file.cc +++ b/src/base/loader/object_file.cc @@ -66,10 +66,10 @@ ObjectFile::~ObjectFile() bool -ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask) +ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask, Addr offset) { if (sec->size != 0) { - Addr addr = sec->baseAddr & addrMask; + Addr addr = (sec->baseAddr & addrMask) + offset; if (sec->fileImage) { memProxy.writeBlob(addr, sec->fileImage, sec->size); } @@ -83,11 +83,11 @@ ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask) bool -ObjectFile::loadSections(PortProxy& memProxy, Addr addrMask) +ObjectFile::loadSections(PortProxy& memProxy, Addr addrMask, Addr offset) { - return (loadSection(&text, memProxy, addrMask) - && loadSection(&data, memProxy, addrMask) - && loadSection(&bss, memProxy, addrMask)); + return (loadSection(&text, memProxy, addrMask, offset) + && loadSection(&data, memProxy, addrMask, offset) + && loadSection(&bss, memProxy, addrMask, offset)); } diff --git a/src/base/loader/object_file.hh b/src/base/loader/object_file.hh index bdc9a31a1..09cde5b53 100644 --- a/src/base/loader/object_file.hh +++ b/src/base/loader/object_file.hh @@ -52,6 +52,7 @@ class ObjectFile Mips, X86_64, I386, + Arm64, Arm, Thumb, Power @@ -84,7 +85,8 @@ class ObjectFile void close(); virtual bool loadSections(PortProxy& memProxy, Addr addrMask = - std::numeric_limits<Addr>::max()); + std::numeric_limits<Addr>::max(), + Addr offset = 0); virtual bool loadGlobalSymbols(SymbolTable *symtab, Addr addrMask = std::numeric_limits<Addr>::max()) = 0; virtual bool loadLocalSymbols(SymbolTable *symtab, Addr addrMask = @@ -114,7 +116,8 @@ class ObjectFile Section data; Section bss; - bool loadSection(Section *sec, PortProxy& memProxy, Addr addrMask); + bool loadSection(Section *sec, PortProxy& memProxy, Addr addrMask, + Addr offset = 0); void setGlobalPointer(Addr global_ptr) { globalPtr = global_ptr; } public: diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index cd82207cd..652af0b80 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -76,7 +76,7 @@ elif buildEnv['TARGET_ISA'] == 'mips': from MipsISA import MipsISA isa_class = MipsISA elif buildEnv['TARGET_ISA'] == 'arm': - from ArmTLB import ArmTLB + from ArmTLB import ArmTLB, ArmStage2IMMU, ArmStage2DMMU from ArmInterrupts import ArmInterrupts from ArmISA import ArmISA isa_class = ArmISA @@ -171,6 +171,8 @@ class BaseCPU(MemObject): elif buildEnv['TARGET_ISA'] == 'arm': dtb = Param.ArmTLB(ArmTLB(), "Data TLB") itb = Param.ArmTLB(ArmTLB(), "Instruction TLB") + istage2_mmu = Param.ArmStage2MMU(ArmStage2IMMU(), "Stage 2 trans") + dstage2_mmu = Param.ArmStage2MMU(ArmStage2DMMU(), "Stage 2 trans") interrupts = Param.ArmInterrupts( NULL, "Interrupt Controller") isa = VectorParam.ArmISA([ isa_class() ], "ISA instance") @@ -211,6 +213,9 @@ class BaseCPU(MemObject): if buildEnv['TARGET_ISA'] in ['x86', 'arm']: _cached_ports += ["itb.walker.port", "dtb.walker.port"] + if buildEnv['TARGET_ISA'] in ['arm']: + _cached_ports += ["istage2_mmu.stage2_tlb.walker.port", + "dstage2_mmu.stage2_tlb.walker.port"] _uncached_slave_ports = [] _uncached_master_ports = [] @@ -267,18 +272,35 @@ class BaseCPU(MemObject): if iwc and dwc: self.itb_walker_cache = iwc self.dtb_walker_cache = dwc - self.itb.walker.port = iwc.cpu_side - self.dtb.walker.port = dwc.cpu_side + if buildEnv['TARGET_ISA'] in ['arm']: + self.itb_walker_cache_bus = CoherentBus() + self.dtb_walker_cache_bus = CoherentBus() + self.itb_walker_cache_bus.master = iwc.cpu_side + self.dtb_walker_cache_bus.master = dwc.cpu_side + self.itb.walker.port = self.itb_walker_cache_bus.slave + self.dtb.walker.port = self.dtb_walker_cache_bus.slave + self.istage2_mmu.stage2_tlb.walker.port = self.itb_walker_cache_bus.slave + self.dstage2_mmu.stage2_tlb.walker.port = self.dtb_walker_cache_bus.slave + else: + self.itb.walker.port = iwc.cpu_side + self.dtb.walker.port = dwc.cpu_side self._cached_ports += ["itb_walker_cache.mem_side", \ "dtb_walker_cache.mem_side"] else: self._cached_ports += ["itb.walker.port", "dtb.walker.port"] + if buildEnv['TARGET_ISA'] in ['arm']: + self._cached_ports += ["istage2_mmu.stage2_tlb.walker.port", \ + "dstage2_mmu.stage2_tlb.walker.port"] + # Checker doesn't need its own tlb caches because it does # functional accesses only if self.checker != NULL: self._cached_ports += ["checker.itb.walker.port", \ "checker.dtb.walker.port"] + if buildEnv['TARGET_ISA'] in ['arm']: + self._cached_ports += ["checker.istage2_mmu.stage2_tlb.walker.port", \ + "checker.dstage2_mmu.stage2_tlb.walker.port"] def addTwoLevelCacheHierarchy(self, ic, dc, l2c, iwc = None, dwc = None): self.addPrivateSplitL1Caches(ic, dc, iwc, dwc) diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py index b3c14580e..3c9c22ecc 100644 --- a/src/dev/arm/RealView.py +++ b/src/dev/arm/RealView.py @@ -1,4 +1,4 @@ -# Copyright (c) 2009-2012 ARM Limited +# Copyright (c) 2009-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -88,6 +88,17 @@ class RealViewCtrl(BasicPioDevice): proc_id1 = Param.UInt32(0x0C000222, "Processor ID, SYS_PROCID1") idreg = Param.UInt32(0x00000000, "ID Register, SYS_ID") +class VGic(PioDevice): + type = 'VGic' + cxx_header = "dev/arm/vgic.hh" + gic = Param.BaseGic(Parent.any, "Gic to use for interrupting") + platform = Param.Platform(Parent.any, "Platform this device is part of.") + vcpu_addr = Param.Addr(0, "Address for vcpu interfaces") + hv_addr = Param.Addr(0, "Address for hv control") + pio_delay = Param.Latency('10ns', "Delay for PIO r/w") + # The number of list registers is not currently configurable at runtime. + ppint = Param.UInt32("HV maintenance interrupt number") + class AmbaFake(AmbaPioDevice): type = 'AmbaFake' cxx_header = "dev/arm/amba_fake.hh" @@ -119,6 +130,15 @@ class CpuLocalTimer(BasicPioDevice): int_num_timer = Param.UInt32("Interrrupt number used per-cpu to GIC") int_num_watchdog = Param.UInt32("Interrupt number for per-cpu watchdog to GIC") +class GenericTimer(SimObject): + type = 'GenericTimer' + cxx_header = "dev/arm/generic_timer.hh" + system = Param.System(Parent.any, "system") + gic = Param.BaseGic(Parent.any, "GIC to use for interrupting") + int_num = Param.UInt32("Interrupt number used per-cpu to GIC") + # @todo: for now only one timer per CPU is supported, which is the + # normal behaviour when Security and Virt. extensions are disabled. + class PL031(AmbaIntDevice): type = 'PL031' cxx_header = "dev/arm/rtc_pl031.hh" @@ -166,6 +186,9 @@ class RealView(Platform): conf_table_reported = False) self.nvmem.port = mem_bus.master cur_sys.boot_loader = loc('boot.arm') + cur_sys.atags_addr = 0x100 + cur_sys.load_addr_mask = 0xfffffff + cur_sys.load_offset = 0 # Reference for memory map and interrupt number @@ -340,12 +363,14 @@ class VExpress_EMM(RealView): realview_io = RealViewCtrl(proc_id0=0x14000000, proc_id1=0x14000000, pio_addr=0x1C010000) gic = Pl390(dist_addr=0x2C001000, cpu_addr=0x2C002000) local_cpu_timer = CpuLocalTimer(int_num_timer=29, int_num_watchdog=30, pio_addr=0x2C080000) + generic_timer = GenericTimer(int_num=29) timer0 = Sp804(int_num0=34, int_num1=34, pio_addr=0x1C110000, clock0='1MHz', clock1='1MHz') timer1 = Sp804(int_num0=35, int_num1=35, pio_addr=0x1C120000, clock0='1MHz', clock1='1MHz') clcd = Pl111(pio_addr=0x1c1f0000, int_num=46) hdlcd = HDLcd(pio_addr=0x2b000000, int_num=117) kmi0 = Pl050(pio_addr=0x1c060000, int_num=44) kmi1 = Pl050(pio_addr=0x1c070000, int_num=45, is_mouse=True) + vgic = VGic(vcpu_addr=0x2c006000, hv_addr=0x2c004000, ppint=25) cf_ctrl = IdeController(disks=[], pci_func=0, pci_dev=0, pci_bus=2, io_shift = 2, ctrl_offset = 2, Command = 0x1, BAR0 = 0x1C1A0000, BAR0Size = '256B', @@ -380,7 +405,9 @@ class VExpress_EMM(RealView): conf_table_reported = False) self.nvmem.port = mem_bus.master cur_sys.boot_loader = loc('boot_emm.arm') - cur_sys.atags_addr = 0x80000100 + cur_sys.atags_addr = 0x8000000 + cur_sys.load_addr_mask = 0xfffffff + cur_sys.load_offset = 0x80000000 # Attach I/O devices that are on chip and also set the appropriate # ranges for the bridge @@ -396,6 +423,8 @@ class VExpress_EMM(RealView): AddrRange(0x40000000, size='512MB'), AddrRange(0x18000000, size='64MB'), AddrRange(0x1C000000, size='64MB')] + self.vgic.pio = bus.master + # Attach I/O devices to specified bus object. Can't do this # earlier, since the bus object itself is typically defined at the @@ -435,3 +464,13 @@ class VExpress_EMM(RealView): self.usb_fake.pio = bus.master self.mmc_fake.pio = bus.master +class VExpress_EMM64(VExpress_EMM): + def setupBootLoader(self, mem_bus, cur_sys, loc): + self.nvmem = SimpleMemory(range = AddrRange(0, size = '64MB')) + self.nvmem.port = mem_bus.master + cur_sys.boot_loader = loc('boot_emm.arm64') + cur_sys.atags_addr = 0x8000000 + cur_sys.load_addr_mask = 0xfffffff + cur_sys.load_offset = 0x80000000 + + diff --git a/src/dev/arm/SConscript b/src/dev/arm/SConscript index 68779ec64..419e2f471 100644 --- a/src/dev/arm/SConscript +++ b/src/dev/arm/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ if env['TARGET_ISA'] == 'arm': Source('amba_device.cc') Source('amba_fake.cc') Source('base_gic.cc') + Source('generic_timer.cc') Source('gic_pl390.cc') Source('pl011.cc') Source('pl111.cc') @@ -57,6 +58,7 @@ if env['TARGET_ISA'] == 'arm': Source('realview.cc') Source('rtc_pl031.cc') Source('timer_cpulocal.cc') + Source('vgic.cc') DebugFlag('AMBA') DebugFlag('HDLcd') @@ -64,3 +66,4 @@ if env['TARGET_ISA'] == 'arm': DebugFlag('Pl050') DebugFlag('GIC') DebugFlag('RVCTRL') + DebugFlag('VGIC') diff --git a/src/dev/arm/generic_timer.cc b/src/dev/arm/generic_timer.cc new file mode 100644 index 000000000..555c1050f --- /dev/null +++ b/src/dev/arm/generic_timer.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + */ + +#include "arch/arm/system.hh" +#include "debug/Checkpoint.hh" +#include "debug/Timer.hh" +#include "dev/arm/base_gic.hh" +#include "dev/arm/generic_timer.hh" + +void +GenericTimer::SystemCounter::setFreq(uint32_t freq) +{ + if (_freq != 0) { + // Altering the frequency after boot shouldn't be done in practice. + warn_once("The frequency of the system counter has already been set"); + } + _freq = freq; + _period = (1.0 / freq) * SimClock::Frequency; + _resetTick = curTick(); +} + +void +GenericTimer::SystemCounter::serialize(std::ostream &os) +{ + SERIALIZE_SCALAR(_freq); + SERIALIZE_SCALAR(_period); + SERIALIZE_SCALAR(_resetTick); +} + +void +GenericTimer::SystemCounter::unserialize(Checkpoint *cp, + const std::string §ion) +{ + UNSERIALIZE_SCALAR(_freq); + UNSERIALIZE_SCALAR(_period); + UNSERIALIZE_SCALAR(_resetTick); +} + +void +GenericTimer::ArchTimer::counterLimitReached() +{ + _control.istatus = 1; + + if (!_control.enable) + return; + + // DPRINTF(Timer, "Counter limit reached\n"); + + if (!_control.imask) { + // DPRINTF(Timer, "Causing interrupt\n"); + _parent->_gic->sendPPInt(_intNum, _cpuNum); + } +} + +void +GenericTimer::ArchTimer::setCompareValue(uint64_t val) +{ + _counterLimit = val; + if (_counterLimitReachedEvent.scheduled()) + _parent->deschedule(_counterLimitReachedEvent); + if (counterValue() >= _counterLimit) { + counterLimitReached(); + } else { + _control.istatus = 0; + _parent->schedule(_counterLimitReachedEvent, + curTick() + (_counterLimit - counterValue()) * _counter->period()); + } +} + +void +GenericTimer::ArchTimer::setTimerValue(uint32_t val) +{ + setCompareValue(counterValue() + sext<32>(val)); +} + +void +GenericTimer::ArchTimer::setControl(uint32_t val) +{ + ArchTimerCtrl new_ctl = val; + if ((new_ctl.enable && !new_ctl.imask) && + !(_control.enable && !_control.imask)) { + // Re-evalute the timer condition + if (_counterLimit >= counterValue()) { + _control.istatus = 1; + + DPRINTF(Timer, "Causing interrupt in control\n"); + //_parent->_gic->sendPPInt(_intNum, _cpuNum); + } + } + _control.enable = new_ctl.enable; + _control.imask = new_ctl.imask; +} + +void +GenericTimer::ArchTimer::serialize(std::ostream &os) +{ + SERIALIZE_SCALAR(_cpuNum); + SERIALIZE_SCALAR(_intNum); + uint32_t control_serial = _control; + SERIALIZE_SCALAR(control_serial); + SERIALIZE_SCALAR(_counterLimit); + bool event_scheduled = _counterLimitReachedEvent.scheduled(); + SERIALIZE_SCALAR(event_scheduled); + Tick event_time; + if (event_scheduled) { + event_time = _counterLimitReachedEvent.when(); + SERIALIZE_SCALAR(event_time); + } +} + +void +GenericTimer::ArchTimer::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_SCALAR(_cpuNum); + UNSERIALIZE_SCALAR(_intNum); + uint32_t control_serial; + UNSERIALIZE_SCALAR(control_serial); + _control = control_serial; + bool event_scheduled; + UNSERIALIZE_SCALAR(event_scheduled); + Tick event_time; + if (event_scheduled) { + UNSERIALIZE_SCALAR(event_time); + _parent->schedule(_counterLimitReachedEvent, event_time); + } +} + +GenericTimer::GenericTimer(Params *p) + : SimObject(p), _gic(p->gic) +{ + for (int i = 0; i < CPU_MAX; ++i) { + std::stringstream oss; + oss << name() << ".arch_timer" << i; + _archTimers[i]._name = oss.str(); + _archTimers[i]._parent = this; + _archTimers[i]._counter = &_systemCounter; + _archTimers[i]._cpuNum = i; + _archTimers[i]._intNum = p->int_num; + } + + ((ArmSystem *) p->system)->setGenericTimer(this); +} + +void +GenericTimer::serialize(std::ostream &os) +{ + nameOut(os, csprintf("%s.sys_counter", name())); + _systemCounter.serialize(os); + for (int i = 0; i < CPU_MAX; ++i) { + nameOut(os, csprintf("%s.arch_timer%d", name(), i)); + _archTimers[i].serialize(os); + } +} + +void +GenericTimer::unserialize(Checkpoint *cp, const std::string §ion) +{ + _systemCounter.unserialize(cp, csprintf("%s.sys_counter", section)); + for (int i = 0; i < CPU_MAX; ++i) { + _archTimers[i].unserialize(cp, csprintf("%s.arch_timer%d", section, i)); + } +} + +GenericTimer * +GenericTimerParams::create() +{ + return new GenericTimer(this); +} diff --git a/src/dev/arm/generic_timer.hh b/src/dev/arm/generic_timer.hh new file mode 100644 index 000000000..bc43f8b3b --- /dev/null +++ b/src/dev/arm/generic_timer.hh @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + */ + +#ifndef __DEV_ARM_GENERIC_TIMER_HH__ +#define __DEV_ARM_GENERIC_TIMER_HH__ + +#include "base/bitunion.hh" +#include "params/GenericTimer.hh" +#include "sim/core.hh" +#include "sim/sim_object.hh" + +/// @file +/// This module implements the global system counter and the local per-CPU +/// architected timers as specified by the ARM Generic Timer extension (ARM +/// ARM, Issue C, Chapter 17). + +class Checkpoint; +class BaseGic; + +/// Wrapper around the actual counters and timers of the Generic Timer +/// extension. +class GenericTimer : public SimObject +{ + public: + + /// Global system counter. It is shared by the architected timers. + /// @todo: implement memory-mapped controls + class SystemCounter + { + protected: + /// Counter frequency (as specified by CNTFRQ). + uint64_t _freq; + /// Cached copy of the counter period (inverse of the frequency). + Tick _period; + /// Tick when the counter was reset. + Tick _resetTick; + + public: + /// Ctor. + SystemCounter() + : _freq(0), _period(0), _resetTick(0) + { + setFreq(0x01800000); + } + + /// Returns the current value of the physical counter. + uint64_t value() const + { + if (_freq == 0) + return 0; // Counter is still off. + return (curTick() - _resetTick) / _period; + } + + /// Returns the counter frequency. + uint64_t freq() const { return _freq; } + /// Sets the counter frequency. + /// @param freq frequency in Hz. + void setFreq(uint32_t freq); + + /// Returns the counter period. + Tick period() const { return _period; } + + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + }; + + /// Per-CPU architected timer. + class ArchTimer + { + protected: + /// Control register. + BitUnion32(ArchTimerCtrl) + Bitfield<0> enable; + Bitfield<1> imask; + Bitfield<2> istatus; + EndBitUnion(ArchTimerCtrl) + + /// Name of this timer. + std::string _name; + /// Pointer to parent class. + GenericTimer *_parent; + /// Pointer to the global system counter. + SystemCounter *_counter; + /// ID of the CPU this timer is attached to. + int _cpuNum; + /// ID of the interrupt to be triggered. + int _intNum; + /// Cached value of the control register ({CNTP/CNTHP/CNTV}_CTL). + ArchTimerCtrl _control; + /// Programmed limit value for the upcounter ({CNTP/CNTHP/CNTV}_CVAL). + uint64_t _counterLimit; + + /// Called when the upcounter reaches the programmed value. + void counterLimitReached(); + EventWrapper<ArchTimer, &ArchTimer::counterLimitReached> + _counterLimitReachedEvent; + + /// Returns the value of the counter which this timer relies on. + uint64_t counterValue() const { return _counter->value(); } + + public: + /// Ctor. + ArchTimer() + : _control(0), _counterLimit(0), _counterLimitReachedEvent(this) + {} + + /// Returns the timer name. + std::string name() const { return _name; } + + /// Returns the CompareValue view of the timer. + uint64_t compareValue() const { return _counterLimit; } + /// Sets the CompareValue view of the timer. + void setCompareValue(uint64_t val); + + /// Returns the TimerValue view of the timer. + uint32_t timerValue() const { return _counterLimit - counterValue(); } + /// Sets the TimerValue view of the timer. + void setTimerValue(uint32_t val); + + /// Sets the control register. + uint32_t control() const { return _control; } + void setControl(uint32_t val); + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + friend class GenericTimer; + }; + + protected: + + static const int CPU_MAX = 8; + + /// Pointer to the GIC, needed to trigger timer interrupts. + BaseGic *_gic; + /// System counter. + SystemCounter _systemCounter; + /// Per-CPU architected timers. + // @todo: this would become a 2-dim. array with Security and Virt. + ArchTimer _archTimers[CPU_MAX]; + + public: + typedef GenericTimerParams Params; + const Params * + params() const + { + return dynamic_cast<const Params *>(_params); + } + + /// Ctor. + GenericTimer(Params *p); + + /// Returns a pointer to the system counter. + SystemCounter *getSystemCounter() { return &_systemCounter; } + + /// Returns a pointer to the architected timer for cpu_id. + ArchTimer *getArchTimer(int cpu_id) { return &_archTimers[cpu_id]; } + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); +}; + +#endif // __DEV_ARM_GENERIC_TIMER_HH__ diff --git a/src/dev/arm/gic_pl390.cc b/src/dev/arm/gic_pl390.cc index d2a660e88..7fc65b2b7 100644 --- a/src/dev/arm/gic_pl390.cc +++ b/src/dev/arm/gic_pl390.cc @@ -56,7 +56,8 @@ Pl390::Pl390(const Params *p) : BaseGic(p), distAddr(p->dist_addr), cpuAddr(p->cpu_addr), distPioDelay(p->dist_pio_delay), cpuPioDelay(p->cpu_pio_delay), intLatency(p->int_latency), - enabled(false), itLines(p->it_lines), msixRegAddr(p->msix_addr), + enabled(false), itLines(p->it_lines), irqEnable(false), + msixRegAddr(p->msix_addr), msixReg(0x0) { itLinesLog2 = ceilLog2(itLines); diff --git a/src/dev/arm/vgic.cc b/src/dev/arm/vgic.cc new file mode 100644 index 000000000..2faf2030e --- /dev/null +++ b/src/dev/arm/vgic.cc @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Matt Evans + */ + +#include "base/trace.hh" +#include "debug/Checkpoint.hh" +#include "debug/VGIC.hh" +#include "dev/arm/base_gic.hh" +#include "dev/arm/vgic.hh" +#include "dev/terminal.hh" +#include "mem/packet.hh" +#include "mem/packet_access.hh" + +VGic::VGic(const Params *p) + : PioDevice(p), platform(p->platform), gic(p->gic), vcpuAddr(p->vcpu_addr), + hvAddr(p->hv_addr), pioDelay(p->pio_delay), + maintInt(p->ppint) +{ + for (int x = 0; x < VGIC_CPU_MAX; x++) { + postVIntEvent[x] = new PostVIntEvent(x, p->platform); + maintIntPosted[x] = false; + vIntPosted[x] = false; + } + for (int c = 0; c < VGIC_CPU_MAX; c++) { + memset(&vcpuData[c], 0, sizeof(struct vcpuIntData)); + } + assert(sys->numRunningContexts() <= VGIC_CPU_MAX); +} + +Tick +VGic::read(PacketPtr pkt) +{ + Addr addr = pkt->getAddr(); + + if (addr >= vcpuAddr && addr < vcpuAddr + GICV_SIZE) + return readVCpu(pkt); + else if (addr >= hvAddr && addr < hvAddr + GICH_REG_SIZE) + return readCtrl(pkt); + else + panic("Read to unknown address %#x\n", pkt->getAddr()); +} + +Tick +VGic::write(PacketPtr pkt) +{ + Addr addr = pkt->getAddr(); + + if (addr >= vcpuAddr && addr < vcpuAddr + GICV_SIZE) + return writeVCpu(pkt); + else if (addr >= hvAddr && addr < hvAddr + GICH_REG_SIZE) + return writeCtrl(pkt); + else + panic("Write to unknown address %#x\n", pkt->getAddr()); +} + +Tick +VGic::readVCpu(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - vcpuAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + DPRINTF(VGIC, "VGIC VCPU read register %#x\n", daddr); + + switch (daddr) { + case GICV_CTLR: + pkt->set<uint32_t>(vid->vctrl); + break; + case GICV_IAR: { + int i = findHighestPendingLR(vid); + if (i < 0 || !vid->vctrl.En) { + pkt->set<uint32_t>(1023); // "No int" marker + } else { + ListReg *lr = &vid->LR[i]; + + pkt->set<uint32_t>(lr->VirtualID | + (((int)lr->CpuID) << 10)); + // We don't support auto-EOI of HW interrupts via real GIC! + // Fortunately, KVM doesn't use this. How about Xen...? Ulp! + if (lr->HW) + panic("VGIC does not support 'HW' List Register feature (LR %#x)!\n", + *lr); + lr->State = LR_ACTIVE; + DPRINTF(VGIC, "Consumed interrupt %d (cpu%d) from LR%d (EOI%d)\n", + lr->VirtualID, lr->CpuID, i, lr->EOI); + } + } break; + default: + panic("VGIC VCPU read of bad address %#x\n", daddr); + } + + updateIntState(ctx_id); + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +VGic::readCtrl(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - hvAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + + DPRINTF(VGIC, "VGIC HVCtrl read register %#x\n", daddr); + + /* Munge the address: 0-0xfff is the usual space banked by requester CPU. + * Anything > that is 0x200-sized slices of 'per CPU' regs. + */ + if (daddr & ~0x1ff) { + ctx_id = (daddr >> 9); + if (ctx_id > 8) + panic("VGIC: Weird unbanked hv ctrl address %#x!\n", daddr); + daddr &= ~0x1ff; + } + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + switch (daddr) { + case GICH_HCR: + pkt->set<uint32_t>(vid->hcr); + break; + + case GICH_VTR: + pkt->set<uint32_t>(0x44000000 | (NUM_LR - 1)); + break; + + case GICH_VMCR: + pkt->set<uint32_t>( + ((uint32_t)vid->VMPriMask << 27) | + ((uint32_t)vid->VMBP << 21) | + ((uint32_t)vid->VMABP << 18) | + ((uint32_t)vid->VEM << 9) | + ((uint32_t)vid->VMCBPR << 4) | + ((uint32_t)vid->VMFiqEn << 3) | + ((uint32_t)vid->VMAckCtl << 2) | + ((uint32_t)vid->VMGrp1En << 1) | + ((uint32_t)vid->VMGrp0En << 0) + ); + break; + + case GICH_MISR: + pkt->set<uint32_t>(getMISR(vid)); + break; + + case GICH_EISR0: + pkt->set<uint32_t>(vid->eisr & 0xffffffff); + break; + + case GICH_EISR1: + pkt->set<uint32_t>(vid->eisr >> 32); + break; + + case GICH_ELSR0: { + uint32_t bm = 0; + for (int i = 0; i < ((NUM_LR < 32) ? NUM_LR : 32); i++) { + if (!vid->LR[i].State) + bm |= 1 << i; + } + pkt->set<uint32_t>(bm); + } break; + + case GICH_ELSR1: { + uint32_t bm = 0; + for (int i = 32; i < NUM_LR; i++) { + if (!vid->LR[i].State) + bm |= 1 << (i-32); + } + pkt->set<uint32_t>(bm); + } break; + + case GICH_APR0: + warn_once("VGIC GICH_APR read!\n"); + pkt->set<uint32_t>(0); + break; + + case GICH_LR0: + case GICH_LR1: + case GICH_LR2: + case GICH_LR3: + pkt->set<uint32_t>(vid->LR[(daddr - GICH_LR0) >> 2]); + break; + + default: + panic("VGIC HVCtrl read of bad address %#x\n", daddr); + } + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +VGic::writeVCpu(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - vcpuAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + DPRINTF(VGIC, "VGIC VCPU write register %#x <= %#x\n", daddr, pkt->get<uint32_t>()); + + switch (daddr) { + case GICV_CTLR: + vid->vctrl = pkt->get<uint32_t>(); + break; + case GICV_PMR: + vid->VMPriMask = pkt->get<uint32_t>(); + break; + case GICV_EOIR: { + // We don't handle the split EOI-then-DIR mode. Linux (guest) + // doesn't need it though. + assert(!vid->vctrl.EOImode); + uint32_t w = pkt->get<uint32_t>(); + unsigned int virq = w & 0x3ff; + unsigned int vcpu = (w >> 10) & 7; + int i = findLRForVIRQ(vid, virq, vcpu); + if (i < 0) { + DPRINTF(VGIC, "EOIR: No LR for irq %d(cpu%d)\n", virq, vcpu); + } else { + DPRINTF(VGIC, "EOIR: Found LR%d for irq %d(cpu%d)\n", i, virq, vcpu); + ListReg *lr = &vid->LR[i]; + lr->State = 0; + // Maintenance interrupt -- via eisr -- is flagged when + // LRs have EOI=1 and State=INVALID! + } + } break; + default: + panic("VGIC VCPU write %#x to unk address %#x\n", pkt->get<uint32_t>(), daddr); + } + + // This updates the EISRs and flags IRQs: + updateIntState(ctx_id); + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +VGic::writeCtrl(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - hvAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + + DPRINTF(VGIC, "VGIC HVCtrl write register %#x <= %#x\n", daddr, pkt->get<uint32_t>()); + + /* Munge the address: 0-0xfff is the usual space banked by requester CPU. + * Anything > that is 0x200-sized slices of 'per CPU' regs. + */ + if (daddr & ~0x1ff) { + ctx_id = (daddr >> 9); + if (ctx_id > 8) + panic("VGIC: Weird unbanked hv ctrl address %#x!\n", daddr); + daddr &= ~0x1ff; + } + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + switch (daddr) { + case GICH_HCR: + vid->hcr = pkt->get<uint32_t>(); + // update int state + break; + + case GICH_VMCR: { + uint32_t d = pkt->get<uint32_t>(); + vid->VMPriMask = d >> 27; + vid->VMBP = (d >> 21) & 7; + vid->VMABP = (d >> 18) & 7; + vid->VEM = (d >> 9) & 1; + vid->VMCBPR = (d >> 4) & 1; + vid->VMFiqEn = (d >> 3) & 1; + vid->VMAckCtl = (d >> 2) & 1; + vid->VMGrp1En = (d >> 1) & 1; + vid->VMGrp0En = d & 1; + } break; + + case GICH_APR0: + warn_once("VGIC GICH_APR0 written, ignored\n"); + break; + + case GICH_LR0: + case GICH_LR1: + case GICH_LR2: + case GICH_LR3: + vid->LR[(daddr - GICH_LR0) >> 2] = pkt->get<uint32_t>(); + // update int state + break; + + default: + panic("VGIC HVCtrl write to bad address %#x\n", daddr); + } + + updateIntState(ctx_id); + + pkt->makeAtomicResponse(); + return pioDelay; +} + + +uint32_t +VGic::getMISR(struct vcpuIntData *vid) +{ + return (!!vid->hcr.VGrp1DIE && !vid->VMGrp1En ? 0x80 : 0) | + (!!vid->hcr.VGrp1EIE && vid->VMGrp1En ? 0x40 : 0) | + (!!vid->hcr.VGrp0DIE && !vid->VMGrp0En ? 0x20 : 0) | + (!!vid->hcr.VGrp0EIE && vid->VMGrp0En ? 0x10 : 0) | + (!!vid->hcr.NPIE && !lrPending(vid) ? 0x08 : 0) | + (!!vid->hcr.LRENPIE && vid->hcr.EOICount ? 0x04 : 0) | + (!!vid->hcr.UIE && lrValid(vid) <= 1 ? 0x02 : 0) | + (vid->eisr ? 0x01 : 0); +} + +void +VGic::postVInt(uint32_t cpu, Tick when) +{ + DPRINTF(VGIC, "Posting VIRQ to %d\n", cpu); + if (!(postVIntEvent[cpu]->scheduled())) + eventq->schedule(postVIntEvent[cpu], when); +} + +void +VGic::unPostVInt(uint32_t cpu) +{ + DPRINTF(VGIC, "Unposting VIRQ to %d\n", cpu); + platform->intrctrl->clear(cpu, ArmISA::INT_VIRT_IRQ, 0); +} + +void +VGic::postMaintInt(uint32_t cpu) +{ + DPRINTF(VGIC, "Posting maintenance PPI to GIC/cpu%d\n", cpu); + // Linux DT configures this as Level. + gic->sendPPInt(maintInt, cpu); +} + +void +VGic::unPostMaintInt(uint32_t cpu) +{ + DPRINTF(VGIC, "Unposting maintenance PPI to GIC/cpu%d\n", cpu); + gic->clearPPInt(maintInt, cpu); +} + +/* Update state (in general); something concerned with ctx_id has changed. + * This may raise a maintenance interrupt. + */ +void +VGic::updateIntState(int ctx_id) +{ + // @todo This should update APRs! + + // Build EISR contents: + // (Cached so that regs can read them without messing about again) + struct vcpuIntData *tvid = &vcpuData[ctx_id]; + + tvid->eisr = 0; + for (int i = 0; i < NUM_LR; i++) { + if (!tvid->LR[i].State && tvid->LR[i].EOI) { + tvid->eisr |= 1 << i; + } + } + + assert(sys->numRunningContexts() <= VGIC_CPU_MAX); + for (int i = 0; i < sys->numRunningContexts(); i++) { + struct vcpuIntData *vid = &vcpuData[i]; + // Are any LRs active that weren't before? + if (!vIntPosted[i]) { + if (lrPending(vid) && vid->vctrl.En) { + vIntPosted[i] = true; + postVInt(i, curTick() + 1); + } + } else if (!lrPending(vid)) { + vIntPosted[i] = false; + unPostVInt(i); + } + + // Any maintenance ints to send? + if (!maintIntPosted[i]) { + if (vid->hcr.En && getMISR(vid)) { + maintIntPosted[i] = true; + postMaintInt(i); + } + } else { + if (!vid->hcr.En || !getMISR(vid)) { + unPostMaintInt(i); + maintIntPosted[i] = false; + } + } + } +} + +AddrRangeList +VGic::getAddrRanges() const +{ + AddrRangeList ranges; + ranges.push_back(RangeSize(hvAddr, GICH_REG_SIZE)); + ranges.push_back(RangeSize(vcpuAddr, GICV_SIZE)); + return ranges; +} + +void +VGic::serialize(std::ostream &os) +{ + Tick interrupt_time[VGIC_CPU_MAX]; + for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) { + interrupt_time[cpu] = 0; + if (postVIntEvent[cpu]->scheduled()) { + interrupt_time[cpu] = postVIntEvent[cpu]->when(); + } + } + + DPRINTF(Checkpoint, "Serializing VGIC\n"); + + SERIALIZE_ARRAY(interrupt_time, VGIC_CPU_MAX); + SERIALIZE_ARRAY(maintIntPosted, VGIC_CPU_MAX); + SERIALIZE_ARRAY(vIntPosted, VGIC_CPU_MAX); + SERIALIZE_SCALAR(vcpuAddr); + SERIALIZE_SCALAR(hvAddr); + SERIALIZE_SCALAR(pioDelay); + SERIALIZE_SCALAR(maintInt); + + for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) { + nameOut(os, csprintf("%s.vcpuData%d", name(), cpu)); + uint32_t vctrl_val = vcpuData[cpu].vctrl; + SERIALIZE_SCALAR(vctrl_val); + uint32_t hcr_val = vcpuData[cpu].hcr; + SERIALIZE_SCALAR(hcr_val); + uint64_t eisr_val = vcpuData[cpu].eisr; + SERIALIZE_SCALAR(eisr_val); + uint8_t VMGrp0En_val = vcpuData[cpu].VMGrp0En; + SERIALIZE_SCALAR(VMGrp0En_val); + uint8_t VMGrp1En_val = vcpuData[cpu].VMGrp1En; + SERIALIZE_SCALAR(VMGrp1En_val); + uint8_t VMAckCtl_val = vcpuData[cpu].VMAckCtl; + SERIALIZE_SCALAR(VMAckCtl_val); + uint8_t VMFiqEn_val = vcpuData[cpu].VMFiqEn; + SERIALIZE_SCALAR(VMFiqEn_val); + uint8_t VMCBPR_val = vcpuData[cpu].VMCBPR; + SERIALIZE_SCALAR(VMCBPR_val); + uint8_t VEM_val = vcpuData[cpu].VEM; + SERIALIZE_SCALAR(VEM_val); + uint8_t VMABP_val = vcpuData[cpu].VMABP; + SERIALIZE_SCALAR(VMABP_val); + uint8_t VMBP_val = vcpuData[cpu].VMBP; + SERIALIZE_SCALAR(VMBP_val); + uint8_t VMPriMask_val = vcpuData[cpu].VMPriMask; + SERIALIZE_SCALAR(VMPriMask_val); + + for (int i = 0; i < NUM_LR; i++) { + uint32_t lr = vcpuData[cpu].LR[i]; + nameOut(os, csprintf("%s.vcpuData%d.LR%d", name(), cpu, i)); + SERIALIZE_SCALAR(lr); + } + } +} + +void VGic::unserialize(Checkpoint *cp, const std::string §ion) +{ + DPRINTF(Checkpoint, "Unserializing Arm GIC\n"); + + Tick interrupt_time[VGIC_CPU_MAX]; + UNSERIALIZE_ARRAY(interrupt_time, VGIC_CPU_MAX); + for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) { + if (interrupt_time[cpu]) + schedule(postVIntEvent[cpu], interrupt_time[cpu]); + + uint32_t tmp; + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "vctrl_val", tmp); + vcpuData[cpu].vctrl = tmp; + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "hcr_val", tmp); + vcpuData[cpu].hcr = tmp; + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "eisr_val", vcpuData[cpu].eisr); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMGrp0En_val", vcpuData[cpu].VMGrp0En); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMGrp1En_val", vcpuData[cpu].VMGrp1En); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMAckCtl_val", vcpuData[cpu].VMAckCtl); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMFiqEn_val", vcpuData[cpu].VMFiqEn); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMCBPR_val", vcpuData[cpu].VMCBPR); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VEM_val", vcpuData[cpu].VEM); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMABP_val", vcpuData[cpu].VMABP); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMPriMask_val", vcpuData[cpu].VMPriMask); + + for (int i = 0; i < NUM_LR; i++) { + paramIn(cp, csprintf("%s.vcpuData%d.LR%d", section, cpu, i), + "lr", tmp); + vcpuData[cpu].LR[i] = tmp; + } + } + UNSERIALIZE_ARRAY(maintIntPosted, VGIC_CPU_MAX); + UNSERIALIZE_ARRAY(vIntPosted, VGIC_CPU_MAX); + UNSERIALIZE_SCALAR(vcpuAddr); + UNSERIALIZE_SCALAR(hvAddr); + UNSERIALIZE_SCALAR(pioDelay); + UNSERIALIZE_SCALAR(maintInt); +} + +VGic * +VGicParams::create() +{ + return new VGic(this); +} diff --git a/src/dev/arm/vgic.hh b/src/dev/arm/vgic.hh new file mode 100644 index 000000000..e1c4960e9 --- /dev/null +++ b/src/dev/arm/vgic.hh @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Matt Evans + */ + + +/** @file + * Implementiation of a GIC-400 List Register-based VGIC interface. + * The VGIC is, in this implementation, completely separate from the GIC itself. + * Only a VIRQ line to the CPU and a PPI line to the GIC (for a HV maintenance IRQ) + * is required. + * + * The mode in which the List Registers may flag (via LR.HW) that a hardware EOI + * is to be performed is NOT supported. (This requires tighter integration with + * the GIC.) + */ + +#ifndef __DEV_ARM_VGIC_H__ +#define __DEV_ARM_VGIC_H__ + +#include "base/addr_range.hh" +#include "base/bitunion.hh" +#include "cpu/intr_control.hh" +#include "dev/io_device.hh" +#include "dev/platform.hh" +#include "params/VGic.hh" + +class VGic : public PioDevice +{ + private: + static const int VGIC_CPU_MAX = 256; + static const int NUM_LR = 4; + + static const int GICH_SIZE = 0x200; + static const int GICH_REG_SIZE = 0x2000; + + static const int GICH_HCR = 0x000; + static const int GICH_VTR = 0x004; + static const int GICH_VMCR = 0x008; + static const int GICH_MISR = 0x010; + static const int GICH_EISR0 = 0x020; + static const int GICH_EISR1 = 0x024; + static const int GICH_ELSR0 = 0x030; + static const int GICH_ELSR1 = 0x034; + static const int GICH_APR0 = 0x0f0; + static const int GICH_LR0 = 0x100; + static const int GICH_LR1 = 0x104; + static const int GICH_LR2 = 0x108; + static const int GICH_LR3 = 0x10c; + + static const int GICV_SIZE = 0x2000; + static const int GICV_CTLR = 0x000; + static const int GICV_PMR = 0x004; + static const int GICV_BPR = 0x008; + static const int GICV_IAR = 0x00c; + static const int GICV_EOIR = 0x010; + static const int GICV_RPR = 0x014; + static const int GICV_HPPIR = 0x018; + static const int GICV_ABPR = 0x01c; + static const int GICV_AIAR = 0x020; + static const int GICV_AEOIR = 0x024; + static const int GICV_AHPPIR = 0x028; + static const int GICV_APR0 = 0x0d0; + static const int GICV_IIDR = 0x0fc; + static const int GICV_DIR = 0x1000; + + static const uint32_t LR_PENDING = 1; + static const uint32_t LR_ACTIVE = 2; + + /** Event definition to post interrupt to CPU after a delay + */ + class PostVIntEvent : public Event + { + private: + uint32_t cpu; + Platform *platform; + public: + PostVIntEvent( uint32_t c, Platform* p) + : cpu(c), platform(p) + { } + void process() { platform->intrctrl->post(cpu, ArmISA::INT_VIRT_IRQ, 0);} + const char *description() const { return "Post VInterrupt to CPU"; } + }; + + PostVIntEvent *postVIntEvent[VGIC_CPU_MAX]; + bool maintIntPosted[VGIC_CPU_MAX]; + bool vIntPosted[VGIC_CPU_MAX]; + + Platform *platform; + BaseGic *gic; + + Addr vcpuAddr; + Addr hvAddr; + Tick pioDelay; + int maintInt; + + BitUnion32(ListReg) + Bitfield<31> HW; + Bitfield<30> Grp1; + Bitfield<29,28> State; + Bitfield<27,23> Priority; + Bitfield<19> EOI; + Bitfield<12,10> CpuID; + Bitfield<9,0> VirtualID; + EndBitUnion(ListReg) + + BitUnion32(HCR) + Bitfield<31,27> EOICount; + Bitfield<7> VGrp1DIE; + Bitfield<6> VGrp1EIE; + Bitfield<5> VGrp0DIE; + Bitfield<4> VGrp0EIE; + Bitfield<3> NPIE; + Bitfield<2> LRENPIE; + Bitfield<1> UIE; + Bitfield<0> En; + EndBitUnion(HCR) + + BitUnion32(VCTLR) + Bitfield<9> EOImode; + Bitfield<4> CPBR; + Bitfield<3> FIQEn; + Bitfield<2> AckCtl; + Bitfield<1> EnGrp1; + Bitfield<0> En; // This gets written to enable, not group 1. + EndBitUnion(VCTLR) + + /* State per CPU. EVERYTHING should be in this struct and simply replicated + * N times. + */ + struct vcpuIntData { + ListReg LR[NUM_LR]; + VCTLR vctrl; + + HCR hcr; + uint64_t eisr; + + /* Host info, guest info (should be 100% accessible via GICH_* regs!) */ + uint8_t VMGrp0En; + uint8_t VMGrp1En; + uint8_t VMAckCtl; + uint8_t VMFiqEn; + uint8_t VMCBPR; + uint8_t VEM; + uint8_t VMABP; + uint8_t VMBP; + uint8_t VMPriMask; + }; + + struct vcpuIntData vcpuData[VGIC_CPU_MAX]; + + public: + typedef VGicParams Params; + const Params * + params() const + { + return dynamic_cast<const Params *>(_params); + } + VGic(const Params *p); + + virtual AddrRangeList getAddrRanges() const; + + virtual Tick read(PacketPtr pkt); + virtual Tick write(PacketPtr pkt); + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + private: + Tick readVCpu(PacketPtr pkt); + Tick readCtrl(PacketPtr pkt); + + Tick writeVCpu(PacketPtr pkt); + Tick writeCtrl(PacketPtr pkt); + + void updateIntState(int ctx_id); + uint32_t getMISR(struct vcpuIntData *vid); + void postVInt(uint32_t cpu, Tick when); + void unPostVInt(uint32_t cpu); + void postMaintInt(uint32_t cpu); + void unPostMaintInt(uint32_t cpu); + + unsigned int lrPending(struct vcpuIntData *vid) + { + unsigned int pend = 0; + for (int i = 0; i < NUM_LR; i++) { + if (vid->LR[i].State & LR_PENDING) + pend++; + } + return pend; + } + unsigned int lrValid(struct vcpuIntData *vid) + { + unsigned int valid = 0; + for (int i = 0; i < NUM_LR; i++) { + if (vid->LR[i].State) + valid++; + } + return valid; + } + + /** Returns LR index or -1 if none pending */ + int findHighestPendingLR(struct vcpuIntData *vid) + { + unsigned int prio = 0xff; + int p = -1; + for (int i = 0; i < NUM_LR; i++) { + if ((vid->LR[i].State & LR_PENDING) && (vid->LR[i].Priority < prio)) { + p = i; + prio = vid->LR[i].Priority; + } + } + return p; + } + + int findLRForVIRQ(struct vcpuIntData *vid, int virq, int vcpu) + { + for (int i = 0; i < NUM_LR; i++) { + if (vid->LR[i].State && + vid->LR[i].VirtualID == virq && + vid->LR[i].CpuID == vcpu) + return i; + } + return -1; + } +}; + +#endif diff --git a/src/sim/System.py b/src/sim/System.py index 302e2fa60..95162be89 100644 --- a/src/sim/System.py +++ b/src/sim/System.py @@ -86,4 +86,5 @@ class System(MemObject): readfile = Param.String("", "file to read startup script from") symbolfile = Param.String("", "file to get the symbols from") load_addr_mask = Param.UInt64(0xffffffffff, - "Address to mask loading binaries with"); + "Address to mask loading binaries with") + load_offset = Param.UInt64(0, "Address to offset loading binaries with") diff --git a/src/sim/process.cc b/src/sim/process.cc index 1654ea5c5..ccaac2096 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -695,15 +695,22 @@ LiveProcess::create(LiveProcessParams * params) fatal("Unknown/unsupported operating system."); } #elif THE_ISA == ARM_ISA - if (objFile->getArch() != ObjectFile::Arm && - objFile->getArch() != ObjectFile::Thumb) + ObjectFile::Arch arch = objFile->getArch(); + if (arch != ObjectFile::Arm && arch != ObjectFile::Thumb && + arch != ObjectFile::Arm64) fatal("Object file architecture does not match compiled ISA (ARM)."); switch (objFile->getOpSys()) { case ObjectFile::UnknownOpSys: warn("Unknown operating system; assuming Linux."); // fall through case ObjectFile::Linux: - process = new ArmLinuxProcess(params, objFile, objFile->getArch()); + if (arch == ObjectFile::Arm64) { + process = new ArmLinuxProcess64(params, objFile, + objFile->getArch()); + } else { + process = new ArmLinuxProcess32(params, objFile, + objFile->getArch()); + } break; case ObjectFile::LinuxArmOABI: fatal("M5 does not support ARM OABI binaries. Please recompile with an" diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 6d4207090..bbf759cf6 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -58,7 +58,7 @@ class EventQueue; * SimObject shouldn't cause the version number to increase, only changes to * existing objects such as serializing/unserializing more state, changing sizes * of serialized arrays, etc. */ -static const uint64_t gem5CheckpointVersion = 0x0000000000000008; +static const uint64_t gem5CheckpointVersion = 0x0000000000000009; template <class T> void paramOut(std::ostream &os, const std::string &name, const T ¶m); diff --git a/src/sim/system.cc b/src/sim/system.cc index 7de483216..e2bf0a3d2 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -79,6 +79,7 @@ System::System(Params *p) init_param(p->init_param), physProxy(_systemPort, p->cache_line_size), loadAddrMask(p->load_addr_mask), + loadAddrOffset(p->load_offset), nextPID(0), physmem(name() + ".physmem", p->memories), memoryMode(p->mem_mode), @@ -274,14 +275,15 @@ System::initState() */ if (params()->kernel != "") { // Validate kernel mapping before loading binary - if (!(isMemAddr(kernelStart & loadAddrMask) && - isMemAddr(kernelEnd & loadAddrMask))) { + if (!(isMemAddr((kernelStart & loadAddrMask) + loadAddrOffset) && + isMemAddr((kernelEnd & loadAddrMask) + loadAddrOffset))) { fatal("Kernel is mapped to invalid location (not memory). " - "kernelStart 0x(%x) - kernelEnd 0x(%x)\n", kernelStart, - kernelEnd); + "kernelStart 0x(%x) - kernelEnd 0x(%x) %#x:%#x\n", kernelStart, + kernelEnd, (kernelStart & loadAddrMask) + loadAddrOffset, + (kernelEnd & loadAddrMask) + loadAddrOffset); } // Load program sections into memory - kernel->loadSections(physProxy, loadAddrMask); + kernel->loadSections(physProxy, loadAddrMask, loadAddrOffset); DPRINTF(Loader, "Kernel start = %#x\n", kernelStart); DPRINTF(Loader, "Kernel end = %#x\n", kernelEnd); diff --git a/src/sim/system.hh b/src/sim/system.hh index c8945c8c1..ecef2c4f2 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -237,6 +237,13 @@ class System : public MemObject */ Addr loadAddrMask; + /** Offset that should be used for binary/symbol loading. + * This further allows more flexibily than the loadAddrMask allows alone in + * loading kernels and similar. The loadAddrOffset is applied after the + * loadAddrMask. + */ + Addr loadAddrOffset; + protected: uint64_t nextPID; @@ -321,7 +328,7 @@ class System : public MemObject * Called by pseudo_inst to track the number of work items completed by * this system. */ - uint64_t + uint64_t incWorkItemsEnd() { return ++workItemsEnd; @@ -332,13 +339,13 @@ class System : public MemObject * Returns the total number of cpus that have executed work item begin or * ends. */ - int + int markWorkItem(int index) { int count = 0; assert(index < activeCpus.size()); activeCpus[index] = true; - for (std::vector<bool>::iterator i = activeCpus.begin(); + for (std::vector<bool>::iterator i = activeCpus.begin(); i < activeCpus.end(); i++) { if (*i) count++; } |