7 files changed, 252 insertions, 9 deletions
diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py
index 5771242e6..5884d68c2 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py
@@ -58,8 +58,9 @@ def macroop POP_R {
     # Make the default data size of pops 64 bits in 64 bit mode
     .adjust_env oszIn64Override
 
-    ld reg, ss, [1, t0, rsp]
+    ld t1, ss, [1, t0, rsp]
     addi rsp, rsp, dsz
+    mov reg, reg, t1
 };
 
 def macroop POP_M {
@@ -67,7 +68,7 @@ def macroop POP_M {
     .adjust_env oszIn64Override
 
     ld t1, ss, [1, t0, rsp]
-    # Check stack address
+    cda seg, sib, disp
     addi rsp, rsp, dsz
     st t1, seg, sib, disp
 };
@@ -78,7 +79,7 @@ def macroop POP_P {
 
     rdip t7
     ld t1, ss, [1, t0, rsp]
-    # Check stack address
+    cda seg, sib, disp
     addi rsp, rsp, dsz
     st t1, seg, riprel, disp
 };
@@ -87,8 +88,6 @@ def macroop PUSH_R {
     # Make the default data size of pops 64 bits in 64 bit mode
     .adjust_env oszIn64Override
 
-    # This needs to work slightly differently from the other versions of push
-    # because the -original- version of the stack pointer is what gets pushed
     stupd reg, ss, [1, t0, rsp], "-env.dataSize"
 };
 
@@ -118,7 +117,10 @@ def macroop PUSH_P {
 };
 
 def macroop PUSHA {
-    # Check all the stack addresses.
+    # Check all the stack addresses. We'll assume that if the beginning and
+    # end are ok, then the stuff in the middle should be as well.
+    cda ss, [1, t0, rsp], "-env.dataSize"
+    cda ss, [1, t0, rsp], "-8 * env.dataSize"
     stupd rax, ss, [1, t0, rsp], "-env.dataSize"
     stupd rcx, ss, [1, t0, rsp], "-env.dataSize"
     stupd rdx, ss, [1, t0, rsp], "-env.dataSize"
@@ -130,14 +132,17 @@ def macroop PUSHA {
 };
 
 def macroop POPA {
-    # Check all the stack addresses.
-    ld rdi, ss, [1, t0, rsp], "0 * env.dataSize"
+    # Check all the stack addresses. We'll assume that if the beginning and
+    # end are ok, then the stuff in the middle should be as well.
+    ld t1, ss, [1, t0, rsp], "0 * env.dataSize"
+    ld t2, ss, [1, t0, rsp], "7 * env.dataSize"
+    mov rdi, rdi, t1
     ld rsi, ss, [1, t0, rsp], "1 * env.dataSize"
     ld rbp, ss, [1, t0, rsp], "2 * env.dataSize"
     ld rbx, ss, [1, t0, rsp], "4 * env.dataSize"
     ld rdx, ss, [1, t0, rsp], "5 * env.dataSize"
     ld rcx, ss, [1, t0, rsp], "6 * env.dataSize"
-    ld rax, ss, [1, t0, rsp], "7 * env.dataSize"
+    mov rax, rax, t2
     addi rsp, rsp, "8 * env.dataSize"
 };
 
diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa
index c9ace4790..61adde8d1 100644
--- a/src/arch/x86/isa/microops/ldstop.isa
+++ b/src/arch/x86/isa/microops/ldstop.isa
@@ -445,5 +445,27 @@ let {{
             self.mnemonic = "lea"
 
     microopClasses["lea"] = LeaOp
+
+
+    iop = InstObjParams("cda", "Cda", 'X86ISA::LdStOp',
+            {"code": '''
+            Addr paddr;
+            fault = xc->translateDataWriteAddr(EA, paddr,
+                dataSize, (1 << segment));
+            ''',
+            "ea_code": calculateEA})
+    header_output += MicroLeaDeclare.subst(iop)
+    decoder_output += MicroLdStOpConstructor.subst(iop)
+    exec_output += MicroLeaExecute.subst(iop)
+
+    class CdaOp(LdStOp):
+        def __init__(self, segment, addr, disp = 0,
+                dataSize="env.dataSize", addressSize="env.addressSize"):
+            super(CdaOp, self).__init__("NUM_INTREGS", segment,
+                    addr, disp, dataSize, addressSize)
+            self.className = "Cda"
+            self.mnemonic = "cda"
+
+    microopClasses["cda"] = CdaOp
 }};
 
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 0f2a90bf6..74b250207 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -102,6 +102,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     template <class T>
     Fault read(Addr addr, T &data, unsigned flags);
 
+    Fault translateDataReadAddr(Addr vaddr, Addr &paddr,
+            int size, unsigned flags);
+
     /**
      * Does a write to a given address.
      * @param data The data to be written.
@@ -114,6 +117,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
     Fault write(T data, Addr addr, unsigned flags,
                         uint64_t *res);
 
+    Fault translateDataWriteAddr(Addr vaddr, Addr &paddr,
+            int size, unsigned flags);
+
     void prefetch(Addr addr, unsigned flags);
     void writeHint(Addr addr, int size, unsigned flags);
     Fault copySrcTranslate(Addr src);
@@ -838,6 +844,29 @@ class BaseDynInst : public FastAlloc, public RefCounted
 };
 
 template<class Impl>
+Fault
+BaseDynInst<Impl>::translateDataReadAddr(Addr vaddr, Addr &paddr,
+        int size, unsigned flags)
+{
+    if (traceData) {
+        traceData->setAddr(vaddr);
+    }
+
+    reqMade = true;
+    Request *req = new Request();
+    req->setVirt(asid, vaddr, size, flags, PC);
+    req->setThreadContext(thread->readCpuId(), threadNumber);
+
+    fault = cpu->translateDataReadReq(req, thread);
+
+    if (fault == NoFault)
+        paddr = req->getPaddr();
+
+    delete req;
+    return fault;
+}
+
+template<class Impl>
 template<class T>
 inline Fault
 BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
@@ -889,6 +918,29 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
 }
 
 template<class Impl>
+Fault
+BaseDynInst<Impl>::translateDataWriteAddr(Addr vaddr, Addr &paddr,
+        int size, unsigned flags)
+{
+    if (traceData) {
+        traceData->setAddr(vaddr);
+    }
+
+    reqMade = true;
+    Request *req = new Request();
+    req->setVirt(asid, vaddr, size, flags, PC);
+    req->setThreadContext(thread->readCpuId(), threadNumber);
+
+    fault = cpu->translateDataWriteReq(req, thread);
+
+    if (fault == NoFault)
+        paddr = req->getPaddr();
+
+    delete req;
+    return fault;
+}
+
+template<class Impl>
 template<class T>
 inline Fault
 BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 9f574e8be..4e52f9b33 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -366,6 +366,61 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
     }
 }
 
+Fault
+AtomicSimpleCPU::translateDataReadAddr(Addr vaddr, Addr & paddr,
+        int size, unsigned flags)
+{
+    // use the CPU's statically allocated read request and packet objects
+    Request *req = &data_read_req;
+
+    if (traceData) {
+        traceData->setAddr(vaddr);
+    }
+
+    //The block size of our peer.
+    int blockSize = dcachePort.peerBlockSize();
+    //The size of the data we're trying to read.
+    int dataSize = size;
+
+    bool firstTimeThrough = true;
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(vaddr + dataSize - 1, blockSize);
+
+    if(secondAddr > vaddr)
+        dataSize = secondAddr - vaddr;
+
+    while(1) {
+        req->setVirt(0, vaddr, dataSize, flags, thread->readPC());
+
+        // translate to physical address
+        Fault fault = thread->translateDataReadReq(req);
+
+        //If there's a fault, return it
+        if (fault != NoFault)
+            return fault;
+
+        if (firstTimeThrough) {
+            paddr = req->getPaddr();
+            firstTimeThrough = false;
+        }
+
+        //If we don't need to access a second cache line, stop now.
+        if (secondAddr <= vaddr)
+            return fault;
+
+        /*
+         * Set up for accessing the second cache line.
+         */
+
+        //Adjust the size to get the remaining bytes.
+        dataSize = vaddr + size - secondAddr;
+        //And access the right address.
+        vaddr = secondAddr;
+    }
+}
+
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 
 template
@@ -524,6 +579,64 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
     }
 }
 
+Fault
+AtomicSimpleCPU::translateDataWriteAddr(Addr vaddr, Addr &paddr,
+        int size, unsigned flags)
+{
+    // use the CPU's statically allocated write request and packet objects
+    Request *req = &data_write_req;
+
+    if (traceData) {
+        traceData->setAddr(vaddr);
+    }
+
+    //The block size of our peer.
+    int blockSize = dcachePort.peerBlockSize();
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(vaddr + size - 1, blockSize);
+
+    //The size of the data we're trying to read.
+    int dataSize = size;
+
+    bool firstTimeThrough = true;
+
+    if(secondAddr > vaddr)
+        dataSize = secondAddr - vaddr;
+
+    dcache_latency = 0;
+
+    while(1) {
+        req->setVirt(0, vaddr, flags, flags, thread->readPC());
+
+        // translate to physical address
+        Fault fault = thread->translateDataWriteReq(req);
+
+        //If there's a fault or we don't need to access a second cache line,
+        //stop now.
+        if (fault != NoFault)
+            return fault;
+
+        if (firstTimeThrough) {
+            paddr = req->getPaddr();
+            firstTimeThrough = false;
+        }
+
+        if (secondAddr <= vaddr)
+            return fault;
+
+        /*
+         * Set up for accessing the second cache line.
+         */
+
+        //Adjust the size to get the remaining bytes.
+        dataSize = vaddr + size - secondAddr;
+        //And access the right address.
+        vaddr = secondAddr;
+    }
+}
+
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 96429e5b1..f68f41a90 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -151,6 +151,11 @@ class AtomicSimpleCPU : public BaseSimpleCPU
 
     template <class T>
     Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+
+    Fault translateDataReadAddr(Addr vaddr, Addr &paddr,
+            int size, unsigned flags);
+    Fault translateDataWriteAddr(Addr vaddr, Addr &paddr,
+            int size, unsigned flags);
 };
 
 #endif // __CPU_SIMPLE_ATOMIC_HH__
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index f1e51ac70..998d0b017 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -293,6 +293,26 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
     return fault;
 }
 
+Fault
+TimingSimpleCPU::translateDataReadAddr(Addr vaddr, Addr &paddr,
+        int size, unsigned flags)
+{
+    Request *req =
+        new Request(0, vaddr, size, flags, thread->readPC(), cpuId, 0);
+
+    if (traceData) {
+        traceData->setAddr(vaddr);
+    }
+
+    Fault fault = thread->translateDataWriteReq(req);
+
+    if (fault == NoFault)
+        paddr = req->getPaddr();
+
+    delete req;
+    return fault;
+}
+
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 
 template
@@ -411,6 +431,26 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
     return fault;
 }
 
+Fault
+TimingSimpleCPU::translateDataWriteAddr(Addr vaddr, Addr &paddr,
+        int size, unsigned flags)
+{
+    Request *req =
+        new Request(0, vaddr, size, flags, thread->readPC(), cpuId, 0);
+
+    if (traceData) {
+        traceData->setAddr(vaddr);
+    }
+
+    Fault fault = thread->translateDataWriteReq(req);
+
+    if (fault == NoFault)
+        paddr = req->getPaddr();
+
+    delete req;
+    return fault;
+}
+
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 template
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 668b6ddaf..d7554f6de 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -189,9 +189,15 @@ class TimingSimpleCPU : public BaseSimpleCPU
     template <class T>
     Fault read(Addr addr, T &data, unsigned flags);
 
+    Fault translateDataReadAddr(Addr vaddr, Addr &paddr,
+            int size, unsigned flags);
+
     template <class T>
     Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
 
+    Fault translateDataWriteAddr(Addr vaddr, Addr &paddr,
+            int size, unsigned flags);
+
     void fetch();
     void completeIfetch(PacketPtr );
     void completeDataAccess(PacketPtr );