From 0cede15d6c5213d83e4cd143014321b2ee7ec5eb Mon Sep 17 00:00:00 2001
From: Nilay Vaish <nilay@cs.wisc.edu>
Date: Sat, 12 Feb 2011 11:41:20 -0600
Subject: Ruby: Reorder Cache Lookup in Protocol Files The patch changes the
 order in which L1 dcache and icache are looked up when a request comes in.
 Earlier, if a request came in for instruction fetch, the dcache was looked up
 before the icache, to correctly handle self-modifying code. But, in the
 common case, dcache is going to report a miss and the subsequent icache
 lookup is going to report a hit. Given the invariant - caches under the same
 controller keep track of disjoint sets of cache blocks, we can move the
 icache lookup before the dcache lookup. In case of a hit in the icache, using
 our invariant, we know that the dcache would have reported a miss. In  case
 of a miss in the icache, we know that icache would have missed even if the
 dcache was looked up before looking up the icache. Effectively, we are doing
 the same thing as before, though in the common case, we expect reduction in
 the number of lookups. This was empirically confirmed for MOESI hammer. The
 ratio lookups to access requests is now about 1.1 to 1.

---
 src/mem/protocol/MESI_CMP_directory-L1cache.sm  | 35 ++++++++-------
 src/mem/protocol/MOESI_CMP_directory-L1cache.sm | 32 +++++++-------
 src/mem/protocol/MOESI_CMP_token-L1cache.sm     | 35 +++++++--------
 src/mem/protocol/MOESI_hammer-cache.sm          | 57 +++++++++++++------------
 4 files changed, 82 insertions(+), 77 deletions(-)

(limited to 'src')

diff --git a/src/mem/protocol/MESI_CMP_directory-L1cache.sm b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
index 8744a7122..4442cee41 100644
--- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm
@@ -287,20 +287,21 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 asks the L2 for it.
             trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                     L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
@@ -313,21 +314,23 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
             }
           }
         } else {
-          // *** DATA ACCESS ***
-          // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
-          }
 
+          // *** DATA ACCESS ***
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 ask the L2 for it
             trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
                     L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index 4082f23c9..7f0ab62a8 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -338,14 +338,6 @@ machine(L1Cache, "Directory protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          // Check to see if it is in the OTHER L1
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry,
-                    TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 asks the L2 for it.
@@ -353,6 +345,14 @@ machine(L1Cache, "Directory protocol")
                     in_msg.LineAddress, L1Icache_entry,
                     TBEs[in_msg.LineAddress]);
           } else {
+
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            // Check to see if it is in the OTHER L1
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry,
+                      TBEs[in_msg.LineAddress]);
+            }
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type),
@@ -369,14 +369,6 @@ machine(L1Cache, "Directory protocol")
         } else {
           // *** DATA ACCESS ***
 
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          // Check to see if it is in the OTHER L1
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, put the request on the queue to the shared L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, TBEs[in_msg.LineAddress]);
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 ask the L2 for it
@@ -384,6 +376,14 @@ machine(L1Cache, "Directory protocol")
                     in_msg.LineAddress, L1Dcache_entry,
                     TBEs[in_msg.LineAddress]);
           } else {
+
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            // Check to see if it is in the OTHER L1
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, put the request on the queue to the shared L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, TBEs[in_msg.LineAddress]);
+            }
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
               trigger(mandatory_request_type_to_event(in_msg.Type),
diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
index 00e9404c9..226f21374 100644
--- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm
@@ -647,20 +647,21 @@ machine(L1Cache, "Token protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-              trigger(Event:L1_Replacement, in_msg.LineAddress,
-                      L1Dcache_entry, tbe);
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+                trigger(Event:L1_Replacement, in_msg.LineAddress,
+                        L1Dcache_entry, tbe);
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               trigger(mandatory_request_type_to_event(in_msg.Type),
@@ -676,21 +677,21 @@ machine(L1Cache, "Token protocol")
         } else {
           // *** DATA ACCESS ***
 
-            // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            trigger(Event:L1_Replacement, in_msg.LineAddress,
-                    L1Icache_entry, tbe);
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              trigger(Event:L1_Replacement, in_msg.LineAddress,
+                      L1Icache_entry, tbe);
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               trigger(mandatory_request_type_to_event(in_msg.Type),
diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm
index 78bc9e3e7..ab2a6acf4 100644
--- a/src/mem/protocol/MOESI_hammer-cache.sm
+++ b/src/mem/protocol/MOESI_hammer-cache.sm
@@ -377,26 +377,26 @@ machine(L1Cache, "AMD Hammer-like protocol")
         if (in_msg.Type == CacheRequestType:IFETCH) {
           // ** INSTRUCTION ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Dcache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
-              trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
-            } else {
-              trigger(Event:L2_Replacement,
-                      L2cacheMemory.cacheProbe(in_msg.LineAddress),
-                      getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
-                      TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
-            }
-          }
-
           Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
           if (is_valid(L1Icache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Icache_entry, tbe);
           } else {
+            // Check to see if it is in the OTHER L1
+            Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Dcache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+                trigger(Event:L1_to_L2, in_msg.LineAddress, L1Dcache_entry, tbe);
+              } else {
+                trigger(Event:L2_Replacement,
+                        L2cacheMemory.cacheProbe(in_msg.LineAddress),
+                        getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
+                        TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+
             if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
 
@@ -430,26 +430,27 @@ machine(L1Cache, "AMD Hammer-like protocol")
         } else {
           // *** DATA ACCESS ***
 
-          // Check to see if it is in the OTHER L1
-          Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
-          if (is_valid(L1Icache_entry)) {
-            // The block is in the wrong L1, try to write it to the L2
-            if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
-              trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
-            } else {
-              trigger(Event:L2_Replacement,
-                      L2cacheMemory.cacheProbe(in_msg.LineAddress),
-                      getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
-                      TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
-            }
-          }
-
           Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
           if (is_valid(L1Dcache_entry)) {
             // The tag matches for the L1, so the L1 fetches the line.  We know it can't be in the L2 due to exclusion
             trigger(mandatory_request_type_to_event(in_msg.Type),
                     in_msg.LineAddress, L1Dcache_entry, tbe);
           } else {
+
+            // Check to see if it is in the OTHER L1
+            Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
+            if (is_valid(L1Icache_entry)) {
+              // The block is in the wrong L1, try to write it to the L2
+              if (L2cacheMemory.cacheAvail(in_msg.LineAddress)) {
+                trigger(Event:L1_to_L2, in_msg.LineAddress, L1Icache_entry, tbe);
+              } else {
+                trigger(Event:L2_Replacement,
+                        L2cacheMemory.cacheProbe(in_msg.LineAddress),
+                        getL2CacheEntry(L2cacheMemory.cacheProbe(in_msg.LineAddress)),
+                        TBEs[L2cacheMemory.cacheProbe(in_msg.LineAddress)]);
+              }
+            }
+
             if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
               // L1 does't have the line, but we have space for it in the L1
               Entry L2cache_entry := getL2CacheEntry(in_msg.LineAddress);
-- 
cgit v1.2.3


From 7c763b34c9fc69a4e13a8f74df0f981fdf71f221 Mon Sep 17 00:00:00 2001
From: Ali Saidi <saidi@eecs.umich.edu>
Date: Sun, 13 Feb 2011 16:51:15 -0500
Subject: O3: Fix GCC 4.2.4 complaint

---
 src/cpu/o3/inst_queue_impl.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index d6da4b818..aa21a0edc 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -749,7 +749,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
     DynInstPtr deferred_mem_inst;
     int total_deferred_mem_issued = 0;
     while (total_deferred_mem_issued < totalWidth &&
-           (deferred_mem_inst = getDeferredMemInstToExecute()) != NULL) {
+           (deferred_mem_inst = getDeferredMemInstToExecute()) != 0) {
         issueToExecuteQueue->access(0)->size++;
         instsToExecute.push_back(deferred_mem_inst);
         total_deferred_mem_issued++;
-- 
cgit v1.2.3


From f036fd97481081afce7f757231ab69ba212f7f2a Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 13 Feb 2011 17:40:07 -0800
Subject: O3: Fetch from the microcode ROM when needed.

---
 src/cpu/o3/fetch_impl.hh | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 2e4e4819e..d2cde496e 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -1070,6 +1070,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
     Addr pcOffset = fetchOffset[tid];
     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
 
+    bool inRom = isRomMicroPC(thisPC.microPC());
+
     // If returning from the delay of a cache miss, then update the status
     // to running, otherwise do the cache access.  Possibly move this up
     // to tick() function.
@@ -1083,7 +1085,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         Addr block_PC = icacheBlockAlignPC(fetchAddr);
 
         // Unless buffer already got the block, fetch it from icache.
-        if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid]) {
+        if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) && !inRom) {
             DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
                     "instruction, starting at PC %s.\n", tid, thisPC);
 
@@ -1155,7 +1157,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
            !predictedBranch) {
 
         // If we need to process more memory, do it now.
-        if (!curMacroop && !predecoder.extMachInstReady()) {
+        if (!(curMacroop || inRom) && !predecoder.extMachInstReady()) {
             if (ISA_HAS_DELAY_SLOT && pcOffset == 0) {
                 // Walk past any annulled delay slot instructions.
                 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask;
@@ -1181,7 +1183,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         // Extract as many instructions and/or microops as we can from
         // the memory we've processed so far.
         do {
-            if (!curMacroop) {
+            if (!(curMacroop || inRom)) {
                 if (predecoder.extMachInstReady()) {
                     ExtMachInst extMachInst;
 
@@ -1202,8 +1204,13 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                     break;
                 }
             }
-            if (curMacroop) {
-                staticInst = curMacroop->fetchMicroop(thisPC.microPC());
+            if (curMacroop || inRom) {
+                if (inRom) {
+                    staticInst = cpu->microcodeRom.fetchMicroop(
+                            thisPC.microPC(), curMacroop);
+                } else {
+                    staticInst = curMacroop->fetchMicroop(thisPC.microPC());
+                }
                 if (staticInst->isLastMicroop()) {
                     curMacroop = NULL;
                     pcOffset = 0;
-- 
cgit v1.2.3


From 5ee94f4a3dadda357c6d28b60c19b3638146f9a7 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 13 Feb 2011 17:41:10 -0800
Subject: X86: Only reset npc to reflect instruction length once.

When redirecting fetch to handle branches, the npc of the current pc state
needs to be left alone. This change makes the pc state record whether or not
the npc already reflects a real value by making it keep track of the current
instruction size, or if no size has been set.
---
 src/arch/x86/predecoder.hh |  6 +++++-
 src/arch/x86/types.hh      | 50 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86/predecoder.hh b/src/arch/x86/predecoder.hh
index c06ec18bc..5c67e28e1 100644
--- a/src/arch/x86/predecoder.hh
+++ b/src/arch/x86/predecoder.hh
@@ -225,7 +225,11 @@ namespace X86ISA
         {
             assert(emiIsReady);
             emiIsReady = false;
-            nextPC.npc(nextPC.pc() + getInstSize());
+            if (!nextPC.size()) {
+                Addr size = getInstSize();
+                nextPC.size(size);
+                nextPC.npc(nextPC.pc() + size);
+            }
             return emi;
         }
     };
diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index 5a208446a..d78af1b81 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -222,7 +222,55 @@ namespace X86ISA
         return true;
     }
 
-    typedef GenericISA::UPCState<MachInst> PCState;
+    class PCState : public GenericISA::UPCState<MachInst>
+    {
+      protected:
+        typedef GenericISA::UPCState<MachInst> Base;
+
+        uint8_t _size;
+
+      public:
+        void
+        set(Addr val)
+        {
+            Base::set(val);
+            _size = 0;
+        }
+
+        PCState() {}
+        PCState(Addr val) { set(val); }
+
+        uint8_t size() const { return _size; }
+        void size(uint8_t newSize) { _size = newSize; }
+
+        void
+        advance()
+        {
+            Base::advance();
+            _size = 0;
+        }
+
+        void
+        uEnd()
+        {
+            Base::uEnd();
+            _size = 0;
+        }
+
+        void
+        serialize(std::ostream &os)
+        {
+            Base::serialize(os);
+            SERIALIZE_SCALAR(_size);
+        }
+
+        void
+        unserialize(Checkpoint *cp, const std::string &section)
+        {
+            Base::unserialize(cp, section);
+            UNSERIALIZE_SCALAR(_size);
+        }
+    };
 
     struct CoreSpecific {
         int core_type;
-- 
cgit v1.2.3


From 1aa9698fa00e8ffce9b8d3c90b3bd76c3c9e950e Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 13 Feb 2011 17:42:05 -0800
Subject: X86: Define fault objects to carry debug messages.

These faults can panic/warn/warn_once, etc., instead of instructions doing
that themselves directly. That way, instructions can be speculatively
executed, and only if they're actually going to commit will their fault be
invoked and the panic, etc., happen.
---
 src/arch/generic/debugfaults.hh     | 111 ++++++++++++++++++++++++++++++++++++
 src/arch/x86/isa/includes.isa       |   1 +
 src/arch/x86/isa/microops/debug.isa |  67 ++++++++++------------
 3 files changed, 142 insertions(+), 37 deletions(-)
 create mode 100644 src/arch/generic/debugfaults.hh

(limited to 'src')

diff --git a/src/arch/generic/debugfaults.hh b/src/arch/generic/debugfaults.hh
new file mode 100644
index 000000000..acffadc34
--- /dev/null
+++ b/src/arch/generic/debugfaults.hh
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2010 Advanced Micro Devices
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_GENERIC_DEBUGFAULTS_HH__
+#define __ARCH_GENERIC_DEBUGFAULTS_HH__
+
+#include "base/misc.hh"
+#include "sim/faults.hh"
+
+#include <string>
+
+namespace GenericISA
+{
+class M5DebugFault : public FaultBase
+{
+  public:
+    enum DebugFunc
+    {
+        PanicFunc,
+        FatalFunc,
+        WarnFunc,
+        WarnOnceFunc
+    };
+
+  protected:
+    std::string message;
+    DebugFunc func;
+
+  public:
+    M5DebugFault(DebugFunc _func, std::string _message) :
+        message(_message), func(_func)
+    {}
+
+    FaultName
+    name() const
+    {
+        switch (func) {
+          case PanicFunc:
+            return "panic fault";
+          case FatalFunc:
+            return "fatal fault";
+          case WarnFunc:
+            return "warn fault";
+          case WarnOnceFunc:
+            return "warn_once fault";
+          default:
+            panic("unrecognized debug function number\n");
+        }
+    }
+
+    void
+    invoke(ThreadContext *tc,
+            StaticInstPtr inst = StaticInst::nullStaticInstPtr)
+    {
+        switch (func) {
+          case PanicFunc:
+            panic(message);
+            break;
+          case FatalFunc:
+            fatal(message);
+            break;
+          case WarnFunc:
+            warn(message);
+            break;
+          case WarnOnceFunc:
+            warn_once(message);
+            break;
+          default:
+            panic("unrecognized debug function number\n");
+        }
+    }
+};
+} // namespace GenericISA
+
+#endif // __ARCH_GENERIC_DEBUGFAULTS_HH__
diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index 58b1fbc62..32708043e 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -53,6 +53,7 @@ output header {{
 #include <sstream>
 #include <iostream>
 
+#include "arch/generic/debugfaults.hh"
 #include "arch/x86/emulenv.hh"
 #include "arch/x86/insts/macroop.hh"
 #include "arch/x86/insts/microfpop.hh"
diff --git a/src/arch/x86/isa/microops/debug.isa b/src/arch/x86/isa/microops/debug.isa
index 4b2ecdd5a..c2735565d 100644
--- a/src/arch/x86/isa/microops/debug.isa
+++ b/src/arch/x86/isa/microops/debug.isa
@@ -45,16 +45,29 @@ output header {{
     class MicroDebugBase : public X86ISA::X86MicroopBase
     {
       protected:
+        typedef GenericISA::M5DebugFault::DebugFunc DebugFunc;
+        DebugFunc func;
         std::string message;
         uint8_t cc;
 
       public:
-        MicroDebugBase(ExtMachInst _machInst, const char * mnem,
+        MicroDebugBase(ExtMachInst machInst, const char * mnem,
                 const char * instMnem, uint64_t setFlags,
-                std::string _message, uint8_t _cc);
+                DebugFunc _func, std::string _message, uint8_t _cc) :
+            X86MicroopBase(machInst, mnem, instMnem, setFlags, No_OpClass),
+                    func(_func), message(_message), cc(_cc)
+        {}
 
-        std::string generateDisassembly(Addr pc,
-                const SymbolTable *symtab) const;
+        std::string
+        generateDisassembly(Addr pc, const SymbolTable *symtab) const
+        {
+            std::stringstream response;
+
+            printMnemonic(response, instMnem, mnemonic);
+            response << "\"" << message << "\"";
+
+            return response.str();
+        }
     };
 }};
 
@@ -70,53 +83,31 @@ def template MicroDebugDeclare {{
 }};
 
 def template MicroDebugExecute {{
-        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+        Fault
+        %(class_name)s::execute(%(CPU_exec_context)s *xc,
                 Trace::InstRecord *traceData) const
         {
             %(op_decl)s
             %(op_rd)s
             if (%(cond_test)s) {
-                %(func)s("%s\n", message);
+                return new GenericISA::M5DebugFault(func, message);
+            } else {
+                return NoFault;
             }
-            return NoFault;
         }
 }};
 
-output decoder {{
-    inline MicroDebugBase::MicroDebugBase(
-            ExtMachInst machInst, const char * mnem, const char * instMnem,
-            uint64_t setFlags, std::string _message, uint8_t _cc) :
-        X86MicroopBase(machInst, mnem, instMnem,
-                setFlags, No_OpClass),
-                message(_message), cc(_cc)
-    {
-    }
-}};
-
 def template MicroDebugConstructor {{
     inline %(class_name)s::%(class_name)s(
             ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
             std::string _message, uint8_t _cc) :
         %(base_class)s(machInst, "%(func)s", instMnem,
-                setFlags, _message, _cc)
+                setFlags, %(func_num)s, _message, _cc)
     {
         %(constructor)s;
     }
 }};
 
-output decoder {{
-    std::string MicroDebugBase::generateDisassembly(Addr pc,
-            const SymbolTable *symtab) const
-    {
-        std::stringstream response;
-
-        printMnemonic(response, instMnem, mnemonic);
-        response << "\"" << message << "\"";
-
-        return response.str();
-    }
-}};
-
 let {{
     class MicroDebug(X86Microop):
         def __init__(self, message, flags=None):
@@ -142,13 +133,14 @@ let {{
     header_output = ""
     decoder_output = ""
 
-    def buildDebugMicro(func):
+    def buildDebugMicro(func, func_num):
         global exec_output, header_output, decoder_output
 
         iop = InstObjParams(func, "Micro%sFlags" % func.capitalize(),
                 "MicroDebugBase",
                 {"code": "",
                  "func": func,
+                 "func_num": "GenericISA::M5DebugFault::%s" % func_num,
                  "cond_test": "checkCondition(ccFlagBits, cc)"})
         exec_output += MicroDebugExecute.subst(iop)
         header_output += MicroDebugDeclare.subst(iop)
@@ -158,6 +150,7 @@ let {{
                 "MicroDebugBase",
                 {"code": "",
                  "func": func,
+                 "func_num": "GenericISA::M5DebugFault::%s" % func_num,
                  "cond_test": "true"})
         exec_output += MicroDebugExecute.subst(iop)
         header_output += MicroDebugDeclare.subst(iop)
@@ -169,8 +162,8 @@ let {{
         global microopClasses
         microopClasses[func] = MicroDebugChild
 
-    buildDebugMicro("panic")
-    buildDebugMicro("fatal")
-    buildDebugMicro("warn")
-    buildDebugMicro("warn_once")
+    buildDebugMicro("panic", "PanicFunc")
+    buildDebugMicro("fatal", "FatalFunc")
+    buildDebugMicro("warn", "WarnFunc")
+    buildDebugMicro("warn_once", "WarnOnceFunc")
 }};
-- 
cgit v1.2.3


From 399e095510ff6bc469c45b1e5afa96567d757004 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 13 Feb 2011 17:42:56 -0800
Subject: X86: On a bad microopc, return a microop that returns a fault that
 panics.

This way a bad micropc will have to get all the way to commit before killing
the simulation. This accounts for misspeculated branches.
---
 src/arch/x86/SConscript          |  1 +
 src/arch/x86/insts/badmicroop.cc | 55 ++++++++++++++++++++++++++++++++++++++++
 src/arch/x86/insts/badmicroop.hh | 52 +++++++++++++++++++++++++++++++++++++
 src/arch/x86/insts/macroop.hh    |  7 +++--
 src/arch/x86/microcode_rom.hh    |  7 +++--
 5 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 src/arch/x86/insts/badmicroop.cc
 create mode 100644 src/arch/x86/insts/badmicroop.hh

(limited to 'src')

diff --git a/src/arch/x86/SConscript b/src/arch/x86/SConscript
index 27de9da11..9cb774647 100644
--- a/src/arch/x86/SConscript
+++ b/src/arch/x86/SConscript
@@ -46,6 +46,7 @@ if env['TARGET_ISA'] == 'x86':
     Source('cpuid.cc')
     Source('emulenv.cc')
     Source('faults.cc')
+    Source('insts/badmicroop.cc')
     Source('insts/microfpop.cc')
     Source('insts/microldstop.cc')
     Source('insts/micromediaop.cc')
diff --git a/src/arch/x86/insts/badmicroop.cc b/src/arch/x86/insts/badmicroop.cc
new file mode 100644
index 000000000..ef493f250
--- /dev/null
+++ b/src/arch/x86/insts/badmicroop.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011 Advanced Micro Devices
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/x86/insts/badmicroop.hh"
+#include "arch/x86/isa_traits.hh"
+#include "arch/x86/decoder.hh"
+
+namespace X86ISA
+{
+
+// This microop needs to be allocated on the heap even though it could
+// theoretically be statically allocated. The reference counted pointer would
+// try to delete the static memory when it was destructed.
+const StaticInstPtr badMicroop =
+    new X86ISAInst::MicroPanic(NoopMachInst, "BAD",
+        StaticInst::IsMicroop | StaticInst::IsLastMicroop,
+        "Invalid microop!", 0);
+
+} // namespace X86ISA
diff --git a/src/arch/x86/insts/badmicroop.hh b/src/arch/x86/insts/badmicroop.hh
new file mode 100644
index 000000000..57fe242c4
--- /dev/null
+++ b/src/arch/x86/insts/badmicroop.hh
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2011 Advanced Micro Devices
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_X86_INSTS_BADMICROOP_HH__
+#define __ARCH_X86_INSTS_BADMICROOP_HH__
+
+class StaticInstPtr;
+
+namespace X86ISA
+{
+
+extern const StaticInstPtr badMicroop;
+
+} // namespace X86ISA
+
+#endif //__ARCH_X86_INSTS_BADMICROOP_HH__
diff --git a/src/arch/x86/insts/macroop.hh b/src/arch/x86/insts/macroop.hh
index fcf051a37..4f4176b77 100644
--- a/src/arch/x86/insts/macroop.hh
+++ b/src/arch/x86/insts/macroop.hh
@@ -41,6 +41,7 @@
 #define __ARCH_X86_INSTS_MACROOP_HH__
 
 #include "arch/x86/emulenv.hh"
+#include "arch/x86/insts/badmicroop.hh"
 #include "arch/x86/types.hh"
 #include "arch/x86/insts/static_inst.hh"
 
@@ -76,8 +77,10 @@ class MacroopBase : public X86StaticInst
     StaticInstPtr
     fetchMicroop(MicroPC microPC) const
     {
-        assert(microPC < numMicroops);
-        return microops[microPC];
+        if (microPC >= numMicroops)
+            return badMicroop;
+        else
+            return microops[microPC];
     }
 
     std::string
diff --git a/src/arch/x86/microcode_rom.hh b/src/arch/x86/microcode_rom.hh
index f8ad410ce..84c503bb9 100644
--- a/src/arch/x86/microcode_rom.hh
+++ b/src/arch/x86/microcode_rom.hh
@@ -32,6 +32,7 @@
 #define __ARCH_X86_MICROCODE_ROM_HH__
 
 #include "arch/x86/emulenv.hh"
+#include "arch/x86/insts/badmicroop.hh"
 #include "cpu/static_inst.hh"
 
 namespace X86ISAInst
@@ -60,8 +61,10 @@ namespace X86ISAInst
         fetchMicroop(MicroPC microPC, StaticInstPtr curMacroop)
         {
             microPC = normalMicroPC(microPC);
-            assert(microPC < numMicroops);
-            return genFuncs[microPC](curMacroop);
+            if (microPC >= numMicroops)
+                return X86ISA::badMicroop;
+            else
+                return genFuncs[microPC](curMacroop);
         }
     };
 }
-- 
cgit v1.2.3


From 4e1adf85f77edf761466af3568576d3f9134a14c Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 13 Feb 2011 17:44:24 -0800
Subject: X86: Don't read in dest regs if all bits are replaced.

In x86, 32 and 64 bit writes to registers in which registers appear to be 32 or
64 bits wide overwrite all bits of the destination register. This change
removes false dependencies in these cases where the previous value of a
register doesn't need to be read to write a new value. New versions of most
microops are created that have a "Big" suffix which simply overwrite their
destination, and the right version to use is selected during microop
allocation based on the selected data size.

This does not change the performance of the O3 CPU model significantly, I
assume because there are other false dependencies from the condition code bits
in the flags register.
---
 src/arch/x86/isa/microops/ldstop.isa |  80 +++++++++--
 src/arch/x86/isa/microops/limmop.isa |  27 ++--
 src/arch/x86/isa/microops/regop.isa  | 269 ++++++++++++++++++++++++-----------
 3 files changed, 267 insertions(+), 109 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa
index 216a74c6c..cd649d644 100644
--- a/src/arch/x86/isa/microops/ldstop.isa
+++ b/src/arch/x86/isa/microops/ldstop.isa
@@ -301,6 +301,46 @@ let {{
                 "dataSize" : self.dataSize, "addressSize" : self.addressSize,
                 "memFlags" : self.memFlags}
             return allocator
+
+    class BigLdStOp(X86Microop):
+        def __init__(self, data, segment, addr, disp,
+                dataSize, addressSize, baseFlags, atCPL0, prefetch):
+            self.data = data
+            [self.scale, self.index, self.base] = addr
+            self.disp = disp
+            self.segment = segment
+            self.dataSize = dataSize
+            self.addressSize = addressSize
+            self.memFlags = baseFlags
+            if atCPL0:
+                self.memFlags += " | (CPL0FlagBit << FlagShift)"
+            if prefetch:
+                self.memFlags += " | Request::PREFETCH"
+            self.memFlags += " | (machInst.legacy.addr ? " + \
+                             "(AddrSizeFlagBit << FlagShift) : 0)"
+
+        def getAllocator(self, microFlags):
+            allocString = '''
+                (%(dataSize)s >= 4) ?
+                    (StaticInstPtr)(new %(class_name)sBig(machInst,
+                        macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
+                        %(base)s, %(disp)s, %(segment)s, %(data)s,
+                        %(dataSize)s, %(addressSize)s, %(memFlags)s)) :
+                    (StaticInstPtr)(new %(class_name)s(machInst,
+                        macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
+                        %(base)s, %(disp)s, %(segment)s, %(data)s,
+                        %(dataSize)s, %(addressSize)s, %(memFlags)s))
+            '''
+            allocator = allocString % {
+                "class_name" : self.className,
+                "flags" : self.microFlagsText(microFlags),
+                "scale" : self.scale, "index" : self.index,
+                "base" : self.base,
+                "disp" : self.disp,
+                "segment" : self.segment, "data" : self.data,
+                "dataSize" : self.dataSize, "addressSize" : self.addressSize,
+                "memFlags" : self.memFlags}
+            return allocator
 }};
 
 let {{
@@ -315,7 +355,8 @@ let {{
     EA = bits(SegBase + scale * Index + Base + disp, addressSize * 8 - 1, 0);
     '''
 
-    def defineMicroLoadOp(mnemonic, code, mem_flags="0"):
+    def defineMicroLoadOp(mnemonic, code, bigCode='',
+                          mem_flags="0", big=True):
         global header_output
         global decoder_output
         global exec_output
@@ -324,16 +365,22 @@ let {{
         name = mnemonic.lower()
 
         # Build up the all register version of this micro op
-        iop = InstObjParams(name, Name, 'X86ISA::LdStOp',
-                {"code": code,
-                 "ea_code": calculateEA})
-        header_output += MicroLdStOpDeclare.subst(iop)
-        decoder_output += MicroLdStOpConstructor.subst(iop)
-        exec_output += MicroLoadExecute.subst(iop)
-        exec_output += MicroLoadInitiateAcc.subst(iop)
-        exec_output += MicroLoadCompleteAcc.subst(iop)
-
-        class LoadOp(LdStOp):
+        iops = [InstObjParams(name, Name, 'X86ISA::LdStOp',
+                {"code": code, "ea_code": calculateEA})]
+        if big:
+            iops += [InstObjParams(name, Name + "Big", 'X86ISA::LdStOp',
+                     {"code": bigCode, "ea_code": calculateEA})]
+        for iop in iops:
+            header_output += MicroLdStOpDeclare.subst(iop)
+            decoder_output += MicroLdStOpConstructor.subst(iop)
+            exec_output += MicroLoadExecute.subst(iop)
+            exec_output += MicroLoadInitiateAcc.subst(iop)
+            exec_output += MicroLoadCompleteAcc.subst(iop)
+
+        base = LdStOp
+        if big:
+            base = BigLdStOp
+        class LoadOp(base):
             def __init__(self, data, segment, addr, disp = 0,
                     dataSize="env.dataSize",
                     addressSize="env.addressSize",
@@ -346,12 +393,15 @@ let {{
 
         microopClasses[name] = LoadOp
 
-    defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);')
+    defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);',
+                            'Data = Mem & mask(dataSize * 8);')
     defineMicroLoadOp('Ldst', 'Data = merge(Data, Mem, dataSize);',
-            '(StoreCheck << FlagShift)')
+                              'Data = Mem & mask(dataSize * 8);',
+                      '(StoreCheck << FlagShift)')
     defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);',
-            '(StoreCheck << FlagShift) | Request::LOCKED')
-    defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;')
+                               'Data = Mem & mask(dataSize * 8);',
+                      '(StoreCheck << FlagShift) | Request::LOCKED')
+    defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;', big = False)
 
     def defineMicroStoreOp(mnemonic, code, \
             postCode="", completeCode="", mem_flags="0"):
diff --git a/src/arch/x86/isa/microops/limmop.isa b/src/arch/x86/isa/microops/limmop.isa
index 2871d5a89..ac78b090d 100644
--- a/src/arch/x86/isa/microops/limmop.isa
+++ b/src/arch/x86/isa/microops/limmop.isa
@@ -114,8 +114,16 @@ let {{
             self.dataSize = dataSize
 
         def getAllocator(self, microFlags):
-            allocator = '''new %(class_name)s(machInst, macrocodeBlock,
-                    %(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % {
+            allocString = '''
+                (%(dataSize)s >= 4) ?
+                    (StaticInstPtr)(new %(class_name)sBig(machInst,
+                        macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
+                        %(dataSize)s)) :
+                    (StaticInstPtr)(new %(class_name)s(machInst,
+                        macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
+                        %(dataSize)s))
+            '''
+            allocator = allocString % {
                 "class_name" : self.className,
                 "mnemonic" : self.mnemonic,
                 "flags" : self.microFlagsText(microFlags),
@@ -152,12 +160,15 @@ let {{
 
 let {{
     # Build up the all register version of this micro op
-    iop = InstObjParams("limm", "Limm", 'X86MicroopBase',
-            {"code" : "DestReg = merge(DestReg, imm, dataSize);"})
-    header_output += MicroLimmOpDeclare.subst(iop)
-    decoder_output += MicroLimmOpConstructor.subst(iop)
-    decoder_output += MicroLimmOpDisassembly.subst(iop)
-    exec_output += MicroLimmOpExecute.subst(iop)
+    iops = [InstObjParams("limm", "Limm", 'X86MicroopBase',
+            {"code" : "DestReg = merge(DestReg, imm, dataSize);"}),
+            InstObjParams("limm", "LimmBig", 'X86MicroopBase',
+            {"code" : "DestReg = imm & mask(dataSize * 8);"})]
+    for iop in iops:
+        header_output += MicroLimmOpDeclare.subst(iop)
+        decoder_output += MicroLimmOpConstructor.subst(iop)
+        decoder_output += MicroLimmOpDisassembly.subst(iop)
+        exec_output += MicroLimmOpExecute.subst(iop)
 
     iop = InstObjParams("lfpimm", "Lfpimm", 'X86MicroopBase',
             {"code" : "FpDestReg.uqw = imm"})
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index ccfcb3a69..158bfdd59 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -224,8 +224,8 @@ let {{
             MicroRegOpExecute)
 
     class RegOpMeta(type):
-        def buildCppClasses(self, name, Name, suffix, \
-                code, flag_code, cond_check, else_code, cond_control_flag_init):
+        def buildCppClasses(self, name, Name, suffix, code, big_code, \
+                flag_code, cond_check, else_code, cond_control_flag_init):
 
             # Globals to stick the output in
             global header_output
@@ -235,11 +235,13 @@ let {{
             # Stick all the code together so it can be searched at once
             allCode = "|".join((code, flag_code, cond_check, else_code, 
                                 cond_control_flag_init))
+            allBigCode = "|".join((big_code, flag_code, cond_check, else_code,
+                                   cond_control_flag_init))
 
             # If op2 is used anywhere, make register and immediate versions
             # of this code.
             matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
-            match = matcher.search(allCode)
+            match = matcher.search(allCode + allBigCode)
             if match:
                 typeQual = ""
                 if match.group("typeQual"):
@@ -247,6 +249,7 @@ let {{
                 src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
                 self.buildCppClasses(name, Name, suffix,
                         matcher.sub(src2_name, code),
+                        matcher.sub(src2_name, big_code),
                         matcher.sub(src2_name, flag_code),
                         matcher.sub(src2_name, cond_check),
                         matcher.sub(src2_name, else_code),
@@ -254,6 +257,7 @@ let {{
                 imm_name = "%simm8" % match.group("prefix")
                 self.buildCppClasses(name + "i", Name, suffix + "Imm",
                         matcher.sub(imm_name, code),
+                        matcher.sub(imm_name, big_code),
                         matcher.sub(imm_name, flag_code),
                         matcher.sub(imm_name, cond_check),
                         matcher.sub(imm_name, else_code),
@@ -264,27 +268,32 @@ let {{
             # a version without it and fix up this version to use it.
             if flag_code != "" or cond_check != "true":
                 self.buildCppClasses(name, Name, suffix,
-                        code, "", "true", else_code, "")
+                        code, big_code, "", "true", else_code, "")
                 suffix = "Flags" + suffix
 
             # If psrc1 or psrc2 is used, we need to actually insert code to
             # compute it.
-            matcher = re.compile("(?<!\w)psrc1(?!\w)")
-            if matcher.search(allCode):
-                code = "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);" + code
-            matcher = re.compile("(?<!\w)psrc2(?!\w)")
-            if matcher.search(allCode):
-                code = "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);" + code
-            # Also make available versions which do sign extension
-            matcher = re.compile("(?<!\w)spsrc1(?!\w)")
-            if matcher.search(allCode):
-                code = "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);" + code
-            matcher = re.compile("(?<!\w)spsrc2(?!\w)")
-            if matcher.search(allCode):
-                code = "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);" + code
-            matcher = re.compile("(?<!\w)simm8(?!\w)")
-            if matcher.search(allCode):
-                code = "int8_t simm8 = imm8;" + code
+            for (big, all) in ((False, allCode), (True, allBigCode)):
+                prefix = ""
+                for (rex, decl) in (
+                        ("(?<!\w)psrc1(?!\w)",
+                         "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);"),
+                        ("(?<!\w)psrc2(?!\w)",
+                         "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);"),
+                        ("(?<!\w)spsrc1(?!\w)",
+                         "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);"),
+                        ("(?<!\w)spsrc2(?!\w)",
+                         "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);"),
+                        ("(?<!\w)simm8(?!\w)",
+                         "int8_t simm8 = imm8;")):
+                    matcher = re.compile(rex)
+                    if matcher.search(all):
+                        prefix += decl + "\n"
+                if big:
+                    if big_code != "":
+                        big_code = prefix + big_code
+                else:
+                    code = prefix + code
 
             base = "X86ISA::RegOp"
 
@@ -297,17 +306,26 @@ let {{
                 templates = immTemplates
 
             # Get everything ready for the substitution
-            iop = InstObjParams(name, Name + suffix, base,
+            iops = [InstObjParams(name, Name + suffix, base,
                     {"code" : code,
                      "flag_code" : flag_code,
                      "cond_check" : cond_check,
                      "else_code" : else_code,
-                     "cond_control_flag_init": cond_control_flag_init})
+                     "cond_control_flag_init" : cond_control_flag_init})]
+            if big_code != "":
+                iops += [InstObjParams(name, Name + suffix + "Big", base,
+                         {"code" : big_code,
+                          "flag_code" : flag_code,
+                          "cond_check" : cond_check,
+                          "else_code" : else_code,
+                          "cond_control_flag_init" :
+                              cond_control_flag_init})]
 
             # Generate the actual code (finally!)
-            header_output += templates[0].subst(iop)
-            decoder_output += templates[1].subst(iop)
-            exec_output += templates[2].subst(iop)
+            for iop in iops:
+                header_output += templates[0].subst(iop)
+                decoder_output += templates[1].subst(iop)
+                exec_output += templates[2].subst(iop)
 
 
         def __new__(mcls, Name, bases, dict):
@@ -322,14 +340,16 @@ let {{
                 cls.className = Name
                 cls.base_mnemonic = name
                 code = cls.code
+                big_code = cls.big_code
                 flag_code = cls.flag_code
                 cond_check = cls.cond_check
                 else_code = cls.else_code
                 cond_control_flag_init = cls.cond_control_flag_init
 
                 # Set up the C++ classes
-                mcls.buildCppClasses(cls, name, Name, "", code, flag_code,
-                        cond_check, else_code, cond_control_flag_init)
+                mcls.buildCppClasses(cls, name, Name, "", code, big_code,
+                        flag_code, cond_check, else_code,
+                        cond_control_flag_init)
 
                 # Hook into the microassembler dict
                 global microopClasses
@@ -352,6 +372,7 @@ let {{
         abstract = True
 
         # Default template parameter values
+        big_code = ""
         flag_code = ""
         cond_check = "true"
         else_code = ";"
@@ -372,19 +393,41 @@ let {{
                 self.className += "Flags"
 
         def getAllocator(self, microFlags):
-            className = self.className
-            if self.mnemonic == self.base_mnemonic + 'i':
-                className += "Imm"
-            allocator = '''new %(class_name)s(machInst, macrocodeBlock,
-                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
-                    %(dataSize)s, %(ext)s)''' % {
-                "class_name" : className,
-                "flags" : self.microFlagsText(microFlags),
-                "src1" : self.src1, "op2" : self.op2,
-                "dest" : self.dest,
-                "dataSize" : self.dataSize,
-                "ext" : self.ext}
-            return allocator
+            if self.big_code != "":
+                className = self.className
+                if self.mnemonic == self.base_mnemonic + 'i':
+                    className += "Imm"
+                allocString = '''
+                    (%(dataSize)s >= 4) ?
+                        (StaticInstPtr)(new %(class_name)sBig(machInst,
+                            macrocodeBlock, %(flags)s, %(src1)s, %(op2)s,
+                            %(dest)s, %(dataSize)s, %(ext)s)) :
+                        (StaticInstPtr)(new %(class_name)s(machInst,
+                            macrocodeBlock, %(flags)s, %(src1)s, %(op2)s,
+                            %(dest)s, %(dataSize)s, %(ext)s))
+                    '''
+                allocator = allocString % {
+                    "class_name" : className,
+                    "flags" : self.microFlagsText(microFlags),
+                    "src1" : self.src1, "op2" : self.op2,
+                    "dest" : self.dest,
+                    "dataSize" : self.dataSize,
+                    "ext" : self.ext}
+                return allocator
+            else:
+                className = self.className
+                if self.mnemonic == self.base_mnemonic + 'i':
+                    className += "Imm"
+                allocator = '''new %(class_name)s(machInst, macrocodeBlock,
+                        %(flags)s, %(src1)s, %(op2)s, %(dest)s,
+                        %(dataSize)s, %(ext)s)''' % {
+                    "class_name" : className,
+                    "flags" : self.microFlagsText(microFlags),
+                    "src1" : self.src1, "op2" : self.op2,
+                    "dest" : self.dest,
+                    "dataSize" : self.dataSize,
+                    "ext" : self.ext}
+                return allocator
 
     class LogicRegOp(RegOp):
         abstract = True
@@ -429,30 +472,43 @@ let {{
 
     class Add(FlagRegOp):
         code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);'
+        big_code = 'DestReg = (psrc1 + op2) & mask(dataSize * 8);'
 
     class Or(LogicRegOp):
         code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);'
+        big_code = 'DestReg = (psrc1 | op2) & mask(dataSize * 8);'
 
     class Adc(FlagRegOp):
         code = '''
             CCFlagBits flags = ccFlagBits;
             DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize);
             '''
+        big_code = '''
+            CCFlagBits flags = ccFlagBits;
+            DestReg = (psrc1 + op2 + flags.cf) & mask(dataSize * 8);
+            '''
 
     class Sbb(SubRegOp):
         code = '''
             CCFlagBits flags = ccFlagBits;
             DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize);
             '''
+        big_code = '''
+            CCFlagBits flags = ccFlagBits;
+            DestReg = (psrc1 - op2 - flags.cf) & mask(dataSize * 8);
+            '''
 
     class And(LogicRegOp):
         code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)'
+        big_code = 'DestReg = (psrc1 & op2) & mask(dataSize * 8)'
 
     class Sub(SubRegOp):
         code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)'
+        big_code = 'DestReg = (psrc1 - op2) & mask(dataSize * 8)'
 
     class Xor(LogicRegOp):
         code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)'
+        big_code = 'DestReg = (psrc1 ^ op2) & mask(dataSize * 8)'
 
     class Mul1s(WrRegOp):
         code = '''
@@ -505,6 +561,7 @@ let {{
 
     class Mulel(RdRegOp):
         code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);'
+        big_code = 'DestReg = ProdLow & mask(dataSize * 8);'
 
     class Muleh(RdRegOp):
         def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"):
@@ -513,6 +570,7 @@ let {{
             super(RdRegOp, self).__init__(dest, src1, \
                     "InstRegIndex(NUM_INTREGS)", flags, dataSize)
         code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);'
+        big_code = 'DestReg = ProdHi & mask(dataSize * 8);'
 
     # One or two bit divide
     class Div1(WrRegOp):
@@ -540,7 +598,7 @@ let {{
 
     # Step divide
     class Div2(RegOp):
-        code = '''
+        divCode = '''
             uint64_t dividend = Remainder;
             uint64_t divisor = Divisor;
             uint64_t quotient = Quotient;
@@ -587,11 +645,13 @@ let {{
                 }
             }
             //Keep track of how many bits there are still to pull in.
-            DestReg = merge(DestReg, remaining, dataSize);
+            %s
             //Record the final results
             Remainder = remainder;
             Quotient = quotient;
         '''
+        code = divCode % "DestReg = merge(DestReg, remaining, dataSize);"
+        big_code = divCode % "DestReg = remaining & mask(dataSize * 8);"
         flag_code = '''
             if (remaining == 0)
                 ccFlagBits = ccFlagBits | (ext & EZFBit);
@@ -601,9 +661,11 @@ let {{
 
     class Divq(RdRegOp):
         code = 'DestReg = merge(SrcReg1, Quotient, dataSize);'
+        big_code = 'DestReg = Quotient & mask(dataSize * 8);'
 
     class Divr(RdRegOp):
         code = 'DestReg = merge(SrcReg1, Remainder, dataSize);'
+        big_code = 'DestReg = Remainder & mask(dataSize * 8);'
 
     class Mov(CondRegOp):
         code = 'DestReg = merge(SrcReg1, op2, dataSize)'
@@ -616,6 +678,10 @@ let {{
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             DestReg = merge(DestReg, psrc1 << shiftAmt, dataSize);
             '''
+        big_code = '''
+            uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+            DestReg = (psrc1 << shiftAmt) & mask(dataSize * 8);
+            '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -641,14 +707,19 @@ let {{
         '''
 
     class Srl(RegOp):
+        # Because what happens to the bits shift -in- on a right shift
+        # is not defined in the C/C++ standard, we have to mask them out
+        # to be sure they're zero.
         code = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            // Because what happens to the bits shift -in- on a right shift
-            // is not defined in the C/C++ standard, we have to mask them out
-            // to be sure they're zero.
             uint64_t logicalMask = mask(dataSize * 8 - shiftAmt);
             DestReg = merge(DestReg, (psrc1 >> shiftAmt) & logicalMask, dataSize);
             '''
+        big_code = '''
+            uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+            uint64_t logicalMask = mask(dataSize * 8 - shiftAmt);
+            DestReg = (psrc1 >> shiftAmt) & logicalMask;
+            '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -671,15 +742,21 @@ let {{
         '''
 
     class Sra(RegOp):
+        # Because what happens to the bits shift -in- on a right shift
+        # is not defined in the C/C++ standard, we have to sign extend
+        # them manually to be sure.
         code = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
-            // Because what happens to the bits shift -in- on a right shift
-            // is not defined in the C/C++ standard, we have to sign extend
-            // them manually to be sure.
             uint64_t arithMask = (shiftAmt == 0) ? 0 :
                 -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
             DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize);
             '''
+        big_code = '''
+            uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+            uint64_t arithMask = (shiftAmt == 0) ? 0 :
+                -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
+            DestReg = ((psrc1 >> shiftAmt) | arithMask) & mask(dataSize * 8);
+            '''
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -704,13 +781,11 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt);
                 uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -739,16 +814,14 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 CCFlagBits flags = ccFlagBits;
                 uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt);
                 if (realShiftAmt > 1)
                     top |= psrc1 << (dataSize * 8 - realShiftAmt + 1);
                 uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -780,14 +853,12 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 uint64_t top = psrc1 << realShiftAmt;
                 uint64_t bottom =
                     bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -816,8 +887,7 @@ let {{
             uint8_t shiftAmt =
                 (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
-            if(realShiftAmt)
-            {
+            if (realShiftAmt) {
                 CCFlagBits flags = ccFlagBits;
                 uint64_t top = psrc1 << realShiftAmt;
                 uint64_t bottom = flags.cf << (realShiftAmt - 1);
@@ -826,8 +896,7 @@ let {{
                         bits(psrc1, dataSize * 8 - 1,
                                    dataSize * 8 - realShiftAmt + 1);
                 DestReg = merge(DestReg, top | bottom, dataSize);
-            }
-            else
+            } else
                 DestReg = merge(DestReg, DestReg, dataSize);
             '''
         flag_code = '''
@@ -853,10 +922,10 @@ let {{
         '''
 
     class Sld(RegOp):
-        code = '''
+        sldCode = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t dataBits = dataSize * 8;
-            uint8_t realShiftAmt = shiftAmt % (2 * dataBits);
+            uint8_t realShiftAmt = shiftAmt %% (2 * dataBits);
             uint64_t result;
             if (realShiftAmt == 0) {
                 result = psrc1;
@@ -867,8 +936,10 @@ let {{
                 result = (DoubleBits << (realShiftAmt - dataBits)) |
                          (psrc1 >> (2 * dataBits - realShiftAmt));
             }
-            DestReg = merge(DestReg, result, dataSize);
+            %s
             '''
+        code = sldCode % "DestReg = merge(DestReg, result, dataSize);"
+        big_code = sldCode % "DestReg = result & mask(dataSize * 8);"
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -899,10 +970,10 @@ let {{
         '''
 
     class Srd(RegOp):
-        code = '''
+        srdCode = '''
             uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
             uint8_t dataBits = dataSize * 8;
-            uint8_t realShiftAmt = shiftAmt % (2 * dataBits);
+            uint8_t realShiftAmt = shiftAmt %% (2 * dataBits);
             uint64_t result;
             if (realShiftAmt == 0) {
                 result = psrc1;
@@ -919,8 +990,10 @@ let {{
                           logicalMask) |
                          (psrc1 << (2 * dataBits - realShiftAmt));
             }
-            DestReg = merge(DestReg, result, dataSize);
+            %s
             '''
+        code = srdCode % "DestReg = merge(DestReg, result, dataSize);"
+        big_code = srdCode % "DestReg = result & mask(dataSize * 8);"
         flag_code = '''
             // If the shift amount is zero, no flags should be modified.
             if (shiftAmt) {
@@ -986,6 +1059,12 @@ let {{
             ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
                                        (ccFlagBits & ~EZFBit);
             '''
+        big_code = '''
+            int flag = bits(ccFlagBits, imm8);
+            DestReg = flag & mask(dataSize * 8);
+            ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
+                                       (ccFlagBits & ~EZFBit);
+            '''
         def __init__(self, dest, imm, flags=None, \
                 dataSize="env.dataSize"):
             super(Ruflag, self).__init__(dest, \
@@ -1000,6 +1079,14 @@ let {{
             ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
                                        (ccFlagBits & ~EZFBit);
             '''
+        big_code = '''
+            MiscReg flagMask = 0x3F7FDD5;
+            MiscReg flags = (nccFlagBits | ccFlagBits) & flagMask;
+            int flag = bits(flags, imm8);
+            DestReg = flag & mask(dataSize * 8);
+            ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
+                                       (ccFlagBits & ~EZFBit);
+            '''
         def __init__(self, dest, imm, flags=None, \
                 dataSize="env.dataSize"):
             super(Rflag, self).__init__(dest, \
@@ -1015,6 +1102,15 @@ let {{
             val = sign_bit ? (val | ~maskVal) : (val & maskVal);
             DestReg = merge(DestReg, val, dataSize);
             '''
+        big_code = '''
+            IntReg val = psrc1;
+            // Mask the bit position so that it wraps.
+            int bitPos = op2 & (dataSize * 8 - 1);
+            int sign_bit = bits(val, bitPos, bitPos);
+            uint64_t maskVal = mask(bitPos+1);
+            val = sign_bit ? (val | ~maskVal) : (val & maskVal);
+            DestReg = val & mask(dataSize * 8);
+            '''
         flag_code = '''
             if (!sign_bit)
                 ccFlagBits = ccFlagBits &
@@ -1026,12 +1122,13 @@ let {{
 
     class Zext(RegOp):
         code = 'DestReg = merge(DestReg, bits(psrc1, op2, 0), dataSize);'
+        big_code = 'DestReg = bits(psrc1, op2, 0) & mask(dataSize * 8);'
 
     class Rddr(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
             super(Rddr, self).__init__(dest, \
                     src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize)
-        code = '''
+        rdrCode = '''
             CR4 cr4 = CR4Op;
             DR7 dr7 = DR7Op;
             if ((cr4.de == 1 && (src1 == 4 || src1 == 5)) || src1 >= 8) {
@@ -1039,9 +1136,11 @@ let {{
             } else if (dr7.gd) {
                 fault = new DebugException();
             } else {
-                DestReg = merge(DestReg, DebugSrc1, dataSize);
+                %s
             }
         '''
+        code = rdrCode % "DestReg = merge(DestReg, DebugSrc1, dataSize);"
+        big_code = rdrCode % "DestReg = DebugSrc1 & mask(dataSize * 8);"
 
     class Wrdr(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
@@ -1066,13 +1165,15 @@ let {{
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
             super(Rdcr, self).__init__(dest, \
                     src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize)
-        code = '''
+        rdcrCode = '''
             if (src1 == 1 || (src1 > 4 && src1 < 8) || (src1 > 8)) {
                 fault = new InvalidOpcode();
             } else {
-                DestReg = merge(DestReg, ControlSrc1, dataSize);
+                %s
             }
         '''
+        code = rdcrCode % "DestReg = merge(DestReg, ControlSrc1, dataSize);"
+        big_code = rdcrCode % "DestReg = ControlSrc1 & mask(dataSize * 8);"
 
     class Wrcr(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
@@ -1154,24 +1255,20 @@ let {{
         '''
 
     class Rdbase(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegBaseSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegBaseSrc1, dataSize);'
+        big_code = 'DestReg = SegBaseSrc1 & mask(dataSize * 8);'
 
     class Rdlimit(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegLimitSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegLimitSrc1, dataSize);'
+        big_code = 'DestReg = SegLimitSrc1 & mask(dataSize * 8);'
 
     class RdAttr(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegAttrSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegAttrSrc1, dataSize);'
+        big_code = 'DestReg = SegAttrSrc1 & mask(dataSize * 8);'
 
     class Rdsel(SegOp):
-        code = '''
-            DestReg = merge(DestReg, SegSelSrc1, dataSize);
-        '''
+        code = 'DestReg = merge(DestReg, SegSelSrc1, dataSize);'
+        big_code = 'DestReg = SegSelSrc1 & mask(dataSize * 8);'
 
     class Rdval(RegOp):
         def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
-- 
cgit v1.2.3


From bce2be525d0ff05af914515bc2d9e9ac9d66a123 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 13 Feb 2011 17:45:12 -0800
Subject: X86: Put the result used for flags in an intermediate variable.

Using the destination register directly causes the ISA parser to treat it as a
source even if none of the original bits are used.
---
 src/arch/x86/insts/microregop.cc    |  3 ---
 src/arch/x86/isa/includes.isa       |  1 +
 src/arch/x86/isa/microops/regop.isa | 38 ++++++++++++++++++++-----------------
 3 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86/insts/microregop.cc b/src/arch/x86/insts/microregop.cc
index 6aee87449..dedea0f3d 100644
--- a/src/arch/x86/insts/microregop.cc
+++ b/src/arch/x86/insts/microregop.cc
@@ -50,9 +50,6 @@ namespace X86ISA
             bool subtract) const
     {
         DPRINTF(X86, "flagMask = %#x\n", flagMask);
-        if (_destRegIdx[0] & IntFoldBit) {
-            _dest >>= 8;
-        }
         uint64_t flags = oldFlags & ~flagMask;
         if(flagMask & (ECFBit | CFBit))
         {
diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index 32708043e..674e69e98 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -114,6 +114,7 @@ output exec {{
 #include "arch/x86/regs/misc.hh"
 #include "arch/x86/tlb.hh"
 #include "base/bigint.hh"
+#include "base/compiler.hh"
 #include "base/condcodes.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index 158bfdd59..e2a51c127 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -51,6 +51,8 @@ def template MicroRegOpExecute {{
             %(op_decl)s;
             %(op_rd)s;
 
+            IntReg result M5_VAR_USED;
+
             if(%(cond_check)s)
             {
                 %(code)s;
@@ -79,6 +81,8 @@ def template MicroRegOpImmExecute {{
             %(op_decl)s;
             %(op_rd)s;
 
+            IntReg result M5_VAR_USED;
+
             if(%(cond_check)s)
             {
                 %(code)s;
@@ -434,7 +438,7 @@ let {{
         flag_code = '''
             //Don't have genFlags handle the OF or CF bits
             uint64_t mask = CFBit | ECFBit | OFBit;
-            ccFlagBits = genFlags(ccFlagBits, ext & ~mask, DestReg, psrc1, op2);
+            ccFlagBits = genFlags(ccFlagBits, ext & ~mask, result, psrc1, op2);
             //If a logic microop wants to set these, it wants to set them to 0.
             ccFlagBits &= ~(CFBit & ext);
             ccFlagBits &= ~(ECFBit & ext);
@@ -444,12 +448,12 @@ let {{
     class FlagRegOp(RegOp):
         abstract = True
         flag_code = \
-            "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, op2);"
+            "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, op2);"
 
     class SubRegOp(RegOp):
         abstract = True
         flag_code = \
-            "ccFlagBits = genFlags(ccFlagBits, ext, DestReg, psrc1, ~op2, true);"
+            "ccFlagBits = genFlags(ccFlagBits, ext, result, psrc1, ~op2, true);"
 
     class CondRegOp(RegOp):
         abstract = True
@@ -471,44 +475,44 @@ let {{
                     src1, src2, flags, dataSize)
 
     class Add(FlagRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);'
-        big_code = 'DestReg = (psrc1 + op2) & mask(dataSize * 8);'
+        code = 'DestReg = merge(DestReg, result = (psrc1 + op2), dataSize);'
+        big_code = 'DestReg = result = (psrc1 + op2) & mask(dataSize * 8);'
 
     class Or(LogicRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);'
-        big_code = 'DestReg = (psrc1 | op2) & mask(dataSize * 8);'
+        code = 'DestReg = merge(DestReg, result = (psrc1 | op2), dataSize);'
+        big_code = 'DestReg = result = (psrc1 | op2) & mask(dataSize * 8);'
 
     class Adc(FlagRegOp):
         code = '''
             CCFlagBits flags = ccFlagBits;
-            DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize);
+            DestReg = merge(DestReg, result = (psrc1 + op2 + flags.cf), dataSize);
             '''
         big_code = '''
             CCFlagBits flags = ccFlagBits;
-            DestReg = (psrc1 + op2 + flags.cf) & mask(dataSize * 8);
+            DestReg = result = (psrc1 + op2 + flags.cf) & mask(dataSize * 8);
             '''
 
     class Sbb(SubRegOp):
         code = '''
             CCFlagBits flags = ccFlagBits;
-            DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize);
+            DestReg = merge(DestReg, result = (psrc1 - op2 - flags.cf), dataSize);
             '''
         big_code = '''
             CCFlagBits flags = ccFlagBits;
-            DestReg = (psrc1 - op2 - flags.cf) & mask(dataSize * 8);
+            DestReg = result = (psrc1 - op2 - flags.cf) & mask(dataSize * 8);
             '''
 
     class And(LogicRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)'
-        big_code = 'DestReg = (psrc1 & op2) & mask(dataSize * 8)'
+        code = 'DestReg = merge(DestReg, result = (psrc1 & op2), dataSize)'
+        big_code = 'DestReg = result = (psrc1 & op2) & mask(dataSize * 8)'
 
     class Sub(SubRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)'
-        big_code = 'DestReg = (psrc1 - op2) & mask(dataSize * 8)'
+        code = 'DestReg = merge(DestReg, result = (psrc1 - op2), dataSize)'
+        big_code = 'DestReg = result = (psrc1 - op2) & mask(dataSize * 8)'
 
     class Xor(LogicRegOp):
-        code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)'
-        big_code = 'DestReg = (psrc1 ^ op2) & mask(dataSize * 8)'
+        code = 'DestReg = merge(DestReg, result = (psrc1 ^ op2), dataSize)'
+        big_code = 'DestReg = result = (psrc1 ^ op2) & mask(dataSize * 8)'
 
     class Mul1s(WrRegOp):
         code = '''
-- 
cgit v1.2.3


From 77b4a370670bed84d1c000a58d3e668334fdc86b Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Sun, 13 Feb 2011 17:45:47 -0800
Subject: X86: Detect branches taking into account instruction size.

The size of the current instruction determines what the npc should be if
there's no branching.
---
 src/arch/x86/types.hh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/arch/x86/types.hh b/src/arch/x86/types.hh
index d78af1b81..4641141d3 100644
--- a/src/arch/x86/types.hh
+++ b/src/arch/x86/types.hh
@@ -243,6 +243,12 @@ namespace X86ISA
         uint8_t size() const { return _size; }
         void size(uint8_t newSize) { _size = newSize; }
 
+        bool
+        branching() const
+        {
+            return this->npc() != this->pc() + size();
+        }
+
         void
         advance()
         {
-- 
cgit v1.2.3


From 343e94a257baa94575adf0d0def18ffe8da0c4f8 Mon Sep 17 00:00:00 2001
From: Nilay Vaish <nilay@cs.wisc.edu>
Date: Mon, 14 Feb 2011 16:14:54 -0600
Subject: Ruby: Improve Change PerfectSwitch's wakeup function Currently the
 wakeup function for the PerfectSwitch contains three loops -

loop on number of virtual networks
  loop on number of incoming links
	    loop till all messages for this (link, network) have been routed

With an 8 processor mesh network and Hammer protocol, about 11-12% of the
was observed to have been spent in this function, which is the highest
amongst all the functions. It was found that the innermost loop is executed
about 45 times per invocation of the wakeup function, when each invocation
of the wakeup function processes just about one message.

The patch tries to do away with the redundant executions of the innermost
loop. Counters have been added for each virtual network that record the
number of messages that need to be routed for that virtual network. The
inner loops are only executed when the number of messages for that particular
virtual network > 0. This does away with almost 80% of the executions of the
innermost loop. The function now consumes about 5-6% of the total execution
time.
---
 src/mem/ruby/buffers/MessageBuffer.cc          |   3 +
 src/mem/ruby/buffers/MessageBuffer.hh          |   6 +
 src/mem/ruby/common/Consumer.hh                |   1 +
 src/mem/ruby/network/simple/PerfectSwitch.cc   | 281 +++++++++++++------------
 src/mem/ruby/network/simple/PerfectSwitch.hh   |   2 +
 src/mem/ruby/slicc_interface/Message.hh        |   2 +
 src/mem/ruby/slicc_interface/NetworkMessage.hh |   7 +
 7 files changed, 170 insertions(+), 132 deletions(-)

(limited to 'src')

diff --git a/src/mem/ruby/buffers/MessageBuffer.cc b/src/mem/ruby/buffers/MessageBuffer.cc
index f6b79c580..225595005 100644
--- a/src/mem/ruby/buffers/MessageBuffer.cc
+++ b/src/mem/ruby/buffers/MessageBuffer.cc
@@ -58,6 +58,8 @@ MessageBuffer::MessageBuffer(const string &name)
     m_name = name;
 
     m_stall_msg_map.clear();
+    m_input_link_id = 0;
+    m_vnet_id = 0;
 }
 
 int
@@ -228,6 +230,7 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
     // Schedule the wakeup
     if (m_consumer_ptr != NULL) {
         g_eventQueue_ptr->scheduleEventAbsolute(m_consumer_ptr, arrival_time);
+        m_consumer_ptr->storeEventInfo(m_vnet_id);
     } else {
         panic("No consumer: %s name: %s\n", *this, m_name);
     }
diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh
index 62cc65670..88df5b788 100644
--- a/src/mem/ruby/buffers/MessageBuffer.hh
+++ b/src/mem/ruby/buffers/MessageBuffer.hh
@@ -142,6 +142,9 @@ class MessageBuffer
     void printStats(std::ostream& out);
     void clearStats() { m_not_avail_count = 0; m_msg_counter = 0; }
 
+    void setIncomingLink(int link_id) { m_input_link_id = link_id; }
+    void setVnet(int net) { m_vnet_id = net; }
+
   private:
     //added by SS
     int m_recycle_latency;
@@ -184,6 +187,9 @@ class MessageBuffer
     bool m_ordering_set;
     bool m_randomization;
     Time m_last_arrival_time;
+
+    int m_input_link_id;
+    int m_vnet_id;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/common/Consumer.hh b/src/mem/ruby/common/Consumer.hh
index c1f8bc42e..a119abb39 100644
--- a/src/mem/ruby/common/Consumer.hh
+++ b/src/mem/ruby/common/Consumer.hh
@@ -67,6 +67,7 @@ class Consumer
 
     virtual void wakeup() = 0;
     virtual void print(std::ostream& out) const = 0;
+    virtual void storeEventInfo(int info) {}
 
     const Time&
     getLastScheduledWakeup() const
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc
index 7229c724f..5c461c63f 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.cc
+++ b/src/mem/ruby/network/simple/PerfectSwitch.cc
@@ -54,6 +54,11 @@ PerfectSwitch::PerfectSwitch(SwitchID sid, SimpleNetwork* network_ptr)
     m_round_robin_start = 0;
     m_network_ptr = network_ptr;
     m_wakeups_wo_switch = 0;
+
+    for(int i = 0;i < m_virtual_networks;++i)
+    {
+        m_pending_message_count.push_back(0);
+    }
 }
 
 void
@@ -62,12 +67,15 @@ PerfectSwitch::addInPort(const vector<MessageBuffer*>& in)
     assert(in.size() == m_virtual_networks);
     NodeID port = m_in.size();
     m_in.push_back(in);
+
     for (int j = 0; j < m_virtual_networks; j++) {
         m_in[port][j]->setConsumer(this);
         string desc = csprintf("[Queue from port %s %s %s to PerfectSwitch]",
             NodeIDToString(m_switch_id), NodeIDToString(port),
             NodeIDToString(j));
         m_in[port][j]->setDescription(desc);
+        m_in[port][j]->setIncomingLink(port);
+        m_in[port][j]->setVnet(j);
     }
 }
 
@@ -154,160 +162,169 @@ PerfectSwitch::wakeup()
             m_round_robin_start = 0;
         }
 
-        // for all input ports, use round robin scheduling
-        for (int counter = 0; counter < m_in.size(); counter++) {
-            // Round robin scheduling
-            incoming++;
-            if (incoming >= m_in.size()) {
-                incoming = 0;
-            }
+        if(m_pending_message_count[vnet] > 0) {
+            // for all input ports, use round robin scheduling
+            for (int counter = 0; counter < m_in.size(); counter++) {
+                // Round robin scheduling
+                incoming++;
+                if (incoming >= m_in.size()) {
+                    incoming = 0;
+                }
 
-            // temporary vectors to store the routing results
-            vector<LinkID> output_links;
-            vector<NetDest> output_link_destinations;
-
-            // Is there a message waiting?
-            while (m_in[incoming][vnet]->isReady()) {
-                DPRINTF(RubyNetwork, "incoming: %d\n", incoming);
-
-                // Peek at message
-                msg_ptr = m_in[incoming][vnet]->peekMsgPtr();
-                net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
-                DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr));
-
-                output_links.clear();
-                output_link_destinations.clear();
-                NetDest msg_dsts =
-                    net_msg_ptr->getInternalDestination();
-
-                // Unfortunately, the token-protocol sends some
-                // zero-destination messages, so this assert isn't valid
-                // assert(msg_dsts.count() > 0);
-
-                assert(m_link_order.size() == m_routing_table.size());
-                assert(m_link_order.size() == m_out.size());
-
-                if (m_network_ptr->getAdaptiveRouting()) {
-                    if (m_network_ptr->isVNetOrdered(vnet)) {
-                        // Don't adaptively route
-                        for (int out = 0; out < m_out.size(); out++) {
-                            m_link_order[out].m_link = out;
-                            m_link_order[out].m_value = 0;
-                        }
-                    } else {
-                        // Find how clogged each link is
-                        for (int out = 0; out < m_out.size(); out++) {
-                            int out_queue_length = 0;
-                            for (int v = 0; v < m_virtual_networks; v++) {
-                                out_queue_length += m_out[out][v]->getSize();
+                // temporary vectors to store the routing results
+                vector<LinkID> output_links;
+                vector<NetDest> output_link_destinations;
+
+                // Is there a message waiting?
+                while (m_in[incoming][vnet]->isReady()) {
+                    DPRINTF(RubyNetwork, "incoming: %d\n", incoming);
+
+                    // Peek at message
+                    msg_ptr = m_in[incoming][vnet]->peekMsgPtr();
+                    net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
+                    DPRINTF(RubyNetwork, "Message: %s\n", (*net_msg_ptr));
+
+                    output_links.clear();
+                    output_link_destinations.clear();
+                    NetDest msg_dsts =
+                        net_msg_ptr->getInternalDestination();
+
+                    // Unfortunately, the token-protocol sends some
+                    // zero-destination messages, so this assert isn't valid
+                    // assert(msg_dsts.count() > 0);
+
+                    assert(m_link_order.size() == m_routing_table.size());
+                    assert(m_link_order.size() == m_out.size());
+
+                    if (m_network_ptr->getAdaptiveRouting()) {
+                        if (m_network_ptr->isVNetOrdered(vnet)) {
+                            // Don't adaptively route
+                            for (int out = 0; out < m_out.size(); out++) {
+                                m_link_order[out].m_link = out;
+                                m_link_order[out].m_value = 0;
+                            }
+                        } else {
+                            // Find how clogged each link is
+                            for (int out = 0; out < m_out.size(); out++) {
+                                int out_queue_length = 0;
+                                for (int v = 0; v < m_virtual_networks; v++) {
+                                    out_queue_length += m_out[out][v]->getSize();
+                                }
+                                int value =
+                                    (out_queue_length << 8) | (random() & 0xff);
+                                m_link_order[out].m_link = out;
+                                m_link_order[out].m_value = value;
                             }
-                            int value =
-                                (out_queue_length << 8) | (random() & 0xff);
-                            m_link_order[out].m_link = out;
-                            m_link_order[out].m_value = value;
+
+                            // Look at the most empty link first
+                            sort(m_link_order.begin(), m_link_order.end());
                         }
+                    }
 
-                        // Look at the most empty link first
-                        sort(m_link_order.begin(), m_link_order.end());
+                    for (int i = 0; i < m_routing_table.size(); i++) {
+                        // pick the next link to look at
+                        int link = m_link_order[i].m_link;
+                        NetDest dst = m_routing_table[link];
+                        DPRINTF(RubyNetwork, "dst: %s\n", dst);
+
+                        if (!msg_dsts.intersectionIsNotEmpty(dst))
+                            continue;
+
+                        // Remember what link we're using
+                        output_links.push_back(link);
+
+                        // Need to remember which destinations need this
+                        // message in another vector.  This Set is the
+                        // intersection of the routing_table entry and the
+                        // current destination set.  The intersection must
+                        // not be empty, since we are inside "if"
+                        output_link_destinations.push_back(msg_dsts.AND(dst));
+
+                        // Next, we update the msg_destination not to
+                        // include those nodes that were already handled
+                        // by this link
+                        msg_dsts.removeNetDest(dst);
                     }
-                }
 
-                for (int i = 0; i < m_routing_table.size(); i++) {
-                    // pick the next link to look at
-                    int link = m_link_order[i].m_link;
-                    NetDest dst = m_routing_table[link];
-                    DPRINTF(RubyNetwork, "dst: %s\n", dst);
-
-                    if (!msg_dsts.intersectionIsNotEmpty(dst))
-                        continue;
-
-                    // Remember what link we're using
-                    output_links.push_back(link);
-
-                    // Need to remember which destinations need this
-                    // message in another vector.  This Set is the
-                    // intersection of the routing_table entry and the
-                    // current destination set.  The intersection must
-                    // not be empty, since we are inside "if"
-                    output_link_destinations.push_back(msg_dsts.AND(dst));
-
-                    // Next, we update the msg_destination not to
-                    // include those nodes that were already handled
-                    // by this link
-                    msg_dsts.removeNetDest(dst);
-                }
+                    assert(msg_dsts.count() == 0);
+                    //assert(output_links.size() > 0);
+
+                    // Check for resources - for all outgoing queues
+                    bool enough = true;
+                    for (int i = 0; i < output_links.size(); i++) {
+                        int outgoing = output_links[i];
+                        if (!m_out[outgoing][vnet]->areNSlotsAvailable(1))
+                            enough = false;
+                        DPRINTF(RubyNetwork, "Checking if node is blocked\n"
+                                "outgoing: %d, vnet: %d, enough: %d\n",
+                                outgoing, vnet, enough);
+                    }
 
-                assert(msg_dsts.count() == 0);
-                //assert(output_links.size() > 0);
-
-                // Check for resources - for all outgoing queues
-                bool enough = true;
-                for (int i = 0; i < output_links.size(); i++) {
-                    int outgoing = output_links[i];
-                    if (!m_out[outgoing][vnet]->areNSlotsAvailable(1))
-                        enough = false;
-                    DPRINTF(RubyNetwork, "Checking if node is blocked\n"
-                            "outgoing: %d, vnet: %d, enough: %d\n",
-                            outgoing, vnet, enough);
-                }
+                    // There were not enough resources
+                    if (!enough) {
+                        g_eventQueue_ptr->scheduleEvent(this, 1);
+                        DPRINTF(RubyNetwork, "Can't deliver message since a node "
+                                "is blocked\n"
+                                "Message: %s\n", (*net_msg_ptr));
+                        break; // go to next incoming port
+                    }
 
-                // There were not enough resources
-                if (!enough) {
-                    g_eventQueue_ptr->scheduleEvent(this, 1);
-                    DPRINTF(RubyNetwork, "Can't deliver message since a node "
-                            "is blocked\n"
-                            "Message: %s\n", (*net_msg_ptr));
-                    break; // go to next incoming port
-                }
+                    MsgPtr unmodified_msg_ptr;
 
-                MsgPtr unmodified_msg_ptr;
+                    if (output_links.size() > 1) {
+                        // If we are sending this message down more than
+                        // one link (size>1), we need to make a copy of
+                        // the message so each branch can have a different
+                        // internal destination we need to create an
+                        // unmodified MsgPtr because the MessageBuffer
+                        // enqueue func will modify the message
 
-                if (output_links.size() > 1) {
-                    // If we are sending this message down more than
-                    // one link (size>1), we need to make a copy of
-                    // the message so each branch can have a different
-                    // internal destination we need to create an
-                    // unmodified MsgPtr because the MessageBuffer
-                    // enqueue func will modify the message
+                        // This magic line creates a private copy of the
+                        // message
+                        unmodified_msg_ptr = msg_ptr->clone();
+                    }
 
-                    // This magic line creates a private copy of the
-                    // message
-                    unmodified_msg_ptr = msg_ptr->clone();
-                }
+                    // Enqueue it - for all outgoing queues
+                    for (int i=0; i<output_links.size(); i++) {
+                        int outgoing = output_links[i];
 
-                // Enqueue it - for all outgoing queues
-                for (int i=0; i<output_links.size(); i++) {
-                    int outgoing = output_links[i];
+                        if (i > 0) {
+                            // create a private copy of the unmodified
+                            // message
+                            msg_ptr = unmodified_msg_ptr->clone();
+                        }
 
-                    if (i > 0) {
-                        // create a private copy of the unmodified
-                        // message
-                        msg_ptr = unmodified_msg_ptr->clone();
-                    }
+                        // Change the internal destination set of the
+                        // message so it knows which destinations this
+                        // link is responsible for.
+                        net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
+                        net_msg_ptr->getInternalDestination() =
+                            output_link_destinations[i];
 
-                    // Change the internal destination set of the
-                    // message so it knows which destinations this
-                    // link is responsible for.
-                    net_msg_ptr = safe_cast<NetworkMessage*>(msg_ptr.get());
-                    net_msg_ptr->getInternalDestination() =
-                        output_link_destinations[i];
+                        // Enqeue msg
+                        DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from "
+                                "inport[%d][%d] to outport [%d][%d] time: %lld.\n",
+                                m_switch_id, incoming, vnet, outgoing, vnet,
+                                g_eventQueue_ptr->getTime());
 
-                    // Enqeue msg
-                    DPRINTF(RubyNetwork, "Switch: %d enqueuing net msg from "
-                            "inport[%d][%d] to outport [%d][%d] time: %lld.\n",
-                            m_switch_id, incoming, vnet, outgoing, vnet,
-                            g_eventQueue_ptr->getTime());
+                        m_out[outgoing][vnet]->enqueue(msg_ptr);
+                    }
 
-                    m_out[outgoing][vnet]->enqueue(msg_ptr);
+                    // Dequeue msg
+                    m_in[incoming][vnet]->pop();
+                    m_pending_message_count[vnet]--;
                 }
-
-                // Dequeue msg
-                m_in[incoming][vnet]->pop();
             }
         }
     }
 }
 
+void
+PerfectSwitch::storeEventInfo(int info)
+{
+    m_pending_message_count[info]++;
+}
+
 void
 PerfectSwitch::printStats(std::ostream& out) const
 {
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.hh b/src/mem/ruby/network/simple/PerfectSwitch.hh
index a7e577df0..cd0219fd9 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.hh
+++ b/src/mem/ruby/network/simple/PerfectSwitch.hh
@@ -69,6 +69,7 @@ class PerfectSwitch : public Consumer
     int getOutLinks() const { return m_out.size(); }
 
     void wakeup();
+    void storeEventInfo(int info);
 
     void printStats(std::ostream& out) const;
     void clearStats();
@@ -92,6 +93,7 @@ class PerfectSwitch : public Consumer
     int m_round_robin_start;
     int m_wakeups_wo_switch;
     SimpleNetwork* m_network_ptr;
+    std::vector<int> m_pending_message_count;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh
index ff94fdd40..7fcfabe9c 100644
--- a/src/mem/ruby/slicc_interface/Message.hh
+++ b/src/mem/ruby/slicc_interface/Message.hh
@@ -57,6 +57,8 @@ class Message : public RefCounted
 
     virtual Message* clone() const = 0;
     virtual void print(std::ostream& out) const = 0;
+    virtual void setIncomingLink(int) {}
+    virtual void setVnet(int) {}
 
     void setDelayedCycles(const int& cycles) { m_DelayedCycles = cycles; }
     const int& getDelayedCycles() const {return m_DelayedCycles;}
diff --git a/src/mem/ruby/slicc_interface/NetworkMessage.hh b/src/mem/ruby/slicc_interface/NetworkMessage.hh
index 082481e05..a8f9c625b 100644
--- a/src/mem/ruby/slicc_interface/NetworkMessage.hh
+++ b/src/mem/ruby/slicc_interface/NetworkMessage.hh
@@ -82,9 +82,16 @@ class NetworkMessage : public Message
 
     virtual void print(std::ostream& out) const = 0;
 
+    int getIncomingLink() const { return incoming_link; }
+    void setIncomingLink(int link) { incoming_link = link; }
+    int getVnet() const { return vnet; }
+    void setVnet(int net) { vnet = net; }
+
   private:
     NetDest m_internal_dest;
     bool m_internal_dest_valid;
+    int incoming_link;
+    int vnet;
 };
 
 inline std::ostream&
-- 
cgit v1.2.3


From 989138970e3512aa9c1b37810ba64a742c00543e Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Mon, 14 Feb 2011 21:36:37 -0800
Subject: Info: Clean up some info files.

Get rid of RELEASE_NOTES since we no longer do releases, update some of the
information in README, and update the date in LICENSE.
---
 src/SConscript        | 2 +-
 src/python/m5/main.py | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/SConscript b/src/SConscript
index cad0736c5..0ee144747 100755
--- a/src/SConscript
+++ b/src/SConscript
@@ -446,7 +446,7 @@ def makeInfoPyFile(target, source, env):
 
 # Generate a file that wraps the basic top level files
 env.Command('python/m5/info.py',
-            [ '#/AUTHORS', '#/LICENSE', '#/README', '#/RELEASE_NOTES' ],
+            [ '#/AUTHORS', '#/LICENSE', '#/README', ],
             MakeAction(makeInfoPyFile, Transform("INFO")))
 PySource('m5', 'python/m5/info.py')
 
diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index cd139ccb3..23a012166 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -61,8 +61,6 @@ add_option('-C', "--copyright", action="store_true", default=False,
     help="Show full copyright information")
 add_option('-R', "--readme", action="store_true", default=False,
     help="Show the readme")
-add_option('-N', "--release-notes", action="store_true", default=False,
-    help="Show the release notes")
 
 # Options for configuring the base simulator
 add_option('-d', "--outdir", metavar="DIR", default="m5out",
@@ -207,13 +205,6 @@ def main():
         print info.README
         print
 
-    if options.release_notes:
-        done = True
-        print 'Release Notes:'
-        print
-        print info.RELEASE_NOTES
-        print
-
     if options.trace_help:
         done = True
         check_tracing()
-- 
cgit v1.2.3


From fde8b5c3876ad431b193989ab64c802d1cec1ed0 Mon Sep 17 00:00:00 2001
From: Gabe Black <gblack@eecs.umich.edu>
Date: Tue, 15 Feb 2011 15:58:16 -0800
Subject: X86: Get rid of "inline" on the MicroPanic constructor in decoder.cc.

This was making certain versions of gcc omit the function from the object file
which would break the build.
---
 src/arch/x86/isa/microops/debug.isa | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/arch/x86/isa/microops/debug.isa b/src/arch/x86/isa/microops/debug.isa
index c2735565d..220c1af97 100644
--- a/src/arch/x86/isa/microops/debug.isa
+++ b/src/arch/x86/isa/microops/debug.isa
@@ -98,7 +98,7 @@ def template MicroDebugExecute {{
 }};
 
 def template MicroDebugConstructor {{
-    inline %(class_name)s::%(class_name)s(
+    %(class_name)s::%(class_name)s(
             ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
             std::string _message, uint8_t _cc) :
         %(base_class)s(machInst, "%(func)s", instMnem,
-- 
cgit v1.2.3