15 files changed, 434 insertions, 56 deletions
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index f3981a6eb..a0d671da1 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -311,12 +311,19 @@ def p_output_exec(t):
 def p_global_let(t):
     'global_let : LET CODELIT SEMI'
     updateExportContext()
+    exportContext["header_output"] = ''
+    exportContext["decoder_output"] = ''
+    exportContext["exec_output"] = ''
+    exportContext["decode_block"] = ''
     try:
         exec fixPythonIndentation(t[2]) in exportContext
     except Exception, exc:
         error(t.lineno(1),
               'error: %s in global let block "%s".' % (exc, t[2]))
-    t[0] = GenCode() # contributes nothing to the output C++ file
+    t[0] = GenCode(header_output = exportContext["header_output"],
+                   decoder_output = exportContext["decoder_output"],
+                   exec_output = exportContext["exec_output"],
+                   decode_block = exportContext["decode_block"])
 
 # Define the mapping from operand type extensions to C++ types and bit
 # widths (stored in operandTypeMap).
diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
index c56a8bf92..b4aeece07 100644
--- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
@@ -64,7 +64,7 @@
             0x6: push_ES();
             0x7: pop_ES();
             default: MultiOp::add(
-                {{out1 = in1 + in2}},
+                {{Add %0 %0 %1}},
                 OPCODE_OP_BOTTOM3,
                 [[Eb,Gb],[Ev,Gv],
                  [Gb,Eb],[Gv,Ev],
diff --git a/src/arch/x86/isa/formats/multi.isa b/src/arch/x86/isa/formats/multi.isa
index 3e80f9cfb..c14e80095 100644
--- a/src/arch/x86/isa/formats/multi.isa
+++ b/src/arch/x86/isa/formats/multi.isa
@@ -74,33 +74,81 @@ let {{
 }};
 
 def format MultiOp(code, switchVal, opTags, *opt_flags) {{
-    # Loads and stores that bring in and write out values from the
-    # instructions. These are determined by the input and output tags,
-    # and the resulting instruction will have the right number of micro ops,
-    # or could be implemented as an atomic macro op.
-    instNames = []
+    # These are C++ statements to create each type of static int. Since we
+    # don't know what will be microcoded and what won't, we can't assume a
+    # particular set of arguments for the constructor.
+    instNew = []
+    orig_code = code
+    opRe = re.compile(r"%(?P<operandNum>[0-9]*)")
+    # Get all the labels out of the code and make a dict for them. We'll do
+    # this once since the position of labels shouldn't need to change at all.
+    ops = assembleMicro(code)
+    labels = buildLabelDict(ops)
     for tagSet in opTags:
-        loads = []
-        stores = []
+        # A list of strings which either have the register number to use, or
+        # a piece of code for calculating it.
+        regNums = []
+        code = orig_code
+        # Build up a name for this instructions class using the argument
+        # types. Each variation will get its own name this way.
         postfix = ''
         for tag in tagSet:
             postfix += '_' + tag
-        gather_inputs = ''
-        if len(loads) + len(stores) == 0:
-            # If there are no loads or stores, make this a single instruction.
-            iop = InstObjParams(name, Name + postfix, 'X86StaticInst',
-                    {"code": code, "gather_inputs": gather_inputs},
-                    opt_flags)
+
+        # Figure out what register indexes to use for each operand. This
+        # is where loads/stores could be set up. I need to distinguish
+        # between inputs and outputs.
+        # For right now, the indexes are just an increasing sequence
+        counter = 0
+        for tag in tagSet:
+            regNums.append("%d" % counter)
+            counter += 1
+
+        # Replace the placeholders %0, %1, etc., with the right register
+        # indexes.
+        opMatch = opRe.search(code)
+        while opMatch:
+            opNum = opMatch.group("operandNum")
+            opNum = int(opNum)
+            if opNum > len(regNums):
+                print "No operand type specified for operand %d!" % opNum
+                print "I should bail out here too!"
+            regNum = regNums[opNum]
+            code = opRe.sub(regNum, code, 1)
+            opMatch = opRe.search(code)
+
+        # All the loads which feed this instruction
+        loads = []
+        # All the ops that make up the instruction proper.
+        ops = assembleMicro(code)
+        # Get all the labels out and make a dict for them
+        # All the stores for this instruction's results
+        stores = []
+
+        # Various counts
+        numLoads = len(loads)
+        numOps = len(ops)
+        numStores = len(stores)
+        totalOps = numLoads + numOps + numStores
+        print "There are %d total ops" % totalOps
+
+        # If we can implement this instruction with exactly one microop, just
+        # use that directly.
+        newStmnt = ''
+        if totalOps == 1:
+            newStmnt = ops[0].getAllocator(labels)
         else:
             # Build up a macro op. We'll punt on this for now
             pass
 
+        instNew.append(newStmnt)
+
     decodeBlob = 'switch(%s) {\n' % switchVal
     counter = 0
-    for inst in instNames:
-        decodeBlob += '%d: return (X86StaticInst *)(new %s(machInst));\n' % \
-                      (counter, inst)
+    for newStmnt in instNew:
+        decodeBlob += 'case %d: return (X86StaticInst *)(%s);\n' % \
+                      (counter, newStmnt)
         counter += 1
     decodeBlob += '}\n'
-    # decode_block = BasicDecodeWithMnemonic.subst(iop)
+    decode_block = decodeBlob
 }};
diff --git a/src/arch/x86/isa/main.isa b/src/arch/x86/isa/main.isa
index 146f714a7..fe1d4e515 100644
--- a/src/arch/x86/isa/main.isa
+++ b/src/arch/x86/isa/main.isa
@@ -72,6 +72,9 @@
 
 namespace X86ISA;
 
+//Include the simple microcode assembler
+##include "microasm.isa"
+
 //Include the bitfield definitions
 ##include "bitfields.isa"
 
@@ -84,5 +87,8 @@ namespace X86ISA;
 //Include the definitions for the instruction formats
 ##include "formats/formats.isa"
 
+//Include the definitions of the micro ops
+##include "microops/microops.isa"
+
 //Include the decoder definition
 ##include "decoder/decoder.isa"
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
new file mode 100644
index 000000000..2abce6e7f
--- /dev/null
+++ b/src/arch/x86/isa/microasm.isa
@@ -0,0 +1,172 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+//  Code to "assemble" microcode sequences
+//
+
+let {{
+    class MicroOpStatement:
+        def __init__(self):
+            self.className = ''
+            self.label = ''
+            self.args = []
+
+        def getAllocator(self, labelDict = {}):
+            args = ''
+            for arg in self.args:
+                if arg.has_key("operandConst"):
+                    args += ", %s" % arg["operandConst"]
+                elif arg.has_key("operandCode"):
+                    args += ", %s" % arg["operandCode"]
+                elif arg.has_key("operandLabel"):
+                    if not labelDict.has_key(arg["operandLabel"]):
+                        print "Unrecognized label %s!" % arg["operandLabel"]
+                    args += ", %s" % labelDict[arg["operandLabel"]]
+                else:
+                    print "Unrecognized operand type!"
+            return 'new %s(machInst %s)' % (self.className, args)
+
+
+    def assembleMicro(code):
+        # This function takes in a block of microcode assembly and returns
+        # a python list of objects which describe it.
+
+        # Keep this around in case we need it later
+        orig_code = code
+        # A list of the statements we've found thus far
+        statements = []
+
+        # Regular expressions to pull each piece of the statement out at a
+        # time. Each expression expects the thing it's looking for to be at
+        # the beginning of the line, so the previous component is stripped
+        # before continuing.
+        labelRe = re.compile(r'^[ \t]*(?P<label>[a-zA-Z_]\w*)[ \t]:')
+        lineRe = re.compile(r'^(?P<line>[^\n][^\n]*)$')
+        classRe = re.compile(r'^[ \t]*(?P<className>[a-zA-Z_]\w*)')
+        # This recognizes three different flavors of operands:
+        # 1. Raw decimal numbers composed of digits between 0 and 9
+        # 2. Code beginning with "{" and continuing until the first "}"
+        #         ^ This one might need revising
+        # 3. A label, which starts with a capital or small letter, or
+        #    underscore, which is optionally followed by a sequence of
+        #    capital or small letters, underscores, or digts between 0 and 9
+        opRe = re.compile( \
+            r'^[ \t]*((?P<operandLabel>[a-zA-Z_]\w*)|(?P<operandConst>[0-9][0-9]*)|(\{(?P<operandCode>[^}]*)\}))')
+        lineMatch = lineRe.search(code)
+        while lineMatch != None:
+            statement = MicroOpStatement()
+            # Get a line and seperate it from the rest of the code
+            line = lineMatch.group("line")
+            print "Parsing line %s" % line
+            code = lineRe.sub('', code, 1)
+
+            # Find the label, if any
+            labelMatch = labelRe.search(line)
+            if labelMatch != None:
+                statement.label = labelMatch.group("label")
+                print "Found label %s." % statement.label
+            # Clear the label from the statement
+            line = labelRe.sub('', line, 1)
+
+            # Find the class name which is roughly equivalent to the op name
+            classMatch = classRe.search(line)
+            if classMatch == None:
+                print "Oh no! I can't find what instruction you want!"
+                print "I should really bail out here, but I don't know how!"
+            else:
+                statement.className = classMatch.group("className")
+                print "Found class name %s." % statement.className
+
+            # Clear the class name from the statement
+            line = classRe.sub('', line, 1)
+
+            #Find as many arguments as you can
+            statement.args = []
+            opMatch = opRe.search(line)
+            while opMatch is not None:
+                statement.args.append({})
+                # args is a list of dicts which collect different
+                # representations of operand values. Different forms might be
+                # needed in different places, for instance to replace a label
+                # with an offset.
+                for opType in ("operandLabel", "operandConst", "operandCode"):
+                    if opMatch.group(opType):
+                        statement.args[-1][opType] = opMatch.group(opType)
+                if len(statement.args[-1]) == 0:
+                    print "I had a problem parsing an operand!"
+                line = opRe.sub('', line, 1)
+                print "Found operand %s." % statement.args[-1]
+                opMatch = opRe.search(line)
+            print "Found operands", statement.args
+
+            # Add this statement to our collection
+            statements.append(statement)
+
+            # Get the next line
+            lineMatch = lineRe.search(code)
+        return statements
+
+    def buildLabelDict(ops):
+        labels = {}
+        count = 0
+        for op in ops:
+            if op.label:
+                labels[op.label] = count
+            count += 1
+}};
diff --git a/src/arch/x86/isa/microops/microops.isa b/src/arch/x86/isa/microops/microops.isa
new file mode 100644
index 000000000..bbf26f605
--- /dev/null
+++ b/src/arch/x86/isa/microops/microops.isa
@@ -0,0 +1,57 @@
+// Copyright (c) 2007 The Hewlett-Packard Development Company
+// All rights reserved.
+//
+// Redistribution and use of this software in source and binary forms,
+// with or without modification, are permitted provided that the
+// following conditions are met:
+//
+// The software must be used only for Non-Commercial Use which means any
+// use which is NOT directed to receiving any direct monetary
+// compensation for, or commercial advantage from such use.  Illustrative
+// examples of non-commercial use are academic research, personal study,
+// teaching, education and corporate research & development.
+// Illustrative examples of commercial use are distributing products for
+// commercial advantage and providing services using the software for
+// commercial advantage.
+//
+// If you wish to use this software or functionality therein that may be
+// covered by patents for commercial use, please contact:
+//     Director of Intellectual Property Licensing
+//     Office of Strategy and Technology
+//     Hewlett-Packard Company
+//     1501 Page Mill Road
+//     Palo Alto, California  94304
+//
+// Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.  Redistributions
+// in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.  Neither the name of
+// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.  No right of
+// sublicense is granted herewith.  Derivatives of the software and
+// output created using the software may be prepared, but only for
+// Non-Commercial Uses.  Derivatives of the software may be shared with
+// others provided: (i) the others agree to abide by the list of
+// conditions herein which includes the Non-Commercial Use restrictions;
+// and (ii) such Derivatives of the software include the above copyright
+// notice to acknowledge the contribution from this software where
+// applicable, this list of conditions and the disclaimer below.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+//Micro ops
+##include "int.isa"
diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa
index 20376f38f..36b0ee4df 100644
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@@ -96,7 +96,7 @@ def operand_types {{
 }};
 
 def operands {{
-        # This is just copied from SPARC, because having no operands confuses
-        # the parser.
-        'Rd':               ('IntReg', 'udw', 'RD', 'IsInteger', 1)
+        'IntRegOp0':     ('IntReg', 'udw', 'regIndex0', 'IsInteger', 1),
+        'IntRegOp1':     ('IntReg', 'udw', 'regIndex1', 'IsInteger', 2),
+        'IntRegOp2':     ('IntReg', 'udw', 'regIndex2', 'IsInteger', 2),
 }};
diff --git a/src/base/SConscript b/src/base/SConscript
index 788aa3e6f..6fc140145 100644
--- a/src/base/SConscript
+++ b/src/base/SConscript
@@ -30,10 +30,26 @@
 
 Import('*')
 
-# base/traceflags.{cc,hh} are generated from base/traceflags.py.
-# $TARGET.base will expand to "<build-dir>/base/traceflags".
-env.Command(['traceflags.hh', 'traceflags.cc'], 'traceflags.py',
-            'python $SOURCE $TARGET.base')
+def make_cc(target, source, env):
+    assert(len(source) == 1)
+    assert(len(target) == 1)
+
+    traceflags = {}
+    execfile(str(source[0]), traceflags)
+    func = traceflags['gen_cc']
+    func(str(target[0]))
+
+def make_hh(target, source, env):
+    assert(len(source) == 1)
+    assert(len(target) == 1)
+
+    traceflags = {}
+    execfile(str(source[0]), traceflags)
+    func = traceflags['gen_hh']
+    func(str(target[0]))
+
+env.Command('traceflags.hh', 'traceflags.py', make_hh)
+env.Command('traceflags.cc', 'traceflags.py', make_cc)
 
 Source('annotate.cc')
 Source('bigint.cc')
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index e57bfa350..6b241c410 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -350,16 +350,3 @@ const char *Trace::flagStrings[] =
     print >>ccfile, '};'
 
     ccfile.close()
-
-if __name__ == '__main__':
-    # This file generates the header and source files for the flags
-    # that control the tracing facility.
-
-    import sys
-
-    if len(sys.argv) != 2:
-        print "%s: Need argument (basename of cc/hh files)" % sys.argv[0]
-        sys.exit(1)
-
-    gen_hh(sys.argv[1] + '.hh')
-    gen_cc(sys.argv[1] + '.cc')
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index da7ce00f5..811f4d2bc 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -86,6 +86,8 @@ class DefaultFetch
 
         bool snoopRangeSent;
 
+        virtual void setPeer(Port *port);
+
       protected:
         /** Atomic version of receive.  Panics. */
         virtual Tick recvAtomic(PacketPtr pkt);
@@ -184,6 +186,9 @@ class DefaultFetch
     /** Initialize stage. */
     void initStage();
 
+    /** Tells the fetch stage that the Icache is set. */
+    void setIcache();
+
     /** Processes cache completion event. */
     void processCacheCompletion(PacketPtr pkt);
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 663cd3142..34b06420d 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -51,6 +51,15 @@
 #include <algorithm>
 
 template<class Impl>
+void
+DefaultFetch<Impl>::IcachePort::setPeer(Port *port)
+{
+    Port::setPeer(port);
+
+    fetch->setIcache();
+}
+
+template<class Impl>
 Tick
 DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
 {
@@ -323,12 +332,6 @@ DefaultFetch<Impl>::initStage()
         nextNPC[tid] = cpu->readNextNPC(tid);
     }
 
-    // Size of cache block.
-    cacheBlkSize = icachePort->peerBlockSize();
-
-    // Create mask to get rid of offset bits.
-    cacheBlkMask = (cacheBlkSize - 1);
-
     for (int tid=0; tid < numThreads; tid++) {
 
         fetchStatus[tid] = Running;
@@ -337,11 +340,6 @@ DefaultFetch<Impl>::initStage()
 
         memReq[tid] = NULL;
 
-        // Create space to store a cache line.
-        cacheData[tid] = new uint8_t[cacheBlkSize];
-        cacheDataPC[tid] = 0;
-        cacheDataValid[tid] = false;
-
         stalls[tid].decode = false;
         stalls[tid].rename = false;
         stalls[tid].iew = false;
@@ -351,6 +349,24 @@ DefaultFetch<Impl>::initStage()
 
 template<class Impl>
 void
+DefaultFetch<Impl>::setIcache()
+{
+    // Size of cache block.
+    cacheBlkSize = icachePort->peerBlockSize();
+
+    // Create mask to get rid of offset bits.
+    cacheBlkMask = (cacheBlkSize - 1);
+
+    for (int tid=0; tid < numThreads; tid++) {
+        // Create space to store a cache line.
+        cacheData[tid] = new uint8_t[cacheBlkSize];
+        cacheDataPC[tid] = 0;
+        cacheDataValid[tid] = false;
+    }
+}
+
+template<class Impl>
+void
 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 {
     unsigned tid = pkt->req->getThreadNum();
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 722ce216b..3e45c85d2 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -227,6 +227,12 @@ class Cache : public BaseCache
     BlkType* handleAccess(PacketPtr &pkt, int & lat,
                           PacketList & writebacks, bool update = true);
 
+
+    /**
+     *Handle doing the Compare and Swap function for SPARC.
+     */
+    void cmpAndSwap(BlkType *blk, PacketPtr &pkt);
+
     /**
      * Populates a cache block and handles all outstanding requests for the
      * satisfied fill request. This version takes an MSHR pointer and uses its
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index fc4660269..ec0ef1be4 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -122,12 +122,15 @@ Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
     if (blk != NULL) {
 
         if (!update) {
+
             if (pkt->isWrite()){
                 assert(offset < blkSize);
                 assert(pkt->getSize() <= blkSize);
                 assert(offset+pkt->getSize() <= blkSize);
                 std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
                        pkt->getSize());
+            } else if (pkt->isReadWrite()) {
+                cmpAndSwap(blk, pkt);
             } else if (!(pkt->flags & SATISFIED)) {
                 pkt->flags |= SATISFIED;
                 pkt->result = Packet::Success;
@@ -154,7 +157,8 @@ Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
             }
         }
 
-        if ((pkt->isWrite() && blk->isWritable()) ||
+        if ((pkt->isReadWrite() && blk->isWritable()) ||
+            (pkt->isWrite() && blk->isWritable()) ||
             (pkt->isRead() && blk->isValid())) {
 
             // We are satisfying the request
@@ -180,13 +184,15 @@ Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
                     std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
                            pkt->getSize());
                 }
+            } else if (pkt->isReadWrite()) {
+                cmpAndSwap(blk, pkt);
             } else {
                 assert(pkt->isRead());
                 if (pkt->req->isLocked()) {
                     blk->trackLoadLocked(pkt->req);
                 }
                 std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                       pkt->getSize());
+                            pkt->getSize());
             }
 
             if (write_data ||
@@ -215,6 +221,44 @@ Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
 }
 
 template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr &pkt){
+            uint64_t overwrite_val;
+            bool overwrite_mem;
+            uint64_t condition_val64;
+            uint32_t condition_val32;
+
+            int offset = tags->extractBlkOffset(pkt->getAddr());
+
+            assert(sizeof(uint64_t) >= pkt->getSize());
+
+            overwrite_mem = true;
+            // keep a copy of our possible write value, and copy what is at the
+            // memory address into the packet
+            std::memcpy(&overwrite_val, pkt->getPtr<uint8_t>(), pkt->getSize());
+            std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
+                        pkt->getSize());
+
+            if (pkt->req->isCondSwap()) {
+                if (pkt->getSize() == sizeof(uint64_t)) {
+                    condition_val64 = pkt->req->getExtraData();
+                    overwrite_mem = !std::memcmp(&condition_val64, blk->data + offset,
+                                                 sizeof(uint64_t));
+                } else if (pkt->getSize() == sizeof(uint32_t)) {
+                    condition_val32 = (uint32_t)pkt->req->getExtraData();
+                    overwrite_mem = !std::memcmp(&condition_val32, blk->data + offset,
+                                                 sizeof(uint32_t));
+                } else
+                    panic("Invalid size for conditional read/write\n");
+            }
+
+            if (overwrite_mem)
+                std::memcpy(blk->data + offset,
+                            &overwrite_val, pkt->getSize());
+
+}
+
+template<class TagStore, class Coherence>
 typename Cache<TagStore,Coherence>::BlkType*
 Cache<TagStore,Coherence>::handleFill(BlkType *blk, PacketPtr &pkt,
                                       CacheBlk::State new_state,
@@ -244,8 +288,9 @@ Cache<TagStore,Coherence>::handleFill(BlkType *blk, PacketPtr &pkt,
             blk = NULL;
         }
 
-        if (blk && (target->isWrite() ? blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isRead());
+        if (blk && ((target->isWrite() || target->isReadWrite()) ?
+                    blk->isWritable() : blk->isValid())) {
+            assert(target->isWrite() || target->isReadWrite() || target->isRead());
             assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
             if (target->isWrite()) {
                 if (blk->checkWrite(pkt->req)) {
@@ -253,6 +298,8 @@ Cache<TagStore,Coherence>::handleFill(BlkType *blk, PacketPtr &pkt,
                     std::memcpy(blk->data + target->getOffset(blkSize),
                            target->getPtr<uint8_t>(), target->getSize());
                 }
+            } else if (target->isReadWrite()) {
+                cmpAndSwap(blk, target);
             } else {
                 if (pkt->req->isLocked()) {
                     blk->trackLoadLocked(pkt->req);
@@ -332,8 +379,9 @@ Cache<TagStore,Coherence>::handleFill(BlkType *blk, MSHR * mshr,
             continue;
         }
 
-        if (blk && (target->isWrite() ? blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isRead());
+        if (blk && ((target->isWrite() || target->isReadWrite()) ?
+            blk->isWritable() : blk->isValid())) {
+            assert(target->isWrite() || target->isRead() || target->isReadWrite() );
             assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
             if (target->isWrite()) {
                 if (blk->checkWrite(pkt->req)) {
@@ -341,6 +389,8 @@ Cache<TagStore,Coherence>::handleFill(BlkType *blk, MSHR * mshr,
                     std::memcpy(blk->data + target->getOffset(blkSize),
                            target->getPtr<uint8_t>(), target->getSize());
                 }
+            } else if (target->isReadWrite()) {
+                cmpAndSwap(blk, target);
             } else {
                 if (target->req->isLocked()) {
                     blk->trackLoadLocked(target->req);
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index e8520401d..33a8a4e63 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -295,9 +295,12 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Invalid][MC::ReadReq].onRequest(MC::ReadReq);
     // we only support write allocate right now
     tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
+    tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
+    tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
     if (hasOwned) {
         tt[Owned][MC::WriteReq].onRequest(writeToSharedCmd);
+        tt[Owned][MC::SwapReq].onRequest(writeToSharedCmd);
     }
 
     // Prefetching causes a read
diff --git a/src/python/m5/objects/O3CPU.py b/src/python/m5/objects/O3CPU.py
index 20eef383f..5fba4e96f 100644
--- a/src/python/m5/objects/O3CPU.py
+++ b/src/python/m5/objects/O3CPU.py
@@ -116,3 +116,8 @@ class DerivO3CPU(BaseCPU):
     smtROBPolicy   = Param.String("SMT ROB Sharing Policy")
     smtROBThreshold = Param.String("SMT ROB Threshold Sharing Parameter")
     smtCommitPolicy = Param.String("SMT Commit Policy")
+
+    def addPrivateSplitL1Caches(self, ic, dc):
+        BaseCPU.addPrivateSplitL1Caches(self, ic, dc)
+        self.icache.tgts_per_mshr = 20
+        self.dcache.tgts_per_mshr = 20