x86: revamp cmpxchg8b/cmpxchg16b implementation

The previous implementation did a pair of nested RMW operations, which isn't compatible with the way that locked RMW operations are implemented in the cache models. It was convenient though in that it didn't require any new micro-ops, and supported cmpxchg16b using 64-bit memory ops. It also worked in AtomicSimpleCPU where atomicity was guaranteed by the core and not by the memory system. It did not work with timing CPU models though. This new implementation defines new 'split' load and store micro-ops which allow a single memory operation to use a pair of registers as the source or destination, then uses a single ldsplit/stsplit RMW pair to implement cmpxchg. This patch requires support for 128-bit memory accesses in the ISA (added via a separate patch) to support cmpxchg16b.
author: Alexandru Dutu <alexandru.dutu@amd.com> 2016-02-06 17:21:20 -0800
committer: Alexandru Dutu <alexandru.dutu@amd.com> 2016-02-06 17:21:20 -0800
commit: 0f27d70e90c20ba21d3f6a3360a11d9d5e9ee133 (patch)
tree: b9d33a3e92e6d20e5f82d238bf6e94dfc4e2301e /src/arch/x86/isa/insts
parent: 5200e04e92b487181d4a678231564272730e04a2 (diff)
download: gem5-0f27d70e90c20ba21d3f6a3360a11d9d5e9ee133.tar.xz
1 files changed, 6 insertions, 4 deletions
diff --git a/src/arch/x86/isa/insts/general_purpose/semaphores.py b/src/arch/x86/isa/insts/general_purpose/semaphores.py
index 17bee7fb7..9f751b3ae 100644
--- a/src/arch/x86/isa/insts/general_purpose/semaphores.py
+++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2007 The Hewlett-Packard Development Company
+# Copyright (c) 2015 Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -126,12 +127,14 @@ def macroop XADD_R_R {
 
 '''
 
+# Despite the name, this microcode sequence implements both
+# cmpxchg8b and cmpxchg16b, depending on the dynamic value
+# of dataSize.
 cmpxchg8bCode = '''
 def macroop CMPXCHG8B_%(suffix)s {
     %(rdip)s
     lea t1, seg, %(sib)s, disp, dataSize=asz
-    ldst%(l)s t2, seg, [1, t0, t1], 0
-    ldst%(l)s t3, seg, [1, t0, t1], dsz
+    ldsplit%(l)s (t2, t3), seg, [1, t0, t1], disp=0
 
     sub t0, rax, t2, flags=(ZF,)
     br label("doneComparing"), flags=(nCZF,)
@@ -147,8 +150,7 @@ doneComparing:
     mov rdx, rdx, t3, flags=(nCZF,)
 
     # Write to memory
-    st%(ul)s t3, seg, [1, t0, t1], dsz
-    st%(ul)s t2, seg, [1, t0, t1], 0
+    stsplit%(ul)s (t2, t3), seg, [1, t0, t1], disp=0
 };
 '''
author	Alexandru Dutu <alexandru.dutu@amd.com>	2016-02-06 17:21:20 -0800
committer	Alexandru Dutu <alexandru.dutu@amd.com>	2016-02-06 17:21:20 -0800
commit	0f27d70e90c20ba21d3f6a3360a11d9d5e9ee133 (patch)
tree	b9d33a3e92e6d20e5f82d238bf6e94dfc4e2301e /src/arch/x86/isa/insts
parent	5200e04e92b487181d4a678231564272730e04a2 (diff)
download	gem5-0f27d70e90c20ba21d3f6a3360a11d9d5e9ee133.tar.xz