summaryrefslogtreecommitdiff
path: root/src/mem/ruby
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem/ruby')
-rw-r--r--src/mem/ruby/buffers/MessageBuffer.hh5
-rw-r--r--src/mem/ruby/config/MESI_CMP_directory.rb4
-rw-r--r--src/mem/ruby/config/MI_example-homogeneous.rb9
-rw-r--r--src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb90
-rw-r--r--src/mem/ruby/config/assert.rb18
-rw-r--r--src/mem/ruby/config/cfg.rb352
-rw-r--r--src/mem/ruby/config/config.hh236
-rw-r--r--src/mem/ruby/config/defaults.rb124
-rw-r--r--src/mem/ruby/config/rubyconfig.defaults405
-rw-r--r--src/mem/ruby/config/tester.defaults50
-rw-r--r--src/mem/ruby/libruby.cc3
-rw-r--r--src/mem/ruby/libruby.hh8
-rw-r--r--src/mem/ruby/network/simple/PerfectSwitch.cc2
-rw-r--r--src/mem/ruby/network/simple/Topology.cc1
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.hh5
-rw-r--r--src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh11
-rw-r--r--src/mem/ruby/system/CacheMemory.cc28
-rw-r--r--src/mem/ruby/system/CacheMemory.hh10
-rw-r--r--src/mem/ruby/system/DMASequencer.hh1
-rw-r--r--src/mem/ruby/system/DirectoryMemory.cc2
-rw-r--r--src/mem/ruby/system/DirectoryMemory.hh2
-rw-r--r--src/mem/ruby/system/MemoryVector.hh72
-rw-r--r--src/mem/ruby/system/Sequencer.cc119
-rw-r--r--src/mem/ruby/system/Sequencer.hh10
-rw-r--r--src/mem/ruby/system/System.cc4
25 files changed, 410 insertions, 1161 deletions
diff --git a/src/mem/ruby/buffers/MessageBuffer.hh b/src/mem/ruby/buffers/MessageBuffer.hh
index 8440c3335..950423ee5 100644
--- a/src/mem/ruby/buffers/MessageBuffer.hh
+++ b/src/mem/ruby/buffers/MessageBuffer.hh
@@ -64,6 +64,11 @@ public:
(m_prio_heap.peekMin().m_time <= g_eventQueue_ptr->getTime()));
}
+ void delayHead() {
+ MessageBufferNode node = m_prio_heap.extractMin();
+ enqueue(node.m_msgptr, 1);
+ }
+
bool areNSlotsAvailable(int n);
int getPriority() { return m_priority_rank; }
void setPriority(int rank) { m_priority_rank = rank; }
diff --git a/src/mem/ruby/config/MESI_CMP_directory.rb b/src/mem/ruby/config/MESI_CMP_directory.rb
index 4d9ff30b3..7a9d47f24 100644
--- a/src/mem/ruby/config/MESI_CMP_directory.rb
+++ b/src/mem/ruby/config/MESI_CMP_directory.rb
@@ -12,8 +12,8 @@ class MESI_CMP_directory_L2CacheController < CacheController
def argv()
vec = super()
vec += " cache " + cache.obj_name
- vec += " l2_request_latency "+l2_request_latency.to_s
- vec += " l2_response_latency "+l2_response_latency.to_s
+ vec += " l2_request_latency "+request_latency.to_s
+ vec += " l2_response_latency "+response_latency.to_s
vec += " to_l1_latency "+to_L1_latency.to_s
return vec
end
diff --git a/src/mem/ruby/config/MI_example-homogeneous.rb b/src/mem/ruby/config/MI_example-homogeneous.rb
index 1ed81ee42..d409e6782 100644
--- a/src/mem/ruby/config/MI_example-homogeneous.rb
+++ b/src/mem/ruby/config/MI_example-homogeneous.rb
@@ -13,7 +13,7 @@ RubySystem.reset
# default values
num_cores = 2
-l1_cache_size_kb = 32768
+l1_cache_size_bytes = 32768
l1_cache_assoc = 8
l1_cache_latency = 1
num_memories = 2
@@ -34,6 +34,13 @@ for i in 0..$*.size-1 do
elsif $*[i] == "-m"
num_memories = $*[i+1].to_i
i = i+1
+ elsif $*[i] == "-R"
+ if $*[i+1] == "rand"
+ RubySystem.random_seed = "rand"
+ else
+ RubySystem.random_seed = $*[i+1].to_i
+ end
+ i = i+ 1
elsif $*[i] == "-s"
memory_size_mb = $*[i+1].to_i
i = i + 1
diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
index 566055f74..ee22df656 100644
--- a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
+++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
@@ -12,13 +12,13 @@ RubySystem.reset
# default values
num_cores = 2
-l1_icache_size_bytes = 32768
+l1_icache_size_kb = 64
l1_icache_assoc = 8
l1_icache_latency = 1
-l1_dcache_size_bytes = 32768
+l1_dcache_size_kb = 32
l1_dcache_assoc = 8
l1_dcache_latency = 1
-l2_cache_size_bytes = 2048 # total size (sum of all banks)
+l2_cache_size_kb = 8192 # total size (sum of all banks)
l2_cache_assoc = 16
l2_cache_latency = 12
num_l2_banks = num_cores
@@ -26,7 +26,8 @@ num_memories = 1
memory_size_mb = 1024
num_dma = 1
-protocol = "MOESI_CMP_token"
+#default protocol
+protocol = "MOESI_CMP_directory"
# check for overrides
@@ -34,59 +35,50 @@ for i in 0..$*.size-1 do
if $*[i] == "-c" or $*[i] == "--protocol"
i += 1
protocol = $*[i]
+ elsif $*[i] == "-A"
+ l1_dcache_size_kb = $*[i+1].to_i
+ i = i+1
+ elsif $*[i] == "-B"
+ num_l2_banks = $*[i+1].to_i
+ i = i+1
elsif $*[i] == "-m"
num_memories = $*[i+1].to_i
i = i+1
elsif $*[i] == "-p"
num_cores = $*[i+1].to_i
i = i+1
+ elsif $*[i] == "-R"
+ if $*[i+1] == "rand"
+ RubySystem.random_seed = "rand"
+ else
+ RubySystem.random_seed = $*[i+1].to_i
+ end
+ i = i+ 1
elsif $*[i] == "-s"
memory_size_mb = $*[i+1].to_i
i = i + 1
- elsif $*[i] == "-C"
- l1_dcache_size_bytes = $*[i+1].to_i
- i = i + 1
- elsif $*[i] == "-A"
- l1_dcache_assoc = $*[i+1].to_i
- i = i + 1
- elsif $*[i] == "-D"
- num_dma = $*[i+1].to_i
- i = i + 1
end
end
-n_tokens = num_cores + 1
-
net_ports = Array.new
iface_ports = Array.new
-#assert(protocol == "MESI_CMP_directory", __FILE__+" cannot be used with protocol "+protocol);
+assert((protocol == "MESI_CMP_directory" or protocol == "MOESI_CMP_directory"), __FILE__+" cannot be used with protocol '#{protocol}'");
require protocol+".rb"
num_cores.times { |n|
- icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_bytes, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
- dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_bytes, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
+ icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb*1024, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
+ dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb*1024, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
iface_ports << sequencer
- if protocol == "MOESI_CMP_token"
- net_ports << MOESI_CMP_token_L1CacheController.new("L1CacheController_"+n.to_s,
- "L1Cache",
- icache, dcache,
- sequencer,
- num_l2_banks,
- n_tokens)
- end
-
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
"L1Cache",
icache, dcache,
sequencer,
num_l2_banks)
- end
-
- if protocol == "MESI_CMP_directory"
+ elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
"L1Cache",
icache, dcache,
@@ -95,47 +87,29 @@ num_cores.times { |n|
end
}
num_l2_banks.times { |n|
- cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_bytes/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
- if protocol == "MOESI_CMP_token"
- net_ports << MOESI_CMP_token_L2CacheController.new("L2CacheController_"+n.to_s,
- "L2Cache",
- cache,
- n_tokens)
- end
-
+ cache = SetAssociativeCache.new("l2u_"+n.to_s, (l2_cache_size_kb*1024)/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",
cache)
- end
-
- if protocol == "MESI_CMP_directory"
+ elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",
cache)
end
-
+ net_ports.last.request_latency = l2_cache_latency + 2
+ net_ports.last.response_latency = l2_cache_latency + 2
}
num_memories.times { |n|
directory = DirectoryMemory.new("DirectoryMemory_"+n.to_s, memory_size_mb/num_memories)
memory_control = MemoryControl.new("MemoryControl_"+n.to_s)
- if protocol == "MOESI_CMP_token"
- net_ports << MOESI_CMP_token_DirectoryController.new("DirectoryController_"+n.to_s,
- "Directory",
- directory,
- memory_control,
- num_l2_banks)
- end
-
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
"Directory",
directory,
memory_control)
- end
-
- if protocol == "MESI_CMP_directory"
+ elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
"Directory",
directory,
@@ -146,19 +120,11 @@ num_memories.times { |n|
num_dma.times { |n|
dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s)
iface_ports << dma_sequencer
- if protocol == "MOESI_CMP_token"
- net_ports << MOESI_CMP_token_DMAController.new("DMAController_"+n.to_s,
- "DMA",
- dma_sequencer)
- end
-
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
"DMA",
dma_sequencer)
- end
-
- if protocol == "MESI_CMP_directory"
+ elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
"DMA",
dma_sequencer)
diff --git a/src/mem/ruby/config/assert.rb b/src/mem/ruby/config/assert.rb
new file mode 100644
index 000000000..cc3e43214
--- /dev/null
+++ b/src/mem/ruby/config/assert.rb
@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby
+
+class AssertionFailure < RuntimeError
+ attr_reader :msg, :output
+ def initialize(message, out=nil)
+ @msg = message
+ @output = out
+ end
+end
+
+class NotImplementedException < Exception
+end
+
+def assert(condition,message)
+ unless condition
+ raise AssertionFailure.new(message), "\n\nAssertion failed: \n\n #{message}\n\n"
+ end
+end
diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb
index c470ca92f..a20562243 100644
--- a/src/mem/ruby/config/cfg.rb
+++ b/src/mem/ruby/config/cfg.rb
@@ -1,7 +1,7 @@
#!/usr/bin/ruby
-class AssertionFailure < RuntimeError
-end
+root = File.dirname(File.expand_path(__FILE__))
+require root+'/assert.rb'
class Boolean
def self.is_a?(obj)
@@ -9,22 +9,46 @@ class Boolean
end
end
-def assert(condition,message)
- unless condition
- raise AssertionFailure, "\n\nAssertion failed: \n\n #{message}\n\n"
- end
-end
-
class LibRubyObject
@@all_objs = Array.new
- attr_reader :obj_name
@@default_params = Hash.new
+ @@param_types = Hash.new
+
+ attr_reader :obj_name
def initialize(obj_name)
assert obj_name.is_a?(String), "Obj_Name must be a string"
@obj_name = obj_name
@@all_objs << self
@params = Hash.new
+
+ # add all parent parameter accessors if they don't exist
+ self.class.ancestors.each { |ancestor|
+ if @@default_params.key?(ancestor.name.to_sym)
+ @@default_params[ancestor.name.to_sym].each { |p, default|
+ p = p.to_sym
+ @params[p] = default
+ if ! respond_to?(p)
+ self.class.send(:define_method, p) {
+ @params[p] = @@default_params[ancestor.name.to_sym][p] if ! @params.key?(p)
+ return @params[p]
+ }
+ end
+ setter_method_name = (p.to_s + "=").to_sym
+ if ! respond_to?(setter_method_name)
+ self.class.send(:define_method, setter_method_name) { |val|
+ type = @@param_types[ancestor.name.to_sym][p]
+ if val.is_a?(FalseClass) || val.is_a?(TrueClass)
+ assert type.is_a?(Boolean), "default value of param \"#{p}\" must be either true or false"
+ else
+ assert val.is_a?(type), "default value of param \"#{p}\", which is of type #{val.class.name} does not match expected type #{type}"
+ end
+ @params[p] = val
+ }
+ end
+ }
+ end
+ }
end
def cppClassName()
@@ -35,40 +59,24 @@ class LibRubyObject
idx = self.name.to_sym
@@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
@@default_params[idx][param_name] = nil
- send :define_method, param_name do
- @params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name)
- @params[param_name]
- end
- method_name = (param_name.to_s + "=").to_sym
- send :define_method, method_name do |val|
- if val.is_a?(FalseClass) || val.is_a?(TrueClass)
- assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false"
- else
- assert val.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}"
- end
-# assert val.is_a?(type), "#{param_name} must be of type #{type}"
- @params[param_name] = val
- end
+ @@param_types[idx] = Hash.new if ! @@param_types.key?(idx)
+ @@param_types[idx][param_name] = type
end
def self.default_param(param_name, type, default)
- idx = self.name.to_sym
- @@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
+
if default.is_a?(FalseClass) || default.is_a?(TrueClass)
assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false"
else
assert default.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}"
end
+
+ idx = self.name.to_sym
+ @@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
@@default_params[idx][param_name] = default
- send :define_method, param_name do
- @params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name)
- @params[param_name]
- end
- method_name = (param_name.to_s + "=").to_sym
- send :define_method, method_name do |val|
- assert val.is_a?(type), "#{param_name} must be of type #{type}"
- @params[param_name] = val
- end
+ @@param_types[idx] = Hash.new if ! @@param_types.key?(idx)
+ @@param_types[idx][param_name] = type
+
end
def applyDefaults()
@@ -86,6 +94,7 @@ class LibRubyObject
@params.each { |key, val|
str += key.id2name + " "
+ assert(val != nil, "parameter #{key} is nil")
if val.is_a?(LibRubyObject)
str += val.obj_name + " "
else
@@ -123,36 +132,32 @@ end
class NetPort < LibRubyObject
attr :mach_type
- attr_reader :version
+ param :version, Integer
@@type_cnt = Hash.new
- @type_id
def initialize(obj_name, mach_type)
super(obj_name)
@mach_type = mach_type
@@type_cnt[mach_type] ||= 0
- @type_id = @@type_cnt[mach_type]
+ self.version= @@type_cnt[mach_type] # sets the version parameter
+
@@type_cnt[mach_type] += 1
- idx = "NetPort".to_sym
- @@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
- @@default_params[idx].each { |key, val|
- @params[key] = val if ! @params.key?(key)
- }
end
def port_name
mach_type
end
def port_num
- @type_id
- end
- def cppClassName
- "NetPort"
+ version
end
def self.totalOfType(mach_type)
return @@type_cnt[mach_type]
end
+ def cppClassName()
+ "generated:"+@mach_type
+ end
+
end
class MemoryVector < LibRubyObject
@@ -161,7 +166,7 @@ class MemoryVector < LibRubyObject
end
def cppClassName
- "MemoryController"
+ "MemoryVector"
end
end
@@ -296,37 +301,13 @@ private
end
-
-
-
class CacheController < NetPort
- @@total_cache_controllers = Hash.new
def initialize(obj_name, mach_type, caches)
super(obj_name, mach_type)
caches.each { |cache|
cache.controller = self
}
-
- if !@@total_cache_controllers.key?(mach_type)
- @@total_cache_controllers[mach_type] = 0
- end
- @version = @@total_cache_controllers[mach_type]
- @@total_cache_controllers[mach_type] += 1
-
- # call inhereted parameters
- transitions_per_cycle
- buffer_size
- number_of_TBEs
- recycle_latency
- end
-
- def argv()
- vec = "version "+@version.to_s
- vec += " transitions_per_cycle "+@params[:transitions_per_cycle].to_s
- vec += " buffer_size "+@params[:buffer_size].to_s
- vec += " number_of_TBEs "+@params[:number_of_TBEs].to_s
- vec += " recycle_latency "+@params[:recycle_latency].to_s
end
def cppClassName()
@@ -334,89 +315,92 @@ class CacheController < NetPort
end
end
+class Sequencer < IfacePort
+end
+
class L1CacheController < CacheController
- attr :sequencer
+ param :sequencer, Sequencer
def initialize(obj_name, mach_type, caches, sequencer)
super(obj_name, mach_type, caches)
- @sequencer = sequencer
- @sequencer.controller = self
- @sequencer.version = @version
+ sequencer.controller = self
+ sequencer.version = version
+ self.sequencer= sequencer
end
- def argv()
- vec = super()
- vec += " sequencer "+@sequencer.obj_name
- end
+# def argv()
+# vec = super()
+# vec += " sequencer "+@sequencer.obj_name
+# end
+end
+
+class DirectoryMemory < LibRubyObject
+end
+class MemoryControl < LibRubyObject
end
class DirectoryController < NetPort
@@total_directory_controllers = 0
- attr :directory
- attr :memory_control
+ param :directory, DirectoryMemory
+ param :memory_control, MemoryControl
def initialize(obj_name, mach_type, directory, memory_control)
super(obj_name, mach_type)
- @directory = directory
directory.controller = self
-
- @memory_control = memory_control
+ directory.version = @@total_directory_controllers
+ self.directory = directory
+ self.memory_control = memory_control
@version = @@total_directory_controllers
@@total_directory_controllers += 1
buffer_size()
end
- def argv()
- "version "+@version.to_s+" directory_name "+@directory.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " memory_controller_name "+@memory_control.obj_name + " recycle_latency "+@params[:recycle_latency].to_s
- end
-
def cppClassName()
"generated:"+@mach_type
end
end
+class DMASequencer < IfacePort
+end
+
class DMAController < NetPort
@@total_dma_controllers = 0
- attr :dma_sequencer
+ param :dma_sequencer, DMASequencer
+ param :version, Integer
+
def initialize(obj_name, mach_type, dma_sequencer)
super(obj_name, mach_type)
- @dma_sequencer = dma_sequencer
- @version = @@total_dma_controllers
- @@total_dma_controllers += 1
dma_sequencer.controller = self
- buffer_size
- end
+ dma_sequencer.version = @@total_dma_controllers
+ self.dma_sequencer = dma_sequencer
- def argv()
- "version "+@version.to_s+" dma_sequencer "+@dma_sequencer.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " recycle_latency "+@params[:recycle_latency].to_s
+ self.version = @@total_dma_controllers
+ @@total_dma_controllers += 1
end
- def cppClassName()
- "generated:"+@mach_type
- end
end
class Cache < LibRubyObject
- attr :size, :latency
- attr_writer :controller
+ param :size, Integer
+ param :latency, Integer
+ param :controller, NetPort
def initialize(obj_name, size, latency)
super(obj_name)
- assert size.is_a?(Integer), "Cache size must be an integer"
- @size = size
- @latency = latency
+ self.size = size
+ self.latency = latency
+ # controller must be set manually by the configuration script
+ # because there is a cyclic dependence
end
- def args
- "controller "+@controller.obj_name+" size "+@size.to_s+" latency "+@latency.to_s
- end
end
class SetAssociativeCache < Cache
- attr :assoc, :replacement_policy
+ param :assoc, Integer
+ param :replacement_policy, String
# latency can be either an integer, a float, or the string "auto"
# when an integer, it represents the number of cycles for a hit
@@ -424,74 +408,68 @@ class SetAssociativeCache < Cache
# when set to "auto", libruby will attempt to find a realistic latency by running CACTI
def initialize(obj_name, size, latency, assoc, replacement_policy)
super(obj_name, size, latency)
- @assoc = assoc
- @replacement_policy = replacement_policy
+ self.assoc = assoc
+ self.replacement_policy = replacement_policy
end
def calculateLatency()
- if @latency == "auto"
+ if self.latency == "auto"
cacti_args = Array.new()
- cacti_args << (@size) << RubySystem.block_size_bytes << @assoc
+ cacti_args << (self.size*1024) << RubySystem.block_size_bytes << self.assoc
cacti_args << 1 << 0 << 0 << 0 << 1
cacti_args << RubySystem.tech_nm << RubySystem.block_size_bytes*8
cacti_args << 0 << 0 << 0 << 1 << 0 << 0 << 0 << 0 << 1
cacti_args << 360 << 0 << 0 << 0 << 0 << 1 << 1 << 1 << 1 << 0 << 0
cacti_args << 50 << 10 << 10 << 0 << 1 << 1
-# cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ")
-
-# IO.popen(cacti_cmd) { |pipe|
-# str1 = pipe.readline
-# str2 = pipe.readline
-# results = str2.split(", ")
-# if results.size != 61
-# print "CACTI ERROR: CACTI produced unexpected output.\n"
-# print "Are you using the version shipped with libruby?\n"
-# raise Exception
-# end
-# latency_ns = results[5].to_f
-# if (latency_ns == "1e+39")
-# print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n"
-# print "Either change the cache parameters or manually set the latency values\n"
-# raise Exception
-# end
-# clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
-# latency_cycles = (latency_ns / clk_period_ns).ceil
-# @latency = latency_cycles
-# }
- elsif @latency.is_a?(Float)
+ cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ")
+
+ IO.popen(cacti_cmd) { |pipe|
+ str1 = pipe.readline
+ str2 = pipe.readline
+ results = str2.split(", ")
+ if results.size != 61
+ print "CACTI ERROR: CACTI produced unexpected output.\n"
+ print "Are you using the version shipped with libruby?\n"
+ raise Exception
+ end
+ latency_ns = results[5].to_f
+ if (latency_ns == "1e+39")
+ print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n"
+ print "Either change the cache parameters or manually set the latency values\n"
+ raise Exception
+ end
+ clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
+ latency_cycles = (latency_ns / clk_period_ns).ceil
+ self.latency = latency_cycles
+ }
+ elsif self.latency.is_a?(Float)
clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
- latency_cycles = (@latency / clk_period_ns).ceil
- @latency = latency_cycles
- elsif ! @latency.is_a?(Integer)
+ latency_cycles = (self.latency / clk_period_ns).ceil
+ self.latency = latency_cycles
+ elsif ! self.latency.is_a?(Integer)
raise Exception
end
end
- def argv()
- args+" assoc "+@assoc.to_s+" replacement_policy "+@replacement_policy
- end
-
def cppClassName()
"SetAssociativeCache"
end
end
class DirectoryMemory < LibRubyObject
- attr :size_mb
- attr_writer :controller
+ param :size_mb, Integer
+ param :controller, NetPort
+ param :version, Integer
+
@@total_size_mb = 0
def initialize(obj_name, size_mb)
super(obj_name)
- @size_mb = size_mb
+ self.size_mb = size_mb
@@total_size_mb += size_mb
end
- def argv()
- "version "+@controller.version.to_s+" size_mb "+@size_mb.to_s+" controller "+@controller.obj_name
- end
-
def cppClassName()
"DirectoryMemory"
end
@@ -501,43 +479,17 @@ class DirectoryMemory < LibRubyObject
end
end
-#added by SS
class MemoryControl < LibRubyObject
- attr :name
def initialize(obj_name)
super(obj_name)
- @name = obj_name
end
- def argv()
- vec = super()
- vec += " mem_bus_cycle_multiplier "+mem_bus_cycle_multiplier.to_s
- vec += " banks_per_rank "+banks_per_rank.to_s
- vec += " ranks_per_dimm "+ranks_per_dimm.to_s
- vec += " dimms_per_channel "+dimms_per_channel.to_s
- vec += " bank_bit_0 "+bank_bit_0.to_s
- vec += " rank_bit_0 "+rank_bit_0.to_s
- vec += " dimm_bit_0 "+dimm_bit_0.to_s
- vec += " bank_queue_size "+bank_queue_size.to_s
- vec += " bank_busy_time "+bank_busy_time.to_s
- vec += " rank_rank_delay "+rank_rank_delay.to_s
- vec += " read_write_delay "+read_write_delay.to_s
- vec += " basic_bus_busy_time "+basic_bus_busy_time.to_s
- vec += " mem_ctl_latency "+mem_ctl_latency.to_s
- vec += " refresh_period "+refresh_period.to_s
- vec += " tFaw "+tFaw.to_s
- vec += " mem_random_arbitrate "+mem_random_arbitrate.to_s
- vec += " mem_fixed_delay "+mem_fixed_delay.to_s
- vec += " memory_controller_name "+@name
-
- end
-
-
def cppClassName()
"MemoryControl"
end
end
+
class Sequencer < IfacePort
def cppClassName()
@@ -564,17 +516,11 @@ end
class DMASequencer < IfacePort
+ param :controller, NetPort
+ param :version, Integer
+
def initialize(obj_name)
super(obj_name)
- @params = {
- :controller => nil,
- :version => nil
- }
- end
-
- def controller=(controller)
- @params[:controller] = controller.obj_name
- @params[:version] = controller.version
end
def cppClassName()
@@ -582,7 +528,7 @@ class DMASequencer < IfacePort
end
def bochsConnType()
- return "dma"+@params[:version].to_s
+ return "dma"+self.version.to_s
end
end
@@ -613,22 +559,8 @@ class Network < LibRubyObject
param :topology, Topology
def initialize(name, topo)
super(name)
- @params[:topology] = topo
topo.network= self
- end
-
- def argv()
- vec = super()
-
- vec += " endpoint_bandwidth "+endpoint_bandwidth.to_s
- vec += " adaptive_routing "+adaptive_routing.to_s
- vec += " number_of_virtual_networks "+number_of_virtual_networks.to_s
- vec += " fan_out_degree "+fan_out_degree.to_s
-
- vec += " buffer_size "+buffer_size.to_s
- vec += " link_latency "+adaptive_routing.to_s
- vec += " on_chip_latency "+on_chip_latency.to_s
- vec += " control_msg_size "+control_msg_size.to_s
+ self.topology = topo
end
def printTopology()
@@ -689,7 +621,6 @@ class CrossbarTopology < Topology
end
end
-#added by SS
class Tracer < LibRubyObject
def initialize(obj_name)
super(obj_name)
@@ -712,20 +643,10 @@ class Profiler < LibRubyObject
end
-#added by SS
class GarnetNetwork < Network
def initialize(name, topo)
super(name, topo)
end
- def argv()
- vec = super()
- vec += " flit_size "+flit_size.to_s
- vec += " number_of_pipe_stages "+number_of_pipe_stages.to_s
- vec += " vcs_per_class "+vcs_per_class.to_s
- vec += " buffer_size "+buffer_size.to_s
- vec += " using_network_testing "+using_network_testing.to_s
- end
-
end
class GarnetFixedPipeline < GarnetNetwork
@@ -733,10 +654,6 @@ class GarnetFixedPipeline < GarnetNetwork
super(name, net_ports)
end
- def argv()
- super()
- end
-
def cppClassName()
"GarnetNetwork_d"
end
@@ -747,14 +664,9 @@ class GarnetFlexiblePipeline < GarnetNetwork
super(name, net_ports)
end
- def argv()
- super()
- end
-
def cppClassName()
"GarnetNetwork"
end
end
-#added by SS
require "defaults.rb"
diff --git a/src/mem/ruby/config/config.hh b/src/mem/ruby/config/config.hh
deleted file mode 100644
index ad91cd73d..000000000
--- a/src/mem/ruby/config/config.hh
+++ /dev/null
@@ -1,236 +0,0 @@
-
-// FOR MOESI_CMP_token
-//PARAM_BOOL( FilteringEnabled, false, false );
-//PARAM_BOOL( DistributedPersistentEnabled, true, false );
-//PARAM_BOOL( DynamicTimeoutEnabled, true, false );
-//PARAM( RetryThreshold, 1, false );
-//PARAM( FixedTimeoutLatency, 300, false );
-
-//PARAM( TraceWarmupLength, 1000000, false );
-
-//PARAM( callback_counter, 0, false );
-//PARAM( NUM_COMPLETIONS_BEFORE_PASS, 0, false );
-
-//PARAM( tester_length, 0, false );
-//PARAM( synthetic_locks, 2048, false );
-//PARAM( think_time, 5, false );
-//PARAM( wait_time, 5, false );
-//PARAM( hold_time, 5, false );
-//PARAM( deterministic_addrs, 1, false );
-//PARAM_STRING( SpecifiedGenerator, "DetermInvGenerator", false );
-
-// For debugging purposes, one can enable a trace of all the protocol
-// state machine changes. Unfortunately, the code to generate the
-// trace is protocol specific. To enable the code for some of the
-// standard protocols,
-// 1. change "PROTOCOL_DEBUG_TRACE = true"
-// 2. enable debug in Makefile
-// 3. use the "--start 1" command line parameter or
-// "g_debug_ptr->setDebugTime(1)" to beging the following to set the
-// debug begin time
-//
-// this use to be ruby/common/Global.hh
-
-//PARAM_BOOL( ProtocolDebugTrace, true, false );
-// a string for filtering debugging output (for all g_debug vars see Debug.hh)
-//PARAM_STRING( DEBUG_FILTER_STRING, "", false );
-// filters debugging messages based on priority (low, med, high)
-//PARAM_STRING( DEBUG_VERBOSITY_STRING, "", false );
-// filters debugging messages based on a ruby time
-//PARAM_ULONG( DEBUG_START_TIME, 0, false );
-// sends debugging messages to a output filename
-//PARAM_STRING( DEBUG_OUTPUT_FILENAME, "", false );
-
-//PARAM_BOOL( ProfileHotLines, false, false );
-
-// PROFILE_ALL_INSTRUCTIONS is used if you want Ruby to profile all instructions executed
-// The following need to be true for this to work correctly:
-// 1. Disable istc and dstc for this simulation run
-// 2. Add the following line to the object "sim" in the checkpoint you run from:
-// instruction_profile_line_size: 4
-// This is used to have simics report back all instruction requests
-
-// For more details on how to find out how to interpret the output physical instruction
-// address, please read the document in the simics-howto directory
-//PARAM_BOOL( ProfileAllInstructions, false, false );
-
-// Set the following variable to true if you want a complete trace of
-// PCs (physical address of program counters, with executing processor IDs)
-// to be printed to stdout. Make sure to direct the simics output to a file.
-// Otherwise, the run will take a really long time!
-// A long run may write a file that can exceed the OS limit on file length
-//PARAM_BOOL( PRINT_INSTRUCTION_TRACE, false, false );
-//PARAM( DEBUG_CYCLE, 0, false );
-
-// Make the entire memory system perfect
-//PARAM_BOOL( PERFECT_MEMORY_SYSTEM, false, false );
-//PARAM( PERFECT_MEMORY_SYSTEM_LATENCY, 0, false );
-
-// *********************************************
-// SYSTEM PARAMETERS
-// *********************************************
-
-//PARAM( NumberOfChips, 1, false );
-//PARAM( NumberOfCores, 2, false );
-//PARAM_ARRAY( NumberOfCoresPerChip, int, m_NumberOfChips, 2, false);
-
-// *********************************************
-// CACHE PARAMETERS
-// *********************************************
-
-//PARAM( NumberOfCaches, m_NumberOfCores, false );
-//PARAM( NumberOfCacheLevels, 1, false );
-/* this returns the number of discrete CacheMemories per level (i.e. a split L1 counts for 2) */
-//PARAM_ARRAY( NumberOfCachesPerLevel, int, m_NumberOfCacheLevels, m_NumberOfCores, false ); // this is the number of discrete caches if the level is private
- // or the number of banks if the level is shared
-//PARAM( CacheIDFromParams, 1, true ); // returns a unique CacheID from the parameters (level, num, split_type)
-//PARAM_ARRAY( CacheLatency, int, m_NumberOfCaches, 1, false ); // returns the latency for cache, indexed by CacheID
-//PARAM_ARRAY( CacheSplitType, string, m_NumberOfCaches, "unified", false ); // returns "data", "instruction", or "unified", indexed by CacheID
-//PARAM_ARRAY( CacheType, string, m_NumberOfCaches, "SetAssociative", false ); // returns the type of a cache, indexed by CacheID
-//PARAM_ARRAY( CacheAssoc, int, m_NumberOfCaches, 4, false ); // returns the cache associativity, indexed by CacheID
-//PARAM_ARRAY( NumberOfCacheSets, int, m_NumberOfCaches, 256, false ); // returns the number of cache sets, indexed by CacheID
-//PARAM_ARRAY( NumberOfCacheSetBits, int, m_NumberOfCaches, log_int(256), false ); // returns the number of cache set bits, indexed by CacheID
-//PARAM_ARRAY( CacheReplacementPolicy, string, m_NumberOfCaches, "PSEUDO_LRU", false ); // other option is "LRU"
-
-//PARAM( DataBlockBytes, 64, false );
-//PARAM( DataBlockBits, log_int(m_DataBlockBytes), false);
-
-// ********************************************
-// MEMORY PARAMETERS
-// ********************************************
-
-//PARAM_ARRAY( NumberOfControllersPerType, int, m_NumberOfCacheLevels+2, m_NumberOfCores, false);
-//PARAM_ARRAY2D( NumberOfControllersPerTypePerChip, int, m_NumberOfCacheLevels+2, m_NumberOfChips, m_NumberOfCores, false);
-
-// ********************************************
-// DMA CONTROLLER PARAMETERS
-// ********************************************
-
-//PARAM( NumberOfDMA, 1, false );
-//PARAM_ARRAY( NumberOfDMAPerChip, int, m_NumberOfChips, 1, false);
-//PARAM_ARRAY( ChipNumFromDMAVersion, int, m_NumberOfDMA, 0, false );
-
-//PARAM_ULONG( MemorySizeBytes, 4294967296, false );
-//PARAM_ULONG( MemorySizeBits, 32, false);
-
-//PARAM( NUM_PROCESSORS, 0, false );
-//PARAM( NUM_L2_BANKS, 0, false );
-//PARAM( NUM_MEMORIES, 0, false );
-//PARAM( ProcsPerChip, 1, false );
-
-// The following group of parameters are calculated. They must
-// _always_ be left at zero.
-//PARAM( NUM_CHIPS, 0, false );
-//PARAM( NUM_CHIP_BITS, 0, false );
-//PARAM( MEMORY_SIZE_BITS, 0, false );
-//PARAM( DATA_BLOCK_BITS, 0, false );
-//PARAM( PAGE_SIZE_BITS, 0, false );
-//PARAM( NUM_PROCESSORS_BITS, 0, false );
-//PARAM( PROCS_PER_CHIP_BITS, 0, false );
-//PARAM( NUM_L2_BANKS_BITS, 0, false );
-//PARAM( NUM_L2_BANKS_PER_CHIP_BITS, 0, false );
-//PARAM( NUM_L2_BANKS_PER_CHIP, 0, false );
-//PARAM( NUM_MEMORIES_BITS, 0, false );
-//PARAM( NUM_MEMORIES_PER_CHIP, 0, false );
-//PARAM( MEMORY_MODULE_BITS, 0, false );
-//PARAM_ULONG( MEMORY_MODULE_BLOCKS, 0, false );
-
-// TIMING PARAMETERS
-//PARAM( DIRECTORY_CACHE_LATENCY, 6, false );
-
-//PARAM( NULL_LATENCY, 1, false );
-//PARAM( ISSUE_LATENCY, 2, false );
-//PARAM( CACHE_RESPONSE_LATENCY, 12, false );
-//PARAM( L2_RESPONSE_LATENCY, 6, false );
-//PARAM( L2_TAG_LATENCY, 6, false );
-//PARAM( L1_RESPONSE_LATENCY, 3, false );
-
-//PARAM( MEMORY_RESPONSE_LATENCY_MINUS_2, 158, false );
-//PARAM( DirectoryLatency, 6, false );
-
-//PARAM( NetworkLinkLatency, 1, false );
-//PARAM( COPY_HEAD_LATENCY, 4, false );
-//PARAM( OnChipLinkLatency, 1, false );
-//PARAM( RecycleLatency, 10, false );
-//PARAM( L2_RECYCLE_LATENCY, 5, false );
-//PARAM( TIMER_LATENCY, 10000, false );
-//PARAM( TBE_RESPONSE_LATENCY, 1, false );
-//PARAM_BOOL( PERIODIC_TIMER_WAKEUPS, true, false );
-
-// constants used by CMP protocols
-//PARAM( L1_REQUEST_LATENCY, 2, false );
-//PARAM( L2_REQUEST_LATENCY, 4, false );
-//PARAM_BOOL( SINGLE_ACCESS_L2_BANKS, true, false ); // hack to simulate multi-cycle L2 bank accesses
-
-// Ruby cycles between when a sequencer issues a miss it arrives at
-// the L1 cache controller
-//PARAM( SequencerToControllerLatency, 4, false );
-
-// Number of transitions each controller state machines can complete per cycle
-//PARAM( L1CacheTransitionsPerCycle, 32, false );
-//PARAM( L2CACHE_TRANSITIONS_PER_RUBY_CYCLE, 32, false );
-//PARAM( DirectoryTransitionsPerCycle, 32, false );
-//PARAM( DMATransitionsPerCycle, 1, false );
-
-// Number of TBEs available for demand misses, prefetches, and replacements
-//PARAM( NumberOfTBEs, 128, false );
-//PARAM( NumberOfL1TBEs, 32, false );
-//PARAM( NumberOfL2TBEs, 32, false );
-
-// NOTE: Finite buffering allows us to simulate a wormhole routed network
-// with idealized flow control. All message buffers within the network (i.e.
-// the switch's input and output buffers) are set to the size specified below
-// by the PROTOCOL_BUFFER_SIZE
-//PARAM_BOOL( FiniteBuffering, false, false );
-//PARAM( FiniteBufferSize, 3, false ); // Zero is unbounded buffers
-// Number of requests buffered between the sequencer and the L1 conroller
-// This can be more accurately simulated in Opal, therefore it's set to an
-// infinite number
-// Only effects the simualtion when FINITE_BUFFERING is enabled
-//PARAM( ProcessorBufferSize, 10, false );
-// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
-// Controllers. Controlls the number of request issued by the L2 HW Prefetcher
-//PARAM( ProtocolBufferSize, 32, false );
-
-// NETWORK PARAMETERS
-
-// Network Topology: See TopologyType in external.sm for valid values
-//PARAM_STRING( NetworkTopology, "PT_TO_PT", false );
-
-// Cache Design specifies file prefix for topology
-//PARAM_STRING( CacheDesign, "NUCA", false );
-
-//PARAM( EndpointBandwidth, 10000, false );
-//PARAM_BOOL( AdaptiveRouting, true, false );
-//PARAM( NumberOfVirtualNetworks, 6, false );
-//PARAM( FanOutDegree, 4, false );
-//PARAM_BOOL( PrintTopology, true, false );
-
-// Princeton Network (Garnet)
-//PARAM_BOOL( UsingGarnetNetwork, true, false );
-//PARAM_BOOL( UsingDetailNetwork, false, false );
-//PARAM_BOOL( UsingNetworkTesting, false, false );
-//PARAM( FlitSize, 16, false );
-//PARAM( NumberOfPipeStages, 4, false );
-//PARAM( VCSPerClass, 4, false );
-//PARAM( BufferSize, 4, false );
-
-// MemoryControl:
-//PARAM( MEM_BUS_CYCLE_MULTIPLIER, 10, false );
-//PARAM( BANKS_PER_RANK, 8, false );
-//PARAM( RANKS_PER_DIMM, 2, false );
-//PARAM( DIMMS_PER_CHANNEL, 2, false );
-//PARAM( BANK_BIT_0, 8, false );
-//PARAM( RANK_BIT_0, 11, false );
-//PARAM( DIMM_BIT_0, 12, false );
-//PARAM( BANK_QUEUE_SIZE, 12, false );
-//PARAM( BankBusyTime, 11, false );
-//PARAM( RANK_RANK_DELAY, 1, false );
-//PARAM( READ_WRITE_DELAY, 2, false );
-//PARAM( BASIC_BUS_BUSY_TIME, 2, false );
-//PARAM( MEM_CTL_LATENCY, 12, false );
-//PARAM( REFRESH_PERIOD, 1560, false );
-//PARAM( TFAW, 0, false );
-//PARAM( MEM_RANDOM_ARBITRATE, 0, false );
-//PARAM( MEM_FIXED_DELAY, 0, false );
-
diff --git a/src/mem/ruby/config/defaults.rb b/src/mem/ruby/config/defaults.rb
index f338f4e3f..224bf1eeb 100644
--- a/src/mem/ruby/config/defaults.rb
+++ b/src/mem/ruby/config/defaults.rb
@@ -1,7 +1,5 @@
#!/usr/bin/ruby
-
-
class NetPort < LibRubyObject
# number of transitions a SLICC state machine can transition per
# cycle
@@ -9,9 +7,8 @@ class NetPort < LibRubyObject
# buffer_size limits the size of all other buffers connecting to
# SLICC Controllers. When 0, infinite buffering is used.
- default_param :buffer_size, Integer, 0
+ default_param :buffer_size, Integer, 32
- # added by SS for TBE
default_param :number_of_TBEs, Integer, 256
default_param :recycle_latency, Integer, 10
@@ -38,16 +35,36 @@ class Debug < LibRubyObject
# 3. set start_time = 1
default_param :protocol_trace, Boolean, false
- # a string for filtering debugging output (for all g_debug vars see Debug.h)
+ # a string for filtering debugging output. Valid options (also see Debug.cc):
+ # {"System", 's' },
+ # {"Node", 'N' },
+ # {"Queue", 'q' },
+ # {"Event Queue", 'e' },
+ # {"Network", 'n' },
+ # {"Sequencer", 'S' },
+ # {"Tester", 't' },
+ # {"Generated", 'g' },
+ # {"SLICC", 'l' },
+ # {"Network Queues", 'Q' },
+ # {"Time", 'T' },
+ # {"Network Internals", 'i' },
+ # {"Store Buffer", 'b' },
+ # {"Cache", 'c' },
+ # {"Predictor", 'p' },
+ # {"Allocator", 'a' }
+ #
+ # e.g., "sq" will print system and queue debugging messages
+ # Set to "none" for no debugging output
default_param :filter_string, String, "none"
- # filters debugging messages based on priority (low, med, high)
+ # filters debugging messages based on priority (none, low, med, high)
default_param :verbosity_string, String, "none"
# filters debugging messages based on a ruby time
default_param :start_time, Integer, 1
# sends debugging messages to a output filename
+ # set to "none" to print to stdout
default_param :output_filename, String, "none"
end
@@ -65,23 +82,23 @@ class Topology < LibRubyObject
# indicates whether the topology config will be displayed in the
# stats file
- default_param :print_config, Boolean, true
+ default_param :print_config, Boolean, false
end
class Network < LibRubyObject
default_param :endpoint_bandwidth, Integer, 10000
default_param :adaptive_routing, Boolean, true
- default_param :number_of_virtual_networks, Integer, 10
- default_param :fan_out_degree, Integer, 4
+ default_param :number_of_virtual_networks, Integer, 5
+ # default_param :fan_out_degree, Integer, 4
# default buffer size. Setting to 0 indicates infinite buffering
- default_param :buffer_size, Integer, 0
+ # default_param :buffer_size, Integer, 0
# local memory latency ?? NetworkLinkLatency
default_param :link_latency, Integer, 1
# on chip latency
- default_param :on_chip_latency, Integer, 1
+ # default_param :on_chip_latency, Integer, 1
default_param :control_msg_size, Integer, 8
end
@@ -94,20 +111,15 @@ class GarnetNetwork < Network
default_param :using_network_testing, Boolean, false
end
-
-
-#added by SS
class Tracer < LibRubyObject
default_param :warmup_length, Integer, 1000000
end
-#added by SS
class Profiler < LibRubyObject
default_param :hot_lines, Boolean, false
default_param :all_instructions, Boolean, false
end
-#added by SS
class MemoryControl < LibRubyObject
default_param :mem_bus_cycle_multiplier, Integer, 10
@@ -125,7 +137,7 @@ class MemoryControl < LibRubyObject
default_param :mem_ctl_latency, Integer, 12
default_param :refresh_period, Integer, 1560
default_param :tFaw, Integer, 0
- default_param :mem_random_arbitrate, Integer, 0
+ default_param :mem_random_arbitrate, Integer, 11
default_param :mem_fixed_delay, Integer, 0
end
@@ -163,49 +175,33 @@ class MOESI_CMP_directory_DirectoryController < DirectoryController
end
class MOESI_CMP_directory_DMAController < DMAController
- default_param :request_latency, Integer, 6
- default_param :response_latency, Integer, 6
+ default_param :request_latency, Integer, 14
+ default_param :response_latency, Integer, 14
end
-## MOESI_CMP_token protocol
+class MESI_CMP_directory_L2CacheController < CacheController
+ default_param :request_latency, Integer, 2
+ default_param :response_latency, Integer, 2
+ default_param :to_L1_latency, Integer, 1
+
+#if 0 then automatically calculated
+ default_param :lowest_bit, Integer, 0
+ default_param :highest_bit, Integer, 0
+end
-class MOESI_CMP_token_L1CacheController < L1CacheController
+class MESI_CMP_directory_L1CacheController < L1CacheController
default_param :l1_request_latency, Integer, 2
default_param :l1_response_latency, Integer, 2
- default_param :retry_threshold, Integer, 1
- default_param :fixed_timeout_latency, Integer, 300
- default_param :dynamic_timeout_enabled, Boolean, true
+ default_param :to_L2_latency, Integer, 1
end
-class MOESI_CMP_token_L2CacheController < CacheController
- default_param :l2_request_latency, Integer, 2
- default_param :l2_response_latency, Integer, 2
- default_param :filtering_enabled, Boolean, true
-end
-class MOESI_CMP_token_DirectoryController < DirectoryController
+class MESI_CMP_directory_DirectoryController < DirectoryController
+ default_param :to_mem_ctrl_latency, Integer, 1
default_param :directory_latency, Integer, 6
- default_param :distributed_persistent, Boolean, true
- default_param :fixed_timeout_latency, Integer, 300
-end
-
-class MOESI_CMP_token_DMAController < DMAController
- default_param :request_latency, Integer, 6
- default_param :response_latency, Integer, 6
-end
-
-## MOESI_hammer protocol
-
-class MOESI_hammer_CacheController < L1CacheController
- default_param :issue_latency, Integer, 2
- default_param :cache_response_latency, Integer, 12
-end
-
-class MOESI_hammer_DirectoryController < DirectoryController
- default_param :memory_controller_latency, Integer, 12
end
-class MOESI_hammer_DMAController < DMAController
+class MESI_CMP_directory_DMAController < DMAController
default_param :request_latency, Integer, 6
end
@@ -219,8 +215,9 @@ class RubySystem
# When set to true, the simulation will insert random delays on
# message enqueue times. Note that even if this is set to false,
# you can still have a non-deterministic simulation if random seed
- # is set to "rand". This is because the Ruby swtiches use random
- # link priority elevation
+ # is set to "rand". This is used mainly to debug protocols by forcing
+ # really strange interleavings and should not be used for
+ # performance runs.
default_param :randomization, Boolean, false
# tech_nm is the device size used to calculate latency and area
@@ -246,31 +243,6 @@ class RubySystem
default_param :profiler, Profiler, Profiler.new("profiler0")
end
-#added by SS
-
-class MESI_CMP_directory_L2CacheController < CacheController
- default_param :l2_request_latency, Integer, 2
- default_param :l2_response_latency, Integer, 2
- default_param :to_L1_latency, Integer, 1
-
-#if 0 then automatically calculated
- default_param :lowest_bit, Integer, 0
- default_param :highest_bit, Integer, 0
-end
-
-class MESI_CMP_directory_L1CacheController < L1CacheController
- default_param :l1_request_latency, Integer, 2
- default_param :l1_response_latency, Integer, 2
- default_param :to_L2_latency, Integer, 1
-end
-class MESI_CMP_directory_DirectoryController < DirectoryController
- default_param :to_mem_ctrl_latency, Integer, 1
- default_param :directory_latency, Integer, 6
-end
-
-class MESI_CMP_directory_DMAController < DMAController
- default_param :request_latency, Integer, 6
-end
diff --git a/src/mem/ruby/config/rubyconfig.defaults b/src/mem/ruby/config/rubyconfig.defaults
deleted file mode 100644
index 936a2f091..000000000
--- a/src/mem/ruby/config/rubyconfig.defaults
+++ /dev/null
@@ -1,405 +0,0 @@
-//
-// This file has been modified by Kevin Moore and Dan Nussbaum of the
-// Scalable Systems Research Group at Sun Microsystems Laboratories
-// (http://research.sun.com/scalable/) to support the Adaptive
-// Transactional Memory Test Platform (ATMTP). For information about
-// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/.
-//
-// Please send email to atmtp-interest@sun.com with feedback, questions, or
-// to request future announcements about ATMTP.
-//
-// ----------------------------------------------------------------------
-//
-// File modification date: 2008-02-23
-//
-// ----------------------------------------------------------------------
-//
-// ATMTP is distributed as part of the GEMS software toolset and is
-// available for use and modification under the terms of version 2 of the
-// GNU General Public License. The GNU General Public License is contained
-// in the file $GEMS/LICENSE.
-//
-// Multifacet GEMS is free software; you can redistribute it and/or modify
-// it under the terms of version 2 of the GNU General Public License as
-// published by the Free Software Foundation.
-//
-// Multifacet GEMS is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with the Multifacet GEMS; if not, write to the Free Software Foundation,
-// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
-//
-// ----------------------------------------------------------------------
-//
-
-g_RANDOM_SEED: 1
-
-g_DEADLOCK_THRESHOLD: 500000
-
-// determines how many Simics cycles advance for every Ruby cycle
-// (does not apply when running Opal)
-SIMICS_RUBY_MULTIPLIER: 4
-
-// Ruby cycles between when a sequencer issues a request and it arrives at
-// the L1 cache controller
-//
-// ** important ** this parameter determines the L2 hit latency when
-// using the SMP protocols with a combined L1/L2 controller (-cache.sm)
-//
-SEQUENCER_TO_CONTROLLER_LATENCY: 4
-
-
-// When set to false, the L1 cache structures are probed for a hit in Sequencer.C
-// If a request hits, it is *not* issued to the cache controller
-// When set to true, all processor data requests issue to cache controller
-//
-// ** important ** this parameter must be set to false for proper L1/L2 hit timing
-// for the SMP protocols with combined L1/L2 controllers (-cache.sm)
-//
-REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false
-
-
-// When running with Opal in SMT configurations, this indicates the number of threads per physical processor
-g_NUM_SMT_THREADS: 1
-
-
-// Maximum number of requests (including SW prefetches) outstanding from
-// the sequencer (Note: this also include items buffered in the store
-// buffer)
-g_SEQUENCER_OUTSTANDING_REQUESTS: 16
-
-
-PROTOCOL_DEBUG_TRACE: true
-DEBUG_FILTER_STRING: none
-DEBUG_VERBOSITY_STRING: none
-DEBUG_START_TIME: 0
-DEBUG_OUTPUT_FILENAME: none
-
-
-TRANSACTION_TRACE_ENABLED: false
-USER_MODE_DATA_ONLY: false
-PROFILE_HOT_LINES: false
-
-PROFILE_ALL_INSTRUCTIONS: false
-PRINT_INSTRUCTION_TRACE: false
-g_DEBUG_CYCLE: 0
-BLOCK_STC: false
-PERFECT_MEMORY_SYSTEM: false
-PERFECT_MEMORY_SYSTEM_LATENCY: 0
-DATA_BLOCK: false
-
-
-// *********************************************
-// CACHE & MEMORY PARAMETERS
-// *********************************************
-
-
-L1_CACHE_ASSOC: 4
-L1_CACHE_NUM_SETS_BITS: 8
-L2_CACHE_ASSOC: 4
-L2_CACHE_NUM_SETS_BITS: 16
-
-// 32 bits = 4 GB address space
-g_MEMORY_SIZE_BYTES: 1073741824 //4294967296
-g_DATA_BLOCK_BYTES: 64
-g_PAGE_SIZE_BYTES: 4096
-g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU
-
-g_PROCS_PER_CHIP: 1
-
-
-// set automatically
-g_NUM_PROCESSORS: 0
-g_NUM_L2_BANKS: 0
-g_NUM_MEMORIES: 0
-
-// The following group of parameters are calculated. They must
-// _always_ be left at zero.
-g_NUM_CHIPS: 0
-g_NUM_CHIP_BITS: 0
-g_MEMORY_SIZE_BITS: 0
-g_DATA_BLOCK_BITS: 0
-g_PAGE_SIZE_BITS: 0
-g_NUM_PROCESSORS_BITS: 0
-g_PROCS_PER_CHIP_BITS: 0
-g_NUM_L2_BANKS_BITS: 0
-g_NUM_L2_BANKS_PER_CHIP: 0
-g_NUM_L2_BANKS_PER_CHIP_BITS: 0
-g_NUM_MEMORIES_BITS: 0
-g_NUM_MEMORIES_PER_CHIP: 0
-g_MEMORY_MODULE_BITS: 0
-g_MEMORY_MODULE_BLOCKS: 0
-
-
-// For certain CMP protocols, determines whether the lowest bits of a block address
-// are used to index to a L2 cache bank or into the sets of a
-// single bank
-// lowest highest
-// true: g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS
-// false: g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS
-MAP_L2BANKS_TO_LOWEST_BITS: false
-
-
-
-// TIMING PARAMETERS -- many of these are protocol specific. See SLICC files
-// to determine where they apply
-
-MEMORY_RESPONSE_LATENCY_MINUS_2: 158 // determines memory response latency
-DIRECTORY_CACHE_LATENCY: 6
-NULL_LATENCY: 1
-ISSUE_LATENCY: 2
-CACHE_RESPONSE_LATENCY: 12
-L1_RESPONSE_LATENCY: 3
-L2_RESPONSE_LATENCY: 6
-L2_TAG_LATENCY: 6
-DIRECTORY_LATENCY: 80
-NETWORK_LINK_LATENCY: 1
-COPY_HEAD_LATENCY: 4
-ON_CHIP_LINK_LATENCY: 1
-RECYCLE_LATENCY: 10
-L2_RECYCLE_LATENCY: 5
-TIMER_LATENCY: 10000
-TBE_RESPONSE_LATENCY: 1
-PERIODIC_TIMER_WAKEUPS: true
-
-
-// constants used by CMP protocols
-// cache bank access times
-L1_REQUEST_LATENCY: 2
-L2_REQUEST_LATENCY: 4
-
-
-// Number of transitions each controller state machines can complete per cycle
-// i.e. the number of ports to each controller
-// L1cache is the sum of the L1I and L1D cache ports
-L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
-// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a
-// much greater constraint on the concurrency of a L2 cache bank
-L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
-DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32
-DMA_TRANSITIONS_PER_RUBY_CYCLE: 1
-
-
-// Number of TBEs available for demand misses, ALL prefetches, and replacements
-// used by one-level protocols
-NUMBER_OF_TBES: 128
-// two-level protocols
-NUMBER_OF_L1_TBES: 32
-NUMBER_OF_L2_TBES: 32
-
-// ** INTERCONECT PARAMETERS **
-//
-g_PRINT_TOPOLOGY: true
-g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH
-g_CACHE_DESIGN: NUCA // specifies file prefix for FILE_SPECIFIED topology
-FAN_OUT_DEGREE: 4 // for HIERARCHICAL SWITCH topology
-
-g_adaptive_routing: true
-NUMBER_OF_VIRTUAL_NETWORKS: 6
-
-// bandwidth unit is 1/1000 byte per cycle. the following parameter is multiplied by
-// topology specific link weights
-g_endpoint_bandwidth: 10000
-
-
-// ** finite buffering parameters
-//
-// note: Finite buffering allows us to simulate a realistic virtual cut-through
-// routed network with idealized flow control. this feature is NOT heavily tested
-FINITE_BUFFERING: false
-// All message buffers within the network (i.e. the switch's input and
-// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE
-FINITE_BUFFER_SIZE: 3
-// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests
-// issued by the sequencer. The PROCESSOR_BUFFER_SIZE controlls the
-// number of requests in the mandatory queue
-// Only effects the simualtion when FINITE_BUFFERING is enabled
-PROCESSOR_BUFFER_SIZE: 10
-// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
-// Controllers. Controlls the number of request issued by the L2 HW Prefetcher
-PROTOCOL_BUFFER_SIZE: 32
-// ** end finite buffering parameters
-
-
-// (deprecated)
-// Allows on a single accesses to a multi-cycle L2 bank.
-// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY
-// number of cycles. However the TBE table can be accessed in parallel.
-SINGLE_ACCESS_L2_BANKS: true
-
-
-// MOESI_CMP_token parameters (some might be deprecated)
-g_FILTERING_ENABLED: false
-g_DISTRIBUTED_PERSISTENT_ENABLED: true
-g_RETRY_THRESHOLD: 1
-g_DYNAMIC_TIMEOUT_ENABLED: true
-g_FIXED_TIMEOUT_LATENCY: 300
-
-
-// tester parameters (overridden by testerconfig.defaults)
-//
-// injects random message delays to excite protocol races
-RANDOMIZATION: false
-g_SYNTHETIC_DRIVER: false
-g_DETERMINISTIC_DRIVER: false
-g_trace_warmup_length: 1000000
-g_bash_bandwidth_adaptive_threshold: 0.75
-
-g_tester_length: 0
-// # of synthetic locks == 16 * 128
-g_synthetic_locks: 2048
-g_deterministic_addrs: 1
-g_SpecifiedGenerator: DetermInvGenerator
-g_callback_counter: 0
-g_NUM_COMPLETIONS_BEFORE_PASS: 0
-// parameters used by locking synthetic tester
-g_think_time: 5
-g_hold_time: 5
-g_wait_time: 5
-
-// Princeton Network (Garnet)
-g_GARNET_NETWORK: true
-g_DETAIL_NETWORK: false
-g_NETWORK_TESTING: false
-g_FLIT_SIZE: 16
-g_NUM_PIPE_STAGES: 4
-g_VCS_PER_CLASS: 4
-g_BUFFER_SIZE: 4
-
-///////////////////////////////////////////////////////////////////////////////
-//
-// MemoryControl:
-
-// Basic cycle time of the memory controller. This defines the period which is
-// used as the memory channel clock period, the address bus bit time, and the
-// memory controller cycle time.
-// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data),
-// and a 2 GHz Ruby clock:
-MEM_BUS_CYCLE_MULTIPLIER: 10
-
-// How many internal banks in each DRAM chip:
-BANKS_PER_RANK: 8
-
-// How many sets of DRAM chips per DIMM.
-RANKS_PER_DIMM: 2
-
-// How many DIMMs per channel. (Currently the only thing that
-// matters is the number of ranks per channel, i.e. the product
-// of this parameter and RANKS_PER_DIMM. But if and when this is
-// expanded to do FB-DIMMs, the distinction between the two
-// will matter.)
-DIMMS_PER_CHANNEL: 2
-
-// Which bits to use to find the bank, rank, and DIMM numbers.
-// You could choose to have the bank bits, rank bits, and DIMM bits
-// in any order; here they are in that order.
-// For these defaults, we assume this format for addresses:
-// Offset within line: [5:0]
-// Memory controller #: [7:6]
-// Bank: [10:8]
-// Rank: [11]
-// DIMM: [12]
-// Row addr / Col addr: [top:13]
-// If you get these bits wrong, then some banks won't see any
-// requests; you need to check for this in the .stats output.
-BANK_BIT_0: 8
-RANK_BIT_0: 11
-DIMM_BIT_0: 12
-
-// Number of entries max in each bank queues; set to whatever you want.
-// If it is too small, you will see in the .stats file a lot of delay
-// time spent in the common input queue.
-BANK_QUEUE_SIZE: 12
-
-// Bank cycle time (tRC) measured in memory cycles:
-BANK_BUSY_TIME: 11
-
-// This is how many memory address cycles to delay between reads to
-// different ranks of DRAMs to allow for clock skew:
-RANK_RANK_DELAY: 1
-
-// This is how many memory address cycles to delay between a read
-// and a write. This is based on two things: (1) the data bus is
-// used one cycle earlier in the operation; (2) a round-trip wire
-// delay from the controller to the DIMM that did the reading.
-READ_WRITE_DELAY: 2
-
-// Basic address and data bus occupancy. If you are assuming a
-// 16-byte-wide data bus (pairs of DIMMs side-by-side), then
-// the data bus occupancy matches the address bus occupancy at
-// two cycles. But if the channel is only 8 bytes wide, you
-// need to increase this bus occupancy time to 4 cycles.
-BASIC_BUS_BUSY_TIME: 2
-
-// Latency to returning read request or writeback acknowledgement.
-// Measured in memory address cycles.
-// This equals tRCD + CL + AL + (four bit times)
-// + (round trip on channel)
-// + (memory control internal delays)
-// It's going to be an approximation, so pick what you like.
-// Note: The fact that latency is a constant, and does not depend on two
-// low-order address bits, implies that our memory controller either:
-// (a) tells the DRAM to read the critical word first, and sends the
-// critical word first back to the CPU, or (b) waits until it has
-// seen all four bit times on the data wires before sending anything
-// back. Either is plausible. If (a), remove the "four bit times"
-// term from the calculation above.
-MEM_CTL_LATENCY: 12
-
-// refresh_period is the number of memory cycles between refresh
-// of row x in bank n and refresh of row x+1 in bank n. For DDR-400,
-// this is typically 7.8 usec for commercial systems; after 8192 such
-// refreshes, this will have refreshed the whole chip in 64 msec. If
-// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles. The memory
-// controller will divide this by the total number of banks, and kick
-// off a refresh to *somebody* every time that amount is counted
-// down to zero. (There will be some rounding error there, but it
-// should have minimal effect.)
-REFRESH_PERIOD: 1560
-
-// tFAW is a DRAM chip parameter which restricts the number of
-// activates that can be done within a certain window of time.
-// The window is specified here in terms of number of memory
-// controller cycles. At most four activates may be done during
-// any such sliding window. If this number is set to be no more
-// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect.
-// It is typical in real systems for tFAW to have no effect, but
-// it may be useful in throttling power. Set to zero to ignore.
-TFAW: 0
-
-// By default, the memory controller uses round-robin to arbitrate
-// between ready bank queues for use of the address bus. If you
-// wish to add randomness to the system, set this parameter to
-// one instead, and it will restart the round-robin pointer at a
-// random bank number each cycle. If you want additional
-// nondeterminism, set the parameter to some integer n >= 2, and
-// it will in addition add a n% chance each cycle that a ready bank
-// will be delayed an additional cycle. Note that if you are
-// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will
-// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will.
-MEM_RANDOM_ARBITRATE: 0
-
-// The following parameter, if nonzero, will disable the memory
-// controller and instead give every request a fixed latency. The
-// nonzero value specified here is measured in memory cycles and is
-// just added to MEM_CTL_LATENCY. It will also show up in the stats
-// file as a contributor to memory_delays_stalled_at_head_of_bank_queue.
-MEM_FIXED_DELAY: 0
-
-// If instead of DDR-400, you wanted DDR-800, the channel gets faster
-// but the basic operation of the DRAM core is unchanged.
-// Busy times appear to double just because they are measured
-// in smaller clock cycles. The performance advantage comes because
-// the bus busy times don't actually quite double.
-// You would use something like these values:
-//
-// MEM_BUS_CYCLE_MULTIPLIER: 5
-// BANK_BUSY_TIME: 22
-// RANK_RANK_DELAY: 2
-// READ_WRITE_DELAY: 3
-// BASIC_BUS_BUSY_TIME: 3
-// MEM_CTL_LATENCY: 20
-// REFRESH_PERIOD: 3120
diff --git a/src/mem/ruby/config/tester.defaults b/src/mem/ruby/config/tester.defaults
deleted file mode 100644
index b30d1ba99..000000000
--- a/src/mem/ruby/config/tester.defaults
+++ /dev/null
@@ -1,50 +0,0 @@
-
-//
-// This file contains tester specific changes to the rubyconfig.defaults
-// parameter values.
-//
-// Please: - Add new variables only to rubyconfig.defaults file.
-// - Change them here only when necessary.
-
-g_SIMICS: false
-DATA_BLOCK: true
-RANDOMIZATION: true
-g_SYNTHETIC_DRIVER: false
-g_DETERMINISTIC_DRIVER: true
-g_DEADLOCK_THRESHOLD: 500000
-g_SpecifiedGenerator: DetermGETXGenerator
-
-PROTOCOL_DEBUG_TRACE: true
-
-//
-// Generic cache parameters
-//
-
-// Cache sizes are smaller for the random tester to increase the amount
-// of false sharing.
-L1_CACHE_ASSOC: 2
-L1_CACHE_NUM_SETS_BITS: 2
-L2_CACHE_ASSOC: 2
-L2_CACHE_NUM_SETS_BITS: 5
-
-g_MEMORY_SIZE_BYTES: 1048576
-
-//g_NETWORK_TOPOLOGY: FILE_SPECIFIED
-RECYCLE_LATENCY: 1
-//NUMBER_OF_VIRTUAL_NETWORKS: 5
-//g_NUM_MEMORIES: 16
-L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 1000
-DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 1000
-//g_PROCS_PER_CHIP: 2
-//g_NUM_L2_BANKS: 16
-//g_endpoint_bandwidth: 10000
-//g_NUM_PROCESSORS: 16
-//g_NUM_SMT_THREADS: 1
-//g_GARNET_NETWORK: true
-//g_DETAIL_NETWORK: true
-//g_NETWORK_TESTING: false
-//g_FLIT_SIZE: 32
-//g_NUM_PIPE_STAGES: 5
-//g_VCS_PER_CLASS: 2
-//g_BUFFER_SIZE: 4
-
diff --git a/src/mem/ruby/libruby.cc b/src/mem/ruby/libruby.cc
index b9a72d071..57dd13c87 100644
--- a/src/mem/ruby/libruby.cc
+++ b/src/mem/ruby/libruby.cc
@@ -58,11 +58,8 @@ RubyRequestType string_to_RubyRequestType(std::string str)
ostream& operator<<(ostream& out, const RubyRequestType& obj)
{
- cerr << "in op" << endl;
out << RubyRequestType_to_string(obj);
- cerr << "flushing" << endl;
out << flush;
- cerr << "done" << endl;
return out;
}
diff --git a/src/mem/ruby/libruby.hh b/src/mem/ruby/libruby.hh
index 29aac232a..4c50611c1 100644
--- a/src/mem/ruby/libruby.hh
+++ b/src/mem/ruby/libruby.hh
@@ -34,7 +34,7 @@ struct RubyRequest {
unsigned proc_id;
RubyRequest() {}
- RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 0)
+ RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 100)
: paddr(_paddr), data(_data), len(_len), pc(_pc), type(_type), access_mode(_access_mode), proc_id(_proc_id)
{}
};
@@ -71,6 +71,12 @@ RubyPortHandle libruby_get_port(const char* name, void (*hit_callback)(int64_t a
RubyPortHandle libruby_get_port_by_name(const char* name);
+/**
+ * libruby_issue_request error return codes
+ */
+#define LIBRUBY_BUFFER_FULL -2
+#define LIBRUBY_ALIASED_REQUEST -3
+
/**
* issue_request returns a unique access_id to identify the ruby
* transaction. This access_id is later returned to the caller via
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc
index 02fc8db2a..467e1bf87 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.cc
+++ b/src/mem/ruby/network/simple/PerfectSwitch.cc
@@ -184,7 +184,7 @@ void PerfectSwitch::wakeup()
assert(m_link_order.size() == m_routing_table.size());
assert(m_link_order.size() == m_out.size());
-//changed by SS
+
if (m_network_ptr->getAdaptiveRouting()) {
if (m_network_ptr->isVNetOrdered(vnet)) {
// Don't adaptively route
diff --git a/src/mem/ruby/network/simple/Topology.cc b/src/mem/ruby/network/simple/Topology.cc
index dedf79d58..563a1b01c 100644
--- a/src/mem/ruby/network/simple/Topology.cc
+++ b/src/mem/ruby/network/simple/Topology.cc
@@ -79,7 +79,6 @@ void Topology::init(const vector<string> & argv)
m_connections = argv[i+1];
else if (argv[i] == "print_config") {
m_print_config = string_to_bool(argv[i+1]);
- cerr << "print config: " << m_print_config << endl;
}
}
assert(m_network_ptr != NULL);
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 7da3d317a..c7062262a 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -21,9 +21,8 @@ public:
virtual const string toString() const = 0; // returns text version of controller type
virtual const string getName() const = 0; // return instance name
virtual const MachineType getMachineType() const = 0;
- virtual void set_atomic(Address addr) = 0;
- virtual void started_writes() = 0;
- virtual void clear_atomic() = 0;
+ virtual void blockOnQueue(Address, MessageBuffer*) = 0;
+ virtual void unblock(Address) = 0;
virtual void print(ostream & out) const = 0;
virtual void printStats(ostream & out) const = 0;
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
index 222ff86f8..69424c414 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
@@ -94,6 +94,17 @@ MachineID map_Address_to_DMA(const Address & addr)
return dma;
}
+inline
+NetDest broadcast(MachineType type)
+{
+ NetDest dest;
+ for (int i=0; i<MachineType_base_count(type); i++) {
+ MachineID mach = {type, i};
+ dest.add(mach);
+ }
+ return dest;
+}
+
inline
MachineID mapAddressToRange(const Address & addr, MachineType type, int low_bit, int num_bits)
{
diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc
index 630b94542..cf3e094ad 100644
--- a/src/mem/ruby/system/CacheMemory.cc
+++ b/src/mem/ruby/system/CacheMemory.cc
@@ -83,10 +83,8 @@ void CacheMemory::init(const vector<string> & argv)
}
}
- assert(cache_size != -1);
-
- m_cache_num_sets = (cache_size / m_cache_assoc) / RubySystem::getBlockSizeBytes();
- assert(m_cache_num_sets > 1);
+ int num_lines = cache_size/RubySystem::getBlockSizeBytes();
+ m_cache_num_sets = num_lines / m_cache_assoc;
m_cache_num_set_bits = log_int(m_cache_num_sets);
assert(m_cache_num_set_bits > 0);
@@ -122,7 +120,7 @@ CacheMemory::~CacheMemory()
}
int
-CacheMemory::numberOfLastLevelCaches()
+CacheMemory::numberOfLastLevelCaches()
{
return m_num_last_level_caches;
}
@@ -165,13 +163,10 @@ int CacheMemory::findTagInSet(Index cacheSet, const Address& tag) const
{
assert(tag == line_address(tag));
// search the set for the tags
- for (int i=0; i < m_cache_assoc; i++) {
- if ((m_cache[cacheSet][i] != NULL) &&
- (m_cache[cacheSet][i]->m_Address == tag) &&
- (m_cache[cacheSet][i]->m_Permission != AccessPermission_NotPresent)) {
- return i;
- }
- }
+ m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
+ if (it != m_tag_index.end())
+ if (m_cache[cacheSet][it->second]->m_Permission != AccessPermission_NotPresent)
+ return it->second;
return -1; // Not found
}
@@ -181,10 +176,9 @@ int CacheMemory::findTagInSetIgnorePermissions(Index cacheSet, const Address& ta
{
assert(tag == line_address(tag));
// search the set for the tags
- for (int i=0; i < m_cache_assoc; i++) {
- if (m_cache[cacheSet][i] != NULL && m_cache[cacheSet][i]->m_Address == tag)
- return i;
- }
+ m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
+ if (it != m_tag_index.end())
+ return it->second;
return -1; // Not found
}
@@ -291,6 +285,7 @@ void CacheMemory::allocate(const Address& address, AbstractCacheEntry* entry)
m_cache[cacheSet][i]->m_Address = address;
m_cache[cacheSet][i]->m_Permission = AccessPermission_Invalid;
m_locked[cacheSet][i] = -1;
+ m_tag_index[address] = i;
m_replacementPolicy_ptr->touch(cacheSet, i, g_eventQueue_ptr->getTime());
@@ -311,6 +306,7 @@ void CacheMemory::deallocate(const Address& address)
delete m_cache[cacheSet][location];
m_cache[cacheSet][location] = NULL;
m_locked[cacheSet][location] = -1;
+ m_tag_index.erase(address);
}
}
diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh
index 856b7bcac..8b84f33ec 100644
--- a/src/mem/ruby/system/CacheMemory.hh
+++ b/src/mem/ruby/system/CacheMemory.hh
@@ -54,6 +54,7 @@
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/ruby/profiler/CacheProfiler.hh"
#include "mem/protocol/CacheMsg.hh"
+#include "base/hashmap.hh"
#include <vector>
class CacheMemory {
@@ -70,8 +71,6 @@ public:
// static CacheMemory* createCache(int level, int num, char split_type, AbstractCacheEntry* (*entry_factory)());
// static CacheMemory* getCache(int cache_id);
- static int numberOfLastLevelCaches();
-
// Public Methods
void printConfig(ostream& out);
@@ -106,6 +105,8 @@ public:
AccessPermission getPermission(const Address& address) const;
void changePermission(const Address& address, AccessPermission new_perm);
+ static int numberOfLastLevelCaches();
+
int getLatency() const { return m_latency; }
// Hook for checkpointing the contents of the cache
@@ -158,6 +159,7 @@ private:
// The first index is the # of cache lines.
// The second index is the the amount associativity.
+ m5::hash_map<Address, int> m_tag_index;
Vector<Vector<AbstractCacheEntry*> > m_cache;
Vector<Vector<int> > m_locked;
@@ -169,9 +171,11 @@ private:
int m_cache_num_set_bits;
int m_cache_assoc;
+ static Vector< CacheMemory* > m_all_caches;
+
static int m_num_last_level_caches;
static MachineType m_last_level_machine_type;
- static Vector< CacheMemory* > m_all_caches;
+
};
#endif //CACHEMEMORY_H
diff --git a/src/mem/ruby/system/DMASequencer.hh b/src/mem/ruby/system/DMASequencer.hh
index 1f60b95ec..77c0a2258 100644
--- a/src/mem/ruby/system/DMASequencer.hh
+++ b/src/mem/ruby/system/DMASequencer.hh
@@ -25,6 +25,7 @@ public:
void init(const vector<string> & argv);
/* external interface */
int64_t makeRequest(const RubyRequest & request);
+ bool isReady(const RubyRequest & request, bool dont_set = false) { assert(0); return false;};
// void issueRequest(uint64_t paddr, uint8* data, int len, bool rw);
bool busy() { return m_is_busy;}
diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc
index e230059ad..9b2a3873c 100644
--- a/src/mem/ruby/system/DirectoryMemory.cc
+++ b/src/mem/ruby/system/DirectoryMemory.cc
@@ -44,7 +44,7 @@
int DirectoryMemory::m_num_directories = 0;
int DirectoryMemory::m_num_directories_bits = 0;
-int DirectoryMemory::m_total_size_bytes = 0;
+uint64_t DirectoryMemory::m_total_size_bytes = 0;
DirectoryMemory::DirectoryMemory(const string & name)
: m_name(name)
diff --git a/src/mem/ruby/system/DirectoryMemory.hh b/src/mem/ruby/system/DirectoryMemory.hh
index 39de679ed..09211fd83 100644
--- a/src/mem/ruby/system/DirectoryMemory.hh
+++ b/src/mem/ruby/system/DirectoryMemory.hh
@@ -91,7 +91,7 @@ private:
static int m_num_directories;
static int m_num_directories_bits;
- static int m_total_size_bytes;
+ static uint64_t m_total_size_bytes;
MemoryVector* m_ram;
};
diff --git a/src/mem/ruby/system/MemoryVector.hh b/src/mem/ruby/system/MemoryVector.hh
index c5f3cea7f..775244840 100644
--- a/src/mem/ruby/system/MemoryVector.hh
+++ b/src/mem/ruby/system/MemoryVector.hh
@@ -21,61 +21,105 @@ class MemoryVector {
void write(const Address & paddr, uint8* data, int len);
uint8* read(const Address & paddr, uint8* data, int len);
- private:
- uint8* getBlockPtr(const Address & paddr);
+private:
+ uint8* getBlockPtr(const PhysAddress & addr);
uint32 m_size;
- uint8* m_vec;
+ uint8** m_pages;
+ uint32 m_num_pages;
+ const uint32 m_page_offset_mask;
};
inline
MemoryVector::MemoryVector()
+ : m_page_offset_mask(4095)
{
m_size = 0;
- m_vec = NULL;
+ m_num_pages = 0;
+ m_pages = NULL;
}
inline
MemoryVector::MemoryVector(uint32 size)
+ : m_page_offset_mask(4095)
{
- m_size = size;
- m_vec = new uint8[size];
+ setSize(size);
}
inline
MemoryVector::~MemoryVector()
{
- delete [] m_vec;
+ for (int i=0; i<m_num_pages; i++) {
+ if (m_pages[i] != 0) {
+ delete [] m_pages[i];
+ }
+ }
+ delete [] m_pages;
}
inline
void MemoryVector::setSize(uint32 size)
{
+ if (m_pages != NULL){
+ for (int i=0; i<m_num_pages; i++) {
+ if (m_pages[i] != 0) {
+ delete [] m_pages[i];
+ }
+ }
+ delete [] m_pages;
+ }
m_size = size;
- if (m_vec != NULL)
- delete [] m_vec;
- m_vec = new uint8[size];
+ assert(size%4096 == 0);
+ m_num_pages = size >> 12;
+ m_pages = new uint8*[m_num_pages];
+ memset(m_pages, 0, m_num_pages * sizeof(uint8*));
}
inline
void MemoryVector::write(const Address & paddr, uint8* data, int len)
{
assert(paddr.getAddress() + len <= m_size);
- memcpy(m_vec + paddr.getAddress(), data, len);
+ uint32 page_num = paddr.getAddress() >> 12;
+ if (m_pages[page_num] == 0) {
+ bool all_zeros = true;
+ for (int i=0;i<len;i++) {
+ if (data[i] != 0) {
+ all_zeros = false;
+ break;
+ }
+ }
+ if (all_zeros) return;
+ m_pages[page_num] = new uint8[4096];
+ memset(m_pages[page_num], 0, 4096);
+ uint32 offset = paddr.getAddress() & m_page_offset_mask;
+ memcpy(&m_pages[page_num][offset], data, len);
+ } else {
+ memcpy(&m_pages[page_num][paddr.getAddress()&m_page_offset_mask], data, len);
+ }
}
inline
uint8* MemoryVector::read(const Address & paddr, uint8* data, int len)
{
assert(paddr.getAddress() + len <= m_size);
- memcpy(data, m_vec + paddr.getAddress(), len);
+ uint32 page_num = paddr.getAddress() >> 12;
+ if (m_pages[page_num] == 0) {
+ memset(data, 0, len);
+ } else {
+ memcpy(data, &m_pages[page_num][paddr.getAddress()&m_page_offset_mask], len);
+ }
return data;
}
inline
-uint8* MemoryVector::getBlockPtr(const Address & paddr)
+uint8* MemoryVector::getBlockPtr(const PhysAddress & paddr)
{
- return m_vec + paddr.getAddress();
+ uint32 page_num = paddr.getAddress() >> 12;
+ if (m_pages[page_num] == 0) {
+ m_pages[page_num] = new uint8[4096];
+ memset(m_pages[page_num], 0, 4096);
+ }
+ return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
}
#endif // MEMORYVECTOR_H
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index c693e0f37..b4716c346 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -27,6 +27,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "mem/ruby/libruby.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/system/Sequencer.hh"
#include "mem/ruby/system/System.hh"
@@ -44,14 +45,14 @@
//Sequencer::Sequencer(int core_id, MessageBuffer* mandatory_q)
#define LLSC_FAIL -2
-ostream& operator<<(std::ostream& out, const SequencerRequest& obj) {
- out << obj.ruby_request << flush;
- return out;
-}
-
+long int already = 0;
Sequencer::Sequencer(const string & name)
:RubyPort(name)
{
+ m_store_waiting_on_load_cycles = 0;
+ m_store_waiting_on_store_cycles = 0;
+ m_load_waiting_on_store_cycles = 0;
+ m_load_waiting_on_load_cycles = 0;
}
void Sequencer::init(const vector<string> & argv)
@@ -65,8 +66,6 @@ void Sequencer::init(const vector<string> & argv)
m_instCache_ptr = NULL;
m_dataCache_ptr = NULL;
m_controller = NULL;
- m_servicing_atomic = -1;
- m_atomics_counter = 0;
for (size_t i=0; i<argv.size(); i+=2) {
if ( argv[i] == "controller") {
m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache"
@@ -110,8 +109,9 @@ void Sequencer::wakeup() {
SequencerRequest* request = m_readRequestTable.lookup(keys[i]);
if (current_time - request->issue_time >= m_deadlock_threshold) {
WARN_MSG("Possible Deadlock detected");
- WARN_EXPR(request->ruby_request);
+ WARN_EXPR(request);
WARN_EXPR(m_version);
+ WARN_EXPR(request->ruby_request.paddr);
WARN_EXPR(keys.size());
WARN_EXPR(current_time);
WARN_EXPR(request->issue_time);
@@ -125,7 +125,7 @@ void Sequencer::wakeup() {
SequencerRequest* request = m_writeRequestTable.lookup(keys[i]);
if (current_time - request->issue_time >= m_deadlock_threshold) {
WARN_MSG("Possible Deadlock detected");
- WARN_EXPR(request->ruby_request);
+ WARN_EXPR(request);
WARN_EXPR(m_version);
WARN_EXPR(current_time);
WARN_EXPR(request->issue_time);
@@ -145,6 +145,14 @@ void Sequencer::wakeup() {
}
}
+void Sequencer::printStats(ostream & out) const {
+ out << "Sequencer: " << m_name << endl;
+ out << " store_waiting_on_load_cycles: " << m_store_waiting_on_load_cycles << endl;
+ out << " store_waiting_on_store_cycles: " << m_store_waiting_on_store_cycles << endl;
+ out << " load_waiting_on_load_cycles: " << m_load_waiting_on_load_cycles << endl;
+ out << " load_waiting_on_store_cycles: " << m_load_waiting_on_store_cycles << endl;
+}
+
void Sequencer::printProgress(ostream& out) const{
/*
int total_demand = 0;
@@ -267,6 +275,7 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
assert(m_writeRequestTable.exist(line_address(address)));
SequencerRequest* request = m_writeRequestTable.lookup(address);
+
removeRequest(request);
assert((request->ruby_request.type == RubyRequestType_ST) ||
@@ -274,15 +283,15 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
(request->ruby_request.type == RubyRequestType_RMW_Write) ||
(request->ruby_request.type == RubyRequestType_Locked_Read) ||
(request->ruby_request.type == RubyRequestType_Locked_Write));
- // POLINA: the assumption is that atomics are only on data cache and not instruction cache
+
if (request->ruby_request.type == RubyRequestType_Locked_Read) {
m_dataCache_ptr->setLocked(address, m_version);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Read) {
- m_controller->set_atomic(address);
+ m_controller->blockOnQueue(address, m_mandatory_q_ptr);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
- m_controller->clear_atomic();
+ m_controller->unblock(address);
}
hitCallback(request, data);
@@ -354,47 +363,33 @@ void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) {
}
// Returns true if the sequencer already has a load or store outstanding
-bool Sequencer::isReady(const RubyRequest& request) {
- // POLINA: check if we are currently flushing the write buffer, if so Ruby is returned as not ready
- // to simulate stalling of the front-end
- // Do we stall all the sequencers? If it is atomic instruction - yes!
- if (m_outstanding_count >= m_max_outstanding_requests) {
- return false;
- }
-
- if( m_writeRequestTable.exist(line_address(Address(request.paddr))) ||
- m_readRequestTable.exist(line_address(Address(request.paddr))) ){
- //cout << "OUTSTANDING REQUEST EXISTS " << p << " VER " << m_version << endl;
- //printProgress(cout);
- return false;
- }
-
- if (m_servicing_atomic != -1 && m_servicing_atomic != (int)request.proc_id) {
- assert(m_atomics_counter > 0);
- return false;
- }
- else {
- if (request.type == RubyRequestType_RMW_Read) {
- if (m_servicing_atomic == -1) {
- assert(m_atomics_counter == 0);
- m_servicing_atomic = (int)request.proc_id;
- }
- else {
- assert(m_servicing_atomic == (int)request.proc_id);
- }
- m_atomics_counter++;
+int Sequencer::isReady(const RubyRequest& request) {
+ bool is_outstanding_store = m_writeRequestTable.exist(line_address(Address(request.paddr)));
+ bool is_outstanding_load = m_readRequestTable.exist(line_address(Address(request.paddr)));
+ if ( is_outstanding_store ) {
+ if ((request.type == RubyRequestType_LD) ||
+ (request.type == RubyRequestType_IFETCH) ||
+ (request.type == RubyRequestType_RMW_Read)) {
+ m_store_waiting_on_load_cycles++;
+ } else {
+ m_store_waiting_on_store_cycles++;
}
- else if (request.type == RubyRequestType_RMW_Write) {
- assert(m_servicing_atomic == (int)request.proc_id);
- assert(m_atomics_counter > 0);
- m_atomics_counter--;
- if (m_atomics_counter == 0) {
- m_servicing_atomic = -1;
- }
+ return LIBRUBY_ALIASED_REQUEST;
+ } else if ( is_outstanding_load ) {
+ if ((request.type == RubyRequestType_ST) ||
+ (request.type == RubyRequestType_RMW_Write) ) {
+ m_load_waiting_on_store_cycles++;
+ } else {
+ m_load_waiting_on_load_cycles++;
}
+ return LIBRUBY_ALIASED_REQUEST;
}
- return true;
+ if (m_outstanding_count >= m_max_outstanding_requests) {
+ return LIBRUBY_BUFFER_FULL;
+ }
+
+ return 1;
}
bool Sequencer::empty() const {
@@ -405,11 +400,12 @@ bool Sequencer::empty() const {
int64_t Sequencer::makeRequest(const RubyRequest & request)
{
assert(Address(request.paddr).getOffset() + request.len <= RubySystem::getBlockSizeBytes());
- if (isReady(request)) {
+ int ready = isReady(request);
+ if (ready > 0) {
int64_t id = makeUniqueRequestID();
SequencerRequest *srequest = new SequencerRequest(request, id, g_eventQueue_ptr->getTime());
bool found = insertRequest(srequest);
- if (!found)
+ if (!found) {
if (request.type == RubyRequestType_Locked_Write) {
// NOTE: it is OK to check the locked flag here as the mandatory queue will be checked first
// ensuring that nothing comes between checking the flag and servicing the store
@@ -420,16 +416,17 @@ int64_t Sequencer::makeRequest(const RubyRequest & request)
m_dataCache_ptr->clearLocked(line_address(Address(request.paddr)));
}
}
- if (request.type == RubyRequestType_RMW_Write) {
- m_controller->started_writes();
- }
issueRequest(request);
- // TODO: issue hardware prefetches here
- return id;
- }
- else {
- return -1;
+ // TODO: issue hardware prefetches here
+ return id;
+ }
+ else {
+ assert(0);
+ return 0;
+ }
+ } else {
+ return ready;
}
}
@@ -448,10 +445,8 @@ void Sequencer::issueRequest(const RubyRequest& request) {
ctype = CacheRequestType_ST;
break;
case RubyRequestType_Locked_Read:
- ctype = CacheRequestType_ST;
- break;
case RubyRequestType_Locked_Write:
- ctype = CacheRequestType_ST;
+ ctype = CacheRequestType_ATOMIC;
break;
case RubyRequestType_RMW_Read:
ctype = CacheRequestType_ATOMIC;
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index cf12c2a0b..1621bbbdc 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -86,10 +86,11 @@ public:
// called by Tester or Simics
int64_t makeRequest(const RubyRequest & request);
- bool isReady(const RubyRequest& request);
+ int isReady(const RubyRequest& request);
bool empty() const;
void print(ostream& out) const;
+ void printStats(ostream & out) const;
void checkCoherence(const Address& address);
// bool getRubyMemoryValue(const Address& addr, char* value, unsigned int size_in_bytes);
@@ -127,8 +128,11 @@ private:
// Global outstanding request count, across all request tables
int m_outstanding_count;
bool m_deadlock_check_scheduled;
- int m_servicing_atomic;
- int m_atomics_counter;
+
+ int m_store_waiting_on_load_cycles;
+ int m_store_waiting_on_store_cycles;
+ int m_load_waiting_on_store_cycles;
+ int m_load_waiting_on_load_cycles;
};
// Output operator declaration
diff --git a/src/mem/ruby/system/System.cc b/src/mem/ruby/system/System.cc
index ad67cdc80..4ce919618 100644
--- a/src/mem/ruby/system/System.cc
+++ b/src/mem/ruby/system/System.cc
@@ -335,6 +335,10 @@ void RubySystem::printStats(ostream& out)
m_profiler_ptr->printStats(out);
m_network_ptr->printStats(out);
+ for (map<string, Sequencer*>::const_iterator it = m_sequencers.begin();
+ it != m_sequencers.end(); it++) {
+ (*it).second->printStats(out);
+ }
for (map<string, CacheMemory*>::const_iterator it = m_caches.begin();
it != m_caches.end(); it++) {
(*it).second->printStats(out);