diff options
-rw-r--r-- | src/Doxyfile | 2 | ||||
-rw-r--r-- | src/doc/memory_system.doxygen | 278 | ||||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig1.PNG | bin | 0 -> 4004 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig2.PNG | bin | 0 -> 7281 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig3.PNG | bin | 0 -> 30751 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig4.PNG | bin | 0 -> 5351 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig5.PNG | bin | 0 -> 7324 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig6.PNG | bin | 0 -> 16516 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig7.PNG | bin | 0 -> 8626 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig8.PNG | bin | 0 -> 17209 bytes | |||
-rwxr-xr-x | src/doxygen/images/gem5_MS_Fig9.PNG | bin | 0 -> 16453 bytes | |||
-rw-r--r-- | src/mem/cache/cache.hh | 2 | ||||
-rw-r--r-- | src/mem/cache/mshr.hh | 1 | ||||
-rw-r--r-- | src/mem/cache/tags/lru.hh | 1 | ||||
-rw-r--r-- | src/mem/coherent_bus.hh | 1 | ||||
-rw-r--r-- | src/mem/simple_mem.hh | 1 |
16 files changed, 284 insertions, 2 deletions
diff --git a/src/Doxyfile b/src/Doxyfile index ed4245dd2..2dc1f4112 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -459,7 +459,7 @@ EXAMPLE_RECURSIVE = NO # directories that contain image that are included in the documentation (see # the \image command). -IMAGE_PATH = +IMAGE_PATH = doxygen/images # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program diff --git a/src/doc/memory_system.doxygen b/src/doc/memory_system.doxygen new file mode 100644 index 000000000..061a289ee --- /dev/null +++ b/src/doc/memory_system.doxygen @@ -0,0 +1,278 @@ +# Copyright (c) 2012 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Author: Djordje Kovacevic + +/*! \page gem5MemorySystem Memory System in gem5 + + \tableofcontents + + The document describes memory subsystem in gem5 with focus on program flow + during CPU’s simple memory transactions (read or write). + + + \section gem5_MS_MH MODEL HIERARCHY + + Model that is used in this document consists of two out-of-order (O3) + ARM v7 CPUs with corresponding L1 data caches and Simple Memory. It is + created by running gem5 with the following parameters: + + configs/example/fs.py --caches --cpu-type=arm_detailed --num-cpus=2 + + Gem5 uses Memory Objects (MemObject) derived objects as basic blocks for + building memory system. They are connected via ports with established + master/slave hierarchy. Data flow is initiated on master port while the + response messages and snoop queries appear on the slave port. The following + figure shows the hierarchy of Memory Objects used in this document: + + \image html "gem5_MS_Fig1.PNG" "Memory Object hierarchy of the model" width=3cm + + \section gem5_CPU CPU + + It is not in the scope of this document to describe O3 CPU model in details, so + here are only a few relevant notes about the model: + + <b>Read access </b>is initiated by sending message to the port towards DCache + object. If DCache rejects the message (for being blocked or busy) CPU will + flush the pipeline and the access will be re-attempted later on. The access + is completed upon receiving reply message (ReadRep) from DCache. + + <b>Write access</b> is initiated by storing the request into store buffer whose + context is emptied and sent to DCache on every tick. DCache may also reject + the request. Write access is completed when write reply (WriteRep) message is + received from DCache. + + Load & store buffers (for read and write access) don’t impose any + restriction on the number of active memory accesses. Therefore, the maximum + number of outstanding CPU’s memory access requests is not limited by CPU + Memory Object but by underlying memory system model. + + <b>Split memory access</b> is implemented. + + The message that is sent by CPU contains memory type (Normal, Device, Strongly + Ordered and cachebility) of the accessed region. However, this is not being used + by the rest of the model that takes more simplified approach towards memory types. + + \section gem5_DCache DATA CACHE OBJECT + + Data Cache object implements a standard cache structure: + + \image html "gem5_MS_Fig2.PNG" "DCache Memory Object" width=3cm + + <b>Cached memory reads</b> that match particular cache tag (with Valid & Read + flags) will be completed (by sending ReadResp to CPU) after a configurable time. + Otherwise, the request is forwarded to Miss Status and Handling Register + (MSHR) block. + + <b>Cached memory writes</b> that match particular cache tag (with Valid, Read + & Write flags) will be completed (by sending WriteResp CPU) after the same + configurable time. Otherwise, the request is forwarded to Miss Status and + Handling Register(MSHR) block. + + <b>Uncached memory reads</b> are forwarded to MSHR block. + + <b>Uncached memory writes</b> are forwarded to WriteBuffer block. + + <b>Evicted (& dirty) cache lines</b> are forwarded to WriteBuffer block. + + CPU’s access to Data Cache is blocked if any of the following is true: + + - MSHR block is full. (The size of MSHR’s buffer is configurable.) + + - Writeback block is full. (The size of the block’s buffer is + configurable.) + + - The number of outstanding memory accesses against the same memory cache line + has reached configurable threshold value – see MSHR and Write Buffer for details. + + Data Cache in block state will reject any request from slave port (from CPU) + regardless of whether it would result in cache hit or miss. Note that + incoming messages on master port (response messages and snoop requests) + are never rejected. + + Cache hit on uncachable memory region (unpredicted behaviour according to + ARM ARM) will invalidate cache line and fetch data from memory. + + \subsection gem5_MS_TAndDBlock Tags & Data Block + + Cache lines (referred as blocks in source code) are organised into sets with + configurable associativity and size. They have the following status flags: + - <b>Valid.</b> It holds data. Address tag is valid + - <b>Read.</b> No read request will be accepted without this flag being set. + For example, cache line is valid and unreadable when it waits for write flag + to complete write access. + - <b>Write.</b> It may accept writes. Cache line with Write flags + identifies Unique state – no other cache memory holds the copy. + - <b>Dirty.</b> It needs Writeback when evicted. + + Read access will hit cache line if address tags match and Valid and Read + flags are set. Write access will hit cache line if address tags match and + Valid, Read and Write flags are set. + + \subsection gem5_MS_Queues MSHR and Write Buffer Queues + + Miss Status and Handling Register (MSHR) queue holds the list of CPU’s + outstanding memory requests that require read access to lower memory + level. They are: + - Cached Read misses. + - Cached Write misses. + - Uncached reads. + + WriteBuffer queue holds the following memory requests: + - Uncached writes. + - Writeback from evicted (& dirty) cache lines. + + \image html "gem5_MS_Fig3.PNG" "MSHR and Write Buffer Blocks" width=6cm + + Each memory request is assigned to corresponding MSHR object (READ or WRITE + on diagram above) that represents particular block (cache line) of memory + that has to be read or written in order to complete the command(s). As shown + on gigure above, cached read/writes against the same cache line have a common + MSHR object and will be completed with a single memory access. + + The size of the block (and therefore the size of read/write access to lower + memory) is: + - The size of cache line for cached access & writeback; + - As specified in CPU instruction for uncached access. + + In general, Data Cache model distinguishes between just two memory types: + - Normal Cached memory. It is always treated as write back, read and write + allocate. + - Normal uncached, Device and Strongly Ordered types are treated equally + (as uncached memory) + + \subsection gem5_MS_Ordering Memory Access Ordering + + An unique order number is assigned to each CPU read/write request(as they appear on + slave port). Order numbers of MSHR objects are copied from the first + assigned read/write. + + Memory read/writes from each of these two queues are executed in order (according + to the assigned order number). When both queues are not empty the model will + execute memory read from MSHR block unless WriteBuffer is full. It will, + however, always preserve the order of read/writes on the same + (or overlapping) memory cache line (block). + + In summary: + - Order of accesses to cached memory is not preserved unless they target + the same cache line. For example, the accesses #1, #5 & #10 will + complete simultaneously in the same tick (still in order). The access + #5 will complete before #3. + - Order of all uncached memory writes is preserved. Write#6 always + completes before Write#13. + - Order to all uncached memory reads is preserved. Read#2 always completes + before Read#8. + - The order of a read and a write uncached access is not necessarily + preserved - unless their access regions overlap. Therefore, Write#6 + always completes before Read#8 (they target the same memory block). + However, Write#13 may complete before Read#8. + + + \section gem5_MS_Bus COHERENT BUS OBJECT + + \image html "gem5_MS_Fig4.PNG" "Coherent Bus Object" width=3cm + + Coherent Bus object provides basic support for snoop protocol: + + <b>All requests on the slave port</b> are forwarded to the appropriate master port. Requests + for cached memory regions are also forwarded to other slave ports (as snoop + requests). + + <b>Master port replies</b> are forwarded to the appropriate slave port. + + <b>Master port snoop requests</b> are forwarded to all slave ports. + + <b>Slave port snoop replies</b> are forwarded to the port that was the source of the + request. (Note that the source of snoop request can be either slave or + master port.) + + The bus declares itself blocked for a configurable period of time after + any of the following events: + - A packet is sent (or failed to be sent) to a slave port. + - A reply message is sent to a master port. + - Snoop response from one slave port is sent to another slave port. + + The bus in blocked state rejects the following incoming messages: + - Slave port requests. + - Master port replies. + - Master port snoop requests. + + \section gem5_MS_SimpleMemory SIMPLE MEMORY OBJECT + + It never blocks the access on slave port. + + Memory read/write takes immediate effect. (Read or write is performed when + the request is received). + + Reply message is sent after a configurable period of time . + + \section gem5_MS_MessageFlow MESSAGE FLOW + + \subsection gem5_MS_Ordering Read Access + + The following diagram shows read access that hits Data Cache line with Valid + and Read flags: + + \image html "gem5_MS_Fig5.PNG" "Read Hit (Read flag must be set in cache line)" width=3cm + + Cache miss read access will generate the following sequence of messages: + + \image html "gem5_MS_Fig6.PNG" "Read Miss with snoop reply" width=3cm + + Note that bus object never gets response from both DCache2 and Memory object. + It sends the very same ReadReq package (message) object to memory and data + cache. When Data Cache wants to reply on snoop request it marks the message + with MEM_INHIBIT flag that tells Memory object not to process the message. + + \subsection gem5_MS_Ordering Write Access + + The following diagram shows write access that hits DCache1 cache line with + Valid & Write flags: + + \image html "gem5_MS_Fig7.PNG" "Write Hit (with Write flag set in cache line)" width=3cm + + Next figure shows write access that hits DCache1 cache line with Valid but no + Write flags – which qualifies as write miss. DCache1 issues UpgradeReq to + obtain write permission. DCache2::snoopTiming will invalidate cache line that + has been hit. Note that UpgradeResp message doesn’t carry data. + + \image html "gem5_MS_Fig8.PNG" "Write Miss – matching tag with no Write flag" width=3cm + + The next diagram shows write miss in DCache. ReadExReq invalidates cache line + in DCache2. ReadExResp carries the content of memory cache line. + + \image html "gem5_MS_Fig9.PNG" "Miss - no matching tag" width=3cm + +*/ diff --git a/src/doxygen/images/gem5_MS_Fig1.PNG b/src/doxygen/images/gem5_MS_Fig1.PNG Binary files differnew file mode 100755 index 000000000..ed0f11fb3 --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig1.PNG diff --git a/src/doxygen/images/gem5_MS_Fig2.PNG b/src/doxygen/images/gem5_MS_Fig2.PNG Binary files differnew file mode 100755 index 000000000..a6dac40c3 --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig2.PNG diff --git a/src/doxygen/images/gem5_MS_Fig3.PNG b/src/doxygen/images/gem5_MS_Fig3.PNG Binary files differnew file mode 100755 index 000000000..68c72fc74 --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig3.PNG diff --git a/src/doxygen/images/gem5_MS_Fig4.PNG b/src/doxygen/images/gem5_MS_Fig4.PNG Binary files differnew file mode 100755 index 000000000..2e535d03f --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig4.PNG diff --git a/src/doxygen/images/gem5_MS_Fig5.PNG b/src/doxygen/images/gem5_MS_Fig5.PNG Binary files differnew file mode 100755 index 000000000..b8d4041ba --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig5.PNG diff --git a/src/doxygen/images/gem5_MS_Fig6.PNG b/src/doxygen/images/gem5_MS_Fig6.PNG Binary files differnew file mode 100755 index 000000000..05c16fb0c --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig6.PNG diff --git a/src/doxygen/images/gem5_MS_Fig7.PNG b/src/doxygen/images/gem5_MS_Fig7.PNG Binary files differnew file mode 100755 index 000000000..fc4ec8df3 --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig7.PNG diff --git a/src/doxygen/images/gem5_MS_Fig8.PNG b/src/doxygen/images/gem5_MS_Fig8.PNG Binary files differnew file mode 100755 index 000000000..ec0351259 --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig8.PNG diff --git a/src/doxygen/images/gem5_MS_Fig9.PNG b/src/doxygen/images/gem5_MS_Fig9.PNG Binary files differnew file mode 100755 index 000000000..0a4f16e0e --- /dev/null +++ b/src/doxygen/images/gem5_MS_Fig9.PNG diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 9bdbd3456..3c34c10f7 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -64,7 +64,7 @@ class BasePrefetcher; /** * A template-policy based cache. The behavior of the cache can be altered by * supplying different template policies. TagStore handles all tag and data - * storage @sa TagStore. + * storage @sa TagStore, \ref gem5MemorySystem "gem5 Memory System" */ template <class TagStore> class Cache : public BaseCache diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh index 7920ad717..87e561ac7 100644 --- a/src/mem/cache/mshr.hh +++ b/src/mem/cache/mshr.hh @@ -47,6 +47,7 @@ class MSHRQueue; /** * Miss Status and handling Register. This class keeps all the information * needed to handle a cache miss including a list of target requests. + * @sa \ref gem5MemorySystem "gem5 Memory System" */ class MSHR : public Packet::SenderState, public Printable { diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh index bb1420f71..7938fcc3c 100644 --- a/src/mem/cache/tags/lru.hh +++ b/src/mem/cache/tags/lru.hh @@ -50,6 +50,7 @@ class CacheSet; /** * A LRU cache tag store. + * @sa \ref gem5MemorySystem "gem5 Memory System" */ class LRU : public BaseTags { diff --git a/src/mem/coherent_bus.hh b/src/mem/coherent_bus.hh index a28b388d5..8941d0271 100644 --- a/src/mem/coherent_bus.hh +++ b/src/mem/coherent_bus.hh @@ -63,6 +63,7 @@ * The coherent bus can be used as a template for modelling QPI, * HyperTransport, ACE and coherent OCP buses, and is typically used * for the L1-to-L2 buses and as the main system interconnect. + * @sa \ref gem5MemorySystem "gem5 Memory System" */ class CoherentBus : public BaseBus { diff --git a/src/mem/simple_mem.hh b/src/mem/simple_mem.hh index 4f7af864b..f201709c2 100644 --- a/src/mem/simple_mem.hh +++ b/src/mem/simple_mem.hh @@ -58,6 +58,7 @@ * an configurable throughput and latency, potentially with a variance * added to the latter. It uses a QueueSlavePort to avoid dealing with * the flow control of sending responses. + * @sa \ref gem5MemorySystem "gem5 Memory System" */ class SimpleMemory : public AbstractMemory { |