summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/arch/x86/insts/microregop.cc2
-rw-r--r--src/arch/x86/isa/decoder/locked_opcodes.isa3
-rw-r--r--src/arch/x86/isa/decoder/one_byte_opcodes.isa12
-rw-r--r--src/arch/x86/isa/decoder/two_byte_opcodes.isa11
-rw-r--r--src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py176
-rw-r--r--src/arch/x86/isa/insts/general_purpose/data_conversion/endian_conversion.py20
-rw-r--r--src/arch/x86/isa/insts/general_purpose/data_transfer/conditional_move.py52
-rw-r--r--src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py25
-rw-r--r--src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py20
-rw-r--r--src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py72
-rw-r--r--src/arch/x86/isa/insts/general_purpose/rotate_and_shift/shift.py93
-rw-r--r--src/arch/x86/isa/insts/general_purpose/semaphores.py94
-rw-r--r--src/arch/x86/isa/insts/general_purpose/string/scan_string.py6
-rw-r--r--src/arch/x86/isa/microasm.isa4
-rw-r--r--src/arch/x86/isa/microops/regop.isa209
-rw-r--r--src/arch/x86/isa/operands.isa1
-rw-r--r--src/arch/x86/x86_traits.hh3
-rw-r--r--src/mem/gems_common/Map.hh2
-rw-r--r--src/mem/protocol/MI_example-cache.sm15
-rw-r--r--src/mem/protocol/MI_example-dir.sm99
-rw-r--r--src/mem/protocol/MI_example-dma.sm24
-rw-r--r--src/mem/protocol/MI_example-msg.sm1
-rw-r--r--src/mem/protocol/MOESI_CMP_directory-L1cache.sm137
-rw-r--r--src/mem/protocol/MOESI_CMP_directory-L2cache.sm86
-rw-r--r--src/mem/protocol/MOESI_CMP_directory-dir.sm316
-rw-r--r--src/mem/protocol/MOESI_CMP_directory-dma.sm268
-rw-r--r--src/mem/protocol/MOESI_CMP_directory-msg.sm35
-rw-r--r--src/mem/protocol/MOESI_CMP_directory.slicc1
-rw-r--r--src/mem/protocol/MOESI_CMP_directory_m-dir.sm652
-rw-r--r--src/mem/protocol/MOESI_CMP_directory_m.slicc5
-rw-r--r--src/mem/protocol/RubySlicc_ComponentMapping.sm7
-rw-r--r--src/mem/protocol/RubySlicc_Exports.sm33
-rw-r--r--src/mem/protocol/RubySlicc_Profiler.sm2
-rw-r--r--src/mem/protocol/RubySlicc_Types.sm5
-rw-r--r--src/mem/protocol/RubySlicc_Util.sm2
-rw-r--r--src/mem/protocol/SConscript2
-rw-r--r--src/mem/ruby/config/MI_example-homogeneous.rb29
-rw-r--r--src/mem/ruby/config/MI_example.rb39
-rw-r--r--src/mem/ruby/config/MOESI_CMP_directory.rb69
-rw-r--r--src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb100
-rw-r--r--src/mem/ruby/config/cfg.rb96
-rw-r--r--src/mem/ruby/config/defaults.rb50
-rw-r--r--src/mem/ruby/config/util.rb10
-rw-r--r--src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh10
-rw-r--r--src/mem/ruby/slicc_interface/RubySlicc_Util.hh5
-rw-r--r--src/mem/ruby/system/DMASequencer.cc34
-rw-r--r--src/mem/ruby/system/DirectoryMemory.cc6
-rw-r--r--src/mem/ruby/system/DirectoryMemory.hh6
-rw-r--r--src/mem/ruby/system/PerfectCacheMemory.hh14
-rw-r--r--src/mem/ruby/system/System.hh3
-rw-r--r--src/mem/ruby/system/TimerTable.cc4
-rw-r--r--src/mem/ruby/system/TimerTable.hh4
-rw-r--r--src/mem/slicc/ast/AST.hh14
-rw-r--r--src/mem/slicc/ast/ActionDeclAST.cc2
-rw-r--r--src/mem/slicc/ast/ActionDeclAST.hh3
-rw-r--r--src/mem/slicc/ast/EnqueueStatementAST.cc9
-rw-r--r--src/mem/slicc/ast/FormalParamAST.cc11
-rw-r--r--src/mem/slicc/ast/FormalParamAST.hh6
-rw-r--r--src/mem/slicc/ast/FuncDeclAST.cc1
-rw-r--r--src/mem/slicc/ast/FuncDeclAST.hh3
-rw-r--r--src/mem/slicc/ast/MachineAST.cc9
-rw-r--r--src/mem/slicc/ast/MachineAST.hh8
-rw-r--r--src/mem/slicc/parser/parser.py18
-rw-r--r--src/mem/slicc/parser/parser.yy10
-rw-r--r--src/mem/slicc/symbols/StateMachine.cc75
-rw-r--r--src/mem/slicc/symbols/StateMachine.hh5
66 files changed, 1773 insertions, 1375 deletions
diff --git a/src/arch/x86/insts/microregop.cc b/src/arch/x86/insts/microregop.cc
index 2edd3ba87..5982dff7a 100644
--- a/src/arch/x86/insts/microregop.cc
+++ b/src/arch/x86/insts/microregop.cc
@@ -78,7 +78,7 @@ namespace X86ISA
if(subtract)
flags ^= (flagMask & (ECFBit | CFBit));
}
- if(flagMask & PFBit && findParity(dataSize*8, _dest))
+ if(flagMask & PFBit && !findParity(8, _dest))
flags |= PFBit;
if(flagMask & AFBit)
{
diff --git a/src/arch/x86/isa/decoder/locked_opcodes.isa b/src/arch/x86/isa/decoder/locked_opcodes.isa
index f38f2abb8..14d5e58a3 100644
--- a/src/arch/x86/isa/decoder/locked_opcodes.isa
+++ b/src/arch/x86/isa/decoder/locked_opcodes.isa
@@ -160,7 +160,8 @@
0x1: XADD_LOCKED(Mv,Gv);
//0x7: group9();
0x7: decode MODRM_REG {
- 0x1: WarnUnimpl::cmpxchg_Mq_LOCKED();
+ //Also CMPXCHG16B
+ 0x1: CMPXCHG8B_LOCKED(Mdp);
}
}
}
diff --git a/src/arch/x86/isa/decoder/one_byte_opcodes.isa b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
index 84d18441d..d6cfdc593 100644
--- a/src/arch/x86/isa/decoder/one_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/one_byte_opcodes.isa
@@ -330,14 +330,8 @@
//The 64 bit versions of both of these should be illegal only
//if CPUID says it isn't supported. For now, we'll just assume
//that it's supported.
- 0x6: decode MODE_SUBMODE {
- 0x0: SAHF_64();
- default: SAHF();
- }
- 0x7: decode MODE_SUBMODE {
- 0x0: LAHF_64();
- default: LAHF();
- }
+ 0x6: SAHF();
+ 0x7: LAHF();
}
0x14: decode OPCODE_OP_BOTTOM3 {
0x0: MOV(rAb, Ob);
@@ -550,7 +544,7 @@
0x5: IMUL_B(Eb);
//This should be Eb, but it access the entire word value ax.
0x6: DIV_B(Ew);
- 0x7: IDIV(Eb);
+ 0x7: IDIV_B(Eb);
}
//0x7: group3_Ev();
0x7: decode MODRM_REG {
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index c344ee550..55056da81 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -820,8 +820,8 @@
Rdx = result.rdx;
}});
0x3: Inst::BT(Ev,Gv);
- 0x4: shld_Ev_Gv_Ib();
- 0x5: shld_Ev_Gv_rCl();
+ 0x4: Inst::SHLD(Ev,Gv,Ib);
+ 0x5: Inst::SHLD(Ev,Gv);
0x6: xbts_and_cmpxchg();
0x7: ibts_and_cmpxchg();
}
@@ -831,7 +831,7 @@
0x2: rsm_smm();
0x3: Inst::BTS(Ev,Gv);
0x4: Inst::SHRD(Ev,Gv,Ib);
- 0x5: shrd_Ev_Gv_rCl();
+ 0x5: Inst::SHRD(Ev,Gv);
//0x6: group16();
0x6: decode MODRM_REG {
0x0: fxsave();
@@ -898,7 +898,8 @@
0x1: Inst::XADD(Ev,Gv);
//0x7: group9();
0x7: decode MODRM_REG {
- 0x1: cmpxchg_Mq();
+ //Also CMPXCHG16B
+ 0x1: Inst::CMPXCHG8B(Mdp);
0x6: decode LEGACY_OP {
0x1: vmclear_Mq();
default: decode LEGACY_REP {
@@ -1067,7 +1068,7 @@
}
default: Inst::UD2();
}
- 0x1E: decode OPCODE_OP_BOTTOM3 {
+ 0x1E: decode LEGACY_DECODEVAL {
// no prefix
0x0: decode OPCODE_OP_BOTTOM3 {
0x1: psllw_Pq_Qq();
diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
index 19d1c7789..47ad1d53c 100644
--- a/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
+++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/multiply_and_divide.py
@@ -61,29 +61,26 @@ microcode = '''
def macroop MUL_B_R
{
- mul1u rax, reg
+ mul1u rax, reg, flags=(OF,CF)
mulel rax
- # Really ah
- muleh rsi, flags=(OF,CF)
+ muleh ah
};
def macroop MUL_B_M
{
ld t1, seg, sib, disp
- mul1u rax, t1
+ mul1u rax, t1, flags=(OF,CF)
mulel rax
- # Really ah
- muleh rsi, flags=(OF,CF)
+ muleh ah
};
def macroop MUL_B_P
{
rdip t7
ld t1, seg, riprel, disp
- mul1u rax, t1
+ mul1u rax, t1, flags=(OF,CF)
mulel rax
- # Really ah
- muleh rsi, flags=(OF,CF)
+ muleh ah
};
#
@@ -92,26 +89,26 @@ def macroop MUL_B_P
def macroop MUL_R
{
- mul1u rax, reg
+ mul1u rax, reg, flags=(OF,CF)
mulel rax
- muleh rdx, flags=(OF,CF)
+ muleh rdx
};
def macroop MUL_M
{
ld t1, seg, sib, disp
- mul1u rax, t1
+ mul1u rax, t1, flags=(OF,CF)
mulel rax
- muleh rdx, flags=(OF,CF)
+ muleh rdx
};
def macroop MUL_P
{
rdip t7
ld t1, seg, riprel, disp
- mul1u rax, t1
+ mul1u rax, t1, flags=(OF,CF)
mulel rax
- muleh rdx, flags=(OF,CF)
+ muleh rdx
};
#
@@ -120,29 +117,26 @@ def macroop MUL_P
def macroop IMUL_B_R
{
- mul1s rax, reg
+ mul1s rax, reg, flags=(OF,CF)
mulel rax
- # Really ah
- muleh rsi, flags=(OF,CF)
+ muleh ah
};
def macroop IMUL_B_M
{
ld t1, seg, sib, disp
- mul1s rax, t1
+ mul1s rax, t1, flags=(OF,CF)
mulel rax
- # Really ah
- muleh rsi, flags=(OF,CF)
+ muleh ah
};
def macroop IMUL_B_P
{
rdip t7
ld t1, seg, riprel, disp
- mul1s rax, t1
+ mul1s rax, t1, flags=(OF,CF)
mulel rax
- # Really ah
- muleh rsi, flags=(OF,CF)
+ muleh ah
};
#
@@ -151,50 +145,50 @@ def macroop IMUL_B_P
def macroop IMUL_R
{
- mul1s rax, reg
+ mul1s rax, reg, flags=(OF,CF)
mulel rax
- muleh rdx, flags=(OF,CF)
+ muleh rdx
};
def macroop IMUL_M
{
ld t1, seg, sib, disp
- mul1s rax, t1
+ mul1s rax, t1, flags=(OF,CF)
mulel rax
- muleh rdx, flags=(OF,CF)
+ muleh rdx
};
def macroop IMUL_P
{
rdip t7
ld t1, seg, riprel, disp
- mul1s rax, t1
+ mul1s rax, t1, flags=(OF,CF)
mulel rax
- muleh rdx, flags=(OF,CF)
+ muleh rdx
};
def macroop IMUL_R_R
{
- mul1s reg, regm
+ mul1s reg, regm, flags=(OF,CF)
mulel reg
- muleh t0, flags=(CF,OF)
+ muleh t0
};
def macroop IMUL_R_M
{
ld t1, seg, sib, disp
- mul1s reg, t1
+ mul1s reg, t1, flags=(CF,OF)
mulel reg
- muleh t0, flags=(CF,OF)
+ muleh t0
};
def macroop IMUL_R_P
{
rdip t7
ld t1, seg, riprel, disp
- mul1s reg, t1
+ mul1s reg, t1, flags=(CF,OF)
mulel reg
- muleh t0, flags=(CF,OF)
+ muleh t0
};
#
@@ -204,18 +198,18 @@ def macroop IMUL_R_P
def macroop IMUL_R_R_I
{
limm t1, imm
- mul1s regm, t1
+ mul1s regm, t1, flags=(OF,CF)
mulel reg
- muleh t0, flags=(OF,CF)
+ muleh t0
};
def macroop IMUL_R_M_I
{
limm t1, imm
ld t2, seg, sib, disp
- mul1s t2, t1
+ mul1s t2, t1, flags=(OF,CF)
mulel reg
- muleh t0, flags=(OF,CF)
+ muleh t0
};
def macroop IMUL_R_P_I
@@ -223,9 +217,9 @@ def macroop IMUL_R_P_I
rdip t7
limm t1, imm
ld t2, seg, riprel
- mul1s t2, t1
+ mul1s t2, t1, flags=(OF,CF)
mulel reg
- muleh t0, flags=(OF,CF)
+ muleh t0
};
#
@@ -235,7 +229,7 @@ def macroop IMUL_R_P_I
def macroop DIV_B_R
{
# Do the initial part of the division
- div1 rsi, reg, dataSize=1
+ div1 ah, reg, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
@@ -250,7 +244,7 @@ divLoopTop:
#Unload the answer
divq rax, dataSize=1
- divr rsi, dataSize=1
+ divr ah, dataSize=1
};
def macroop DIV_B_M
@@ -258,7 +252,7 @@ def macroop DIV_B_M
ld t2, seg, sib, disp
# Do the initial part of the division
- div1 rsi, t2, dataSize=1
+ div1 ah, t2, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
@@ -273,7 +267,7 @@ divLoopTop:
#Unload the answer
divq rax, dataSize=1
- divr rsi, dataSize=1
+ divr ah, dataSize=1
};
def macroop DIV_B_P
@@ -282,7 +276,7 @@ def macroop DIV_B_P
ld t2, seg, riprel, disp
# Do the initial part of the division
- div1 rsi, t2, dataSize=1
+ div1 ah, t2, dataSize=1
#These are split out so we can initialize the number of bits in the
#second register
@@ -297,7 +291,7 @@ divLoopTop:
#Unload the answer
divq rax, dataSize=1
- divr rsi, dataSize=1
+ divr ah, dataSize=1
};
#
@@ -390,11 +384,10 @@ def macroop IDIV_B_R
# Negate dividend
sub t1, t0, rax, flags=(ECF,), dataSize=1
ruflag t4, 3
- sub t2, t0, rsi, dataSize=1
+ sub t2, t0, ah, dataSize=1
sub t2, t2, t4
#Find the sign of the divisor
- #FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, reg, 1, flags=(ECF,), dataSize=1
# Negate divisor
@@ -403,12 +396,11 @@ def macroop IDIV_B_R
mov t3, t3, reg, flags=(nCECF,), dataSize=1
#Find the sign of the dividend
- #FIXME!!! This depends on shifts setting the carry flag correctly.
- slli t0, rsi, 1, flags=(ECF,), dataSize=1
+ slli t0, ah, 1, flags=(ECF,), dataSize=1
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,), dataSize=1
- mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+ mov t2, t2, ah, flags=(nCECF,), dataSize=1
# Do the initial part of the division
div1 t2, t3, dataSize=1
@@ -435,10 +427,10 @@ divLoopTop:
# Negate the remainder
sub t4, t0, t6, dataSize=1
- # If the dividend was negitive, put the negated remainder in rsi.
- mov rsi, rsi, t4, (CECF,), dataSize=1
- # Otherwise put the regular remainder in rsi.
- mov rsi, rsi, t6, (nCECF,), dataSize=1
+ # If the dividend was negitive, put the negated remainder in ah.
+ mov ah, ah, t4, (CECF,), dataSize=1
+ # Otherwise put the regular remainder in ah.
+ mov ah, ah, t6, (nCECF,), dataSize=1
# Negate the quotient.
sub t4, t0, t5, dataSize=1
@@ -446,7 +438,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,), dataSize=1
# Check the sign of the divisor
- slli t0, t3, 1, flags=(ECF,), dataSize=1
+ slli t0, reg, 1, flags=(ECF,), dataSize=1
# Negate the (possibly already negated) quotient
sub t4, t0, t5, dataSize=1
@@ -461,27 +453,25 @@ def macroop IDIV_B_M
# Negate dividend
sub t1, t0, rax, flags=(ECF,), dataSize=1
ruflag t4, 3
- sub t2, t0, rsi, dataSize=1
+ sub t2, t0, ah, dataSize=1
sub t2, t2, t4
- ld t3, seg, sib, disp
+ ld t8, seg, sib, disp
#Find the sign of the divisor
- #FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,), dataSize=1
# Negate divisor
- sub t4, t0, t3, dataSize=1
+ sub t3, t0, t8, dataSize=1
# Put the divisor's absolute value into t3
- mov t3, t3, t4, flags=(CECF,), dataSize=1
+ mov t3, t3, t8, flags=(nCECF,), dataSize=1
#Find the sign of the dividend
- #FIXME!!! This depends on shifts setting the carry flag correctly.
- slli t0, rsi, 1, flags=(ECF,), dataSize=1
+ slli t0, ah, 1, flags=(ECF,), dataSize=1
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,), dataSize=1
- mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+ mov t2, t2, ah, flags=(nCECF,), dataSize=1
# Do the initial part of the division
div1 t2, t3, dataSize=1
@@ -508,10 +498,10 @@ divLoopTop:
# Negate the remainder
sub t4, t0, t6, dataSize=1
- # If the dividend was negitive, put the negated remainder in rsi.
- mov rsi, rsi, t4, (CECF,), dataSize=1
- # Otherwise put the regular remainder in rsi.
- mov rsi, rsi, t6, (nCECF,), dataSize=1
+ # If the dividend was negitive, put the negated remainder in ah.
+ mov ah, ah, t4, (CECF,), dataSize=1
+ # Otherwise put the regular remainder in ah.
+ mov ah, ah, t6, (nCECF,), dataSize=1
# Negate the quotient.
sub t4, t0, t5, dataSize=1
@@ -519,7 +509,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,), dataSize=1
# Check the sign of the divisor
- slli t0, t3, 1, flags=(ECF,), dataSize=1
+ slli t0, t8, 1, flags=(ECF,), dataSize=1
# Negate the (possibly already negated) quotient
sub t4, t0, t5, dataSize=1
@@ -534,28 +524,26 @@ def macroop IDIV_B_P
# Negate dividend
sub t1, t0, rax, flags=(ECF,), dataSize=1
ruflag t4, 3
- sub t2, t0, rsi, dataSize=1
+ sub t2, t0, ah, dataSize=1
sub t2, t2, t4
rdip t7
- ld t3, seg, riprel, disp
+ ld t8, seg, riprel, disp
#Find the sign of the divisor
- #FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,), dataSize=1
# Negate divisor
- sub t4, t0, t3, dataSize=1
+ sub t3, t0, t8, dataSize=1
# Put the divisor's absolute value into t3
- mov t3, t3, t4, flags=(CECF,), dataSize=1
+ mov t3, t3, t8, flags=(nCECF,), dataSize=1
#Find the sign of the dividend
- #FIXME!!! This depends on shifts setting the carry flag correctly.
- slli t0, rsi, 1, flags=(ECF,), dataSize=1
+ slli t0, ah, 1, flags=(ECF,), dataSize=1
# Put the dividend's absolute value into t1 and t2
mov t1, t1, rax, flags=(nCECF,), dataSize=1
- mov t2, t2, rsi, flags=(nCECF,), dataSize=1
+ mov t2, t2, ah, flags=(nCECF,), dataSize=1
# Do the initial part of the division
div1 t2, t3, dataSize=1
@@ -582,10 +570,10 @@ divLoopTop:
# Negate the remainder
sub t4, t0, t6, dataSize=1
- # If the dividend was negitive, put the negated remainder in rsi.
- mov rsi, rsi, t4, (CECF,), dataSize=1
- # Otherwise put the regular remainder in rsi.
- mov rsi, rsi, t6, (nCECF,), dataSize=1
+ # If the dividend was negitive, put the negated remainder in ah.
+ mov ah, ah, t4, (CECF,), dataSize=1
+ # Otherwise put the regular remainder in ah.
+ mov ah, ah, t6, (nCECF,), dataSize=1
# Negate the quotient.
sub t4, t0, t5, dataSize=1
@@ -593,7 +581,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,), dataSize=1
# Check the sign of the divisor
- slli t0, t3, 1, flags=(ECF,), dataSize=1
+ slli t0, t8, 1, flags=(ECF,), dataSize=1
# Negate the (possibly already negated) quotient
sub t4, t0, t5, dataSize=1
@@ -616,7 +604,6 @@ def macroop IDIV_R
sub t2, t2, t4
#Find the sign of the divisor
- #FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, reg, 1, flags=(ECF,)
# Negate divisor
@@ -625,7 +612,6 @@ def macroop IDIV_R
mov t3, t3, reg, flags=(nCECF,)
#Find the sign of the dividend
- #FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, rdx, 1, flags=(ECF,)
# Put the dividend's absolute value into t1 and t2
@@ -670,7 +656,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,)
# Check the sign of the divisor
- slli t0, t3, 1, flags=(ECF,)
+ slli t0, reg, 1, flags=(ECF,)
# Negate the (possibly already negated) quotient
sub t4, t0, t5
@@ -688,16 +674,16 @@ def macroop IDIV_M
sub t2, t0, rdx
sub t2, t2, t4
- ld t3, seg, sib, disp
+ ld t8, seg, sib, disp
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,)
# Negate divisor
- sub t4, t0, t3
+ sub t3, t0, t8
# Put the divisor's absolute value into t3
- mov t3, t3, t4, flags=(CECF,)
+ mov t3, t3, t8, flags=(nCECF,)
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
@@ -745,7 +731,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,)
# Check the sign of the divisor
- slli t0, t3, 1, flags=(ECF,)
+ slli t0, t8, 1, flags=(ECF,)
# Negate the (possibly already negated) quotient
sub t4, t0, t5
@@ -764,16 +750,16 @@ def macroop IDIV_P
sub t2, t2, t4
rdip t7
- ld t3, seg, riprel, disp
+ ld t8, seg, riprel, disp
#Find the sign of the divisor
#FIXME!!! This depends on shifts setting the carry flag correctly.
slli t0, t3, 1, flags=(ECF,)
# Negate divisor
- sub t4, t0, t3
+ sub t3, t0, t8
# Put the divisor's absolute value into t3
- mov t3, t3, t4, flags=(CECF,)
+ mov t3, t3, t4, flags=(nCECF,)
#Find the sign of the dividend
#FIXME!!! This depends on shifts setting the carry flag correctly.
@@ -821,7 +807,7 @@ divLoopTop:
mov t5, t5, t4, (CECF,)
# Check the sign of the divisor
- slli t0, t3, 1, flags=(ECF,)
+ slli t0, t8, 1, flags=(ECF,)
# Negate the (possibly already negated) quotient
sub t4, t0, t5
diff --git a/src/arch/x86/isa/insts/general_purpose/data_conversion/endian_conversion.py b/src/arch/x86/isa/insts/general_purpose/data_conversion/endian_conversion.py
index ac2343462..f6aac1761 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_conversion/endian_conversion.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_conversion/endian_conversion.py
@@ -64,15 +64,15 @@ def macroop BSWAP_D_R
def macroop BSWAP_Q_R
{
roli reg, reg, 8, dataSize=2
- roli reg, reg, 16, dataSize=4
- roli reg, reg, 8, dataSize=2
- roli reg, reg, 32, dataSize=8
- roli reg, reg, 8, dataSize=2
- roli reg, reg, 16, dataSize=4
- roli reg, reg, 8, dataSize=2
+ roli t1, reg, 16, dataSize=4
+ # Top 4 bytes of t1 are now zero
+ roli t1, t1, 8, dataSize=2
+ roli t1, t1, 32, dataSize=8
+ srli t2, reg, 32, dataSize=8
+ roli t2, t2, 8, dataSize=2
+ roli t2, t2, 16, dataSize=4
+ # Top 4 bytes of t2 are now zero
+ roli t2, t2, 8, dataSize=2
+ or reg, t1, t2, dataSize=8
};
'''
-#let {{
-# class BSWAP(Inst):
-# "GenFault ${new UnimpInstFault}"
-#}};
diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/conditional_move.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/conditional_move.py
index 1a60c5b61..264bbe370 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/conditional_move.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/conditional_move.py
@@ -56,12 +56,14 @@
microcode = '''
def macroop CMOVZ_R_R
{
+ mov reg, reg, reg, flags=(nCZF,)
mov reg, reg, regm, flags=(CZF,)
};
def macroop CMOVZ_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCZF,)
mov reg, reg, t1, flags=(CZF,)
};
@@ -69,17 +71,20 @@ def macroop CMOVZ_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCZF,)
mov reg, reg, t1, flags=(CZF,)
};
def macroop CMOVNZ_R_R
{
+ mov reg, reg, reg, flags=(CZF,)
mov reg, reg, regm, flags=(nCZF,)
};
def macroop CMOVNZ_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(CZF,)
mov reg, reg, t1, flags=(nCZF,)
};
@@ -87,17 +92,20 @@ def macroop CMOVNZ_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(CZF,)
mov reg, reg, t1, flags=(nCZF,)
};
def macroop CMOVB_R_R
{
+ mov reg, reg, reg, flags=(nCCF,)
mov reg, reg, regm, flags=(CCF,)
};
def macroop CMOVB_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCCF,)
mov reg, reg, t1, flags=(CCF,)
};
@@ -105,17 +113,20 @@ def macroop CMOVB_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCCF,)
mov reg, reg, t1, flags=(CCF,)
};
def macroop CMOVNB_R_R
{
+ mov reg, reg, reg, flags=(CCF,)
mov reg, reg, regm, flags=(nCCF,)
};
def macroop CMOVNB_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(CCF,)
mov reg, reg, t1, flags=(nCCF,)
};
@@ -123,17 +134,20 @@ def macroop CMOVNB_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(CCF,)
mov reg, reg, t1, flags=(nCCF,)
};
def macroop CMOVBE_R_R
{
+ mov reg, reg, reg, flags=(nCCvZF,)
mov reg, reg, regm, flags=(CCvZF,)
};
def macroop CMOVBE_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCCvZF,)
mov reg, reg, t1, flags=(CCvZF,)
};
@@ -141,17 +155,20 @@ def macroop CMOVBE_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCCvZF,)
mov reg, reg, t1, flags=(CCvZF,)
};
def macroop CMOVNBE_R_R
{
+ mov reg, reg, reg, flags=(CCvZF,)
mov reg, reg, regm, flags=(nCCvZF,)
};
def macroop CMOVNBE_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(CCvZF,)
mov reg, reg, t1, flags=(nCCvZF,)
};
@@ -159,17 +176,20 @@ def macroop CMOVNBE_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(CCvZF,)
mov reg, reg, t1, flags=(nCCvZF,)
};
def macroop CMOVS_R_R
{
+ mov reg, reg, reg, flags=(nCSF,)
mov reg, reg, regm, flags=(CSF,)
};
def macroop CMOVS_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCSF,)
mov reg, reg, t1, flags=(CSF,)
};
@@ -177,17 +197,20 @@ def macroop CMOVS_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCSF,)
mov reg, reg, t1, flags=(CSF,)
};
def macroop CMOVNS_R_R
{
+ mov reg, reg, reg, flags=(CSF,)
mov reg, reg, regm, flags=(nCSF,)
};
def macroop CMOVNS_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(CSF,)
mov reg, reg, t1, flags=(nCSF,)
};
@@ -195,17 +218,20 @@ def macroop CMOVNS_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(CSF,)
mov reg, reg, t1, flags=(nCSF,)
};
def macroop CMOVP_R_R
{
+ mov reg, reg, reg, flags=(nCPF,)
mov reg, reg, regm, flags=(CPF,)
};
def macroop CMOVP_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCPF,)
mov reg, reg, t1, flags=(CPF,)
};
@@ -213,35 +239,41 @@ def macroop CMOVP_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCPF,)
mov reg, reg, t1, flags=(CPF,)
};
def macroop CMOVNP_R_R
{
+ mov reg, reg, reg, flags=(CPF,)
mov reg, reg, regm, flags=(nCPF,)
};
def macroop CMOVNP_R_M
{
ld t1, seg, sib, disp
- mov reg, reg, regm, flags=(nCPF,)
+ mov reg, reg, reg, flags=(CPF,)
+ mov reg, reg, t1, flags=(nCPF,)
};
def macroop CMOVNP_R_P
{
rdip t7
ld t1, seg, riprel, disp
- mov reg, reg, regm, flags=(nCPF,)
+ mov reg, reg, reg, flags=(CPF,)
+ mov reg, reg, t1, flags=(nCPF,)
};
def macroop CMOVL_R_R
{
+ mov reg, reg, reg, flags=(nCSxOF,)
mov reg, reg, regm, flags=(CSxOF,)
};
def macroop CMOVL_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCSxOF,)
mov reg, reg, t1, flags=(CSxOF,)
};
@@ -249,17 +281,20 @@ def macroop CMOVL_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCSxOF,)
mov reg, reg, t1, flags=(CSxOF,)
};
def macroop CMOVNL_R_R
{
+ mov reg, reg, reg, flags=(CSxOF,)
mov reg, reg, regm, flags=(nCSxOF,)
};
def macroop CMOVNL_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(CSxOF,)
mov reg, reg, t1, flags=(nCSxOF,)
};
@@ -267,17 +302,20 @@ def macroop CMOVNL_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(CSxOF,)
mov reg, reg, t1, flags=(nCSxOF,)
};
def macroop CMOVLE_R_R
{
+ mov reg, reg, reg, flags=(nCSxOvZF,)
mov reg, reg, regm, flags=(CSxOvZF,)
};
def macroop CMOVLE_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCSxOvZF,)
mov reg, reg, t1, flags=(CSxOvZF,)
};
@@ -285,17 +323,20 @@ def macroop CMOVLE_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCSxOvZF,)
mov reg, reg, t1, flags=(CSxOvZF,)
};
def macroop CMOVNLE_R_R
{
+ mov reg, reg, reg, flags=(CSxOvZF,)
mov reg, reg, regm, flags=(nCSxOvZF,)
};
def macroop CMOVNLE_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(CSxOvZF,)
mov reg, reg, t1, flags=(nCSxOvZF,)
};
@@ -303,17 +344,20 @@ def macroop CMOVNLE_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(CSxOvZF,)
mov reg, reg, t1, flags=(nCSxOvZF,)
};
def macroop CMOVO_R_R
{
+ mov reg, reg, reg, flags=(nCOF,)
mov reg, reg, regm, flags=(COF,)
};
def macroop CMOVO_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(nCOF,)
mov reg, reg, t1, flags=(COF,)
};
@@ -321,17 +365,20 @@ def macroop CMOVO_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(nCOF,)
mov reg, reg, t1, flags=(COF,)
};
def macroop CMOVNO_R_R
{
+ mov reg, reg, reg, flags=(COF,)
mov reg, reg, regm, flags=(nCOF,)
};
def macroop CMOVNO_R_M
{
ld t1, seg, sib, disp
+ mov reg, reg, reg, flags=(COF,)
mov reg, reg, t1, flags=(nCOF,)
};
@@ -339,6 +386,7 @@ def macroop CMOVNO_R_P
{
rdip t7
ld t1, seg, riprel, disp
+ mov reg, reg, reg, flags=(COF,)
mov reg, reg, t1, flags=(nCOF,)
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py
index f4c8a4663..6b18caef0 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/stack_operations.py
@@ -150,28 +150,29 @@ def macroop LEAVE {
# Make the default data size of pops 64 bits in 64 bit mode
.adjust_env oszIn64Override
- mov t1, t1, rbp, dataSize=asz
+ mov t1, t1, rbp, dataSize=ssz
ld rbp, ss, [1, t0, t1], dataSize=ssz
- mov rsp, rsp, t1, dataSize=asz
- addi rsp, rsp, ssz, dataSize=asz
+ mov rsp, rsp, t1, dataSize=ssz
+ addi rsp, rsp, ssz, dataSize=ssz
};
def macroop ENTER_I_I {
+ .adjust_env oszIn64Override
# This needs to check all the addresses it writes to before it actually
# writes any values.
# Pull the different components out of the immediate
- limm t1, imm
+ limm t1, imm, dataSize=8
zexti t2, t1, 15, dataSize=8
- srli t1, t1, 16
+ srli t1, t1, 16, dataSize=8
zexti t1, t1, 5, dataSize=8
# t1 is now the masked nesting level, and t2 is the amount of storage.
# Push rbp.
- stupd rbp, ss, [1, t0, rsp], "-env.stackSize", dataSize=ssz
+ stupd rbp, ss, [1, t0, rsp], "-env.dataSize"
# Save the stack pointer for later
- mov t6, t6, rsp, dataSize=asz
+ mov t6, t6, rsp
# If the nesting level is zero, skip all this stuff.
sub t0, t1, t0, flags=(EZF,), dataSize=2
@@ -183,8 +184,8 @@ def macroop ENTER_I_I {
limm t4, "ULL(-1)", dataSize=8
topOfLoop:
- ld t5, ss, [ssz, t4, rbp], dataSize=ssz
- stupd t5, ss, [1, t0, rsp], "-env.stackSize"
+ ld t5, ss, [dsz, t4, rbp]
+ stupd t5, ss, [1, t0, rsp], "-env.dataSize"
# If we're not done yet, loop
subi t4, t4, 1, dataSize=8
@@ -193,10 +194,10 @@ topOfLoop:
bottomOfLoop:
# Push the old rbp onto the stack
- stupd t6, ss, [1, t0, rsp], "-env.stackSize"
+ stupd t6, ss, [1, t0, rsp], "-env.dataSize"
skipLoop:
- sub rsp, rsp, t2, dataSize=asz
- mov rbp, rbp, t6, dataSize=asz
+ sub rsp, rsp, t2, dataSize=ssz
+ mov rbp, rbp, t6
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py b/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py
index 0915bf819..01908ca7b 100644
--- a/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py
+++ b/src/arch/x86/isa/insts/general_purpose/flags/load_and_store.py
@@ -55,26 +55,10 @@
microcode = '''
def macroop SAHF {
- # This will fold to ah since this never executes in 64 bit mode.
- ruflags rsp, dataSize=1
-};
-
-# This is allows the instruction to write to ah in 64 bit mode.
-def macroop SAHF_64 {
- ruflags t1
- slli t1, t1, 8
- mov t1, t1, rax, dataSize=1
- mov rax, rax, t1, dataSize=2
+ ruflags ah, dataSize=1
};
def macroop LAHF {
- # This will fold to ah since this never executes in 64 bit mode.
- wruflags rsp, t0, dataSize=1
-};
-
-# This is allows the instruction to read from ah in 64 bit mode.
-def macroop LAHF_64 {
- srli t1, rax, 8, dataSize=2
- wruflags t1, t0, dataSize=1
+ wruflags ah, t0, dataSize=1
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py
index b5ae9560e..3be954768 100644
--- a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py
+++ b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/rotate.py
@@ -56,13 +56,13 @@
microcode = '''
def macroop ROL_R_I
{
- roli reg, reg, imm
+ roli reg, reg, imm, flags=(OF,CF)
};
def macroop ROL_M_I
{
ldst t1, seg, sib, disp
- roli t1, t1, imm
+ roli t1, t1, imm, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -70,19 +70,19 @@ def macroop ROL_P_I
{
rdip t7
ldst t1, seg, riprel, disp
- roli t1, t1, imm
+ roli t1, t1, imm, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop ROL_1_R
{
- roli reg, reg, 1
+ roli reg, reg, 1, flags=(OF,CF)
};
def macroop ROL_1_M
{
ldst t1, seg, sib, disp
- roli t1, t1, 1
+ roli t1, t1, 1, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -90,19 +90,19 @@ def macroop ROL_1_P
{
rdip t7
ldst t1, seg, riprel, disp
- roli t1, t1, 1
+ roli t1, t1, 1, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop ROL_R_R
{
- rol reg, reg, regm
+ rol reg, reg, regm, flags=(OF,CF)
};
def macroop ROL_M_R
{
ldst t1, seg, sib, disp
- rol t1, t1, reg
+ rol t1, t1, reg, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -110,19 +110,19 @@ def macroop ROL_P_R
{
rdip t7
ldst t1, seg, riprel, disp
- rol t1, t1, reg
+ rol t1, t1, reg, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop ROR_R_I
{
- rori reg, reg, imm
+ rori reg, reg, imm, flags=(OF,CF)
};
def macroop ROR_M_I
{
ldst t1, seg, sib, disp
- rori t1, t1, imm
+ rori t1, t1, imm, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -130,19 +130,19 @@ def macroop ROR_P_I
{
rdip t7
ldst t1, seg, riprel, disp
- rori t1, t1, imm
+ rori t1, t1, imm, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop ROR_1_R
{
- rori reg, reg, 1
+ rori reg, reg, 1, flags=(OF,CF)
};
def macroop ROR_1_M
{
ldst t1, seg, sib, disp
- rori t1, t1, 1
+ rori t1, t1, 1, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -150,19 +150,19 @@ def macroop ROR_1_P
{
rdip t7
ldst t1, seg, riprel, disp
- rori t1, t1, 1
+ rori t1, t1, 1, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop ROR_R_R
{
- ror reg, reg, regm
+ ror reg, reg, regm, flags=(OF,CF)
};
def macroop ROR_M_R
{
ldst t1, seg, sib, disp
- ror t1, t1, reg
+ ror t1, t1, reg, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -170,19 +170,19 @@ def macroop ROR_P_R
{
rdip t7
ldst t1, seg, riprel, disp
- ror t1, t1, reg
+ ror t1, t1, reg, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop RCL_R_I
{
- rcli reg, reg, imm
+ rcli reg, reg, imm, flags=(OF,CF)
};
def macroop RCL_M_I
{
ldst t1, seg, sib, disp
- rcli t1, t1, imm
+ rcli t1, t1, imm, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -190,19 +190,19 @@ def macroop RCL_P_I
{
rdip t7
ldst t1, seg, riprel, disp
- rcli t1, t1, imm
+ rcli t1, t1, imm, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop RCL_1_R
{
- rcli reg, reg, 1
+ rcli reg, reg, 1, flags=(OF,CF)
};
def macroop RCL_1_M
{
ldst t1, seg, sib, disp
- rcli t1, t1, 1
+ rcli t1, t1, 1, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -210,19 +210,19 @@ def macroop RCL_1_P
{
rdip t7
ldst t1, seg, riprel, disp
- rcli t1, t1, 1
+ rcli t1, t1, 1, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop RCL_R_R
{
- rcl reg, reg, regm
+ rcl reg, reg, regm, flags=(OF,CF)
};
def macroop RCL_M_R
{
ldst t1, seg, sib, disp
- rcl t1, t1, reg
+ rcl t1, t1, reg, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -230,19 +230,19 @@ def macroop RCL_P_R
{
rdip t7
ldst t1, seg, riprel, disp
- rcl t1, t1, reg
+ rcl t1, t1, reg, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop RCR_R_I
{
- rcri reg, reg, imm
+ rcri reg, reg, imm, flags=(OF,CF)
};
def macroop RCR_M_I
{
ldst t1, seg, sib, disp
- rcri t1, t1, imm
+ rcri t1, t1, imm, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -250,19 +250,19 @@ def macroop RCR_P_I
{
rdip t7
ldst t1, seg, riprel, disp
- rcri t1, t1, imm
+ rcri t1, t1, imm, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop RCR_1_R
{
- rcri reg, reg, 1
+ rcri reg, reg, 1, flags=(OF,CF)
};
def macroop RCR_1_M
{
ldst t1, seg, sib, disp
- rcri t1, t1, 1
+ rcri t1, t1, 1, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -270,19 +270,19 @@ def macroop RCR_1_P
{
rdip t7
ldst t1, seg, riprel, disp
- rcri t1, t1, 1
+ rcri t1, t1, 1, flags=(OF,CF)
st t1, seg, riprel, disp
};
def macroop RCR_R_R
{
- rcr reg, reg, regm
+ rcr reg, reg, regm, flags=(OF,CF)
};
def macroop RCR_M_R
{
ldst t1, seg, sib, disp
- rcr t1, t1, reg
+ rcr t1, t1, reg, flags=(OF,CF)
st t1, seg, sib, disp
};
@@ -290,7 +290,7 @@ def macroop RCR_P_R
{
rdip t7
ldst t1, seg, riprel, disp
- rcr t1, t1, reg
+ rcr t1, t1, reg, flags=(OF,CF)
st t1, seg, riprel, disp
};
'''
diff --git a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/shift.py b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/shift.py
index caaeca974..092fb4213 100644
--- a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/shift.py
+++ b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/shift.py
@@ -114,6 +114,52 @@ def macroop SAL_P_R
st t1, seg, riprel, disp
};
+def macroop SHLD_R_R
+{
+ mdbi regm, 0
+ sld reg, reg, rcx, flags=(CF,OF,SF,ZF,PF)
+};
+
+def macroop SHLD_M_R
+{
+ ldst t1, seg, sib, disp
+ mdbi reg, 0
+ sld t1, t1, rcx, flags=(CF,OF,SF,ZF,PF)
+ st t1, seg, sib, disp
+};
+
+def macroop SHLD_P_R
+{
+ rdip t7
+ ldst t1, seg, riprel, disp
+ mdbi reg, 0
+ sld t1, t1, rcx, flags=(CF,OF,SF,ZF,PF)
+ st t1, seg, riprel, disp
+};
+
+def macroop SHLD_R_R_I
+{
+ mdbi regm, 0
+ sldi reg, reg, imm, flags=(CF,OF,SF,ZF,PF)
+};
+
+def macroop SHLD_M_R_I
+{
+ ldst t1, seg, sib, disp
+ mdbi reg, 0
+ sldi t1, t1, imm, flags=(CF,OF,SF,ZF,PF)
+ st t1, seg, sib, disp
+};
+
+def macroop SHLD_P_R_I
+{
+ rdip t7
+ ldst t1, seg, riprel, disp
+ mdbi reg, 0
+ sldi t1, t1, imm, flags=(CF,OF,SF,ZF,PF)
+ st t1, seg, riprel, disp
+};
+
def macroop SHR_R_I
{
srli reg, reg, imm, flags=(CF,OF,SF,ZF,PF)
@@ -174,38 +220,49 @@ def macroop SHR_P_R
st t1, seg, riprel, disp
};
-# SHRD will not set OF correctly when the shift count is 1.
+def macroop SHRD_R_R
+{
+ mdbi regm, 0
+ srd reg, reg, rcx, flags=(CF,OF,SF,ZF,PF)
+};
+
+def macroop SHRD_M_R
+{
+ ldst t1, seg, sib, disp
+ mdbi reg, 0
+ srd t1, t1, rcx, flags=(CF,OF,SF,ZF,PF)
+ st t1, seg, sib, disp
+};
+
+def macroop SHRD_P_R
+{
+ rdip t7
+ ldst t1, seg, riprel, disp
+ mdbi reg, 0
+ srd t1, t1, rcx, flags=(CF,OF,SF,ZF,PF)
+ st t1, seg, riprel, disp
+};
+
def macroop SHRD_R_R_I
{
- srli t1, reg, imm, flags=(CF,)
- rori t2, regm, imm
- srli t3, regm, imm
- xor t2, t2, t3
- or reg, t1, t2
+ mdbi regm, 0
+ srdi reg, reg, imm, flags=(CF,OF,SF,ZF,PF)
};
-# SHRD will not set OF correctly when the shift count is 1.
def macroop SHRD_M_R_I
{
ldst t1, seg, sib, disp
- srli t1, t1, imm, flags=(CF,)
- rori t2, reg, imm
- srli t3, reg, imm
- xor t2, t2, t3
- or t1, t1, t2
+ mdbi reg, 0
+ srdi t1, t1, imm, flags=(CF,OF,SF,ZF,PF)
st t1, seg, sib, disp
};
-# SHRD will not set OF correctly when the shift count is 1.
def macroop SHRD_P_R_I
{
rdip t7
ldst t1, seg, riprel, disp
- srli t1, t1, imm, flags=(CF,)
- rori t2, reg, imm
- srli t3, reg, imm
- xor t2, t2, t3
- or t1, t1, t2
+ mdbi reg, 0
+ srdi t1, t1, imm, flags=(CF,OF,SF,ZF,PF)
st t1, seg, riprel, disp
};
diff --git a/src/arch/x86/isa/insts/general_purpose/semaphores.py b/src/arch/x86/isa/insts/general_purpose/semaphores.py
index a7da0720e..2bdbd0ada 100644
--- a/src/arch/x86/isa/insts/general_purpose/semaphores.py
+++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py
@@ -98,6 +98,100 @@ def macroop CMPXCHG_LOCKED_P_R {
mov rax, rax, t1, flags=(nCZF,)
};
+def macroop CMPXCHG8B_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldst t2, seg, [1, t0, t1], 0
+ ldst t3, seg, [1, t0, t1], dsz
+
+ sub t0, rax, t2, flags=(ZF,)
+ br label("doneComparing"), flags=(nCZF,)
+ sub t0, rdx, t3, flags=(ZF,)
+doneComparing:
+
+ # If they're equal, set t3:t2 to rbx:rcx to write to memory
+ mov t2, t2, rbx, flags=(CZF,)
+ mov t3, t3, rcx, flags=(CZF,)
+
+ # If they're not equal, set rdx:rax to the value from memory.
+ mov rax, rax, t2, flags=(nCZF,)
+ mov rdx, rdx, t3, flags=(nCZF,)
+
+ # Write to memory
+ st t3, seg, [1, t0, t1], dsz
+ st t2, seg, [1, t0, t1], 0
+};
+
+def macroop CMPXCHG8B_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldst t2, seg, [1, t0, t1], 0
+ ldst t3, seg, [1, t0, t1], dsz
+
+ sub t0, rax, t2, flags=(ZF,)
+ br label("doneComparing"), flags=(nCZF,)
+ sub t0, rdx, t3, flags=(ZF,)
+doneComparing:
+
+ # If they're equal, set t3:t2 to rbx:rcx to write to memory
+ mov t2, t2, rbx, flags=(CZF,)
+ mov t3, t3, rcx, flags=(CZF,)
+
+ # If they're not equal, set rdx:rax to the value from memory.
+ mov rax, rax, t2, flags=(nCZF,)
+ mov rdx, rdx, t3, flags=(nCZF,)
+
+ # Write to memory
+ st t3, seg, [1, t0, t1], dsz
+ st t2, seg, [1, t0, t1], 0
+};
+
+def macroop CMPXCHG8B_LOCKED_M {
+ lea t1, seg, sib, disp, dataSize=asz
+ ldstl t2, seg, [1, t0, t1], 0
+ ldstl t3, seg, [1, t0, t1], dsz
+
+ sub t0, rax, t2, flags=(ZF,)
+ br label("doneComparing"), flags=(nCZF,)
+ sub t0, rdx, t3, flags=(ZF,)
+doneComparing:
+
+ # If they're equal, set t3:t2 to rbx:rcx to write to memory
+ mov t2, t2, rbx, flags=(CZF,)
+ mov t3, t3, rcx, flags=(CZF,)
+
+ # If they're not equal, set rdx:rax to the value from memory.
+ mov rax, rax, t2, flags=(nCZF,)
+ mov rdx, rdx, t3, flags=(nCZF,)
+
+ # Write to memory
+ stul t3, seg, [1, t0, t1], dsz
+ stul t2, seg, [1, t0, t1], 0
+};
+
+def macroop CMPXCHG8B_LOCKED_P {
+ rdip t7
+ lea t1, seg, riprel, disp, dataSize=asz
+ ldstl t2, seg, [1, t0, t1], 0
+ ldstl t3, seg, [1, t0, t1], dsz
+
+ sub t0, rax, t2, flags=(ZF,)
+ br label("doneComparing"), flags=(nCZF,)
+ sub t0, rdx, t3, flags=(ZF,)
+doneComparing:
+
+ # If they're equal, set t3:t2 to rbx:rcx to write to memory
+ mov t2, t2, rbx, flags=(CZF,)
+ mov t3, t3, rcx, flags=(CZF,)
+
+ # If they're not equal, set rdx:rax to the value from memory.
+ mov rax, rax, t2, flags=(nCZF,)
+ mov rdx, rdx, t3, flags=(nCZF,)
+
+ # Write to memory
+ stul t3, seg, [1, t0, t1], dsz
+ stul t2, seg, [1, t0, t1], 0
+};
+
def macroop XADD_M_R {
ldst t1, seg, sib, disp
add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF)
diff --git a/src/arch/x86/isa/insts/general_purpose/string/scan_string.py b/src/arch/x86/isa/insts/general_purpose/string/scan_string.py
index 5b0e74aad..5115fe8a2 100644
--- a/src/arch/x86/isa/insts/general_purpose/string/scan_string.py
+++ b/src/arch/x86/isa/insts/general_purpose/string/scan_string.py
@@ -62,7 +62,7 @@ def macroop SCAS_M {
mov t2, t2, t3, flags=(nCEZF,), dataSize=asz
ld t1, es, [1, t0, rdi]
- sub t0, t1, rax, flags=(OF, SF, ZF, AF, PF, CF)
+ sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
add rdi, rdi, t2, dataSize=asz
};
@@ -84,7 +84,7 @@ def macroop SCAS_E_M {
topOfLoop:
ld t1, es, [1, t0, rdi]
- sub t0, t1, rax, flags=(OF, SF, ZF, AF, PF, CF)
+ sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
subi rcx, rcx, 1, flags=(EZF,), dataSize=asz
add rdi, rdi, t2, dataSize=asz
@@ -105,7 +105,7 @@ def macroop SCAS_N_M {
topOfLoop:
ld t1, es, [1, t0, rdi]
- sub t0, t1, rax, flags=(OF, SF, ZF, AF, PF, CF)
+ sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF)
subi rcx, rcx, 1, flags=(EZF,), dataSize=asz
add rdi, rdi, t2, dataSize=asz
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 0cc72bf7b..c6f5e9cdd 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -151,6 +151,10 @@ let {{
assembler.symbols["r%s" % reg] = \
regIdx("INTREG_R%s" % reg.upper())
+ for reg in ('ah', 'bh', 'ch', 'dh'):
+ assembler.symbols[reg] = \
+ regIdx("INTREG_FOLDED(INTREG_%s, IntFoldBit)" % reg.upper())
+
for reg in range(16):
assembler.symbols["cr%d" % reg] = regIdx("MISCREG_CR%d" % reg)
diff --git a/src/arch/x86/isa/microops/regop.isa b/src/arch/x86/isa/microops/regop.isa
index 698216139..a4cb6f4cc 100644
--- a/src/arch/x86/isa/microops/regop.isa
+++ b/src/arch/x86/isa/microops/regop.isa
@@ -525,18 +525,25 @@ let {{
uint64_t hiResult;
uint64_t psrc1_h = psrc1 / shifter;
uint64_t psrc1_l = psrc1 & mask(halfSize);
- uint64_t psrc2_h = op2 / shifter;
+ uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
uint64_t psrc2_l = op2 & mask(halfSize);
hiResult = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
((psrc1_l * psrc2_l) / shifter)) /shifter) +
psrc1_h * psrc2_h;
- if (spsrc1 < 0)
+ if (bits(psrc1, dataSize * 8 - 1))
hiResult -= op2;
- int64_t bigSop2 = sop2;
- if (bigSop2 < 0)
+ if (bits(op2, dataSize * 8 - 1))
hiResult -= psrc1;
ProdHi = hiResult;
'''
+ flag_code = '''
+ if ((-ProdHi & mask(dataSize * 8)) !=
+ bits(ProdLow, dataSize * 8 - 1)) {
+ ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
+ } else {
+ ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
+ }
+ '''
class Mul1u(WrRegOp):
code = '''
@@ -545,12 +552,19 @@ let {{
uint64_t shifter = (1ULL << halfSize);
uint64_t psrc1_h = psrc1 / shifter;
uint64_t psrc1_l = psrc1 & mask(halfSize);
- uint64_t psrc2_h = op2 / shifter;
+ uint64_t psrc2_h = (op2 / shifter) & mask(halfSize);
uint64_t psrc2_l = op2 & mask(halfSize);
ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
((psrc1_l * psrc2_l) / shifter)) / shifter) +
psrc1_h * psrc2_h;
'''
+ flag_code = '''
+ if (ProdHi) {
+ ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
+ } else {
+ ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
+ }
+ '''
class Mulel(RdRegOp):
code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);'
@@ -562,12 +576,6 @@ let {{
super(RdRegOp, self).__init__(dest, src1, \
"InstRegIndex(NUM_INTREGS)", flags, dataSize)
code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);'
- flag_code = '''
- if (ProdHi)
- ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
- else
- ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
- '''
# One or two bit divide
class Div1(WrRegOp):
@@ -631,7 +639,7 @@ let {{
class Mov(CondRegOp):
code = 'DestReg = merge(SrcReg1, op2, dataSize)'
- else_code = 'DestReg=DestReg;'
+ else_code = 'DestReg = DestReg;'
# Shift instructions
@@ -648,8 +656,10 @@ let {{
ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
int CFBits = 0;
//Figure out if we -would- set the CF bits if requested.
- if (bits(SrcReg1, dataSize * 8 - shiftAmt))
+ if (shiftAmt <= dataSize * 8 &&
+ bits(SrcReg1, dataSize * 8 - shiftAmt)) {
CFBits = 1;
+ }
//If some combination of the CF bits need to be set, set them.
if ((ext & (CFBit | ECFBit)) && CFBits)
ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
@@ -678,8 +688,11 @@ let {{
//worry about setting them.
ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
//If some combination of the CF bits need to be set, set them.
- if ((ext & (CFBit | ECFBit)) && bits(SrcReg1, shiftAmt - 1))
+ if ((ext & (CFBit | ECFBit)) &&
+ shiftAmt <= dataSize * 8 &&
+ bits(SrcReg1, shiftAmt - 1)) {
ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+ }
//Figure out what the OF bit should be.
if ((ext & OFBit) && bits(SrcReg1, dataSize * 8 - 1))
ccFlagBits = ccFlagBits | OFBit;
@@ -695,7 +708,7 @@ let {{
// Because what happens to the bits shift -in- on a right shift
// is not defined in the C/C++ standard, we have to sign extend
// them manually to be sure.
- uint64_t arithMask =
+ uint64_t arithMask = (shiftAmt == 0) ? 0 :
-bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize);
'''
@@ -706,8 +719,12 @@ let {{
//worry about setting them.
ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
//If some combination of the CF bits need to be set, set them.
- if ((ext & (CFBit | ECFBit)) && bits(SrcReg1, shiftAmt - 1))
+ uint8_t effectiveShift =
+ (shiftAmt <= dataSize * 8) ? shiftAmt : (dataSize * 8);
+ if ((ext & (CFBit | ECFBit)) &&
+ bits(SrcReg1, effectiveShift - 1)) {
ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+ }
//Use the regular mechanisms to calculate the other flags.
ccFlagBits = genFlags(ccFlagBits, ext & ~(CFBit | ECFBit | OFBit),
DestReg, psrc1, op2);
@@ -718,14 +735,15 @@ let {{
code = '''
uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5)));
- if(shiftAmt)
+ uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
+ if(realShiftAmt)
{
- uint64_t top = psrc1 << (dataSize * 8 - shiftAmt);
- uint64_t bottom = bits(psrc1, dataSize * 8, shiftAmt);
+ uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt);
+ uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt);
DestReg = merge(DestReg, top | bottom, dataSize);
}
else
- DestReg = DestReg;
+ DestReg = merge(DestReg, DestReg, dataSize);
'''
flag_code = '''
// If the shift amount is zero, no flags should be modified.
@@ -752,31 +770,37 @@ let {{
code = '''
uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5)));
- if(shiftAmt)
+ uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
+ if(realShiftAmt)
{
CCFlagBits flags = ccFlagBits;
- uint64_t top = flags.cf << (dataSize * 8 - shiftAmt);
- if(shiftAmt > 1)
- top |= psrc1 << (dataSize * 8 - shiftAmt - 1);
- uint64_t bottom = bits(psrc1, dataSize * 8, shiftAmt);
+ uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt);
+ if (realShiftAmt > 1)
+ top |= psrc1 << (dataSize * 8 - realShiftAmt + 1);
+ uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt);
DestReg = merge(DestReg, top | bottom, dataSize);
}
else
- DestReg = DestReg;
+ DestReg = merge(DestReg, DestReg, dataSize);
'''
flag_code = '''
// If the shift amount is zero, no flags should be modified.
if (shiftAmt) {
+ int origCFBit = (ccFlagBits & CFBit) ? 1 : 0;
//Zero out any flags we might modify. This way we only have to
//worry about setting them.
ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
//Figure out what the OF bit should be.
- if ((ext & OFBit) && ((ccFlagBits & CFBit) ^
- bits(SrcReg1, dataSize * 8 - 1)))
+ if ((ext & OFBit) && (origCFBit ^
+ bits(SrcReg1, dataSize * 8 - 1))) {
ccFlagBits = ccFlagBits | OFBit;
+ }
//If some combination of the CF bits need to be set, set them.
- if ((ext & (CFBit | ECFBit)) && bits(SrcReg1, shiftAmt - 1))
+ if ((ext & (CFBit | ECFBit)) &&
+ (realShiftAmt == 0) ? origCFBit :
+ bits(SrcReg1, realShiftAmt - 1)) {
ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+ }
//Use the regular mechanisms to calculate the other flags.
ccFlagBits = genFlags(ccFlagBits, ext & ~(CFBit | ECFBit | OFBit),
DestReg, psrc1, op2);
@@ -787,15 +811,16 @@ let {{
code = '''
uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5)));
- if(shiftAmt)
+ uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
+ if(realShiftAmt)
{
- uint64_t top = psrc1 << shiftAmt;
+ uint64_t top = psrc1 << realShiftAmt;
uint64_t bottom =
- bits(psrc1, dataSize * 8 - 1, dataSize * 8 - shiftAmt);
+ bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt);
DestReg = merge(DestReg, top | bottom, dataSize);
}
else
- DestReg = DestReg;
+ DestReg = merge(DestReg, DestReg, dataSize);
'''
flag_code = '''
// If the shift amount is zero, no flags should be modified.
@@ -822,30 +847,33 @@ let {{
code = '''
uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5)));
- if(shiftAmt)
+ uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
+ if(realShiftAmt)
{
CCFlagBits flags = ccFlagBits;
- uint64_t top = psrc1 << shiftAmt;
- uint64_t bottom = flags.cf << (shiftAmt - 1);
+ uint64_t top = psrc1 << realShiftAmt;
+ uint64_t bottom = flags.cf << (realShiftAmt - 1);
if(shiftAmt > 1)
bottom |=
bits(psrc1, dataSize * 8 - 1,
- dataSize * 8 - shiftAmt + 1);
+ dataSize * 8 - realShiftAmt + 1);
DestReg = merge(DestReg, top | bottom, dataSize);
}
else
- DestReg = DestReg;
+ DestReg = merge(DestReg, DestReg, dataSize);
'''
flag_code = '''
// If the shift amount is zero, no flags should be modified.
if (shiftAmt) {
+ int origCFBit = (ccFlagBits & CFBit) ? 1 : 0;
//Zero out any flags we might modify. This way we only have to
//worry about setting them.
ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
int msb = bits(DestReg, dataSize * 8 - 1);
- int CFBits = bits(SrcReg1, dataSize * 8 - shiftAmt);
+ int CFBits = bits(SrcReg1, dataSize * 8 - realShiftAmt);
//If some combination of the CF bits need to be set, set them.
- if ((ext & (CFBit | ECFBit)) && CFBits)
+ if ((ext & (CFBit | ECFBit)) &&
+ (realShiftAmt == 0) ? origCFBit : CFBits)
ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
//Figure out what the OF bit should be.
if ((ext & OFBit) && (msb ^ CFBits))
@@ -856,6 +884,107 @@ let {{
}
'''
+ class Sld(RegOp):
+ code = '''
+ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+ uint8_t dataBits = dataSize * 8;
+ uint8_t realShiftAmt = shiftAmt % (2 * dataBits);
+ uint64_t result;
+ if (realShiftAmt == 0) {
+ result = psrc1;
+ } else if (realShiftAmt < dataBits) {
+ result = (psrc1 << realShiftAmt) |
+ (DoubleBits >> (dataBits - realShiftAmt));
+ } else {
+ result = (DoubleBits << (realShiftAmt - dataBits)) |
+ (psrc1 >> (2 * dataBits - realShiftAmt));
+ }
+ DestReg = merge(DestReg, result, dataSize);
+ '''
+ flag_code = '''
+ // If the shift amount is zero, no flags should be modified.
+ if (shiftAmt) {
+ //Zero out any flags we might modify. This way we only have to
+ //worry about setting them.
+ ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
+ int CFBits = 0;
+ //Figure out if we -would- set the CF bits if requested.
+ if ((realShiftAmt == 0 &&
+ bits(DoubleBits, 0)) ||
+ (realShiftAmt <= dataBits &&
+ bits(SrcReg1, dataBits - realShiftAmt)) ||
+ (realShiftAmt > dataBits &&
+ bits(DoubleBits, 2 * dataBits - realShiftAmt))) {
+ CFBits = 1;
+ }
+ //If some combination of the CF bits need to be set, set them.
+ if ((ext & (CFBit | ECFBit)) && CFBits)
+ ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+ //Figure out what the OF bit should be.
+ if ((ext & OFBit) && (bits(SrcReg1, dataBits - 1) ^
+ bits(result, dataBits - 1)))
+ ccFlagBits = ccFlagBits | OFBit;
+ //Use the regular mechanisms to calculate the other flags.
+ ccFlagBits = genFlags(ccFlagBits, ext & ~(CFBit | ECFBit | OFBit),
+ DestReg, psrc1, op2);
+ }
+ '''
+
+ class Srd(RegOp):
+ code = '''
+ uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
+ uint8_t dataBits = dataSize * 8;
+ uint8_t realShiftAmt = shiftAmt % (2 * dataBits);
+ uint64_t result;
+ if (realShiftAmt == 0) {
+ result = psrc1;
+ } else if (realShiftAmt < dataBits) {
+ // Because what happens to the bits shift -in- on a right
+ // shift is not defined in the C/C++ standard, we have to
+ // mask them out to be sure they're zero.
+ uint64_t logicalMask = mask(dataBits - realShiftAmt);
+ result = ((psrc1 >> realShiftAmt) & logicalMask) |
+ (DoubleBits << (dataBits - realShiftAmt));
+ } else {
+ uint64_t logicalMask = mask(2 * dataBits - realShiftAmt);
+ result = ((DoubleBits >> (realShiftAmt - dataBits)) &
+ logicalMask) |
+ (psrc1 << (2 * dataBits - realShiftAmt));
+ }
+ DestReg = merge(DestReg, result, dataSize);
+ '''
+ flag_code = '''
+ // If the shift amount is zero, no flags should be modified.
+ if (shiftAmt) {
+ //Zero out any flags we might modify. This way we only have to
+ //worry about setting them.
+ ccFlagBits = ccFlagBits & ~(ext & (CFBit | ECFBit | OFBit));
+ int CFBits = 0;
+ //If some combination of the CF bits need to be set, set them.
+ if ((realShiftAmt == 0 &&
+ bits(DoubleBits, dataBits - 1)) ||
+ (realShiftAmt <= dataBits &&
+ bits(SrcReg1, realShiftAmt - 1)) ||
+ (realShiftAmt > dataBits &&
+ bits(DoubleBits, realShiftAmt - dataBits - 1))) {
+ CFBits = 1;
+ }
+ //If some combination of the CF bits need to be set, set them.
+ if ((ext & (CFBit | ECFBit)) && CFBits)
+ ccFlagBits = ccFlagBits | (ext & (CFBit | ECFBit));
+ //Figure out what the OF bit should be.
+ if ((ext & OFBit) && (bits(SrcReg1, dataBits - 1) ^
+ bits(result, dataBits - 1)))
+ ccFlagBits = ccFlagBits | OFBit;
+ //Use the regular mechanisms to calculate the other flags.
+ ccFlagBits = genFlags(ccFlagBits, ext & ~(CFBit | ECFBit | OFBit),
+ DestReg, psrc1, op2);
+ }
+ '''
+
+ class Mdb(WrRegOp):
+ code = 'DoubleBits = psrc1 ^ op2;'
+
class Wrip(WrRegOp, CondRegOp):
code = 'RIP = psrc1 + sop2 + CSBase'
else_code="RIP = RIP;"
diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa
index 5ea803bfc..135fc10df 100644
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@@ -127,6 +127,7 @@ def operands {{
'Quotient': impIntReg(2, 9),
'Remainder': impIntReg(3, 10),
'Divisor': impIntReg(4, 11),
+ 'DoubleBits': impIntReg(5, 11),
'Rax': intReg('(INTREG_RAX)', 12),
'Rbx': intReg('(INTREG_RBX)', 13),
'Rcx': intReg('(INTREG_RCX)', 14),
diff --git a/src/arch/x86/x86_traits.hh b/src/arch/x86/x86_traits.hh
index 8b50bdf9b..a73aaef19 100644
--- a/src/arch/x86/x86_traits.hh
+++ b/src/arch/x86/x86_traits.hh
@@ -68,12 +68,13 @@ namespace X86ISA
const int NumPseudoIntRegs = 1;
//1. The condition code bits of the rflags register.
- const int NumImplicitIntRegs = 5;
+ const int NumImplicitIntRegs = 6;
//1. The lower part of the result of multiplication.
//2. The upper part of the result of multiplication.
//3. The quotient from division
//4. The remainder from division
//5. The divisor for division
+ //6. The register to use for shift doubles
const int NumMMXRegs = 8;
const int NumXMMRegs = 16;
diff --git a/src/mem/gems_common/Map.hh b/src/mem/gems_common/Map.hh
index 5128a0fee..6e581d375 100644
--- a/src/mem/gems_common/Map.hh
+++ b/src/mem/gems_common/Map.hh
@@ -93,6 +93,8 @@ bool Map<KEY_TYPE, VALUE_TYPE>::exist(const KEY_TYPE& key) const
template <class KEY_TYPE, class VALUE_TYPE>
VALUE_TYPE& Map<KEY_TYPE, VALUE_TYPE>::lookup(const KEY_TYPE& key) const
{
+ if (!exist(key))
+ cerr << *this << " is looking for " << key << endl;
assert(exist(key));
return m_map[key];
}
diff --git a/src/mem/protocol/MI_example-cache.sm b/src/mem/protocol/MI_example-cache.sm
index 16a158f0d..915a0eb99 100644
--- a/src/mem/protocol/MI_example-cache.sm
+++ b/src/mem/protocol/MI_example-cache.sm
@@ -1,5 +1,8 @@
-machine(L1Cache, "MI Example L1 Cache"): LATENCY_CACHE_RESPONSE_LATENCY LATENCY_ISSUE_LATENCY {
+machine(L1Cache, "MI Example L1 Cache")
+: int cache_response_latency,
+ int issue_latency
+{
// NETWORK BUFFERS
MessageBuffer requestFromCache, network="To", virtual_network="0", ordered="true";
@@ -188,8 +191,8 @@ machine(L1Cache, "MI Example L1 Cache"): LATENCY_CACHE_RESPONSE_LATENCY LATENCY
// ACTIONS
action(a_issueRequest, "a", desc="Issue a request") {
- enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") {
- out_msg.Address := address;
+ enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
+ out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETX;
out_msg.Requestor := machineID;
out_msg.Destination.add(map_Address_to_Directory(address));
@@ -198,7 +201,7 @@ machine(L1Cache, "MI Example L1 Cache"): LATENCY_CACHE_RESPONSE_LATENCY LATENCY
}
action(b_issuePUT, "b", desc="Issue a PUT request") {
- enqueue(requestNetwork_out, RequestMsg, latency="ISSUE_LATENCY") {
+ enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:PUTX;
out_msg.Requestor := machineID;
@@ -211,7 +214,7 @@ machine(L1Cache, "MI Example L1 Cache"): LATENCY_CACHE_RESPONSE_LATENCY LATENCY
action(e_sendData, "e", desc="Send data from cache to requestor") {
peek(forwardRequestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -224,7 +227,7 @@ machine(L1Cache, "MI Example L1 Cache"): LATENCY_CACHE_RESPONSE_LATENCY LATENCY
action(ee_sendDataFromTBE, "\e", desc="Send data from TBE to requestor") {
peek(forwardRequestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="CACHE_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
diff --git a/src/mem/protocol/MI_example-dir.sm b/src/mem/protocol/MI_example-dir.sm
index fa8903d47..9af1940f7 100644
--- a/src/mem/protocol/MI_example-dir.sm
+++ b/src/mem/protocol/MI_example-dir.sm
@@ -1,5 +1,9 @@
-machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_DIRECTORY_LATENCY LATENCY_MEMORY_LATENCY {
+machine(Directory, "Directory protocol")
+: int directory_latency,
+ int dma_select_low_bit,
+ int dma_select_num_bits
+{
MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false";
MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false";
@@ -65,9 +69,9 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
// TBE entries for DMA requests
structure(TBE, desc="TBE entries for outstanding DMA requests") {
+ Address PhysicalAddress, desc="physical address";
State TBEState, desc="Transient State";
DataBlock DataBlk, desc="Data to be written (DMA write only)";
- int Offset, desc="...";
int Len, desc="...";
}
@@ -180,7 +184,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") {
peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:WB_ACK;
out_msg.Requestor := in_msg.Requestor;
@@ -192,7 +196,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(l_sendWriteBackAck, "la", desc="Send writeback ack to requestor") {
peek(memQueue_in, MemoryMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="TO_MEM_CTRL_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:WB_ACK;
out_msg.Requestor := in_msg.OriginalRequestorMachId;
@@ -204,7 +208,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") {
peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:WB_NACK;
out_msg.Requestor := in_msg.Requestor;
@@ -218,29 +222,9 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
directory[address].Owner.clear();
}
-// action(d_sendData, "d", desc="Send data to requestor") {
-// peek(requestQueue_in, RequestMsg) {
-// enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") {
-// out_msg.Address := address;
-//
-// if (in_msg.Type == CoherenceRequestType:GETS && directory[address].Sharers.count() == 0) {
-// // out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE_CLEAN;
-// out_msg.Type := CoherenceResponseType:DATA;
-// } else {
-// out_msg.Type := CoherenceResponseType:DATA;
-// }
-//
-// out_msg.Sender := machineID;
-// out_msg.Destination.add(in_msg.Requestor);
-// out_msg.DataBlk := directory[in_msg.Address].DataBlk;
-// out_msg.MessageSize := MessageSizeType:Response_Data;
-// }
-// }
-// }
-
action(d_sendData, "d", desc="Send data to requestor") {
peek(memQueue_in, MemoryMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="TO_MEM_CTRL_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -251,26 +235,15 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
}
}
-// action(dr_sendDMAData, "dr", desc="Send Data to DMA controller from directory") {
-// peek(dmaRequestQueue_in, DMARequestMsg) {
-// enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") {
-// out_msg.PhysicalAddress := address;
-// out_msg.Type := DMAResponseType:DATA;
-// out_msg.DataBlk := directory[in_msg.PhysicalAddress].DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be
-// out_msg.Destination.add(map_Address_to_DMA(address));
-// out_msg.MessageSize := MessageSizeType:Response_Data;
-// }
-// }
-// }
-
action(dr_sendDMAData, "dr", desc="Send Data to DMA controller from directory") {
peek(memQueue_in, MemoryMsg) {
- enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") {
+ enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
out_msg.PhysicalAddress := address;
out_msg.LineAddress := address;
out_msg.Type := DMAResponseType:DATA;
out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be
- out_msg.Destination.add(map_Address_to_DMA(address));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA,
+ dma_select_low_bit, dma_select_num_bits));
out_msg.MessageSize := MessageSizeType:Response_Data;
}
}
@@ -280,23 +253,25 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(drp_sendDMAData, "drp", desc="Send Data to DMA controller from incoming PUTX") {
peek(requestQueue_in, RequestMsg) {
- enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") {
+ enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
out_msg.PhysicalAddress := address;
out_msg.LineAddress := address;
out_msg.Type := DMAResponseType:DATA;
out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be
- out_msg.Destination.add(map_Address_to_DMA(address));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA,
+ dma_select_low_bit, dma_select_num_bits));
out_msg.MessageSize := MessageSizeType:Response_Data;
}
}
}
action(da_sendDMAAck, "da", desc="Send Ack to DMA controller") {
- enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="MEMORY_LATENCY") {
+ enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
out_msg.PhysicalAddress := address;
out_msg.LineAddress := address;
out_msg.Type := DMAResponseType:ACK;
- out_msg.Destination.add(map_Address_to_DMA(address));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA,
+ dma_select_low_bit, dma_select_num_bits));
out_msg.MessageSize := MessageSizeType:Writeback_Control;
}
}
@@ -318,7 +293,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
APPEND_TRANSITION_COMMENT(directory[in_msg.Address].Owner);
APPEND_TRANSITION_COMMENT("Req: ");
APPEND_TRANSITION_COMMENT(in_msg.Requestor);
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := in_msg.Type;
out_msg.Requestor := in_msg.Requestor;
@@ -330,7 +305,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(inv_sendCacheInvalidate, "inv", desc="Invalidate a cache block") {
peek(dmaRequestQueue_in, DMARequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:INV;
out_msg.Requestor := machineID;
@@ -359,14 +334,14 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
}
action(dwt_writeDMADataFromTBE, "dwt", desc="DMA Write data to memory from TBE") {
- directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, TBEs[address].Offset, TBEs[address].Len);
+ directory[address].DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
}
action(v_allocateTBE, "v", desc="Allocate TBE") {
peek(dmaRequestQueue_in, DMARequestMsg) {
TBEs.allocate(address);
TBEs[address].DataBlk := in_msg.DataBlk;
- TBEs[address].Offset := in_msg.Offset;
+ TBEs[address].PhysicalAddress := in_msg.PhysicalAddress;
TBEs[address].Len := in_msg.Len;
}
}
@@ -389,7 +364,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
peek(requestQueue_in, RequestMsg) {
- enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := MemoryRequestType:MEMORY_READ;
out_msg.Sender := machineID;
@@ -403,7 +378,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") {
peek(dmaRequestQueue_in, DMARequestMsg) {
- enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := MemoryRequestType:MEMORY_READ;
out_msg.Sender := machineID;
@@ -414,29 +389,15 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
}
}
}
-// action(qw_queueMemoryWBRequest, "qw", desc="Queue off-chip writeback request") {
-// peek(dmaRequestQueue_in, DMARequestMsg) {
-// enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
-// out_msg.Address := address;
-// out_msg.Type := MemoryRequestType:MEMORY_WB;
-// out_msg.OriginalRequestorMachId := machineID;
-// out_msg.DataBlk := in_msg.DataBlk;
-// out_msg.MessageSize := in_msg.MessageSize;
-
-// DEBUG_EXPR(out_msg);
-// }
-// }
-// }
-
action(qw_queueMemoryWBRequest_partial, "qwp", desc="Queue off-chip writeback request") {
peek(dmaRequestQueue_in, DMARequestMsg) {
- enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := MemoryRequestType:MEMORY_WB;
//out_msg.OriginalRequestorMachId := machineID;
//out_msg.DataBlk := in_msg.DataBlk;
- out_msg.DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len);
+ out_msg.DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.PhysicalAddress), in_msg.Len);
out_msg.MessageSize := in_msg.MessageSize;
//out_msg.Prefetch := in_msg.Prefetch;
@@ -447,12 +408,12 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(qw_queueMemoryWBRequest_partialTBE, "qwt", desc="Queue off-chip writeback request") {
peek(requestQueue_in, RequestMsg) {
- enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := MemoryRequestType:MEMORY_WB;
out_msg.OriginalRequestorMachId := in_msg.Requestor;
//out_msg.DataBlk := in_msg.DataBlk;
- out_msg.DataBlk.copyPartial(TBEs[address].DataBlk, TBEs[address].Offset, TBEs[address].Len);
+ out_msg.DataBlk.copyPartial(TBEs[address].DataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
out_msg.MessageSize := in_msg.MessageSize;
//out_msg.Prefetch := in_msg.Prefetch;
@@ -465,7 +426,7 @@ machine(Directory, "Directory protocol") : LATENCY_TO_MEM_CTRL_LATENCY LATENCY_D
action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") {
peek(requestQueue_in, RequestMsg) {
- enqueue(memQueue_out, MemoryMsg, latency="TO_MEM_CTRL_LATENCY") {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
out_msg.Address := address;
out_msg.Type := MemoryRequestType:MEMORY_WB;
out_msg.OriginalRequestorMachId := in_msg.Requestor;
diff --git a/src/mem/protocol/MI_example-dma.sm b/src/mem/protocol/MI_example-dma.sm
index d5de18552..e883288df 100644
--- a/src/mem/protocol/MI_example-dma.sm
+++ b/src/mem/protocol/MI_example-dma.sm
@@ -1,5 +1,7 @@
-machine(DMA, "DMA Controller") {
+machine(DMA, "DMA Controller")
+: int request_latency
+{
MessageBuffer responseFromDir, network="From", virtual_network="4", ordered="true", no_vector="true";
MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true";
@@ -35,12 +37,12 @@ machine(DMA, "DMA Controller") {
out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="...");
- in_port(dmaRequestQueue_in, DMARequestMsg, mandatoryQueue, desc="...") {
+ in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") {
if (dmaRequestQueue_in.isReady()) {
- peek(dmaRequestQueue_in, DMARequestMsg) {
- if (in_msg.Type == DMARequestType:READ ) {
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ if (in_msg.Type == SequencerRequestType:LD ) {
trigger(Event:ReadRequest, in_msg.LineAddress);
- } else if (in_msg.Type == DMARequestType:WRITE) {
+ } else if (in_msg.Type == SequencerRequestType:ST) {
trigger(Event:WriteRequest, in_msg.LineAddress);
} else {
error("Invalid request type");
@@ -64,9 +66,9 @@ machine(DMA, "DMA Controller") {
}
action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
- peek(dmaRequestQueue_in, DMARequestMsg) {
- enqueue(reqToDirectory_out, DMARequestMsg) {
- out_msg.PhysicalAddress := address;
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
+ out_msg.PhysicalAddress := in_msg.PhysicalAddress;
out_msg.LineAddress := in_msg.LineAddress;
out_msg.Type := DMARequestType:READ;
out_msg.DataBlk := in_msg.DataBlk;
@@ -78,9 +80,9 @@ machine(DMA, "DMA Controller") {
}
action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") {
- peek(dmaRequestQueue_in, DMARequestMsg) {
- enqueue(reqToDirectory_out, DMARequestMsg) {
- out_msg.PhysicalAddress := address;
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
+ out_msg.PhysicalAddress := in_msg.PhysicalAddress;
out_msg.LineAddress := in_msg.LineAddress;
out_msg.Type := DMARequestType:WRITE;
out_msg.DataBlk := in_msg.DataBlk;
diff --git a/src/mem/protocol/MI_example-msg.sm b/src/mem/protocol/MI_example-msg.sm
index 8c0afed2e..d4d557200 100644
--- a/src/mem/protocol/MI_example-msg.sm
+++ b/src/mem/protocol/MI_example-msg.sm
@@ -107,7 +107,6 @@ structure(DMARequestMsg, desc="...", interface="NetworkMessage") {
Address LineAddress, desc="Line address for this request";
NetDest Destination, desc="Destination";
DataBlock DataBlk, desc="DataBlk attached to this request";
- int Offset, desc="The offset into the datablock";
int Len, desc="The length of the request";
MessageSizeType MessageSize, desc="size category of the message";
}
diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
index a65ade10f..28800b2bd 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm
@@ -32,7 +32,11 @@
*
*/
-machine(L1Cache, "Directory protocol") {
+machine(L1Cache, "Directory protocol")
+ : int request_latency,
+ int l2_select_low_bit,
+ int l2_select_num_bits
+{
// NODE L1 CACHE
// From this node's L1 cache TO the network
@@ -125,7 +129,7 @@ machine(L1Cache, "Directory protocol") {
external_type(CacheMemory) {
bool cacheAvail(Address);
Address cacheProbe(Address);
- void allocate(Address);
+ void allocate(Address, Entry);
void deallocate(Address);
Entry lookup(Address);
void changePermission(Address, AccessPermission);
@@ -141,11 +145,11 @@ machine(L1Cache, "Directory protocol") {
MessageBuffer mandatoryQueue, ordered="false", abstract_chip_ptr="true";
- Sequencer sequencer, abstract_chip_ptr="true", constructor_hack="i";
+ Sequencer sequencer, factory='RubySystem::getSequencer(m_cfg["sequencer"])';
TBETable TBEs, template_hack="<L1Cache_TBE>";
- CacheMemory L1IcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1I"', abstract_chip_ptr="true";
- CacheMemory L1DcacheMemory, template_hack="<L1Cache_Entry>", constructor_hack='L1_CACHE_NUM_SETS_BITS,L1_CACHE_ASSOC,MachineType_L1Cache,int_to_string(i)+"_L1D"', abstract_chip_ptr="true";
+ CacheMemory L1IcacheMemory, factory='RubySystem::getCache(m_cfg["icache"])';
+ CacheMemory L1DcacheMemory, factory='RubySystem::getCache(m_cfg["dcache"])';
TimerTable useTimerTable;
Entry getCacheEntry(Address addr), return_by_ref="yes" {
@@ -305,7 +309,7 @@ machine(L1Cache, "Directory protocol") {
assert(in_msg.Destination.isElement(machineID));
DEBUG_EXPR("MRM_DEBUG: L1 received");
DEBUG_EXPR(in_msg.Type);
- if (in_msg.Type == CoherenceRequestType:GETX) {
+if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:DMA_READ || in_msg.Type == CoherenceRequestType:DMA_WRITE) {
if (in_msg.Requestor == machineID && in_msg.RequestorMachine == MachineType:L1Cache) {
trigger(Event:Own_GETX, in_msg.Address);
} else {
@@ -357,40 +361,40 @@ machine(L1Cache, "Directory protocol") {
// ** INSTRUCTION ACCESS ***
// Check to see if it is in the OTHER L1
- if (L1DcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The block is in the wrong L1, put the request on the queue to the shared L2
- trigger(Event:L1_Replacement, in_msg.Address);
+ trigger(Event:L1_Replacement, in_msg.LineAddress);
}
- if (L1IcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The tag matches for the L1, so the L1 asks the L2 for it.
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
- if (L1IcacheMemory.cacheAvail(in_msg.Address)) {
+ if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
// L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
// No room in the L1, so we need to make room in the L1
- trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.Address));
+ trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.LineAddress));
}
}
} else {
// *** DATA ACCESS ***
// Check to see if it is in the OTHER L1
- if (L1IcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1IcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The block is in the wrong L1, put the request on the queue to the shared L2
- trigger(Event:L1_Replacement, in_msg.Address);
+ trigger(Event:L1_Replacement, in_msg.LineAddress);
}
- if (L1DcacheMemory.isTagPresent(in_msg.Address)) {
+ if (L1DcacheMemory.isTagPresent(in_msg.LineAddress)) {
// The tag matches for the L1, so the L1 ask the L2 for it
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
- if (L1DcacheMemory.cacheAvail(in_msg.Address)) {
+ if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
// L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
- trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.Address);
+ trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress);
} else {
// No room in the L1, so we need to make room in the L1
- trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.Address));
+ trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.LineAddress));
}
}
}
@@ -403,11 +407,12 @@ machine(L1Cache, "Directory protocol") {
action(a_issueGETS, "a", desc="Issue GETS") {
peek(mandatoryQueue_in, CacheMsg) {
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(requestNetwork_out, RequestMsg, latency= request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETS;
out_msg.Requestor := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.MessageSize := MessageSizeType:Request_Control;
out_msg.AccessMode := in_msg.AccessMode;
out_msg.Prefetch := in_msg.Prefetch;
@@ -417,11 +422,12 @@ machine(L1Cache, "Directory protocol") {
action(b_issueGETX, "b", desc="Issue GETX") {
peek(mandatoryQueue_in, CacheMsg) {
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(requestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETX;
out_msg.Requestor := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.MessageSize := MessageSizeType:Request_Control;
out_msg.AccessMode := in_msg.AccessMode;
out_msg.Prefetch := in_msg.Prefetch;
@@ -430,34 +436,37 @@ machine(L1Cache, "Directory protocol") {
}
action(d_issuePUTX, "d", desc="Issue PUTX") {
- // enqueue(writebackNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ // enqueue(writebackNetwork_out, RequestMsg, latency=request_latency) {
+ enqueue(requestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:PUTX;
out_msg.Requestor := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.MessageSize := MessageSizeType:Writeback_Control;
}
}
action(dd_issuePUTO, "\d", desc="Issue PUTO") {
- // enqueue(writebackNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ // enqueue(writebackNetwork_out, RequestMsg, latency=request_latency) {
+ enqueue(requestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:PUTO;
out_msg.Requestor := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.MessageSize := MessageSizeType:Writeback_Control;
}
}
action(dd_issuePUTS, "\ds", desc="Issue PUTS") {
- // enqueue(writebackNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
- enqueue(requestNetwork_out, RequestMsg, latency="L1_REQUEST_LATENCY") {
+ // enqueue(writebackNetwork_out, RequestMsg, latency=request_latency) {
+ enqueue(requestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:PUTS;
out_msg.Requestor := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.MessageSize := MessageSizeType:Writeback_Control;
}
}
@@ -465,11 +474,12 @@ machine(L1Cache, "Directory protocol") {
action(e_sendData, "e", desc="Send data from cache to requestor") {
peek(requestNetwork_in, RequestMsg) {
if (in_msg.RequestorMachine == MachineType:L2Cache) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(in_msg.Address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.DataBlk := getCacheEntry(address).DataBlk;
// out_msg.Dirty := getCacheEntry(address).Dirty;
out_msg.Dirty := false;
@@ -480,7 +490,7 @@ machine(L1Cache, "Directory protocol") {
DEBUG_EXPR(in_msg.Address);
}
else {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -497,11 +507,12 @@ machine(L1Cache, "Directory protocol") {
}
action(e_sendDataToL2, "ee", desc="Send data from cache to requestor") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.DataBlk := getCacheEntry(address).DataBlk;
out_msg.Dirty := getCacheEntry(address).Dirty;
out_msg.Acks := 0; // irrelevant
@@ -513,12 +524,13 @@ machine(L1Cache, "Directory protocol") {
action(ee_sendDataExclusive, "\e", desc="Send data from cache to requestor, don't keep a shared copy") {
peek(requestNetwork_in, RequestMsg) {
if (in_msg.RequestorMachine == MachineType:L2Cache) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(in_msg.Address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.DataBlk := getCacheEntry(address).DataBlk;
out_msg.Dirty := getCacheEntry(address).Dirty;
out_msg.Acks := in_msg.Acks;
@@ -527,7 +539,7 @@ machine(L1Cache, "Directory protocol") {
DEBUG_EXPR("Sending exclusive data to L2");
}
else {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -546,7 +558,7 @@ machine(L1Cache, "Directory protocol") {
action(f_sendAck, "f", desc="Send ack from cache to requestor") {
peek(requestNetwork_in, RequestMsg) {
if (in_msg.RequestorMachine == MachineType:L1Cache) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -557,12 +569,13 @@ machine(L1Cache, "Directory protocol") {
}
}
else {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(in_msg.Address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.Acks := 0 - 1; // -1
out_msg.MessageSize := MessageSizeType:Response_Control;
}
@@ -571,21 +584,23 @@ machine(L1Cache, "Directory protocol") {
}
action(g_sendUnblock, "g", desc="Send unblock to memory") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:UNBLOCK;
out_msg.Sender := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.MessageSize := MessageSizeType:Unblock_Control;
}
}
action(gg_sendUnblockExclusive, "\g", desc="Send unblock exclusive to memory") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:UNBLOCK_EXCLUSIVE;
out_msg.Sender := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.MessageSize := MessageSizeType:Unblock_Control;
}
}
@@ -627,7 +642,6 @@ machine(L1Cache, "Directory protocol") {
action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") {
peek(responseToL1Cache_in, ResponseMsg) {
DEBUG_EXPR("MRM_DEBUG: L1 decrementNumberOfMessages");
- DEBUG_EXPR(id);
DEBUG_EXPR(in_msg.Acks);
TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - in_msg.Acks;
}
@@ -660,7 +674,7 @@ machine(L1Cache, "Directory protocol") {
action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") {
peek(requestNetwork_in, RequestMsg) {
if (in_msg.RequestorMachine == MachineType:L1Cache) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -673,11 +687,12 @@ machine(L1Cache, "Directory protocol") {
}
}
else {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.DataBlk := TBEs[address].DataBlk;
// out_msg.Dirty := TBEs[address].Dirty;
out_msg.Dirty := false;
@@ -691,7 +706,7 @@ machine(L1Cache, "Directory protocol") {
action(q_sendExclusiveDataFromTBEToCache, "qq", desc="Send data from TBE to cache") {
peek(requestNetwork_in, RequestMsg) {
if (in_msg.RequestorMachine == MachineType:L1Cache) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -703,11 +718,12 @@ machine(L1Cache, "Directory protocol") {
}
}
else {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address,machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.DataBlk := TBEs[address].DataBlk;
out_msg.Dirty := TBEs[address].Dirty;
out_msg.Acks := in_msg.Acks;
@@ -720,11 +736,12 @@ machine(L1Cache, "Directory protocol") {
// L2 will usually request data for a writeback
action(qq_sendWBDataFromTBEToL2, "\q", desc="Send data from TBE to L2") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L1_REQUEST_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L1Cache;
- out_msg.Destination.add(map_L1CacheMachId_to_L2Cache(address, machineID));
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits));
out_msg.Dirty := TBEs[address].Dirty;
if (TBEs[address].Dirty) {
out_msg.Type := CoherenceResponseType:WRITEBACK_DIRTY_DATA;
@@ -770,13 +787,13 @@ machine(L1Cache, "Directory protocol") {
action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") {
if (L1DcacheMemory.isTagPresent(address) == false) {
- L1DcacheMemory.allocate(address);
+ L1DcacheMemory.allocate(address, new Entry);
}
}
action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") {
if (L1IcacheMemory.isTagPresent(address) == false) {
- L1IcacheMemory.allocate(address);
+ L1IcacheMemory.allocate(address, new Entry);
}
}
@@ -784,7 +801,7 @@ machine(L1Cache, "Directory protocol") {
action(uu_profileMiss, "\u", desc="Profile the demand miss") {
peek(mandatoryQueue_in, CacheMsg) {
- profile_miss(in_msg, id);
+ // profile_miss(in_msg);
}
}
diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
index 50af743c2..68d3a2cd3 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
@@ -32,7 +32,10 @@
*
*/
-machine(L2Cache, "Token protocol") {
+machine(L2Cache, "Token protocol")
+: int response_latency,
+ int request_latency
+{
// L2 BANK QUEUES
// From local bank of L2 cache TO the network
@@ -208,7 +211,7 @@ machine(L2Cache, "Token protocol") {
external_type(CacheMemory) {
bool cacheAvail(Address);
Address cacheProbe(Address);
- void allocate(Address);
+ void allocate(Address, Entry);
void deallocate(Address);
Entry lookup(Address);
void changePermission(Address, AccessPermission);
@@ -225,13 +228,15 @@ machine(L2Cache, "Token protocol") {
TBETable L2_TBEs, template_hack="<L2Cache_TBE>";
- CacheMemory L2cacheMemory, template_hack="<L2Cache_Entry>", constructor_hack='L2_CACHE_NUM_SETS_BITS,L2_CACHE_ASSOC,MachineType_L2Cache,int_to_string(i)+"_L2"';
+ CacheMemory L2cacheMemory, factory='RubySystem::getCache(m_cfg["cache"])';
PerfectCacheMemory localDirectory, template_hack="<L2Cache_DirEntry>";
Entry getL2CacheEntry(Address addr), return_by_ref="yes" {
if (L2cacheMemory.isTagPresent(addr)) {
return L2cacheMemory[addr];
+ } else {
+ return L2cacheMemory[addr];
}
}
@@ -579,7 +584,7 @@ machine(L2Cache, "Token protocol") {
in_port(requestNetwork_in, RequestMsg, GlobalRequestToL2Cache) {
if (requestNetwork_in.isReady()) {
peek(requestNetwork_in, RequestMsg) {
- if (in_msg.Type == CoherenceRequestType:GETX) {
+ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:DMA_READ || in_msg.Type == CoherenceRequestType:DMA_WRITE) {
if (in_msg.Requestor == machineID) {
trigger(Event:Own_GETX, in_msg.Address);
} else {
@@ -675,7 +680,7 @@ machine(L2Cache, "Token protocol") {
action(a_issueGETS, "a", desc="issue local request globally") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") {
+ enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETS;
out_msg.RequestorMachine := MachineType:L2Cache;
@@ -688,7 +693,7 @@ machine(L2Cache, "Token protocol") {
action(a_issueGETX, "\a", desc="issue local request globally") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") {
+ enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETX;
out_msg.RequestorMachine := MachineType:L2Cache;
@@ -700,7 +705,7 @@ machine(L2Cache, "Token protocol") {
}
action(b_issuePUTX, "b", desc="Issue PUTX") {
- enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") {
+ enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:PUTX;
out_msg.RequestorMachine := MachineType:L2Cache;
@@ -711,7 +716,7 @@ machine(L2Cache, "Token protocol") {
}
action(b_issuePUTO, "\b", desc="Issue PUTO") {
- enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") {
+ enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:PUTO;
out_msg.Requestor := machineID;
@@ -723,7 +728,7 @@ machine(L2Cache, "Token protocol") {
/* PUTO, but local sharers exist */
action(b_issuePUTO_ls, "\bb", desc="Issue PUTO") {
- enqueue(globalRequestNetwork_out, RequestMsg, latency="L2_REQUEST_LATENCY") {
+ enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:PUTO_SHARERS;
out_msg.Requestor := machineID;
@@ -734,7 +739,7 @@ machine(L2Cache, "Token protocol") {
}
action(c_sendDataFromTBEToL1GETS, "c", desc="Send data from TBE to L1 requestors in TBE") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -750,7 +755,7 @@ machine(L2Cache, "Token protocol") {
}
action(c_sendDataFromTBEToL1GETX, "\c", desc="Send data from TBE to L1 requestors in TBE") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -766,7 +771,7 @@ machine(L2Cache, "Token protocol") {
}
action(c_sendExclusiveDataFromTBEToL1GETS, "\cc", desc="Send data from TBE to L1 requestors in TBE") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -779,7 +784,7 @@ machine(L2Cache, "Token protocol") {
}
action(c_sendDataFromTBEToFwdGETX, "cc", desc="Send data from TBE to external GETX") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -793,7 +798,7 @@ machine(L2Cache, "Token protocol") {
}
action(c_sendDataFromTBEToFwdGETS, "ccc", desc="Send data from TBE to external GETX") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -810,7 +815,7 @@ machine(L2Cache, "Token protocol") {
}
action(c_sendExclusiveDataFromTBEToFwdGETS, "\ccc", desc="Send data from TBE to external GETX") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -827,7 +832,7 @@ machine(L2Cache, "Token protocol") {
action(d_sendDataToL1GETS, "d", desc="Send data directly to L1 requestor") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -845,7 +850,7 @@ machine(L2Cache, "Token protocol") {
action(d_sendDataToL1GETX, "\d", desc="Send data and a token from TBE to L1 requestor") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -863,7 +868,7 @@ machine(L2Cache, "Token protocol") {
action(dd_sendDataToFwdGETX, "dd", desc="send data") {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -882,7 +887,7 @@ machine(L2Cache, "Token protocol") {
action(dd_sendDataToFwdGETS, "\dd", desc="send data") {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.Sender := machineID;
@@ -900,7 +905,7 @@ machine(L2Cache, "Token protocol") {
action(dd_sendExclusiveDataToFwdGETS, "\d\d", desc="send data") {
peek(requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
out_msg.Sender := machineID;
@@ -913,7 +918,7 @@ machine(L2Cache, "Token protocol") {
}
action(e_sendAck, "e", desc="Send ack with the tokens we've collected thus far.") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -927,7 +932,7 @@ machine(L2Cache, "Token protocol") {
action(e_sendAckToL1Requestor, "\e", desc="Send ack with the tokens we've collected thus far.") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -940,7 +945,7 @@ machine(L2Cache, "Token protocol") {
}
action(e_sendAckToL1RequestorFromTBE, "eee", desc="Send ack with the tokens we've collected thus far.") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Sender := machineID;
@@ -955,14 +960,13 @@ machine(L2Cache, "Token protocol") {
L2_TBEs[address].NumIntPendingAcks := countLocalSharers(address);
DEBUG_EXPR(address);
DEBUG_EXPR(getLocalSharers(address));
- DEBUG_EXPR(id);
DEBUG_EXPR(L2_TBEs[address].NumIntPendingAcks);
if (isLocalOwnerValid(address)) {
L2_TBEs[address].NumIntPendingAcks := L2_TBEs[address].NumIntPendingAcks + 1;
DEBUG_EXPR(getLocalOwner(address));
}
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:INV;
out_msg.Requestor := machineID;
@@ -982,7 +986,7 @@ machine(L2Cache, "Token protocol") {
L2_TBEs[address].NumIntPendingAcks := countLocalSharers(address);
if (countLocalSharers(address) > 0) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:INV;
out_msg.Requestor := machineID;
@@ -1013,7 +1017,7 @@ machine(L2Cache, "Token protocol") {
L2_TBEs[address].NumIntPendingAcks := countLocalSharers(address);
}
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:INV;
out_msg.Requestor := in_msg.Requestor;
@@ -1038,7 +1042,7 @@ machine(L2Cache, "Token protocol") {
L2_TBEs[address].NumIntPendingAcks := countLocalSharers(address);
}
}
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:INV;
out_msg.Requestor := L2_TBEs[address].L1_GetX_ID;
@@ -1051,7 +1055,7 @@ machine(L2Cache, "Token protocol") {
action(f_sendUnblock, "f", desc="Send unblock to global directory") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:UNBLOCK;
out_msg.Destination.add(map_Address_to_Directory(address));
@@ -1063,7 +1067,7 @@ machine(L2Cache, "Token protocol") {
action(f_sendExclusiveUnblock, "\f", desc="Send unblock to global directory") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:UNBLOCK_EXCLUSIVE;
out_msg.Destination.add(map_Address_to_Directory(address));
@@ -1140,7 +1144,7 @@ machine(L2Cache, "Token protocol") {
action(j_forwardGlobalRequestToLocalOwner, "j", desc="Forward external request to local owner") {
peek(requestNetwork_in, RequestMsg) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := in_msg.Address;
out_msg.Type := in_msg.Type;
out_msg.Requestor := machineID;
@@ -1156,7 +1160,7 @@ machine(L2Cache, "Token protocol") {
action(k_forwardLocalGETSToLocalSharer, "k", desc="Forward local request to local sharer/owner") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := in_msg.Address;
out_msg.Type := CoherenceRequestType:GETS;
out_msg.Requestor := in_msg.Requestor;
@@ -1169,7 +1173,7 @@ machine(L2Cache, "Token protocol") {
}
action(k_forwardLocalGETXToLocalOwner, "\k", desc="Forward local request to local owner") {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:GETX;
out_msg.Requestor := L2_TBEs[address].L1_GetX_ID;
@@ -1183,7 +1187,7 @@ machine(L2Cache, "Token protocol") {
// same as previous except that it assumes to TBE is present to get number of acks
action(kk_forwardLocalGETXToLocalExclusive, "kk", desc="Forward local request to local owner") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := in_msg.Address;
out_msg.Type := CoherenceRequestType:GETX;
out_msg.Requestor := in_msg.Requestor;
@@ -1197,7 +1201,7 @@ machine(L2Cache, "Token protocol") {
action(kk_forwardLocalGETSToLocalOwner, "\kk", desc="Forward local request to local owner") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := in_msg.Address;
out_msg.Type := CoherenceRequestType:GETS;
out_msg.Requestor := in_msg.Requestor;
@@ -1211,7 +1215,7 @@ machine(L2Cache, "Token protocol") {
action(l_writebackAckNeedData, "l", desc="Send writeback ack to L1 requesting data") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := in_msg.Address;
// out_msg.Type := CoherenceResponseType:WRITEBACK_SEND_DATA;
out_msg.Type := CoherenceRequestType:WB_ACK_DATA;
@@ -1225,7 +1229,7 @@ machine(L2Cache, "Token protocol") {
action(l_writebackAckDropData, "\l", desc="Send writeback ack to L1 indicating to drop data") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := in_msg.Address;
// out_msg.Type := CoherenceResponseType:WRITEBACK_ACK;
out_msg.Type := CoherenceRequestType:WB_ACK;
@@ -1239,7 +1243,7 @@ machine(L2Cache, "Token protocol") {
action(ll_writebackNack, "\ll", desc="Send writeback nack to L1") {
peek(L1requestNetwork_in, RequestMsg) {
- enqueue( localRequestNetwork_out, RequestMsg, latency="L2_RESPONSE_LATENCY" ) {
+ enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) {
out_msg.Address := in_msg.Address;
out_msg.Type := CoherenceRequestType:WB_NACK;
out_msg.Requestor := machineID;
@@ -1305,7 +1309,7 @@ machine(L2Cache, "Token protocol") {
action( qq_sendDataFromTBEToMemory, "qq", desc="Send data from TBE to directory") {
- enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) {
out_msg.Address := address;
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:L2Cache;
@@ -1372,7 +1376,7 @@ machine(L2Cache, "Token protocol") {
}
action(vv_allocateL2CacheBlock, "\v", desc="Set L2 cache tag equal to tag of block B.") {
- L2cacheMemory.allocate(address);
+ L2cacheMemory.allocate(address, new Entry);
}
action(rr_deallocateL2CacheBlock, "\r", desc="Deallocate L2 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
@@ -1389,7 +1393,7 @@ machine(L2Cache, "Token protocol") {
action(uu_profileMiss, "\u", desc="Profile the demand miss") {
peek(L1requestNetwork_in, RequestMsg) {
// AccessModeType not implemented
- //profile_L2Cache_miss(convertToGenericType(in_msg.Type), in_msg.AccessMode, MessageSizeTypeToInt(in_msg.MessageSize), in_msg.Prefetch, machineIDToNodeID(in_msg.Requestor));
+ // profile_L2Cache_miss(convertToGenericType(in_msg.Type), in_msg.AccessMode, MessageSizeTypeToInt(in_msg.MessageSize), in_msg.Prefetch, machineIDToNodeID(in_msg.Requestor));
}
}
diff --git a/src/mem/protocol/MOESI_CMP_directory-dir.sm b/src/mem/protocol/MOESI_CMP_directory-dir.sm
index a016836c2..8d8ee7f8a 100644
--- a/src/mem/protocol/MOESI_CMP_directory-dir.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-dir.sm
@@ -31,13 +31,15 @@
* $Id$
*/
-machine(Directory, "Directory protocol") {
+machine(Directory, "Directory protocol")
+: int directory_latency
+{
// ** IN QUEUES **
MessageBuffer foo1, network="From", virtual_network="0", ordered="false"; // a mod-L2 bank -> this Dir
MessageBuffer requestToDir, network="From", virtual_network="1", ordered="false"; // a mod-L2 bank -> this Dir
MessageBuffer responseToDir, network="From", virtual_network="2", ordered="false"; // a mod-L2 bank -> this Dir
-
+
MessageBuffer goo1, network="To", virtual_network="0", ordered="false";
MessageBuffer forwardFromDir, network="To", virtual_network="1", ordered="false";
MessageBuffer responseFromDir, network="To", virtual_network="2", ordered="false"; // Dir -> mod-L2 bank
@@ -56,11 +58,16 @@ machine(Directory, "Directory protocol") {
OO, desc="Blocked, was in owned";
MO, desc="Blocked, going to owner or maybe modified";
MM, desc="Blocked, going to modified";
+ MM_DMA, desc="Blocked, going to I";
MI, desc="Blocked on a writeback";
MIS, desc="Blocked on a writeback, but don't remove from sharers when received";
OS, desc="Blocked on a writeback";
OSS, desc="Blocked on a writeback, but don't remove from sharers when received";
+
+ XI_M, desc="In a stable state, going to I, waiting for the memory controller";
+ XI_U, desc="In a stable state, going to I, waiting for an unblock";
+ OI_D, desc="In O, going to I, waiting for data";
}
// Events
@@ -75,6 +82,11 @@ machine(Directory, "Directory protocol") {
Exclusive_Unblock, desc="The processor become the exclusive owner (E or M) of the line";
Clean_Writeback, desc="The final message as part of a PutX/PutS, no data";
Dirty_Writeback, desc="The final message as part of a PutX/PutS, contains data";
+ Memory_Data, desc="Fetched data from memory arrives";
+ Memory_Ack, desc="Writeback Ack from memory arrives";
+ DMA_READ, desc="DMA Read";
+ DMA_WRITE, desc="DMA Write";
+ Data, desc="Data to directory";
}
// TYPES
@@ -88,15 +100,36 @@ machine(Directory, "Directory protocol") {
int WaitingUnblocks, desc="Number of acks we're waiting for";
}
+ structure(TBE, desc="...") {
+ Address PhysicalAddress, desc="Physical address for this entry";
+ int Len, desc="Length of request";
+ DataBlock DataBlk, desc="DataBlk";
+ MachineID Requestor, desc="original requestor";
+ }
+
external_type(DirectoryMemory) {
Entry lookup(Address);
bool isPresent(Address);
}
+ external_type(TBETable) {
+ TBE lookup(Address);
+ void allocate(Address);
+ void deallocate(Address);
+ bool isPresent(Address);
+ }
+
+ // to simulate detailed DRAM
+ external_type(MemoryControl, inport="yes", outport="yes") {
+
+ }
+
// ** OBJECTS **
- DirectoryMemory directory, constructor_hack="i";
+ DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
+ MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
+ TBETable TBEs, template_hack="<Directory_TBE>";
State getState(Address addr) {
return directory[addr].DirectoryState;
@@ -164,6 +197,7 @@ machine(Directory, "Directory protocol") {
out_port(responseNetwork_out, ResponseMsg, responseFromDir);
// out_port(requestQueue_out, ResponseMsg, requestFromDir); // For recycling requests
out_port(goo1_out, ResponseMsg, goo1);
+ out_port(memQueue_out, MemoryMsg, memBuffer);
// ** IN_PORTS **
@@ -188,6 +222,8 @@ machine(Directory, "Directory protocol") {
trigger(Event:Dirty_Writeback, in_msg.Address);
} else if (in_msg.Type == CoherenceResponseType:WRITEBACK_CLEAN_ACK) {
trigger(Event:Clean_Writeback, in_msg.Address);
+ } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
+ trigger(Event:Data, in_msg.Address);
} else {
error("Invalid message");
}
@@ -208,7 +244,27 @@ machine(Directory, "Directory protocol") {
trigger(Event:PUTO, in_msg.Address);
} else if (in_msg.Type == CoherenceRequestType:PUTO_SHARERS) {
trigger(Event:PUTO_SHARERS, in_msg.Address);
+ } else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
+ trigger(Event:DMA_READ, makeLineAddress(in_msg.Address));
+ } else if (in_msg.Type == CoherenceRequestType:DMA_WRITE) {
+ trigger(Event:DMA_WRITE, makeLineAddress(in_msg.Address));
+ } else {
+ error("Invalid message");
+ }
+ }
+ }
+ }
+
+ // off-chip memory request/response is done
+ in_port(memQueue_in, MemoryMsg, memBuffer) {
+ if (memQueue_in.isReady()) {
+ peek(memQueue_in, MemoryMsg) {
+ if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
+ trigger(Event:Memory_Data, in_msg.Address);
+ } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
+ trigger(Event:Memory_Ack, in_msg.Address);
} else {
+ DEBUG_EXPR(in_msg.Type);
error("Invalid message");
}
}
@@ -219,7 +275,7 @@ machine(Directory, "Directory protocol") {
action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") {
peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:WB_ACK;
out_msg.Requestor := in_msg.Requestor;
@@ -231,7 +287,7 @@ machine(Directory, "Directory protocol") {
action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") {
peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:WB_NACK;
out_msg.Requestor := in_msg.Requestor;
@@ -254,26 +310,21 @@ machine(Directory, "Directory protocol") {
directory[address].Sharers.clear();
}
- action(d_sendData, "d", desc="Send data to requestor") {
- peek(requestQueue_in, RequestMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="MEMORY_LATENCY") {
- // enqueue(responseNetwork_out, ResponseMsg, latency="L2_RESPONSE_LATENCY") {
+ action(d_sendDataMsg, "d", desc="Send data to requestor") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
out_msg.Address := address;
-
- if (in_msg.Type == CoherenceRequestType:GETS && directory[address].Sharers.count() == 0) {
- out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
- } else {
- out_msg.Type := CoherenceResponseType:DATA;
- }
-
out_msg.Sender := machineID;
out_msg.SenderMachine := MachineType:Directory;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.DataBlk := directory[in_msg.Address].DataBlk;
+ out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+ //out_msg.DataBlk := directory[in_msg.Address].DataBlk;
+ out_msg.DataBlk := in_msg.DataBlk;
out_msg.Dirty := false; // By definition, the block is now clean
- out_msg.Acks := directory[address].Sharers.count();
- if (directory[address].Sharers.isElement(in_msg.Requestor)) {
- out_msg.Acks := out_msg.Acks - 1;
+ out_msg.Acks := in_msg.Acks;
+ if (in_msg.ReadX) {
+ out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
+ } else {
+ out_msg.Type := CoherenceResponseType:DATA;
}
out_msg.MessageSize := MessageSizeType:Response_Data;
}
@@ -289,7 +340,7 @@ machine(Directory, "Directory protocol") {
action(f_forwardRequest, "f", desc="Forward request to owner") {
peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := in_msg.Type;
out_msg.Requestor := in_msg.Requestor;
@@ -303,11 +354,27 @@ machine(Directory, "Directory protocol") {
}
}
+ action(f_forwardRequestDirIsRequestor, "\f", desc="Forward request to owner") {
+ peek(requestQueue_in, RequestMsg) {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
+ out_msg.Address := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.addNetDest(directory[in_msg.Address].Owner);
+ out_msg.Acks := directory[address].Sharers.count();
+ if (directory[address].Sharers.isElement(in_msg.Requestor)) {
+ out_msg.Acks := out_msg.Acks - 1;
+ }
+ out_msg.MessageSize := MessageSizeType:Forwarded_Control;
+ }
+ }
+ }
+
action(g_sendInvalidations, "g", desc="Send invalidations to sharers, not including the requester") {
peek(requestQueue_in, RequestMsg) {
if ((directory[in_msg.Address].Sharers.count() > 1) ||
((directory[in_msg.Address].Sharers.count() > 0) && (directory[in_msg.Address].Sharers.isElement(in_msg.Requestor) == false))) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
+ enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceRequestType:INV;
out_msg.Requestor := in_msg.Requestor;
@@ -338,7 +405,7 @@ machine(Directory, "Directory protocol") {
}
}
- action(ll_checkDataInMemory, "\l", desc="Check PUTX/PUTO data is same as in the memory") {
+ action(ll_checkDataInMemory, "\ld", desc="Check PUTX/PUTO data is same as in the memory") {
peek(unblockNetwork_in, ResponseMsg) {
assert(in_msg.Dirty == false);
assert(in_msg.MessageSize == MessageSizeType:Writeback_Control);
@@ -366,6 +433,70 @@ machine(Directory, "Directory protocol") {
assert(directory[address].WaitingUnblocks >= 0);
}
+ action(q_popMemQueue, "q", desc="Pop off-chip request queue") {
+ memQueue_in.dequeue();
+ }
+
+ action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
+ peek(requestQueue_in, RequestMsg) {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_READ;
+ out_msg.Sender := machineID;
+ out_msg.OriginalRequestorMachId := in_msg.Requestor;
+ out_msg.DataBlk := directory[in_msg.Address].DataBlk;
+ out_msg.MessageSize := in_msg.MessageSize;
+ //out_msg.Prefetch := false;
+ // These are not used by memory but are passed back here with the read data:
+ out_msg.ReadX := (in_msg.Type == CoherenceRequestType:GETS && directory[address].Sharers.count() == 0);
+ out_msg.Acks := directory[address].Sharers.count();
+ if (directory[address].Sharers.isElement(in_msg.Requestor)) {
+ out_msg.Acks := out_msg.Acks - 1;
+ }
+ DEBUG_EXPR(out_msg);
+ }
+ }
+ }
+
+ action(qw_queueMemoryWBRequest, "qw", desc="Queue off-chip writeback request") {
+ peek(unblockNetwork_in, ResponseMsg) {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ out_msg.Sender := machineID;
+ if (TBEs.isPresent(address)) {
+ out_msg.OriginalRequestorMachId := TBEs[address].Requestor;
+ }
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.MessageSize := in_msg.MessageSize;
+ //out_msg.Prefetch := false;
+ // Not used:
+ out_msg.ReadX := false;
+ out_msg.Acks := directory[address].Sharers.count(); // for dma requests
+ DEBUG_EXPR(out_msg);
+ }
+ }
+ }
+
+ action(qw_queueMemoryWBRequest2, "/qw", desc="Queue off-chip writeback request") {
+ peek(requestQueue_in, RequestMsg) {
+ enqueue(memQueue_out, MemoryMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Type := MemoryRequestType:MEMORY_WB;
+ out_msg.Sender := machineID;
+ out_msg.OriginalRequestorMachId := in_msg.Requestor;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.MessageSize := in_msg.MessageSize;
+ //out_msg.Prefetch := false;
+ // Not used:
+ out_msg.ReadX := false;
+ out_msg.Acks := directory[address].Sharers.count(); // for dma requests
+ DEBUG_EXPR(out_msg);
+ }
+ }
+ }
+
+
// action(z_stall, "z", desc="Cannot be handled right now.") {
// Special name recognized as do nothing case
// }
@@ -374,26 +505,109 @@ machine(Directory, "Directory protocol") {
requestQueue_in.recycle();
}
+ action(a_sendDMAAck, "\a", desc="Send DMA Ack that write completed, along with Inv Ack count") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, latency="1") {
+ out_msg.Address := address;
+ out_msg.Sender := machineID;
+ out_msg.SenderMachine := MachineType:Directory;
+ out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Acks := in_msg.Acks;
+ out_msg.Type := CoherenceResponseType:DMA_ACK;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(l_writeDMADataToMemory, "\l", desc="Write data from a DMA_WRITE to memory") {
+ peek(requestQueue_in, RequestMsg) {
+ directory[address].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.Address), in_msg.Len);
+ }
+ }
+
+ action(l_writeDMADataToMemoryFromTBE, "\ll", desc="Write data from a DMA_WRITE to memory") {
+ directory[address].DataBlk.copyPartial(TBEs[address].DataBlk,
+ addressOffset(TBEs[address].PhysicalAddress),
+ TBEs[address].Len);
+ }
+
+ action(v_allocateTBE, "v", desc="Allocate TBE entry") {
+ peek (requestQueue_in, RequestMsg) {
+ TBEs.allocate(address);
+ TBEs[address].PhysicalAddress := in_msg.Address;
+ TBEs[address].Len := in_msg.Len;
+ TBEs[address].DataBlk := in_msg.DataBlk;
+ TBEs[address].Requestor := in_msg.Requestor;
+ }
+ }
+
+ action(w_deallocateTBE, "w", desc="Deallocate TBE entry") {
+ TBEs.deallocate(address);
+ }
+
+
+
// TRANSITIONS
transition(I, GETX, MM) {
- d_sendData;
+ qf_queueMemoryFetchRequest;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(I, DMA_READ, XI_M) {
+ qf_queueMemoryFetchRequest;
i_popIncomingRequestQueue;
}
+ transition(I, DMA_WRITE, XI_M) {
+ qw_queueMemoryWBRequest2;
+ l_writeDMADataToMemory;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(XI_M, Memory_Data, XI_U) {
+ d_sendDataMsg; // ack count may be zero
+ q_popMemQueue;
+ }
+
+ transition(XI_M, Memory_Ack, XI_U) {
+ a_sendDMAAck; // ack count may be zero
+ q_popMemQueue;
+ }
+
+ transition(XI_U, Exclusive_Unblock, I) {
+ cc_clearSharers;
+ c_clearOwner;
+ j_popIncomingUnblockQueue;
+ }
+
transition(S, GETX, MM) {
- d_sendData;
+ qf_queueMemoryFetchRequest;
g_sendInvalidations;
i_popIncomingRequestQueue;
}
+ transition(S, DMA_READ, XI_M) {
+ qf_queueMemoryFetchRequest;
+ g_sendInvalidations; // the DMA will collect the invalidations then send an Unblock Exclusive
+ i_popIncomingRequestQueue;
+ }
+
+ transition(S, DMA_WRITE, XI_M) {
+ qw_queueMemoryWBRequest2;
+ l_writeDMADataToMemory;
+ g_sendInvalidations; // the DMA will collect invalidations
+ i_popIncomingRequestQueue;
+ }
+
transition(I, GETS, IS) {
- d_sendData;
+ qf_queueMemoryFetchRequest;
i_popIncomingRequestQueue;
}
transition({S, SS}, GETS, SS) {
- d_sendData;
+ qf_queueMemoryFetchRequest;
n_incrementOutstanding;
i_popIncomingRequestQueue;
}
@@ -414,6 +628,27 @@ machine(Directory, "Directory protocol") {
i_popIncomingRequestQueue;
}
+ transition(O, DMA_READ, XI_U) {
+ f_forwardRequest; // this will cause the data to go to DMA directly
+ g_sendInvalidations; // this will cause acks to be sent to the DMA
+ i_popIncomingRequestQueue;
+ }
+
+ transition({O,M}, DMA_WRITE, OI_D) {
+ f_forwardRequestDirIsRequestor; // need the modified data before we can proceed
+ g_sendInvalidations; // these go to the DMA Controller
+ v_allocateTBE;
+ i_popIncomingRequestQueue;
+ }
+
+ transition(OI_D, Data, XI_M) {
+ qw_queueMemoryWBRequest;
+ l_writeDataToMemory;
+ l_writeDMADataToMemoryFromTBE;
+ w_deallocateTBE;
+ j_popIncomingUnblockQueue;
+ }
+
transition({O, OO}, GETS, OO) {
f_forwardRequest;
n_incrementOutstanding;
@@ -425,6 +660,12 @@ machine(Directory, "Directory protocol") {
i_popIncomingRequestQueue;
}
+ // no exclusive unblock will show up to the directory
+ transition(M, DMA_READ, XI_U) {
+ f_forwardRequest; // this will cause the data to go to DMA directly
+ i_popIncomingRequestQueue;
+ }
+
transition(M, GETS, MO) {
f_forwardRequest;
i_popIncomingRequestQueue;
@@ -457,7 +698,7 @@ machine(Directory, "Directory protocol") {
}
- transition({MM, MO, MI, MIS, OS, OSS}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX}) {
+ transition({MM, MO, MI, MIS, OS, OSS, XI_M, XI_U, OI_D}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) {
zz_recycleRequest;
}
@@ -472,7 +713,7 @@ machine(Directory, "Directory protocol") {
j_popIncomingUnblockQueue;
}
- transition({IS, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX}) {
+ transition({IS, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) {
zz_recycleRequest;
}
@@ -519,12 +760,14 @@ machine(Directory, "Directory protocol") {
c_clearOwner;
cc_clearSharers;
l_writeDataToMemory;
+ qw_queueMemoryWBRequest;
j_popIncomingUnblockQueue;
}
transition(MIS, Dirty_Writeback, S) {
c_moveOwnerToSharer;
l_writeDataToMemory;
+ qw_queueMemoryWBRequest;
j_popIncomingUnblockQueue;
}
@@ -536,12 +779,14 @@ machine(Directory, "Directory protocol") {
transition(OS, Dirty_Writeback, S) {
c_clearOwner;
l_writeDataToMemory;
+ qw_queueMemoryWBRequest;
j_popIncomingUnblockQueue;
}
transition(OSS, Dirty_Writeback, S) {
c_moveOwnerToSharer;
l_writeDataToMemory;
+ qw_queueMemoryWBRequest;
j_popIncomingUnblockQueue;
}
@@ -570,4 +815,15 @@ machine(Directory, "Directory protocol") {
transition({OS, OSS}, Unblock, O) {
j_popIncomingUnblockQueue;
}
+
+ transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Data) {
+ d_sendDataMsg;
+ q_popMemQueue;
+ }
+
+ transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Ack) {
+ //a_sendAck;
+ q_popMemQueue;
+ }
+
}
diff --git a/src/mem/protocol/MOESI_CMP_directory-dma.sm b/src/mem/protocol/MOESI_CMP_directory-dma.sm
new file mode 100644
index 000000000..ae86e24da
--- /dev/null
+++ b/src/mem/protocol/MOESI_CMP_directory-dma.sm
@@ -0,0 +1,268 @@
+
+machine(DMA, "DMA Controller")
+: int request_latency,
+ int response_latency
+{
+
+ MessageBuffer goo1, network="From", virtual_network="0", ordered="false";
+ MessageBuffer goo2, network="From", virtual_network="1", ordered="false";
+ MessageBuffer responseFromDir, network="From", virtual_network="2", ordered="false";
+
+ MessageBuffer foo1, network="To", virtual_network="0", ordered="false";
+ MessageBuffer reqToDir, network="To", virtual_network="1", ordered="false";
+ MessageBuffer respToDir, network="To", virtual_network="2", ordered="false";
+
+ enumeration(State, desc="DMA states", default="DMA_State_READY") {
+ READY, desc="Ready to accept a new request";
+ BUSY_RD, desc="Busy: currently processing a request";
+ BUSY_WR, desc="Busy: currently processing a request";
+ }
+
+ enumeration(Event, desc="DMA events") {
+ ReadRequest, desc="A new read request";
+ WriteRequest, desc="A new write request";
+ Data, desc="Data from a DMA memory read";
+ DMA_Ack, desc="DMA write to memory completed";
+ Inv_Ack, desc="Invalidation Ack from a sharer";
+ All_Acks, desc="All acks received";
+ }
+
+ structure(TBE, desc="...") {
+ Address address, desc="Physical address";
+ int NumAcks, default="0", desc="Number of Acks pending";
+ DataBlock DataBlk, desc="Data";
+ }
+
+ external_type(DMASequencer) {
+ void ackCallback();
+ void dataCallback(DataBlock);
+ }
+
+ external_type(TBETable) {
+ TBE lookup(Address);
+ void allocate(Address);
+ void deallocate(Address);
+ bool isPresent(Address);
+ }
+
+ MessageBuffer mandatoryQueue, ordered="false";
+ MessageBuffer triggerQueue, ordered="true";
+ DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])';
+ TBETable TBEs, template_hack="<DMA_TBE>";
+ State cur_state;
+
+ State getState(Address addr) {
+ return cur_state;
+ }
+ void setState(Address addr, State state) {
+ cur_state := state;
+ }
+
+ out_port(reqToDirectory_out, RequestMsg, reqToDir, desc="...");
+ out_port(respToDirectory_out, ResponseMsg, respToDir, desc="...");
+ out_port(foo1_out, ResponseMsg, foo1, desc="...");
+ out_port(triggerQueue_out, TriggerMsg, triggerQueue, desc="...");
+
+ in_port(goo1_in, RequestMsg, goo1) {
+ if (goo1_in.isReady()) {
+ peek(goo1_in, RequestMsg) {
+ assert(false);
+ }
+ }
+ }
+
+ in_port(goo2_in, RequestMsg, goo2) {
+ if (goo2_in.isReady()) {
+ peek(goo2_in, RequestMsg) {
+ assert(false);
+ }
+ }
+ }
+
+ in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") {
+ if (dmaRequestQueue_in.isReady()) {
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ if (in_msg.Type == SequencerRequestType:LD ) {
+ trigger(Event:ReadRequest, in_msg.LineAddress);
+ } else if (in_msg.Type == SequencerRequestType:ST) {
+ trigger(Event:WriteRequest, in_msg.LineAddress);
+ } else {
+ error("Invalid request type");
+ }
+ }
+ }
+ }
+
+ in_port(dmaResponseQueue_in, ResponseMsg, responseFromDir, desc="...") {
+ if (dmaResponseQueue_in.isReady()) {
+ peek( dmaResponseQueue_in, ResponseMsg) {
+ if (in_msg.Type == CoherenceResponseType:DMA_ACK) {
+ trigger(Event:DMA_Ack, makeLineAddress(in_msg.Address));
+ } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE ||
+ in_msg.Type == CoherenceResponseType:DATA) {
+ trigger(Event:Data, makeLineAddress(in_msg.Address));
+ } else if (in_msg.Type == CoherenceResponseType:ACK) {
+ trigger(Event:Inv_Ack, makeLineAddress(in_msg.Address));
+ } else {
+ error("Invalid response type");
+ }
+ }
+ }
+ }
+
+ // Trigger Queue
+ in_port(triggerQueue_in, TriggerMsg, triggerQueue) {
+ if (triggerQueue_in.isReady()) {
+ peek(triggerQueue_in, TriggerMsg) {
+ if (in_msg.Type == TriggerType:ALL_ACKS) {
+ trigger(Event:All_Acks, in_msg.Address);
+ } else {
+ error("Unexpected message");
+ }
+ }
+ }
+ }
+
+ action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) {
+ out_msg.Address := in_msg.PhysicalAddress;
+ out_msg.Type := CoherenceRequestType:DMA_READ;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Len := in_msg.Len;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") {
+ peek(dmaRequestQueue_in, SequencerMsg) {
+ enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) {
+ out_msg.Address := in_msg.PhysicalAddress;
+ out_msg.Type := CoherenceRequestType:DMA_WRITE;
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Len := in_msg.Len;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.Requestor := machineID;
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+ }
+
+ action(a_ackCallback, "a", desc="Notify dma controller that write request completed") {
+ dma_sequencer.ackCallback();
+ }
+
+ action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") {
+ if (TBEs[address].NumAcks == 0) {
+ enqueue(triggerQueue_out, TriggerMsg) {
+ out_msg.Address := address;
+ out_msg.Type := TriggerType:ALL_ACKS;
+ }
+ }
+ }
+
+ action(u_updateAckCount, "u", desc="Update ack count") {
+ peek(dmaResponseQueue_in, ResponseMsg) {
+ TBEs[address].NumAcks := TBEs[address].NumAcks - in_msg.Acks;
+ }
+ }
+
+ action( u_sendExclusiveUnblockToDir, "\u", desc="send exclusive unblock to directory") {
+ enqueue(respToDirectory_out, ResponseMsg, latency=response_latency) {
+ out_msg.Address := address;
+ out_msg.Type := CoherenceResponseType:UNBLOCK_EXCLUSIVE;
+ out_msg.Destination.add(map_Address_to_Directory(address));
+ out_msg.MessageSize := MessageSizeType:Writeback_Control;
+ }
+ }
+
+ action(p_popRequestQueue, "p", desc="Pop request queue") {
+ dmaRequestQueue_in.dequeue();
+ }
+
+ action(p_popResponseQueue, "\p", desc="Pop request queue") {
+ dmaResponseQueue_in.dequeue();
+ }
+
+ action(p_popTriggerQueue, "pp", desc="Pop trigger queue") {
+ triggerQueue_in.dequeue();
+ }
+
+ action(t_updateTBEData, "t", desc="Update TBE Data") {
+ peek(dmaResponseQueue_in, ResponseMsg) {
+ TBEs[address].DataBlk := in_msg.DataBlk;
+ }
+ }
+
+ action(d_dataCallbackFromTBE, "/d", desc="data callback with data from TBE") {
+ dma_sequencer.dataCallback(TBEs[address].DataBlk);
+ }
+
+ action(v_allocateTBE, "v", desc="Allocate TBE entry") {
+ TBEs.allocate(address);
+ }
+
+ action(w_deallocateTBE, "w", desc="Deallocate TBE entry") {
+ TBEs.deallocate(address);
+ }
+
+ action(z_stall, "z", desc="dma is busy..stall") {
+ // do nothing
+ }
+
+
+
+ transition(READY, ReadRequest, BUSY_RD) {
+ s_sendReadRequest;
+ v_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition(BUSY_RD, Inv_Ack) {
+ u_updateAckCount;
+ o_checkForCompletion;
+ p_popResponseQueue;
+ }
+
+ transition(BUSY_RD, Data) {
+ t_updateTBEData;
+ u_updateAckCount;
+ o_checkForCompletion;
+ p_popResponseQueue;
+ }
+
+ transition(BUSY_RD, All_Acks, READY) {
+ d_dataCallbackFromTBE;
+ u_sendExclusiveUnblockToDir;
+ w_deallocateTBE;
+ p_popTriggerQueue;
+ }
+
+ transition(READY, WriteRequest, BUSY_WR) {
+ s_sendWriteRequest;
+ v_allocateTBE;
+ p_popRequestQueue;
+ }
+
+ transition(BUSY_WR, Inv_Ack) {
+ u_updateAckCount;
+ o_checkForCompletion;
+ p_popResponseQueue;
+ }
+
+ transition(BUSY_WR, DMA_Ack) {
+ u_updateAckCount; // actually increases
+ o_checkForCompletion;
+ p_popResponseQueue;
+ }
+
+ transition(BUSY_WR, All_Acks, READY) {
+ a_ackCallback;
+ u_sendExclusiveUnblockToDir;
+ w_deallocateTBE;
+ p_popTriggerQueue;
+ }
+}
diff --git a/src/mem/protocol/MOESI_CMP_directory-msg.sm b/src/mem/protocol/MOESI_CMP_directory-msg.sm
index 08b4abec3..edbff0c96 100644
--- a/src/mem/protocol/MOESI_CMP_directory-msg.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-msg.sm
@@ -44,6 +44,9 @@ enumeration(CoherenceRequestType, desc="...") {
WB_ACK_DATA, desc="Writeback ack";
WB_NACK, desc="Writeback neg. ack";
INV, desc="Invalidation";
+
+ DMA_READ, desc="DMA Read";
+ DMA_WRITE, desc="DMA Write";
}
// CoherenceResponseType
@@ -56,6 +59,8 @@ enumeration(CoherenceResponseType, desc="...") {
WRITEBACK_CLEAN_DATA, desc="Clean writeback (contains data)";
WRITEBACK_CLEAN_ACK, desc="Clean writeback (contains no data)";
WRITEBACK_DIRTY_DATA, desc="Dirty writeback (contains data)";
+
+ DMA_ACK, desc="Ack that a DMA write completed";
}
// TriggerType
@@ -72,10 +77,12 @@ structure(TriggerMsg, desc="...", interface="Message") {
// RequestMsg (and also forwarded requests)
structure(RequestMsg, desc="...", interface="NetworkMessage") {
Address Address, desc="Physical address for this request";
+ int Len, desc="Length of Request";
CoherenceRequestType Type, desc="Type of request (GetS, GetX, PutX, etc)";
MachineID Requestor, desc="Node who initiated the request";
MachineType RequestorMachine, desc="type of component";
NetDest Destination, desc="Multicast destination mask";
+ DataBlock DataBlk, desc="data for the cache line (DMA WRITE request)";
int Acks, desc="How many acks to expect";
MessageSizeType MessageSize, desc="size category of the message";
AccessModeType AccessMode, desc="user/supervisor access type";
@@ -95,32 +102,4 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") {
MessageSizeType MessageSize, desc="size category of the message";
}
-GenericRequestType convertToGenericType(CoherenceRequestType type) {
- if(type == CoherenceRequestType:PUTX) {
- return GenericRequestType:PUTX;
- } else if(type == CoherenceRequestType:GETS) {
- return GenericRequestType:GETS;
- } else if(type == CoherenceRequestType:GETX) {
- return GenericRequestType:GETX;
- } else if(type == CoherenceRequestType:PUTS) {
- return GenericRequestType:PUTS;
- } else if(type == CoherenceRequestType:PUTX) {
- return GenericRequestType:PUTS;
- } else if(type == CoherenceRequestType:PUTO) {
- return GenericRequestType:PUTO;
- } else if(type == CoherenceRequestType:PUTO_SHARERS) {
- return GenericRequestType:PUTO;
- } else if(type == CoherenceRequestType:INV) {
- return GenericRequestType:INV;
- } else if(type == CoherenceRequestType:WB_ACK) {
- return GenericRequestType:WB_ACK;
- } else if(type == CoherenceRequestType:WB_ACK_DATA) {
- return GenericRequestType:WB_ACK;
- } else if(type == CoherenceRequestType:WB_NACK) {
- return GenericRequestType:NACK;
- } else {
- DEBUG_EXPR(type);
- error("invalid CoherenceRequestType");
- }
-}
diff --git a/src/mem/protocol/MOESI_CMP_directory.slicc b/src/mem/protocol/MOESI_CMP_directory.slicc
index c552d7157..f288aa4b0 100644
--- a/src/mem/protocol/MOESI_CMP_directory.slicc
+++ b/src/mem/protocol/MOESI_CMP_directory.slicc
@@ -1,5 +1,6 @@
MOESI_CMP_directory-msg.sm
MOESI_CMP_directory-L2cache.sm
MOESI_CMP_directory-L1cache.sm
+MOESI_CMP_directory-dma.sm
MOESI_CMP_directory-dir.sm
standard_CMP-protocol.sm
diff --git a/src/mem/protocol/MOESI_CMP_directory_m-dir.sm b/src/mem/protocol/MOESI_CMP_directory_m-dir.sm
deleted file mode 100644
index 3a4d875c1..000000000
--- a/src/mem/protocol/MOESI_CMP_directory_m-dir.sm
+++ /dev/null
@@ -1,652 +0,0 @@
-
-/*
- * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * $Id$
- */
-
-machine(Directory, "Directory protocol") {
-
- // ** IN QUEUES **
- MessageBuffer foo1, network="From", virtual_network="0", ordered="false"; // a mod-L2 bank -> this Dir
- MessageBuffer requestToDir, network="From", virtual_network="1", ordered="false"; // a mod-L2 bank -> this Dir
- MessageBuffer responseToDir, network="From", virtual_network="2", ordered="false"; // a mod-L2 bank -> this Dir
-
- MessageBuffer goo1, network="To", virtual_network="0", ordered="false";
- MessageBuffer forwardFromDir, network="To", virtual_network="1", ordered="false";
- MessageBuffer responseFromDir, network="To", virtual_network="2", ordered="false"; // Dir -> mod-L2 bank
-
-
- // STATES
- enumeration(State, desc="Directory states", default="Directory_State_I") {
- // Base states
- I, desc="Invalid";
- S, desc="Shared";
- O, desc="Owner";
- M, desc="Modified";
-
- IS, desc="Blocked, was in idle";
- SS, desc="Blocked, was in shared";
- OO, desc="Blocked, was in owned";
- MO, desc="Blocked, going to owner or maybe modified";
- MM, desc="Blocked, going to modified";
-
- MI, desc="Blocked on a writeback";
- MIS, desc="Blocked on a writeback, but don't remove from sharers when received";
- OS, desc="Blocked on a writeback";
- OSS, desc="Blocked on a writeback, but don't remove from sharers when received";
- }
-
- // Events
- enumeration(Event, desc="Directory events") {
- GETX, desc="A GETX arrives";
- GETS, desc="A GETS arrives";
- PUTX, desc="A PUTX arrives";
- PUTO, desc="A PUTO arrives";
- PUTO_SHARERS, desc="A PUTO arrives, but don't remove from sharers list";
- Unblock, desc="An unblock message arrives";
- Last_Unblock, desc="An unblock message arrives, we're not waiting for any additional unblocks";
- Exclusive_Unblock, desc="The processor become the exclusive owner (E or M) of the line";
- Clean_Writeback, desc="The final message as part of a PutX/PutS, no data";
- Dirty_Writeback, desc="The final message as part of a PutX/PutS, contains data";
- Memory_Data, desc="Fetched data from memory arrives";
- Memory_Ack, desc="Writeback Ack from memory arrives";
- }
-
- // TYPES
-
- // DirectoryEntry
- structure(Entry, desc="...") {
- State DirectoryState, desc="Directory state";
- DataBlock DataBlk, desc="data for the block";
- NetDest Sharers, desc="Sharers for this block";
- NetDest Owner, desc="Owner of this block";
- int WaitingUnblocks, desc="Number of acks we're waiting for";
- }
-
- external_type(DirectoryMemory) {
- Entry lookup(Address);
- bool isPresent(Address);
- }
-
- // to simulate detailed DRAM
- external_type(MemoryControl, inport="yes", outport="yes") {
-
- }
-
-
- // ** OBJECTS **
-
- DirectoryMemory directory, constructor_hack="i";
- MemoryControl memBuffer, constructor_hack="i";
-
- State getState(Address addr) {
- return directory[addr].DirectoryState;
- }
-
- void setState(Address addr, State state) {
- if (directory.isPresent(addr)) {
-
- if (state == State:I) {
- assert(directory[addr].Owner.count() == 0);
- assert(directory[addr].Sharers.count() == 0);
- }
-
- if (state == State:S) {
- assert(directory[addr].Owner.count() == 0);
- }
-
- if (state == State:O) {
- assert(directory[addr].Owner.count() == 1);
- assert(directory[addr].Sharers.isSuperset(directory[addr].Owner) == false);
- }
-
- if (state == State:M) {
- assert(directory[addr].Owner.count() == 1);
- assert(directory[addr].Sharers.count() == 0);
- }
-
- if ((state != State:SS) && (state != State:OO)) {
- assert(directory[addr].WaitingUnblocks == 0);
- }
-
- if ( (directory[addr].DirectoryState != State:I) && (state == State:I) ) {
- directory[addr].DirectoryState := state;
- // disable coherence checker
- // sequencer.checkCoherence(addr);
- }
- else {
- directory[addr].DirectoryState := state;
- }
- }
- }
-
- // if no sharers, then directory can be considered both a sharer and exclusive w.r.t. coherence checking
- bool isBlockShared(Address addr) {
- if (directory.isPresent(addr)) {
- if (directory[addr].DirectoryState == State:I) {
- return true;
- }
- }
- return false;
- }
-
- bool isBlockExclusive(Address addr) {
- if (directory.isPresent(addr)) {
- if (directory[addr].DirectoryState == State:I) {
- return true;
- }
- }
- return false;
- }
-
-
- // ** OUT_PORTS **
- out_port(forwardNetwork_out, RequestMsg, forwardFromDir);
- out_port(responseNetwork_out, ResponseMsg, responseFromDir);
-// out_port(requestQueue_out, ResponseMsg, requestFromDir); // For recycling requests
- out_port(goo1_out, ResponseMsg, goo1);
- out_port(memQueue_out, MemoryMsg, memBuffer);
-
- // ** IN_PORTS **
-
- in_port(foo1_in, ResponseMsg, foo1) {
-
- }
-
- // in_port(unblockNetwork_in, ResponseMsg, unblockToDir) {
- // if (unblockNetwork_in.isReady()) {
- in_port(unblockNetwork_in, ResponseMsg, responseToDir) {
- if (unblockNetwork_in.isReady()) {
- peek(unblockNetwork_in, ResponseMsg) {
- if (in_msg.Type == CoherenceResponseType:UNBLOCK) {
- if (directory[in_msg.Address].WaitingUnblocks == 1) {
- trigger(Event:Last_Unblock, in_msg.Address);
- } else {
- trigger(Event:Unblock, in_msg.Address);
- }
- } else if (in_msg.Type == CoherenceResponseType:UNBLOCK_EXCLUSIVE) {
- trigger(Event:Exclusive_Unblock, in_msg.Address);
- } else if (in_msg.Type == CoherenceResponseType:WRITEBACK_DIRTY_DATA) {
- trigger(Event:Dirty_Writeback, in_msg.Address);
- } else if (in_msg.Type == CoherenceResponseType:WRITEBACK_CLEAN_ACK) {
- trigger(Event:Clean_Writeback, in_msg.Address);
- } else {
- error("Invalid message");
- }
- }
- }
- }
-
- in_port(requestQueue_in, RequestMsg, requestToDir) {
- if (requestQueue_in.isReady()) {
- peek(requestQueue_in, RequestMsg) {
- if (in_msg.Type == CoherenceRequestType:GETS) {
- trigger(Event:GETS, in_msg.Address);
- } else if (in_msg.Type == CoherenceRequestType:GETX) {
- trigger(Event:GETX, in_msg.Address);
- } else if (in_msg.Type == CoherenceRequestType:PUTX) {
- trigger(Event:PUTX, in_msg.Address);
- } else if (in_msg.Type == CoherenceRequestType:PUTO) {
- trigger(Event:PUTO, in_msg.Address);
- } else if (in_msg.Type == CoherenceRequestType:PUTO_SHARERS) {
- trigger(Event:PUTO_SHARERS, in_msg.Address);
- } else {
- error("Invalid message");
- }
- }
- }
- }
-
- // off-chip memory request/response is done
- in_port(memQueue_in, MemoryMsg, memBuffer) {
- if (memQueue_in.isReady()) {
- peek(memQueue_in, MemoryMsg) {
- if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
- trigger(Event:Memory_Data, in_msg.Address);
- } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
- trigger(Event:Memory_Ack, in_msg.Address);
- } else {
- DEBUG_EXPR(in_msg.Type);
- error("Invalid message");
- }
- }
- }
- }
-
- // Actions
-
- action(a_sendWriteBackAck, "a", desc="Send writeback ack to requestor") {
- peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
- out_msg.Address := address;
- out_msg.Type := CoherenceRequestType:WB_ACK;
- out_msg.Requestor := in_msg.Requestor;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.MessageSize := MessageSizeType:Writeback_Control;
- }
- }
- }
-
- action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") {
- peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
- out_msg.Address := address;
- out_msg.Type := CoherenceRequestType:WB_NACK;
- out_msg.Requestor := in_msg.Requestor;
- out_msg.Destination.add(in_msg.Requestor);
- out_msg.MessageSize := MessageSizeType:Writeback_Control;
- }
- }
- }
-
- action(c_clearOwner, "c", desc="Clear the owner field") {
- directory[address].Owner.clear();
- }
-
- action(c_moveOwnerToSharer, "cc", desc="Move owner to sharers") {
- directory[address].Sharers.addNetDest(directory[address].Owner);
- directory[address].Owner.clear();
- }
-
- action(cc_clearSharers, "\c", desc="Clear the sharers field") {
- directory[address].Sharers.clear();
- }
-
- action(d_sendDataMsg, "d", desc="Send data to requestor") {
- peek(memQueue_in, MemoryMsg) {
- enqueue(responseNetwork_out, ResponseMsg, latency="1") {
- out_msg.Address := address;
- out_msg.Sender := machineID;
- out_msg.SenderMachine := MachineType:Directory;
- out_msg.Destination.add(in_msg.OriginalRequestorMachId);
- //out_msg.DataBlk := directory[in_msg.Address].DataBlk;
- out_msg.DataBlk := in_msg.DataBlk;
- out_msg.Dirty := false; // By definition, the block is now clean
- out_msg.Acks := in_msg.Acks;
- if (in_msg.ReadX) {
- out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
- } else {
- out_msg.Type := CoherenceResponseType:DATA;
- }
- out_msg.MessageSize := MessageSizeType:Response_Data;
- }
- }
- }
-
- action(e_ownerIsUnblocker, "e", desc="The owner is now the unblocker") {
- peek(unblockNetwork_in, ResponseMsg) {
- directory[address].Owner.clear();
- directory[address].Owner.add(in_msg.Sender);
- }
- }
-
- action(f_forwardRequest, "f", desc="Forward request to owner") {
- peek(requestQueue_in, RequestMsg) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
- out_msg.Address := address;
- out_msg.Type := in_msg.Type;
- out_msg.Requestor := in_msg.Requestor;
- out_msg.Destination.addNetDest(directory[in_msg.Address].Owner);
- out_msg.Acks := directory[address].Sharers.count();
- if (directory[address].Sharers.isElement(in_msg.Requestor)) {
- out_msg.Acks := out_msg.Acks - 1;
- }
- out_msg.MessageSize := MessageSizeType:Forwarded_Control;
- }
- }
- }
-
- action(g_sendInvalidations, "g", desc="Send invalidations to sharers, not including the requester") {
- peek(requestQueue_in, RequestMsg) {
- if ((directory[in_msg.Address].Sharers.count() > 1) ||
- ((directory[in_msg.Address].Sharers.count() > 0) && (directory[in_msg.Address].Sharers.isElement(in_msg.Requestor) == false))) {
- enqueue(forwardNetwork_out, RequestMsg, latency="DIRECTORY_LATENCY") {
- out_msg.Address := address;
- out_msg.Type := CoherenceRequestType:INV;
- out_msg.Requestor := in_msg.Requestor;
- // out_msg.Destination := directory[in_msg.Address].Sharers;
- out_msg.Destination.addNetDest(directory[in_msg.Address].Sharers);
- out_msg.Destination.remove(in_msg.Requestor);
- out_msg.MessageSize := MessageSizeType:Invalidate_Control;
- }
- }
- }
- }
-
- action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") {
- requestQueue_in.dequeue();
- }
-
- action(j_popIncomingUnblockQueue, "j", desc="Pop incoming unblock queue") {
- unblockNetwork_in.dequeue();
- }
-
- action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") {
- peek(unblockNetwork_in, ResponseMsg) {
- assert(in_msg.Dirty);
- assert(in_msg.MessageSize == MessageSizeType:Writeback_Data);
- directory[in_msg.Address].DataBlk := in_msg.DataBlk;
- DEBUG_EXPR(in_msg.Address);
- DEBUG_EXPR(in_msg.DataBlk);
- }
- }
-
- action(ll_checkDataInMemory, "\l", desc="Check PUTX/PUTO data is same as in the memory") {
- peek(unblockNetwork_in, ResponseMsg) {
- assert(in_msg.Dirty == false);
- assert(in_msg.MessageSize == MessageSizeType:Writeback_Control);
-
- // NOTE: The following check would not be valid in a real
- // implementation. We include the data in the "dataless"
- // message so we can assert the clean data matches the datablock
- // in memory
- assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk);
- }
- }
-
- action(m_addUnlockerToSharers, "m", desc="Add the unlocker to the sharer list") {
- peek(unblockNetwork_in, ResponseMsg) {
- directory[address].Sharers.add(in_msg.Sender);
- }
- }
-
- action(n_incrementOutstanding, "n", desc="Increment outstanding requests") {
- directory[address].WaitingUnblocks := directory[address].WaitingUnblocks + 1;
- }
-
- action(o_decrementOutstanding, "o", desc="Decrement outstanding requests") {
- directory[address].WaitingUnblocks := directory[address].WaitingUnblocks - 1;
- assert(directory[address].WaitingUnblocks >= 0);
- }
-
- action(q_popMemQueue, "q", desc="Pop off-chip request queue") {
- memQueue_in.dequeue();
- }
-
- action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
- peek(requestQueue_in, RequestMsg) {
- enqueue(memQueue_out, MemoryMsg, latency="1") {
- out_msg.Address := address;
- out_msg.Type := MemoryRequestType:MEMORY_READ;
- out_msg.Sender := machineID;
- out_msg.OriginalRequestorMachId := in_msg.Requestor;
- out_msg.DataBlk := directory[in_msg.Address].DataBlk;
- out_msg.MessageSize := in_msg.MessageSize;
- //out_msg.Prefetch := false;
- // These are not used by memory but are passed back here with the read data:
- out_msg.ReadX := (in_msg.Type == CoherenceRequestType:GETS && directory[address].Sharers.count() == 0);
- out_msg.Acks := directory[address].Sharers.count();
- if (directory[address].Sharers.isElement(in_msg.Requestor)) {
- out_msg.Acks := out_msg.Acks - 1;
- }
- DEBUG_EXPR(out_msg);
- }
- }
- }
-
- action(qw_queueMemoryWBRequest, "qw", desc="Queue off-chip writeback request") {
- peek(unblockNetwork_in, ResponseMsg) {
- enqueue(memQueue_out, MemoryMsg, latency="1") {
- out_msg.Address := address;
- out_msg.Type := MemoryRequestType:MEMORY_WB;
- out_msg.Sender := machineID;
- //out_msg.OriginalRequestorMachId := in_msg.Requestor;
- out_msg.DataBlk := in_msg.DataBlk;
- out_msg.MessageSize := in_msg.MessageSize;
- //out_msg.Prefetch := false;
- // Not used:
- out_msg.ReadX := false;
- out_msg.Acks := 0;
- DEBUG_EXPR(out_msg);
- }
- }
- }
-
-
- // action(z_stall, "z", desc="Cannot be handled right now.") {
- // Special name recognized as do nothing case
- // }
-
- action(zz_recycleRequest, "\z", desc="Recycle the request queue") {
- requestQueue_in.recycle();
- }
-
- // TRANSITIONS
-
- transition(I, GETX, MM) {
- qf_queueMemoryFetchRequest;
- i_popIncomingRequestQueue;
- }
-
- transition(S, GETX, MM) {
- qf_queueMemoryFetchRequest;
- g_sendInvalidations;
- i_popIncomingRequestQueue;
- }
-
- transition(I, GETS, IS) {
- qf_queueMemoryFetchRequest;
- i_popIncomingRequestQueue;
- }
-
- transition({S, SS}, GETS, SS) {
- qf_queueMemoryFetchRequest;
- n_incrementOutstanding;
- i_popIncomingRequestQueue;
- }
-
- transition({I, S}, PUTO) {
- b_sendWriteBackNack;
- i_popIncomingRequestQueue;
- }
-
- transition({I, S, O}, PUTX) {
- b_sendWriteBackNack;
- i_popIncomingRequestQueue;
- }
-
- transition(O, GETX, MM) {
- f_forwardRequest;
- g_sendInvalidations;
- i_popIncomingRequestQueue;
- }
-
- transition({O, OO}, GETS, OO) {
- f_forwardRequest;
- n_incrementOutstanding;
- i_popIncomingRequestQueue;
- }
-
- transition(M, GETX, MM) {
- f_forwardRequest;
- i_popIncomingRequestQueue;
- }
-
- transition(M, GETS, MO) {
- f_forwardRequest;
- i_popIncomingRequestQueue;
- }
-
- transition(M, PUTX, MI) {
- a_sendWriteBackAck;
- i_popIncomingRequestQueue;
- }
-
- // happens if M->O transition happens on-chip
- transition(M, PUTO, MI) {
- a_sendWriteBackAck;
- i_popIncomingRequestQueue;
- }
-
- transition(M, PUTO_SHARERS, MIS) {
- a_sendWriteBackAck;
- i_popIncomingRequestQueue;
- }
-
- transition(O, PUTO, OS) {
- a_sendWriteBackAck;
- i_popIncomingRequestQueue;
- }
-
- transition(O, PUTO_SHARERS, OSS) {
- a_sendWriteBackAck;
- i_popIncomingRequestQueue;
- }
-
-
- transition({MM, MO, MI, MIS, OS, OSS}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX}) {
- zz_recycleRequest;
- }
-
- transition({MM, MO}, Exclusive_Unblock, M) {
- cc_clearSharers;
- e_ownerIsUnblocker;
- j_popIncomingUnblockQueue;
- }
-
- transition(MO, Unblock, O) {
- m_addUnlockerToSharers;
- j_popIncomingUnblockQueue;
- }
-
- transition({IS, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX}) {
- zz_recycleRequest;
- }
-
- transition(IS, GETS) {
- zz_recycleRequest;
- }
-
- transition(IS, Unblock, S) {
- m_addUnlockerToSharers;
- j_popIncomingUnblockQueue;
- }
-
- transition(IS, Exclusive_Unblock, M) {
- cc_clearSharers;
- e_ownerIsUnblocker;
- j_popIncomingUnblockQueue;
- }
-
- transition(SS, Unblock) {
- m_addUnlockerToSharers;
- o_decrementOutstanding;
- j_popIncomingUnblockQueue;
- }
-
- transition(SS, Last_Unblock, S) {
- m_addUnlockerToSharers;
- o_decrementOutstanding;
- j_popIncomingUnblockQueue;
- }
-
- transition(OO, Unblock) {
- m_addUnlockerToSharers;
- o_decrementOutstanding;
- j_popIncomingUnblockQueue;
- }
-
- transition(OO, Last_Unblock, O) {
- m_addUnlockerToSharers;
- o_decrementOutstanding;
- j_popIncomingUnblockQueue;
- }
-
- transition(MI, Dirty_Writeback, I) {
- c_clearOwner;
- cc_clearSharers;
- l_writeDataToMemory;
- qw_queueMemoryWBRequest;
- j_popIncomingUnblockQueue;
- }
-
- transition(MIS, Dirty_Writeback, S) {
- c_moveOwnerToSharer;
- l_writeDataToMemory;
- qw_queueMemoryWBRequest;
- j_popIncomingUnblockQueue;
- }
-
- transition(MIS, Clean_Writeback, S) {
- c_moveOwnerToSharer;
- j_popIncomingUnblockQueue;
- }
-
- transition(OS, Dirty_Writeback, S) {
- c_clearOwner;
- l_writeDataToMemory;
- qw_queueMemoryWBRequest;
- j_popIncomingUnblockQueue;
- }
-
- transition(OSS, Dirty_Writeback, S) {
- c_moveOwnerToSharer;
- l_writeDataToMemory;
- qw_queueMemoryWBRequest;
- j_popIncomingUnblockQueue;
- }
-
- transition(OSS, Clean_Writeback, S) {
- c_moveOwnerToSharer;
- j_popIncomingUnblockQueue;
- }
-
- transition(MI, Clean_Writeback, I) {
- c_clearOwner;
- cc_clearSharers;
- ll_checkDataInMemory;
- j_popIncomingUnblockQueue;
- }
-
- transition(OS, Clean_Writeback, S) {
- c_clearOwner;
- ll_checkDataInMemory;
- j_popIncomingUnblockQueue;
- }
-
- transition({MI, MIS}, Unblock, M) {
- j_popIncomingUnblockQueue;
- }
-
- transition({OS, OSS}, Unblock, O) {
- j_popIncomingUnblockQueue;
- }
-
- transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Data) {
- d_sendDataMsg;
- q_popMemQueue;
- }
-
- transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Ack) {
- //a_sendAck;
- q_popMemQueue;
- }
-
-}
diff --git a/src/mem/protocol/MOESI_CMP_directory_m.slicc b/src/mem/protocol/MOESI_CMP_directory_m.slicc
deleted file mode 100644
index 3abe8603a..000000000
--- a/src/mem/protocol/MOESI_CMP_directory_m.slicc
+++ /dev/null
@@ -1,5 +0,0 @@
-MOESI_CMP_directory-msg.sm
-MOESI_CMP_directory-L2cache.sm
-MOESI_CMP_directory-L1cache.sm
-MOESI_CMP_directory_m-dir.sm
-standard_CMP-protocol.sm
diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm
index 022bb6862..559e54a8c 100644
--- a/src/mem/protocol/RubySlicc_ComponentMapping.sm
+++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm
@@ -30,14 +30,11 @@
// Mapping functions
// NodeID map_address_to_node(Address addr);
+MachineID mapAddressToRange(Address addr, MachineType type, int low, int high);
MachineID map_Address_to_DMA(Address addr);
MachineID map_Address_to_Directory(Address addr);
NodeID map_Address_to_DirectoryNode(Address addr);
-MachineID map_Address_to_CentralArbiterNode(Address addr);
-NodeID oldmap_L1RubyNode_to_L2Cache(Address addr, NodeID L1RubyNode);
-MachineID map_L1CacheMachId_to_L2Cache(Address addr, MachineID L1CacheMachId);
-MachineID map_L2ChipId_to_L2Cache(Address addr, NodeID L2ChipId);
-// MachineID map_L1RubyNode_to_Arb(NodeID L1RubyNode);
+
MachineID getL1MachineID(NodeID L1RubyNode);
NodeID getChipID(MachineID L2machID);
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index a8b58b96c..412fd0de0 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -39,7 +39,10 @@ external_type(string, primitive="yes");
external_type(uint64, primitive="yes");
external_type(Time, primitive="yes", default="0");
external_type(Address);
-
+external_type(DataBlock, desc="..."){
+ void clear();
+ void copyPartial(DataBlock, int, int);
+}
// Declarations of external types that are common to all protocols
@@ -131,12 +134,12 @@ enumeration(CacheRequestType, desc="...", default="CacheRequestType_NULL") {
IO, desc="I/O";
REPLACEMENT, desc="Replacement";
COMMIT, desc="Commit version";
- LD_XACT, desc="Transactional Load";
- LDX_XACT, desc="Transactional Load-Intend-To-Modify";
- ST_XACT, desc="Transactional Store";
- BEGIN_XACT, desc="Begin Transaction";
- COMMIT_XACT, desc="Commit Transaction";
- ABORT_XACT, desc="Abort Transaction";
+ NULL, desc="Invalid request type";
+}
+
+enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
+ LD, desc="Load";
+ ST, desc="Store";
NULL, desc="Invalid request type";
}
@@ -167,7 +170,9 @@ enumeration(GenericRequestType, desc="...", default="GenericRequestType_NULL") {
ST_XACT, desc="Transactional Store";
BEGIN_XACT, desc="Begin Transaction";
COMMIT_XACT, desc="Commit Transaction";
- ABORT_XACT, desc="Abort Transaction";
+ ABORT_XACT, desc="Abort Transaction";
+ DMA_READ, desc="DMA READ";
+ DMA_WRITE, desc="DMA WRITE";
NULL, desc="null request type";
}
@@ -232,6 +237,18 @@ structure(CacheMsg, desc="...", interface="Message") {
PrefetchBit Prefetch, desc="Is this a prefetch request";
}
+// CacheMsg
+structure(SequencerMsg, desc="...", interface="Message") {
+ Address LineAddress, desc="Line address for this request";
+ Address PhysicalAddress, desc="Physical address for this request";
+ SequencerRequestType Type, desc="Type of request (LD, ST, etc)";
+ Address ProgramCounter, desc="Program counter of the instruction that caused the miss";
+ AccessModeType AccessMode, desc="user/supervisor access type";
+ DataBlock DataBlk, desc="Data";
+ int Len, desc="size in bytes of access";
+ PrefetchBit Prefetch, desc="Is this a prefetch request";
+}
+
// MaskPredictorType
enumeration(MaskPredictorType, "MaskPredictorType_Undefined", desc="...") {
Undefined, desc="Undefined";
diff --git a/src/mem/protocol/RubySlicc_Profiler.sm b/src/mem/protocol/RubySlicc_Profiler.sm
index 7a7fbdae1..d360af160 100644
--- a/src/mem/protocol/RubySlicc_Profiler.sm
+++ b/src/mem/protocol/RubySlicc_Profiler.sm
@@ -34,7 +34,7 @@ void profileCacheCLBsize(int size, int numStaleI);
void profileMemoryCLBsize(int size, int numStaleI);
// used by 2level exclusive cache protocols
-void profile_miss(CacheMsg msg, NodeID id);
+void profile_miss(CacheMsg msg);
// used by non-fast path protocols
void profile_L1Cache_miss(CacheMsg msg, NodeID l1cacheID);
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index aa5648a9e..9679b7b6f 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -29,11 +29,6 @@
// External Types
-external_type(DataBlock, desc="..."){
- void clear();
- void copyPartial(DataBlock, int, int);
-}
-
external_type(MessageBuffer, buffer="yes", inport="yes", outport="yes");
external_type(OutPort, primitive="yes");
diff --git a/src/mem/protocol/RubySlicc_Util.sm b/src/mem/protocol/RubySlicc_Util.sm
index 2aa494fff..312682bd7 100644
--- a/src/mem/protocol/RubySlicc_Util.sm
+++ b/src/mem/protocol/RubySlicc_Util.sm
@@ -56,5 +56,5 @@ int N_tokens();
bool distributedPersistentEnabled();
Address setOffset(Address addr, int offset);
Address makeLineAddress(Address addr);
-
+int addressOffset(Address addr);
diff --git a/src/mem/protocol/SConscript b/src/mem/protocol/SConscript
index 9630c685a..293346f13 100644
--- a/src/mem/protocol/SConscript
+++ b/src/mem/protocol/SConscript
@@ -61,7 +61,7 @@ def slicc_generator(target, source, env, for_signature):
if not isdir(hdir):
os.mkdir(hdir)
- do_html = "no_html"
+ do_html = "html"
cmdline = [ slicc_bin, pdir, hdir, protocol, do_html ]
cmdline += [ str(s) for s in source[2:] ]
cmdline = ' '.join(cmdline)
diff --git a/src/mem/ruby/config/MI_example-homogeneous.rb b/src/mem/ruby/config/MI_example-homogeneous.rb
index d43e384e5..2b416e647 100644
--- a/src/mem/ruby/config/MI_example-homogeneous.rb
+++ b/src/mem/ruby/config/MI_example-homogeneous.rb
@@ -8,20 +8,27 @@
require "cfg.rb"
+RubySystem.reset
+
# default values
num_cores = 2
-L1_CACHE_SIZE_KB = 32
-L1_CACHE_ASSOC = 8
-L1_CACHE_LATENCY = 1
+l1_cache_size_kb = 32
+l1_cache_assoc = 8
+l1_cache_latency = 1
num_memories = 2
memory_size_mb = 1024
-NUM_DMA = 1
+num_dma = 1
+protocol = "MI_example"
# check for overrides
+
for i in 0..$*.size-1 do
- if $*[i] == "-p"
+ if $*[i] == "-c"
+ protocol = $*[i+1]
+ i = i+1
+ elsif $*[i] == "-p"
num_cores = $*[i+1].to_i
i = i+1
elsif $*[i] == "-m"
@@ -36,13 +43,17 @@ end
net_ports = Array.new
iface_ports = Array.new
+assert(protocol == "MI_example", __FILE__ + " cannot be used with protocol " + protocol)
+
+require protocol+".rb"
+
num_cores.times { |n|
- cache = SetAssociativeCache.new("l1u_"+n.to_s, L1_CACHE_SIZE_KB, L1_CACHE_LATENCY, L1_CACHE_ASSOC, "PSEUDO_LRU")
+ cache = SetAssociativeCache.new("l1u_"+n.to_s, l1_cache_size_kb, l1_cache_latency, l1_cache_assoc, "PSEUDO_LRU")
sequencer = Sequencer.new("Sequencer_"+n.to_s, cache, cache)
iface_ports << sequencer
net_ports << MI_example_CacheController.new("L1CacheController_"+n.to_s,
"L1Cache",
- [cache],
+ cache,
sequencer)
}
num_memories.times { |n|
@@ -52,10 +63,10 @@ num_memories.times { |n|
"Directory",
directory, memory_control)
}
-NUM_DMA.times { |n|
+num_dma.times { |n|
dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s)
iface_ports << dma_sequencer
- net_ports << DMAController.new("DMAController_"+n.to_s, "DMA", dma_sequencer)
+ net_ports << MI_example_DMAController.new("DMAController_"+n.to_s, "DMA", dma_sequencer)
}
topology = CrossbarTopology.new("theTopology", net_ports)
diff --git a/src/mem/ruby/config/MI_example.rb b/src/mem/ruby/config/MI_example.rb
new file mode 100644
index 000000000..187dc7a68
--- /dev/null
+++ b/src/mem/ruby/config/MI_example.rb
@@ -0,0 +1,39 @@
+
+require "util.rb"
+
+class MI_example_CacheController < L1CacheController
+ attr :cache
+ def initialize(obj_name, mach_type, cache, sequencer)
+ super(obj_name, mach_type, [cache], sequencer)
+ @cache = cache
+ end
+ def argv()
+ vec = super()
+ vec += " cache " + @cache.obj_name
+ vec += " issue_latency "+issue_latency.to_s
+ vec += " cache_response_latency "+cache_response_latency.to_s
+ end
+
+end
+
+class MI_example_DirectoryController < DirectoryController
+ def initialize(obj_name, mach_type, directory, memory_control)
+ super(obj_name, mach_type, directory, memory_control)
+ end
+ def argv()
+ vec = super()
+ vec += " directory_latency "+directory_latency.to_s
+ vec += " dma_select_low_bit "+log_int(RubySystem.block_size_bytes).to_s
+ vec += " dma_select_num_bits "+log_int(NetPort.totalOfType("DMA")).to_s
+ end
+end
+
+class MI_example_DMAController < DMAController
+ def initialize(obj_name, mach_type, dma_sequencer)
+ super(obj_name, mach_type, dma_sequencer)
+ end
+ def argv()
+ vec = super
+ vec += " request_latency "+request_latency.to_s
+ end
+end
diff --git a/src/mem/ruby/config/MOESI_CMP_directory.rb b/src/mem/ruby/config/MOESI_CMP_directory.rb
new file mode 100644
index 000000000..1e8a82fab
--- /dev/null
+++ b/src/mem/ruby/config/MOESI_CMP_directory.rb
@@ -0,0 +1,69 @@
+
+require "cfg.rb"
+require "util.rb"
+
+
+class MOESI_CMP_directory_L1CacheController < L1CacheController
+ attr :icache, :dcache
+ attr :num_l2_controllers
+ def initialize(obj_name, mach_type, icache, dcache, sequencer, num_l2_controllers)
+ super(obj_name, mach_type, [icache, dcache], sequencer)
+ @icache = icache
+ @dcache = dcache
+ @num_l2_controllers = num_l2_controllers
+ end
+ def argv()
+ num_select_bits = log_int(num_l2_controllers)
+ num_block_bits = log_int(RubySystem.block_size_bytes)
+
+ l2_select_low_bit = num_block_bits
+
+ vec = super()
+ vec += " icache " + @icache.obj_name
+ vec += " dcache " + @dcache.obj_name
+ vec += " request_latency "+request_latency().to_s
+ vec += " l2_select_low_bit " + l2_select_low_bit.to_s
+ vec += " l2_select_num_bits " + num_select_bits.to_s
+ return vec
+ end
+end
+
+class MOESI_CMP_directory_L2CacheController < CacheController
+ attr :cache
+ def initialize(obj_name, mach_type, cache)
+ super(obj_name, mach_type, [cache])
+ @cache = cache
+ end
+ def argv()
+ vec = super()
+ vec += " cache " + @cache.obj_name
+ vec += " request_latency "+request_latency().to_s
+ vec += " response_latency "+response_latency().to_s
+ return vec
+ end
+end
+
+
+class MOESI_CMP_directory_DirectoryController < DirectoryController
+ def initialize(obj_name, mach_type, directory, memory_control)
+ super(obj_name, mach_type, directory, memory_control)
+ end
+ def argv()
+ vec = super()
+ vec += " directory_latency "+directory_latency.to_s
+ return vec
+ end
+
+end
+
+class MOESI_CMP_directory_DMAController < DMAController
+ def initialize(obj_name, mach_type, dma_sequencer)
+ super(obj_name, mach_type, dma_sequencer)
+ end
+ def argv()
+ vec = super
+ vec += " request_latency "+request_latency.to_s
+ vec += " response_latency "+response_latency.to_s
+ return vec
+ end
+end
diff --git a/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
new file mode 100644
index 000000000..8a202d450
--- /dev/null
+++ b/src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb
@@ -0,0 +1,100 @@
+#!/usr/bin/ruby
+#
+# Creates a homogeneous CMP system with a single unified cache per
+# core and a crossbar network. Uses the default parameters listed
+# below, which can be overridden using command line args.
+#
+
+require "cfg.rb"
+
+RubySystem.reset
+
+# default values
+
+num_cores = 2
+l1_icache_size_kb = 32
+l1_icache_assoc = 8
+l1_icache_latency = 1
+l1_dcache_size_kb = 32
+l1_dcache_assoc = 8
+l1_dcache_latency = 1
+l2_cache_size_kb = 2048 # total size (sum of all banks)
+l2_cache_assoc = 16
+l2_cache_latency = 12
+num_l2_banks = num_cores
+num_memories = 1
+memory_size_mb = 1024
+num_dma = 1
+
+protocol = "MOESI_CMP_directory"
+
+# check for overrides
+
+for i in 0..$*.size-1 do
+ if $*[i] == "-c" or $*[i] == "--protocol"
+ i += 1
+ protocol = $*[i]
+ elsif $*[i] == "-m"
+ num_memories = $*[i+1].to_i
+ i = i+1
+ elsif $*[i] == "-p"
+ num_cores = $*[i+1].to_i
+ i = i+1
+ elsif $*[i] == "-s"
+ memory_size_mb = $*[i+1].to_i
+ i = i + 1
+ end
+end
+
+net_ports = Array.new
+iface_ports = Array.new
+
+assert(protocol == "MOESI_CMP_directory", __FILE__+" cannot be used with protocol "+protocol);
+
+require protocol+".rb"
+
+num_cores.times { |n|
+ icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
+ dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
+ sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
+ iface_ports << sequencer
+ if protocol == "MOESI_CMP_directory"
+ net_ports << MOESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
+ "L1Cache",
+ icache, dcache,
+ sequencer,
+ num_l2_banks)
+ end
+}
+num_l2_banks.times { |n|
+ cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_kb/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
+ if protocol == "MOESI_CMP_directory"
+ net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
+ "L2Cache",
+ cache)
+ end
+}
+num_memories.times { |n|
+ directory = DirectoryMemory.new("DirectoryMemory_"+n.to_s, memory_size_mb/num_memories)
+ memory_control = MemoryControl.new("MemoryControl_"+n.to_s)
+ if protocol == "MOESI_CMP_directory"
+ net_ports << MOESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
+ "Directory",
+ directory,
+ memory_control)
+ end
+}
+num_dma.times { |n|
+ dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s)
+ iface_ports << dma_sequencer
+ if protocol == "MOESI_CMP_directory"
+ net_ports << MOESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
+ "DMA",
+ dma_sequencer)
+ end
+}
+
+topology = CrossbarTopology.new("theTopology", net_ports)
+on_chip_net = Network.new("theNetwork", topology)
+
+RubySystem.init(iface_ports, on_chip_net)
diff --git a/src/mem/ruby/config/cfg.rb b/src/mem/ruby/config/cfg.rb
index 6b12ad22a..8ed7de474 100644
--- a/src/mem/ruby/config/cfg.rb
+++ b/src/mem/ruby/config/cfg.rb
@@ -11,7 +11,7 @@ end
def assert(condition,message)
unless condition
- raise AssertionFailure, "Assertion failed: #{message}"
+ raise AssertionFailure, "\n\nAssertion failed: \n\n #{message}\n\n"
end
end
@@ -150,6 +150,9 @@ class NetPort < LibRubyObject
def cppClassName
"NetPort"
end
+ def self.totalOfType(mach_type)
+ return @@type_cnt[mach_type]
+ end
end
class MemoryVector < LibRubyObject
@@ -190,6 +193,7 @@ end
class RubySystem
@@params = Hash.new
+ @@defaults = Hash.new
@@network = nil
def self.init(iface_ports, network)
@@ -197,6 +201,14 @@ class RubySystem
@@network = network
end
+ def self.reset()
+ @@iface_ports = nil
+ @@network = nil
+ @@params.each { |param_name, param|
+ param = @@defaults[param_name]
+ }
+ end
+
def self.default_param(param_name, type, default)
if default.is_a?(FalseClass) || default.is_a?(TrueClass)
assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false"
@@ -204,6 +216,7 @@ class RubySystem
assert default.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}"
end
@@params[param_name] = default
+ @@defaults[param_name] = default
method_name = (param_name.to_s).to_sym
instance_eval <<-EOS
def #{method_name.to_s}
@@ -233,6 +246,7 @@ class RubySystem
end
}
str += LibRubyObject.printConstructors
+ #puts str.gsub('%',' ').gsub('#','\n')
return str
end
@@ -287,35 +301,33 @@ end
class CacheController < NetPort
- @@total_cache_controllers = 0
- attr :caches
- attr :sequencer
- def initialize(obj_name, mach_type, caches, sequencer)
+ @@total_cache_controllers = Hash.new
+
+ def initialize(obj_name, mach_type, caches)
super(obj_name, mach_type)
- @caches = caches
- @caches.each { |cache|
+ caches.each { |cache|
cache.controller = self
}
- @sequencer = sequencer
- @sequencer.controller = self
-
- @version = @@total_cache_controllers
- @@total_cache_controllers += 1
- @sequencer.version = @version
- buffer_size()
+ if !@@total_cache_controllers.has_key?(mach_type)
+ @@total_cache_controllers[mach_type] = 0
+ end
+ @version = @@total_cache_controllers[mach_type]
+ @@total_cache_controllers[mach_type] += 1
+
+ # call inhereted parameters
+ transitions_per_cycle
+ buffer_size
+ number_of_TBEs
+ recycle_latency
end
def argv()
vec = "version "+@version.to_s
- @caches.each { |cache|
- vec += " cache " + cache.obj_name
- }
- vec += " sequencer "+@sequencer.obj_name
vec += " transitions_per_cycle "+@params[:transitions_per_cycle].to_s
vec += " buffer_size "+@params[:buffer_size].to_s
vec += " number_of_TBEs "+@params[:number_of_TBEs].to_s
-
+ vec += " recycle_latency "+@params[:recycle_latency].to_s
end
def cppClassName()
@@ -323,6 +335,23 @@ class CacheController < NetPort
end
end
+class L1CacheController < CacheController
+ attr :sequencer
+
+ def initialize(obj_name, mach_type, caches, sequencer)
+ super(obj_name, mach_type, caches)
+
+ @sequencer = sequencer
+ @sequencer.controller = self
+ @sequencer.version = @version
+ end
+
+ def argv()
+ vec = super()
+ vec += " sequencer "+@sequencer.obj_name
+ end
+end
+
class DirectoryController < NetPort
@@total_directory_controllers = 0
attr :directory
@@ -364,7 +393,7 @@ class DMAController < NetPort
end
def argv()
- "version "+@version.to_s+" dma_sequencer "+@dma_sequencer.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s
+ "version "+@version.to_s+" dma_sequencer "+@dma_sequencer.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " recycle_latency "+@params[:recycle_latency].to_s
end
def cppClassName()
@@ -606,7 +635,7 @@ class Network < LibRubyObject
end
def printTopology()
- topology.printFile
+ topology().printFile
end
def cppClassName()
"SimpleNetwork"
@@ -686,31 +715,6 @@ class Profiler < LibRubyObject
end
-class MI_example_CacheController < CacheController
- def initialize(obj_name, mach_type, caches, sequencer)
- super(obj_name, mach_type, caches, sequencer)
- end
- def argv()
- vec = super()
- vec += " issue_latency "+issue_latency.to_s
- vec += " cache_response_latency "+cache_response_latency.to_s
- end
-
-end
-
-class MI_example_DirectoryController < DirectoryController
- def initialize(obj_name, mach_type, directory, memory_control)
- super(obj_name, mach_type, directory, memory_control)
- end
- def argv()
- vec = super()
- vec += " to_mem_ctrl_latency "+to_mem_ctrl_latency.to_s
- vec += " directory_latency "+directory_latency.to_s
- vec += " memory_latency "+memory_latency.to_s
- end
-
-end
-
#added by SS
class GarnetNetwork < Network
def initialize(name, topo)
diff --git a/src/mem/ruby/config/defaults.rb b/src/mem/ruby/config/defaults.rb
index 7d5d91ae2..5451c577e 100644
--- a/src/mem/ruby/config/defaults.rb
+++ b/src/mem/ruby/config/defaults.rb
@@ -108,19 +108,6 @@ class Profiler < LibRubyObject
end
#added by SS
-class MI_example_CacheController < CacheController
- default_param :issue_latency, Integer, 2
- default_param :cache_response_latency, Integer, 12
-end
-
-class MI_example_DirectoryController < DirectoryController
- default_param :to_mem_ctrl_latency, Integer, 1
- default_param :directory_latency, Integer, 6
- default_param :memory_latency, Integer, 158
-end
-
-
-#added by SS
class MemoryControl < LibRubyObject
default_param :mem_bus_cycle_multiplier, Integer, 10
@@ -143,6 +130,43 @@ class MemoryControl < LibRubyObject
end
+###### Protocols #######
+
+## MI_example protocol
+
+class MI_example_CacheController < L1CacheController
+ default_param :issue_latency, Integer, 2
+ default_param :cache_response_latency, Integer, 12
+end
+
+class MI_example_DirectoryController < DirectoryController
+ default_param :directory_latency, Integer, 6
+end
+
+class MI_example_DMAController < DMAController
+ default_param :request_latency, Integer, 6
+end
+
+## MOESI_CMP_directory protocol
+
+class MOESI_CMP_directory_L1CacheController < L1CacheController
+ default_param :request_latency, Integer, 2
+end
+
+class MOESI_CMP_directory_L2CacheController < CacheController
+ default_param :request_latency, Integer, 2
+ default_param :response_latency, Integer, 2
+end
+
+class MOESI_CMP_directory_DirectoryController < DirectoryController
+ default_param :directory_latency, Integer, 6
+end
+
+class MOESI_CMP_directory_DMAController < DMAController
+ default_param :request_latency, Integer, 6
+ default_param :response_latency, Integer, 6
+end
+
class RubySystem
# Random seed used by the simulation. If set to "rand", the seed
diff --git a/src/mem/ruby/config/util.rb b/src/mem/ruby/config/util.rb
new file mode 100644
index 000000000..a6aa8f6ab
--- /dev/null
+++ b/src/mem/ruby/config/util.rb
@@ -0,0 +1,10 @@
+
+def log_int(n)
+ assert(n.is_a?(Fixnum), "log_int takes a number for an argument")
+ counter = 0
+ while n >= 2 do
+ counter += 1
+ n = n >> 1
+ end
+ return counter
+end
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
index cd3cdbe48..96405c8dd 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
@@ -85,6 +85,16 @@ MachineID map_Address_to_DMA(const Address & addr)
return dma;
}
+inline
+MachineID mapAddressToRange(const Address & addr, MachineType type, int low_bit, int num_bits)
+{
+ MachineID mach = {type, 0};
+ if (num_bits == 0)
+ return mach;
+ mach.num = addr.bitSelect(low_bit, low_bit+num_bits-1);
+ return mach;
+}
+
extern inline NodeID machineIDToNodeID(MachineID machID)
{
return machID.num;
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
index a7b8a13fd..3d4fa3e5c 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
@@ -168,4 +168,9 @@ extern inline Address makeLineAddress(Address addr)
return result;
}
+extern inline int addressOffset(Address addr)
+{
+ return addr.getOffset();
+}
+
#endif //SLICC_UTIL_H
diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc
index d29dba602..8af892007 100644
--- a/src/mem/ruby/system/DMASequencer.cc
+++ b/src/mem/ruby/system/DMASequencer.cc
@@ -4,9 +4,8 @@
#include "mem/ruby/slicc_interface/AbstractController.hh"
/* SLICC generated types */
-#include "mem/protocol/DMARequestMsg.hh"
-#include "mem/protocol/DMARequestType.hh"
-#include "mem/protocol/DMAResponseMsg.hh"
+#include "mem/protocol/SequencerMsg.hh"
+#include "mem/protocol/SequencerRequestType.hh"
#include "mem/ruby/system/System.hh"
DMASequencer::DMASequencer(const string & name)
@@ -66,20 +65,16 @@ int64_t DMASequencer::makeRequest(const RubyRequest & request)
active_request.bytes_issued = 0;
active_request.id = makeUniqueRequestID();
- DMARequestMsg msg;
+ SequencerMsg msg;
msg.getPhysicalAddress() = Address(paddr);
msg.getLineAddress() = line_address(msg.getPhysicalAddress());
- msg.getType() = write ? DMARequestType_WRITE : DMARequestType_READ;
- msg.getOffset() = paddr & m_data_block_mask;
- msg.getLen() = (msg.getOffset() + len) <= RubySystem::getBlockSizeBytes() ?
+ msg.getType() = write ? SequencerRequestType_ST : SequencerRequestType_LD;
+ int offset = paddr & m_data_block_mask;
+ msg.getLen() = (offset + len) <= RubySystem::getBlockSizeBytes() ?
len :
- RubySystem::getBlockSizeBytes() - msg.getOffset();
- if (write) {
- msg.getType() = DMARequestType_WRITE;
- msg.getDataBlk().setData(data, msg.getOffset(), msg.getLen());
- } else {
- msg.getType() = DMARequestType_READ;
- }
+ RubySystem::getBlockSizeBytes() - offset;
+ if (write)
+ msg.getDataBlk().setData(data, offset, msg.getLen());
m_mandatory_q_ptr->enqueue(msg);
active_request.bytes_issued += msg.getLen();
@@ -96,14 +91,13 @@ void DMASequencer::issueNext()
return;
}
- DMARequestMsg msg;
+ SequencerMsg msg;
msg.getPhysicalAddress() = Address(active_request.start_paddr +
active_request.bytes_completed);
assert((msg.getPhysicalAddress().getAddress() & m_data_block_mask) == 0);
msg.getLineAddress() = line_address(msg.getPhysicalAddress());
- msg.getOffset() = 0;
- msg.getType() = (active_request.write ? DMARequestType_WRITE :
- DMARequestType_READ);
+ msg.getType() = (active_request.write ? SequencerRequestType_ST :
+ SequencerRequestType_LD);
msg.getLen() = (active_request.len -
active_request.bytes_completed < RubySystem::getBlockSizeBytes() ?
active_request.len - active_request.bytes_completed :
@@ -111,9 +105,9 @@ void DMASequencer::issueNext()
if (active_request.write) {
msg.getDataBlk().setData(&active_request.data[active_request.bytes_completed],
0, msg.getLen());
- msg.getType() = DMARequestType_WRITE;
+ msg.getType() = SequencerRequestType_ST;
} else {
- msg.getType() = DMARequestType_READ;
+ msg.getType() = SequencerRequestType_LD;
}
m_mandatory_q_ptr->enqueue(msg);
active_request.bytes_issued += msg.getLen();
diff --git a/src/mem/ruby/system/DirectoryMemory.cc b/src/mem/ruby/system/DirectoryMemory.cc
index b279d21af..c87be94a2 100644
--- a/src/mem/ruby/system/DirectoryMemory.cc
+++ b/src/mem/ruby/system/DirectoryMemory.cc
@@ -58,12 +58,14 @@ void DirectoryMemory::init(const vector<string> & argv)
if ( (*it) == "version" )
m_version = atoi( (*(++it)).c_str() );
else if ( (*it) == "size_mb" ) {
- m_size_bytes = atoi((*(++it)).c_str()) * (1<<20);
+ m_size_bytes = atoi((*(++it)).c_str()) * static_cast<uint64>(1<<20);
m_size_bits = log_int(m_size_bytes);
} else if ( (*it) == "controller" ) {
m_controller = RubySystem::getController((*(++it)));
- } else
+ } else {
+ cerr << "DirectoryMemory: Unkown config parameter: " << (*it) << endl;
assert(0);
+ }
}
assert(m_controller != NULL);
diff --git a/src/mem/ruby/system/DirectoryMemory.hh b/src/mem/ruby/system/DirectoryMemory.hh
index 6445ecc62..39de679ed 100644
--- a/src/mem/ruby/system/DirectoryMemory.hh
+++ b/src/mem/ruby/system/DirectoryMemory.hh
@@ -59,7 +59,7 @@ public:
int mapAddressToLocalIdx(PhysAddress address);
static int mapAddressToDirectoryVersion(PhysAddress address);
- int getSize() { return m_size_bytes; }
+ uint64 getSize() { return m_size_bytes; }
// Public Methods
void printConfig(ostream& out) const;
@@ -84,8 +84,8 @@ private:
// Data Members (m_ prefix)
Directory_Entry **m_entries;
// int m_size; // # of memory module blocks this directory is responsible for
- uint32 m_size_bytes;
- uint32 m_size_bits;
+ uint64 m_size_bytes;
+ uint64 m_size_bits;
int m_num_entries;
int m_version;
diff --git a/src/mem/ruby/system/PerfectCacheMemory.hh b/src/mem/ruby/system/PerfectCacheMemory.hh
index 90c9273e5..6561d028b 100644
--- a/src/mem/ruby/system/PerfectCacheMemory.hh
+++ b/src/mem/ruby/system/PerfectCacheMemory.hh
@@ -43,7 +43,6 @@
#include "mem/gems_common/Map.hh"
#include "mem/protocol/AccessPermission.hh"
#include "mem/ruby/common/Address.hh"
-#include "mem/ruby/slicc_interface/AbstractChip.hh"
template<class ENTRY>
class PerfectCacheLineState {
@@ -54,11 +53,18 @@ public:
};
template<class ENTRY>
+extern inline
+ostream& operator<<(ostream& out, const PerfectCacheLineState<ENTRY>& obj)
+{
+ return out;
+}
+
+template<class ENTRY>
class PerfectCacheMemory {
public:
// Constructors
- PerfectCacheMemory(AbstractChip* chip_ptr);
+ PerfectCacheMemory();
// Destructor
//~PerfectCacheMemory();
@@ -106,7 +112,6 @@ private:
// Data Members (m_prefix)
Map<Address, PerfectCacheLineState<ENTRY> > m_map;
- AbstractChip* m_chip_ptr;
};
// Output operator declaration
@@ -129,9 +134,8 @@ ostream& operator<<(ostream& out, const PerfectCacheMemory<ENTRY>& obj)
template<class ENTRY>
extern inline
-PerfectCacheMemory<ENTRY>::PerfectCacheMemory(AbstractChip* chip_ptr)
+PerfectCacheMemory<ENTRY>::PerfectCacheMemory()
{
- m_chip_ptr = chip_ptr;
}
// STATIC METHODS
diff --git a/src/mem/ruby/system/System.hh b/src/mem/ruby/system/System.hh
index dbf4dbc78..38ef09177 100644
--- a/src/mem/ruby/system/System.hh
+++ b/src/mem/ruby/system/System.hh
@@ -104,6 +104,9 @@ public:
static RubyPort* getPortOnly(const string & name) {
assert(m_ports.count(name) == 1); return m_ports[name]; }
static RubyPort* getPort(const string & name, void (*hit_callback)(int64_t)) {
+ if (m_ports.count(name) != 1){
+ cerr << "Port " << name << " has " << m_ports.count(name) << " instances" << endl;
+ }
assert(m_ports.count(name) == 1); m_ports[name]->registerHitCallback(hit_callback); return m_ports[name]; }
static Network* getNetwork() { assert(m_network_ptr != NULL); return m_network_ptr; }
static Topology* getTopology(const string & name) { assert(m_topologies.count(name) == 1); return m_topologies[name]; }
diff --git a/src/mem/ruby/system/TimerTable.cc b/src/mem/ruby/system/TimerTable.cc
index edc2de230..5d496da04 100644
--- a/src/mem/ruby/system/TimerTable.cc
+++ b/src/mem/ruby/system/TimerTable.cc
@@ -35,11 +35,9 @@
#include "mem/ruby/system/TimerTable.hh"
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
-TimerTable::TimerTable(Chip* chip_ptr)
+TimerTable::TimerTable()
{
- assert(chip_ptr != NULL);
m_consumer_ptr = NULL;
- m_chip_ptr = chip_ptr;
m_next_valid = false;
m_next_address = Address(0);
m_next_time = 0;
diff --git a/src/mem/ruby/system/TimerTable.hh b/src/mem/ruby/system/TimerTable.hh
index 9912036f3..eda84069d 100644
--- a/src/mem/ruby/system/TimerTable.hh
+++ b/src/mem/ruby/system/TimerTable.hh
@@ -43,13 +43,12 @@
#include "mem/gems_common/Map.hh"
#include "mem/ruby/common/Address.hh"
class Consumer;
-class Chip;
class TimerTable {
public:
// Constructors
- TimerTable(Chip* chip_ptr);
+ TimerTable();
// Destructor
//~TimerTable();
@@ -77,7 +76,6 @@ private:
// Data Members (m_prefix)
Map<Address, Time> m_map;
- Chip* m_chip_ptr;
mutable bool m_next_valid;
mutable Time m_next_time; // Only valid if m_next_valid is true
mutable Address m_next_address; // Only valid if m_next_valid is true
diff --git a/src/mem/slicc/ast/AST.hh b/src/mem/slicc/ast/AST.hh
index 53f9a6c33..33c9b84ed 100644
--- a/src/mem/slicc/ast/AST.hh
+++ b/src/mem/slicc/ast/AST.hh
@@ -50,28 +50,28 @@ public:
// Constructors
AST(Map<string, string> pairs) { m_pairs = pairs; };
AST() {};
-
+
// Destructor
virtual ~AST() {};
-
+
// Public Methods
virtual void print(ostream& out) const = 0;
void error(string err_msg) const { m_location.error(err_msg); };
string embedError(string err_msg) const { return m_location.embedError(err_msg); };
void warning(string err_msg) const { m_location.warning(err_msg); };
-
+
const Location& getLocation() const { return m_location; };
-
+
const Map<string, string>& getPairs() const { return m_pairs; };
Map<string, string>& getPairs() { return m_pairs; };
-
+
private:
// Private Methods
-
+
// Private copy constructor and assignment operator
// AST(const AST& obj);
// AST& operator=(const AST& obj);
-
+
// Data Members (m_ prefix)
Location m_location;
Map<string, string> m_pairs;
diff --git a/src/mem/slicc/ast/ActionDeclAST.cc b/src/mem/slicc/ast/ActionDeclAST.cc
index 2734722d1..e46412ff7 100644
--- a/src/mem/slicc/ast/ActionDeclAST.cc
+++ b/src/mem/slicc/ast/ActionDeclAST.cc
@@ -36,8 +36,10 @@
*
*/
+
#include "mem/slicc/ast/ActionDeclAST.hh"
#include "mem/slicc/symbols/Action.hh"
+#include "mem/slicc/ast/StatementListAST.hh"
ActionDeclAST::ActionDeclAST(string* ident_ptr,
PairListAST* pairs_ptr,
diff --git a/src/mem/slicc/ast/ActionDeclAST.hh b/src/mem/slicc/ast/ActionDeclAST.hh
index 53d938ca8..4970ee254 100644
--- a/src/mem/slicc/ast/ActionDeclAST.hh
+++ b/src/mem/slicc/ast/ActionDeclAST.hh
@@ -41,7 +41,8 @@
#include "mem/slicc/slicc_global.hh"
#include "mem/slicc/ast/DeclAST.hh"
-#include "mem/slicc/ast/StatementListAST.hh"
+
+class StatementListAST;
class ActionDeclAST : public DeclAST {
public:
diff --git a/src/mem/slicc/ast/EnqueueStatementAST.cc b/src/mem/slicc/ast/EnqueueStatementAST.cc
index 8be0378c9..a422d8a28 100644
--- a/src/mem/slicc/ast/EnqueueStatementAST.cc
+++ b/src/mem/slicc/ast/EnqueueStatementAST.cc
@@ -77,7 +77,14 @@ void EnqueueStatementAST::generate(string& code, Type* return_type_ptr) const
code += ".enqueue(out_msg";
if (getPairs().exist("latency")) {
- code += ", m_LATENCY_" + getPairs().lookup("latency");
+ bool is_number = true;
+ string val = getPairs().lookup("latency");
+ for (int i=0; i<val.size(); i++)
+ if (!isdigit(val[i])) is_number = false;
+ if (is_number)
+ code += ", " + getPairs().lookup("latency");
+ else
+ code += ", m_" + getPairs().lookup("latency");
}
code += ");\n";
diff --git a/src/mem/slicc/ast/FormalParamAST.cc b/src/mem/slicc/ast/FormalParamAST.cc
index 4ca2c8978..529811f25 100644
--- a/src/mem/slicc/ast/FormalParamAST.cc
+++ b/src/mem/slicc/ast/FormalParamAST.cc
@@ -38,6 +38,7 @@
#include "mem/slicc/ast/FormalParamAST.hh"
#include "mem/slicc/ast/StatementAST.hh"
+#include "mem/slicc/ast/TypeAST.hh"
#include "mem/slicc/symbols/SymbolTable.hh"
FormalParamAST::~FormalParamAST()
@@ -46,6 +47,16 @@ FormalParamAST::~FormalParamAST()
delete m_type_ast_ptr;
}
+string FormalParamAST::getTypeName() const
+{
+ return m_type_ast_ptr->toString();
+}
+
+Type* FormalParamAST::getType() const
+{
+ return m_type_ast_ptr->lookupType();
+}
+
Type* FormalParamAST::generate(string& code) const
{
string param = "param_" + *m_ident_ptr;
diff --git a/src/mem/slicc/ast/FormalParamAST.hh b/src/mem/slicc/ast/FormalParamAST.hh
index 63d66cc03..ca27948b7 100644
--- a/src/mem/slicc/ast/FormalParamAST.hh
+++ b/src/mem/slicc/ast/FormalParamAST.hh
@@ -40,7 +40,9 @@
#define FORMALPARAMAST_H
#include "mem/slicc/slicc_global.hh"
-#include "mem/slicc/ast/TypeAST.hh"
+#include "mem/slicc/ast/AST.hh"
+
+class TypeAST;
class FormalParamAST : public AST {
@@ -55,6 +57,8 @@ public:
Type* generate(string& code) const;
void print(ostream& out) const { out << "[FormalParamAST: " << *m_ident_ptr << "]"; }
string getName() const { return *m_ident_ptr; }
+ string getTypeName() const;
+ Type* getType() const;
private:
// Private Methods
diff --git a/src/mem/slicc/ast/FuncDeclAST.cc b/src/mem/slicc/ast/FuncDeclAST.cc
index 7fb0e6346..2a0905f06 100644
--- a/src/mem/slicc/ast/FuncDeclAST.cc
+++ b/src/mem/slicc/ast/FuncDeclAST.cc
@@ -37,6 +37,7 @@
*/
#include "mem/slicc/ast/FuncDeclAST.hh"
+#include "mem/slicc/ast/FormalParamAST.hh"
#include "mem/slicc/symbols/SymbolTable.hh"
#include "mem/slicc/main.hh"
diff --git a/src/mem/slicc/ast/FuncDeclAST.hh b/src/mem/slicc/ast/FuncDeclAST.hh
index d60694303..205e71a85 100644
--- a/src/mem/slicc/ast/FuncDeclAST.hh
+++ b/src/mem/slicc/ast/FuncDeclAST.hh
@@ -43,7 +43,8 @@
#include "mem/slicc/ast/DeclAST.hh"
#include "mem/slicc/ast/TypeFieldAST.hh"
#include "mem/slicc/ast/TypeAST.hh"
-#include "mem/slicc/ast/FormalParamAST.hh"
+
+class FormalParamsAST;
class FuncDeclAST : public DeclAST {
public:
diff --git a/src/mem/slicc/ast/MachineAST.cc b/src/mem/slicc/ast/MachineAST.cc
index 2096db591..ae8026458 100644
--- a/src/mem/slicc/ast/MachineAST.cc
+++ b/src/mem/slicc/ast/MachineAST.cc
@@ -37,21 +37,20 @@
*/
#include "mem/slicc/ast/MachineAST.hh"
+#include "mem/slicc/ast/FormalParamAST.hh"
#include "mem/slicc/symbols/SymbolTable.hh"
MachineAST::MachineAST(string* ident_ptr,
PairListAST* pairs_ptr,
- Vector<TypeFieldAST*>* config_params_ptr,
- std::vector<std::string*>* latency_vector,
+ Vector<FormalParamAST*>* config_parameters,
DeclListAST* decl_list_ptr)
: DeclAST(pairs_ptr)
{
m_ident_ptr = ident_ptr;
m_pairs_ptr = pairs_ptr;
- m_config_params_ptr = config_params_ptr;
+ m_config_parameters = config_parameters;
m_decl_list_ptr = decl_list_ptr;
- m_latency_vector = latency_vector;
}
MachineAST::~MachineAST()
@@ -69,7 +68,7 @@ void MachineAST::generate()
g_sym_table.pushFrame();
// Create a new machine
- machine_ptr = new StateMachine(*m_ident_ptr, getLocation(), getPairs(), m_latency_vector);
+ machine_ptr = new StateMachine(*m_ident_ptr, getLocation(), getPairs(), m_config_parameters);
g_sym_table.newCurrentMachine(machine_ptr);
// Generate code for all the internal decls
diff --git a/src/mem/slicc/ast/MachineAST.hh b/src/mem/slicc/ast/MachineAST.hh
index 8f83e4cfe..5d1bc2a1c 100644
--- a/src/mem/slicc/ast/MachineAST.hh
+++ b/src/mem/slicc/ast/MachineAST.hh
@@ -45,13 +45,14 @@
#include "mem/slicc/ast/TypeFieldAST.hh"
#include "mem/slicc/symbols/StateMachine.hh"
+class FormalParamAST;
+
class MachineAST : public DeclAST {
public:
// Constructors
MachineAST(string* ident_ptr,
PairListAST* pairs_ptr,
- Vector<TypeFieldAST*>* config_params_ptr,
- std::vector<std::string*>* latency_vector,
+ Vector<FormalParamAST*>* config_parameters,
DeclListAST* decl_list_ptr);
// Destructor
@@ -69,10 +70,9 @@ private:
MachineAST& operator=(const MachineAST& obj);
// Data Members (m_ prefix)
- std::vector<std::string*>* m_latency_vector;
+ Vector<FormalParamAST*>* m_config_parameters;
string* m_ident_ptr;
DeclListAST* m_decl_list_ptr;
- Vector<TypeFieldAST*>* m_config_params_ptr;
PairListAST* m_pairs_ptr;
};
diff --git a/src/mem/slicc/parser/parser.py b/src/mem/slicc/parser/parser.py
index c042ba2c1..7fecfd273 100644
--- a/src/mem/slicc/parser/parser.py
+++ b/src/mem/slicc/parser/parser.py
@@ -76,7 +76,7 @@ tokens = [ 'EQ', 'NE', 'LT', 'GT', 'LE', 'GE',
'NOT', 'AND', 'OR',
'PLUS', 'DASH', 'STAR', 'SLASH',
'DOUBLE_COLON', 'SEMICOLON',
- 'ASSIGN', 'DOT', 'LATENCY',
+ 'ASSIGN', 'DOT',
'IDENT', 'LIT_BOOL', 'FLOATNUMBER', 'NUMBER', 'STRING' ]
tokens += reserved.values()
@@ -197,19 +197,8 @@ def p_decl(p):
| d_func_def"""
p[0] = p[1]
-def p_latency(p):
- """latency : LATENCY"""
- pass
-
-def p_latencies(p):
- """latencies : latency latencies
- | empty"""
- return []
-
def p_d_machine(p):
- """d_machine : MACHINE '(' ident pair_l ')' '{' decl_l '}'
- | MACHINE '(' ident pair_l ')' ':' type_members '{' decl_l '}'
- | MACHINE '(' ident pair_l ')' ':' latencies '{' decl_l '}'"""
+ """d_machine : MACHINE '(' ident pair_l ')' ':' param_l '{' decl_l '}'"""
if len(p) == 9:
decl_l = p[7]
@@ -549,10 +538,11 @@ def scan(filenames):
for filename in filenames:
lex.lexer.lineno = 1
try:
+ print "parsing ",filename
results = yacc.parse(file(filename, 'r').read())
except (TokenError, ParseError), e:
sys.exit("%s: %s:%d" % (e, filename, e.token.lineno))
-
+
for result in results:
result.add(hh, cc)
diff --git a/src/mem/slicc/parser/parser.yy b/src/mem/slicc/parser/parser.yy
index fa5a3b355..c8cef3b21 100644
--- a/src/mem/slicc/parser/parser.yy
+++ b/src/mem/slicc/parser/parser.yy
@@ -111,8 +111,6 @@ extern "C" int yylex();
%type <expr_ptr> expr literal enumeration
%type <expr_vector_ptr> expr_list
-%type <stdstring_vector_ptr> myrule
-
%type <pair_ptr> pair
%type <pair_list_ptr> pair_list pairs
@@ -148,9 +146,7 @@ decls: decl decls { $2->insertAtTop($1); $$ = $2; }
| { $$ = new Vector<DeclAST*>; }
;
-decl: MACHINE_DECL '(' ident pair_list ')' ':' myrule '{' decl_list '}' { $$ = new MachineAST($3, $4, NULL, $7, $9); }
-// | MACHINE_DECL '(' ident pair_list ')' ':' type_members '{' decl_list '}' { $$ = new MachineAST($3, $4, $7, string_vector, $9); }
- | MACHINE_DECL '(' ident pair_list ')' '{' decl_list '}' { $$ = new MachineAST($3, $4, NULL, new vector<string*>(), $7); }
+decl: MACHINE_DECL '(' ident pair_list ')' ':' formal_param_list '{' decl_list '}' { $$ = new MachineAST($3, $4, $7, $9); }
| ACTION_DECL '(' ident pair_list ')' statement_list { $$ = new ActionDeclAST($3, $4, $6); }
| IN_PORT_DECL '(' ident ',' type ',' var pair_list ')' statement_list { $$ = new InPortDeclAST($3, $5, $7, $8, $10); }
| OUT_PORT_DECL '(' ident ',' type ',' var pair_list ')' SEMICOLON { $$ = new OutPortDeclAST($3, $5, $7, $8); }
@@ -336,10 +332,6 @@ var: ident { $$ = new VarExprAST($1); }
field: ident { $$ = $1; }
;
-myrule: myrule IDENT { $1->push_back($2); }
- | IDENT { $$ = new vector<string*>(1, $1); }
- ;
-
%%
extern FILE *yyin;
diff --git a/src/mem/slicc/symbols/StateMachine.cc b/src/mem/slicc/symbols/StateMachine.cc
index 4a9ee3714..7bc84ffe0 100644
--- a/src/mem/slicc/symbols/StateMachine.cc
+++ b/src/mem/slicc/symbols/StateMachine.cc
@@ -43,14 +43,25 @@
#include "mem/slicc/symbols/SymbolTable.hh"
#include "mem/gems_common/util.hh"
#include "mem/gems_common/Vector.hh"
+#include "mem/slicc/ast/FormalParamAST.hh"
#include <set>
-StateMachine::StateMachine(string ident, const Location& location, const Map<string, string>& pairs, std::vector<std::string*>* latency_vector)
+StateMachine::StateMachine(string ident, const Location& location, const Map<string, string>& pairs, Vector<FormalParamAST*>* config_parameters)
: Symbol(ident, location, pairs)
{
m_table_built = false;
- m_latency_vector = *latency_vector;
+ m_config_parameters = config_parameters;
+
+ for (int i=0; i< m_config_parameters->size(); i++) {
+ Var* var = new Var(m_config_parameters->ref(i)->getName(),
+ location,
+ m_config_parameters->ref(i)->getType(),
+ "m_"+m_config_parameters->ref(i)->getName(),
+ Map<string, string>(),
+ this);
+ g_sym_table.registerSym(m_config_parameters->ref(i)->getName(), var);
+ }
}
StateMachine::~StateMachine()
@@ -284,9 +295,8 @@ void StateMachine::printControllerH(ostream& out, string component)
out << "private:" << endl;
//added by SS
// found_to_mem = 0;
- std::vector<std::string*>::const_iterator it;
- for(it=m_latency_vector.begin();it!=m_latency_vector.end();it++){
- out << " int m_" << (*it)->c_str() << ";" << endl;
+ for(int i=0;i<m_config_parameters->size();i++){
+ out << " int m_" << m_config_parameters->ref(i)->getName() << ";" << endl;
}
if (strncmp(component.c_str(), "L1Cache", 7) == 0) {
out << " bool servicing_atomic;" << endl;
@@ -429,41 +439,22 @@ void StateMachine::printControllerC(ostream& out, string component)
out << " else if (argv[i] == \"number_of_TBEs\") " << endl;
out << " m_number_of_TBEs = atoi(argv[i+1].c_str());" << endl;
- if (m_latency_vector.size()) {
- out << " else { " << endl;
- std::vector<std::string*>::const_iterator it;
- for(it=m_latency_vector.begin();it!=m_latency_vector.end();it++) {
- string str = (*it)->c_str();
- str.erase(0,8);
-//convert to lowercase
- size_t i;
- char* strc = (char*) malloc (str.length()+1);
- strc[str.length()]=0;
- for(i=0; i < str.length(); i++) {
- strc[i] = str.at(i);
- strc[i] = tolower(strc[i]);
- }
- str = strc;
- delete strc;
- out << " if (argv[i] == \"" << str << "\"){" << endl;
- if (str == "to_mem_ctrl_latency")
- out << " m_" << (*it)->c_str() << "=" << "atoi(argv[i+1].c_str())+(random() % 5);" << endl;
+ if (m_config_parameters->size()) {
+ for(int i= 0 ; i < m_config_parameters->size(); i++) {
+ out << " else if (argv[i] == \"" << m_config_parameters->ref(i)->getName() << "\")" << endl;
+ if (m_config_parameters->ref(i)->getTypeName() == "int")
+ out << " m_" << m_config_parameters->ref(i)->getName() << "=" << "atoi(argv[i+1].c_str());" << endl;
else
- out << " m_" << (*it)->c_str() << "=" << "atoi(argv[i+1].c_str());" << endl;
-// out << " printf (\"SET m_" << it->c_str() << "= %i \\n \", m_" << it->c_str() << ");" << endl;
- out << " }" << endl;
+ assert(0); // only int parameters are supported right now
+ // if (str == "to_mem_ctrl_latency")
+ // out << " m_" << (*it)->c_str() << "=" << "atoi(argv[i+1].c_str())+(random() % 5);" << endl;
}
- out << " }" << endl;
}
out << " }" << endl;
-
out << " m_net_ptr = net_ptr;" << endl;
out << " m_machineID.type = MachineType_" << component << ";" << endl;
out << " m_machineID.num = m_version;" << endl;
-// out << " printf (\"I set m_LATENCY_ISSUE_LATENCY to %i \\n \", m_LATENCY_ISSUE_LATENCY);" << endl;
-// out << " printf (\"I set m_LATENCY_CACHE_RESPONSE_LATENCY to %i \\n \", m_LATENCY_CACHE_RESPONSE_LATENCY);" << endl;
-
// make configuration array
out << " for (size_t i=0; i < argv.size(); i+=2) {" << endl;
out << " if (argv[i] != \"version\") " << endl;
@@ -724,25 +715,7 @@ void StateMachine::printControllerC(ostream& out, string component)
string c_code_string = action.lookupPair("c_code");
-/*
- size_t found = c_code_string.find("RubyConfig::get");
-
- if (found!=string::npos){ //found --> replace it with local access
- //if it is related to latency --> replace it
- std::vector<std::string*>::const_iterator it;
- for(it=m_latency_vector.begin();it!=m_latency_vector.end();it++){
- string str = (*it)->c_str();
- str.erase(0,8);
- size_t fd = c_code_string.find(str, found);
- if (fd!=string::npos && (fd == found+15)){
- string rstr = "m_";
- rstr += (*it)->c_str();
- c_code_string.replace(found,15+str.size()+2,rstr);
- break;
- }
- }
- }
-*/
+
// add here:
if (strncmp(component.c_str(), "L1Cache", 7) == 0) {
if (c_code_string.find("writeCallback") != string::npos) {
diff --git a/src/mem/slicc/symbols/StateMachine.hh b/src/mem/slicc/symbols/StateMachine.hh
index 101e38547..f5f3ab073 100644
--- a/src/mem/slicc/symbols/StateMachine.hh
+++ b/src/mem/slicc/symbols/StateMachine.hh
@@ -49,11 +49,12 @@ class State;
class Action;
class Var;
class Func;
+class FormalParamAST;
class StateMachine : public Symbol {
public:
// Constructors
- StateMachine(string ident, const Location& location, const Map<string, string>& pairs, std::vector<std::string*>* latency_vector);
+ StateMachine(string ident, const Location& location, const Map<string, string>& pairs, Vector<FormalParamAST*>* config_parameters);
// Destructor
~StateMachine();
@@ -94,7 +95,7 @@ public:
void print(ostream& out) const { out << "[StateMachine: " << toString() << "]" << endl; }
private:
- std::vector<std::string*> m_latency_vector;
+ Vector<FormalParamAST*>* m_config_parameters;
// Private Methods
void checkForDuplicate(const Symbol& sym) const;