summaryrefslogtreecommitdiff
path: root/src/vendorcode/amd/agesa/Proc/Mem/Main/mu.asm
blob: 63c85b94aa0c137fef54241b0152c75d7f579773 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
;*****************************************************************************
; AMD Generic Encapsulated Software Architecture
;
;  $Workfile:: mu.asm   $ $Revision:: 274#$  $Date: 2010-03-04 06:16:56 +0800 (Thu, 04 Mar 2010) $
; Description: Main memory controller system configuration for AGESA
;
;
;*****************************************************************************
; 
;  Copyright (c) 2011, Advanced Micro Devices, Inc.
;  All rights reserved.
;  
;  Redistribution and use in source and binary forms, with or without
;  modification, are permitted provided that the following conditions are met:
;      * Redistributions of source code must retain the above copyright
;        notice, this list of conditions and the following disclaimer.
;      * Redistributions in binary form must reproduce the above copyright
;        notice, this list of conditions and the following disclaimer in the
;        documentation and/or other materials provided with the distribution.
;      * Neither the name of Advanced Micro Devices, Inc. nor the names of 
;        its contributors may be used to endorse or promote products derived 
;        from this software without specific prior written permission.
;  
;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
;  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
;  DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. BE LIABLE FOR ANY
;  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
;  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
;  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;  
;*****************************************************************************
;============================================================================


    .XLIST
    .LIST

    .686p
    .MODEL FLAT
    .CODE
    ASSUME FS: NOTHING

; Define the calling convention used for the C library modules
;@attention - This should be in a central include file
CALLCONV    EQU     NEAR C


;===============================================================================
;memUOutPort:
;
; Do a 32 Bit IO Out operation using edx.
; NOTE: This function will be obsolete in the future.
;
;             In: Port  - port number
;                 Value - value to be written
;
;            Out:
;
; All registers preserved.
;===============================================================================
MemUOutPort PROC CALLCONV PUBLIC Port:DWORD, Value:DWORD
    pushad
    mov edx,Port
    mov eax,Value
    out dx,al
    popad
    ret
MemUOutPort ENDP


;----------------------------------------------------------------------------
; _SFENCE();
;
_SFENCE macro
    db  0Fh,0AEh,0F8h
    endm

;----------------------------------------------------------------------------
; _MFENCE();
;
_MFENCE macro
    db  0Fh,0AEh,0F0h
    endm

;----------------------------------------------------------------------------
; _EXECFENCE();
;
_EXECFENCE macro
    out 0EDh,al             ;prevent speculative execution of following instructions
    endm

;===============================================================================
;MemUWriteCachelines:
;   Write a test pattern to DRAM
;
;             In: Pattern   - pointer to the write pattern
;                 Address   - Physical address to be read
;                 ClCount   - number of cachelines to be read
;            Out:
;
;All registers preserved.
;===============================================================================
MemUWriteCachelines PROC CALLCONV PUBLIC Address:DWORD, Pattern:NEAR PTR DWORD, ClCount:WORD
        pushad
        push ds

        mov eax,Address
        push ss
        pop ds
        xor edx,edx
        mov edx, DWORD PTR Pattern
        mov esi,edx
        mov edx,16
        _EXECFENCE
        xor ecx, ecx
        mov cx,ClCount
        shl ecx,2
        @@:
        db 66h, 0Fh,6Fh,06           ;MOVDQA xmm0,[esi]
        db 64h, 66h, 0Fh,0E7h,00      ;MOVNTDQ fs:[eax],xmm0  (xmm0 is 128 bits)
        add eax,edx
        add esi,edx
        loop @B

        pop ds
        popad
        ret
MemUWriteCachelines ENDP

;===============================================================================
;MemUReadCachelines:
;
; Read a pattern of 72 bit times (per DQ), to test dram functionality.  The
;pattern is a stress pattern which exercises both ISI and crosstalk.  The number
;of cache lines to fill is dependent on DCT width mode and burstlength.
;
;             In: Buffer    - pointer to a buffer where read data will be stored
;                 Address   - Physical address to be read
;                 ClCount   - number of cachelines to be read
;            Out:
;
;All registers preserved.
;===============================================================================
MemUReadCachelines PROC CALLCONV PUBLIC Buffer:NEAR PTR DWORD, Address:DWORD, ClCount:WORD
LOCAL Count:BYTE
        pushad
        ; First, issue continuous dummy reads to fill up the cache
        mov eax,Address
        .if (ClCount > 18)
            mov cx,ClCount
            shr cx,4
            mov Count,cl
            .while (Count != 0)
                push eax
                mov edi,eax
                add edi,128                     ;bias value (to account for signed displacement)
                                                ;clflush opcode=0F AE /7
                mov esi,edi
                mov ebx,esi
                mov ecx,esi
                mov edx,esi
                add edi,4*64                    ;TestAddr+4 cache lines
                add ebx,8*64                    ;TestAddr+8 cache lines
                add ecx,12*64                   ;TestAddr+12 cache lines
                add edx,16*64                   ;TestAddr+16 cache lines
                sub edx,128
                _EXECFENCE
                mov eax,fs:[esi-128]            ;TestAddr
                _MFENCE
                mov eax,fs:[esi-64]             ;TestAddr+1 cache line
                _MFENCE
                mov eax,fs:[esi]                ;TestAddr+2 cache lines
                _MFENCE
                mov eax,fs:[esi+64]             ;TestAddr+3 cache lines
                _MFENCE
                mov eax,fs:[edi-128]            ;TestAddr+4 cache lines
                _MFENCE
                mov eax,fs:[edi-64]             ;TestAddr+5 cache lines
                _MFENCE
                mov eax,fs:[edi]                ;TestAddr+6 cache lines
                _MFENCE
                mov eax,fs:[edi+64]             ;TestAddr+7 cache lines
                _MFENCE
                mov eax,fs:[ebx-128]            ;TestAddr+8 cache lines
                _MFENCE
                mov eax,fs:[ebx-64]             ;TestAddr+9 cache lines
                _MFENCE
                mov eax,fs:[ebx]                ;TestAddr+10 cache lines
                _MFENCE
                mov eax,fs:[ebx+64]             ;TestAddr+11 cache lines
                _MFENCE
                mov eax,fs:[ecx-128]            ;TestAddr+12 cache lines
                _MFENCE
                mov eax,fs:[ecx-64]             ;TestAddr+13 cache lines
                _MFENCE
                mov eax,fs:[ecx]                ;TestAddr+14 cache lines
                _MFENCE
                mov eax,fs:[ecx+64]             ;TestAddr+15 cache lines
                _MFENCE
                pop eax
                add eax,(16*64)                 ;Next 16CL
                dec Count
            .endw
        .else
            mov edi,eax
            add edi,128                     ;bias value (to account for signed displacement)
                                            ;clflush opcode=0F AE /7
            mov esi,edi
            mov ebx,esi
            mov ecx,esi
            mov edx,esi
            add edi,4*64                    ;TestAddr+4 cache lines
            add ebx,8*64                    ;TestAddr+8 cache lines
            add ecx,12*64                   ;TestAddr+12 cache lines
            add edx,16*64                   ;TestAddr+16 cache lines
            sub edx,128
            .if(ClCount == 1)
                _MFENCE
                mov eax,fs:[esi-128]            ;TestAddr
                _MFENCE
            .elseif(ClCount == 3)
                _EXECFENCE
                mov eax,fs:[esi-128]            ;TestAddr
                _MFENCE
                mov eax,fs:[esi-64]             ;TestAddr+1 cache line
                _MFENCE
                mov eax,fs:[esi]                ;TestAddr+2 cache lines
                _MFENCE
            .elseif(ClCount == 6)
                _EXECFENCE
                mov eax,fs:[esi-128]            ;TestAddr
                _MFENCE
                mov eax,fs:[esi-64]             ;TestAddr+1 cache line
                _MFENCE
                mov eax,fs:[esi]                ;TestAddr+2 cache lines
                _MFENCE
                mov eax,fs:[esi+64]             ;TestAddr+3 cache lines
                _MFENCE
                mov eax,fs:[edi-128]            ;TestAddr+4 cache lines
                _MFENCE
                mov eax,fs:[edi-64]             ;TestAddr+5 cache lines
                _MFENCE
            .elseif(ClCount == 9)
                _EXECFENCE
                mov eax,fs:[esi-128]            ;TestAddr
                _MFENCE
                mov eax,fs:[esi-64]             ;TestAddr+1 cache line
                _MFENCE
                mov eax,fs:[esi]                ;TestAddr+2 cache lines
                _MFENCE
                mov eax,fs:[esi+64]             ;TestAddr+3 cache lines
                _MFENCE
                mov eax,fs:[edi-128]            ;TestAddr+4 cache lines
                _MFENCE
                mov eax,fs:[edi-64]             ;TestAddr+5 cache lines
                _MFENCE
                mov eax,fs:[edi]                ;TestAddr+6 cache lines
                _MFENCE
                mov eax,fs:[edi+64]             ;TestAddr+7 cache lines
                _MFENCE
                mov eax,fs:[ebx-128]            ;TestAddr+8 cache lines
                _MFENCE
            .elseif(ClCount == 18)
                _EXECFENCE
                mov eax,fs:[esi-128]            ;TestAddr
                _MFENCE
                mov eax,fs:[esi-64]             ;TestAddr+1 cache line
                _MFENCE
                mov eax,fs:[esi]                ;TestAddr+2 cache lines
                _MFENCE
                mov eax,fs:[esi+64]             ;TestAddr+3 cache lines
                _MFENCE
                mov eax,fs:[edi-128]            ;TestAddr+4 cache lines
                _MFENCE
                mov eax,fs:[edi-64]             ;TestAddr+5 cache lines
                _MFENCE
                mov eax,fs:[edi]                ;TestAddr+6 cache lines
                _MFENCE
                mov eax,fs:[edi+64]             ;TestAddr+7 cache lines
                _MFENCE
                mov eax,fs:[ebx-128]            ;TestAddr+8 cache lines
                _MFENCE
                mov eax,fs:[ebx-64]             ;TestAddr+9 cache lines
                _MFENCE
                mov eax,fs:[ebx]                ;TestAddr+10 cache lines
                _MFENCE
                mov eax,fs:[ebx+64]             ;TestAddr+11 cache lines
                _MFENCE
                mov eax,fs:[ecx-128]            ;TestAddr+12 cache lines
                _MFENCE
                mov eax,fs:[ecx-64]             ;TestAddr+13 cache lines
                _MFENCE
                mov eax,fs:[ecx]                ;TestAddr+14 cache lines
                _MFENCE
                mov eax,fs:[ecx+64]             ;TestAddr+15 cache lines
                _MFENCE
                mov eax,fs:[edx]                ;TestAddr+16 cache lines
                _MFENCE
                mov eax,fs:[edx+64]             ;TestAddr+17 cache lines
                _MFENCE
            .endif
        .endif
        _MFENCE

        ; Then, copy data to buffer
        mov esi,Address
        xor edx,edx
        mov edx,DWORD PTR Buffer
        mov edi,edx
        xor ecx, ecx
        mov cx,ClCount
        shl ecx,6
        @@:
        mov al,fs:[esi]
        mov ss:[edi],al
        inc esi
        inc edi
        loop @B

        popad
        ret
MemUReadCachelines ENDP

;===============================================================================
;MemUDummyCLRead:
;
;   Perform a single cache line read from a given physical address.
;
;             In: Address   - Physical address to be read
;                 ClCount   - number of cachelines to be read
;            Out:
;
;All registers preserved.
;===============================================================================
MemUDummyCLRead PROC CALLCONV PUBLIC Address:DWORD
    _SFENCE
    pushad
    mov eax,Address
    mov dl,fs:[eax]
    popad
    ret
MemUDummyCLRead ENDP

;===============================================================================
;MemUFlushPattern:
;
; Flush a pattern of 72 bit times (per DQ) from cache.  This procedure is used
;to ensure cache miss on the next read training.
;
;             In: Address   - Physical address to be flushed
;                 ClCount   - number of cachelines to be flushed
;            Out:
;
;All registers preserved.
;===============================================================================
MemUFlushPattern PROC CALLCONV PUBLIC Address:DWORD, ClCount:WORD
        pushad
        mov edi,Address
        movzx ecx,ClCount
        @@:
        _MFENCE                     ; Force strong ordering of clflush
        db  64h,0Fh,0AEh,3Fh        ; MemUClFlush fs:[edi]
        _MFENCE
        add edi,64
        loop @B
        popad
        ret
MemUFlushPattern ENDP


;===============================================================================
;MemUGetWrLvNblErr:
;   Read ClCount number of cachelines then return the bitmap that indicates
;   the write leveling result of each byte lane.
;
;   IN:     ErrBitmap - pointer to a DWORD that will be assigned with WL result
;           Address   - Physical address to be sampled
;           ClCount   - number of cachelines to be read
;
;   OUT:    ErrBitmap - WL result
;
;All registers preserved
;===============================================================================
MemUGetWrLvNblErr PROC CALLCONV PUBLIC ErrBitmap:NEAR PTR DWORD, Address:DWORD, ClCount:WORD
LOCAL ZeroCount[32]:WORD

        pushad
        mov esi,Address
        _EXECFENCE
    ;Cache fill
        movzx ecx,ClCount
        @@:
        mov eax,fs:[esi]
        add esi,64
        loop @B
        _MFENCE

    ; Then, count the number of 0's
        ;push es
        ;push ss
        ;pop es
        lea edi,ZeroCount
        mov cx,SIZEOF ZeroCount
        mov al,0
        rep stosb
        ;pop es

        mov esi,Address
        lea edi,ZeroCount
        mov cx,ClCount
        shl cx,6
        .while(cx > 0)
            mov al,fs:[esi]
            test al,00Fh        ;check lower nibble
            .if(ZERO?)
                inc WORD PTR [edi]
            .endif
            add edi,2
            test al,0F0h        ;check upper nibble
            .if(ZERO?)
                inc WORD PTR [edi]
            .endif
            add edi,2
            inc esi
            dec cx
            test cx,07h
            .if(ZERO?)
                sub edi,(16*2)
                sub cx,8
                add esi,8
            .endif
        .endw

    ; Then, average and compress data to error bits
        lea esi,ZeroCount
        mov dx,ClCount
        shl dx,1
        xor eax,eax
        xor ecx,ecx
        mov cl,0
        .while(cl<16)
            .if(WORD PTR [esi] < dx)
                bts eax,ecx
            .endif
            add esi,2
            inc cl
        .endw
        xor edx,edx
        mov dx,WORD PTR ErrBitmap
        mov [edx], ax

        popad
        ret
MemUGetWrLvNblErr ENDP

;===============================================================================
;AlignPointerTo16Byte:
;   Modifies BufferPtr to be 16 byte aligned
;
;             In: BufferPtrPtr - Pointer to buffer pointer
;            Out: BufferPtrPtr - Pointer to buffer pointer that has been 16 byte aligned
;
;All registers preserved.
;===============================================================================
AlignPointerTo16Byte PROC CALLCONV PUBLIC BufferPtrPtr:NEAR PTR DWORD
        push edx
        push eax
        mov edx, BufferPtrPtr
        mov eax, [edx]
        add eax, 16
        and ax, 0FFF0h
        mov [edx], eax
        pop eax
        pop edx
        ret
AlignPointerTo16Byte ENDP

;===============================================================================
;MemUMFenceInstr:
;   Serialize instruction
;
;             In:
;            Out:
;
;All registers preserved.
;===============================================================================
MemUMFenceInstr PROC CALLCONV PUBLIC
        _MFENCE
        ret
MemUMFenceInstr ENDP

    END