summaryrefslogtreecommitdiff
path: root/ReferenceCode/Haswell/CpuInit/Dxe/x64/Cpu.asm
blob: 9b28b544db27d4f6f3f1131c25e4a447c6443e8b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
;
; This file contains an 'Intel Peripheral Driver' and is      
; licensed for Intel CPUs and chipsets under the terms of your
; license agreement with Intel or your vendor.  This file may 
; be modified by the user, subject to additional terms of the 
; license agreement                                           
;
      TITLE   Cpu.asm: Assembly code for the IA-32 resources

;-------------------------------------------------------------------------------
;
; Copyright (c) 2005 -2012 Intel Corporation. All rights reserved
; This software and associated documentation (if any) is furnished
; under a license and may only be used or copied in accordance
; with the terms of the license. Except as permitted by such
; license, no part of this software or documentation may be
; reproduced, stored in a retrieval system, or transmitted in any
; form or by any means without the express written consent of
; Intel Corporation.
;
;
; Module Name:
;
;   Cpu.asm
; 
; Abstract:
; 
;
;-------------------------------------------------------------------------------

text    SEGMENT

EXTRN mErrorCodeFlag:DWORD ; Error code flags for exceptions

ExternalVectorTablePtr QWORD 0 ; point to the external interrupt vector table

;
; Float control word initial value: 
; all exceptions masked, double-extended-precision, round-to-nearest
;
mFpuControlWord       DW      037Fh
;
; Multimedia-extensions control word:
; all exceptions masked, round-to-nearest, flush to zero for masked underflow
;
mMmxControlWord       DD      01F80h 


InitializeExternalVectorTablePtr PROC PUBLIC
  mov  ExternalVectorTablePtr, rcx
  ret
InitializeExternalVectorTablePtr ENDP
;
;
;
;------------------------------------------------------------------------------
;  Generic IDT Vector Handlers for the Host. They are all the same so they
;  will compress really well.
;
;  By knowing the return address for Vector 00 you can can calculate the
;  vector number by looking at the call CommonInterruptEntry return address.
;  (return address - (AsmIdtVector00 + 5))/8 == IDT index
;
;------------------------------------------------------------------------------

ALIGN   8

PUBLIC	AsmIdtVector00
        
AsmIdtVector00 LABEL BYTE
REPEAT  256
        call  CommonInterruptEntry
        dw ( $ - AsmIdtVector00 - 5 ) / 8 ; vector number
        nop
ENDM


;---------------------------------------;
; CommonInterruptEntry                  ;
;---------------------------------------;
; The follow algorithm is used for the common interrupt routine.

;
; +---------------------+ <-- 16-byte aligned ensured by processor
; +    Old SS           +
; +---------------------+
; +    Old RSP          +
; +---------------------+
; +    RFlags           +
; +---------------------+
; +    CS               +
; +---------------------+
; +    RIP              +
; +---------------------+
; +    Error Code       +
; +---------------------+
; + RCX / Vector Number +
; +---------------------+
; +    RBP              +
; +---------------------+ <-- RBP, 16-byte aligned
;

CommonInterruptEntry PROC  PUBLIC  
  cli
  cld
  ;
  ; All interrupt handlers are invoked through interrupt gates, so
  ; IF flag automatically cleared at the entry point
  ;
  ;
  ; Calculate vector number
  ;
  xchg    rcx, [rsp] ; get the return address of call, actually, it is the address of vector number.
  movzx   ecx, word ptr [rcx]        
  cmp     ecx, 32         ; Intel reserved vector for exceptions?
  jae     NoErrorCode
  bt      mErrorCodeFlag, ecx
  jc      @F

NoErrorCode:
  ;
  ; Push a dummy error code on the stack
  ; to maintain coherent stack map
  ;
  push    [rsp]
  mov     qword ptr [rsp + 8], 0
@@:       
  push    rbp
  mov     rbp, rsp

  ;
  ; Since here the stack pointer is 16-byte aligned, so
  ; EFI_FX_SAVE_STATE_X64 of EFI_SYSTEM_CONTEXT_x64
  ; is 16-byte aligned
  ;       

;; UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
;; UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
  push r15
  push r14
  push r13
  push r12
  push r11
  push r10
  push r9
  push r8
  push rax
  push qword ptr [rbp + 8]   ; RCX
  push rdx
  push rbx
  push qword ptr [rbp + 48]  ; RSP
  push qword ptr [rbp]       ; RBP
  push rsi
  push rdi

;; UINT64  Gs, Fs, Es, Ds, Cs, Ss;  insure high 16 bits of each is zero
  movzx   rax, word ptr [rbp + 56]
  push    rax                      ; for ss
  movzx   rax, word ptr [rbp + 32]
  push    rax                      ; for cs
  mov     rax, ds
  push    rax
  mov     rax, es
  push    rax
  mov     rax, fs
  push    rax
  mov     rax, gs
  push    rax

  mov     [rbp + 8], rcx               ; save vector number

;; UINT64  Rip;
  push    qword ptr [rbp + 24]

;; UINT64  Gdtr[2], Idtr[2];
  sub     rsp, 16
  sidt    fword ptr [rsp]
  sub     rsp, 16
  sgdt    fword ptr [rsp]

;; UINT64  Ldtr, Tr;
  xor     rax, rax
  str     ax
  push    rax
  sldt    ax
  push    rax

;; UINT64  RFlags;
  push    qword ptr [rbp + 40]

;; UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
  mov     rax, cr8
  push    rax
  mov     rax, cr4
  or      rax, 208h
  mov     cr4, rax
  push    rax
  mov     rax, cr3
  push    rax
  mov     rax, cr2
  push    rax
  xor     rax, rax
  push    rax
  mov     rax, cr0
  push    rax

;; UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
  mov     rax, dr7
  push    rax
;; clear Dr7 while executing debugger itself
  xor     rax, rax
  mov     dr7, rax

  mov     rax, dr6
  push    rax
;; insure all status bits in dr6 are clear...
  xor     rax, rax
  mov     dr6, rax

  mov     rax, dr3
  push    rax
  mov     rax, dr2
  push    rax
  mov     rax, dr1
  push    rax
  mov     rax, dr0
  push    rax

;; FX_SAVE_STATE_X64 FxSaveState;

  sub rsp, 512
  mov rdi, rsp
  db 0fh, 0aeh, 00000111y ;fxsave [rdi]

;; UINT32  ExceptionData;
  push    qword ptr [rbp + 16]

;; call into exception handler
  mov     rcx, [rbp + 8]
  mov     rax, ExternalVectorTablePtr  ; get the interrupt vectors base
  mov     rax, [rax + rcx * 8]       
  or      rax, rax                        ; NULL?

  je    nonNullValue;

;; Prepare parameter and call
;  mov     rcx, [rbp + 8]
  mov     rdx, rsp
  ;
  ; Per X64 calling convention, allocate maximum parameter stack space
  ; and make sure RSP is 16-byte aligned
  ;
  sub     rsp, 4 * 8 + 8
  call    rax
  add     rsp, 4 * 8 + 8

nonNullValue:
  cli
;; UINT64  ExceptionData;
  add     rsp, 8

;; FX_SAVE_STATE_X64 FxSaveState;

  mov rsi, rsp
  db 0fh, 0aeh, 00001110y ; fxrstor [rsi]
  add rsp, 512

;; UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
  pop     rax
  mov     dr0, rax
  pop     rax
  mov     dr1, rax
  pop     rax
  mov     dr2, rax
  pop     rax
  mov     dr3, rax
;; skip restore of dr6.  We cleared dr6 during the context save.
  add     rsp, 8
  pop     rax
  mov     dr7, rax

;; UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
  pop     rax
  mov     cr0, rax
  add     rsp, 8   ; not for Cr1
  pop     rax
  mov     cr2, rax
  pop     rax
  mov     cr3, rax
  pop     rax
  mov     cr4, rax
  pop     rax
  mov     cr8, rax

;; UINT64  RFlags;
  pop     qword ptr [rbp + 40]

;; UINT64  Ldtr, Tr;
;; UINT64  Gdtr[2], Idtr[2];
;; Best not let anyone mess with these particular registers...
  add     rsp, 48

;; UINT64  Rip;
  pop     qword ptr [rbp + 24]

;; UINT64  Gs, Fs, Es, Ds, Cs, Ss;
  pop     rax
  ; mov     gs, rax ; not for gs
  pop     rax
  ; mov     fs, rax ; not for fs
  ; (X64 will not use fs and gs, so we do not restore it)
  pop     rax
  mov     es, rax
  pop     rax
  mov     ds, rax
  pop     qword ptr [rbp + 32]  ; for cs
  pop     qword ptr [rbp + 56]  ; for ss

;; UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
;; UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
  pop     rdi
  pop     rsi
  add     rsp, 8               ; not for rbp
  pop     qword ptr [rbp + 48] ; for rsp
  pop     rbx
  pop     rdx
  pop     rcx
  pop     rax
  pop     r8
  pop     r9
  pop     r10
  pop     r11
  pop     r12
  pop     r13
  pop     r14
  pop     r15

  mov     rsp, rbp
  pop     rbp
  add     rsp, 16
  iretq

CommonInterruptEntry ENDP


LongMode PROC  PUBLIC

in_long_mode::
    ;
    ; Debug Stop
    ;
    jmp   in_long_mode

    ;
    ; We're in long mode, so marshall the arguments to call the
    ; passed in function pointers
    ; Recall
    ;         [ebp][10h] = HobStart
    ;         [ebp][18h] = Stack
    ;         [ebp][20h] = PpisNeededByDxeIplEntryPoint <--- Call this first (for each call, pass HOB pointer)
    ;         [ebp][28h] = DxeCoreEntryPoint            <--- Call this second
    ;
    mov rbx, [rbp+18h]        ; Setup the stack
    mov rsp, rbx              ; On a new stack now

    mov rcx, [rbp+10h]        ; Pass Hob Start in RCX
    mov rax, [rbp+20h]        ; Get the function pointer for 
                              ; PpisNeededByDxeIplEntryPoint into EAX
    call fword ptr [rax]      ; Make the call into PpisNeededByDxeIplEntryPoint

    mov ecx, [rbp+10h]        ; Pass Hob Start in RCX
    mov eax, [rbp+28h]        ; Get the function pointer for 
                              ; DxeCoreEntryPoint into EAX
    call fword ptr [rax]      ; Make the call into Dxe Core

    call CommonInterruptEntry

    mov rdi, CommonInterruptEntry 

    lgdt  fword ptr [rdi]

    lidt  fword ptr [rdi]

    call near ptr [rax]      ; Make the call into PpisNeededByDxeIplEntryPoint

    call rax

    ;
    ; Should never get here.
    ;
no_long_mode:
    jmp   no_long_mode
    ;
    ; WE SHOULD NEVER GET HERE!!!!!!!!!!!!!
    ;
LongMode endp

EnableMce  proc  public

  mov     rax, cr4
  or      rax, 40h
  mov     cr4, rax

  ret

EnableMce  endp

MpMtrrSynchUpEntry  PROC        PUBLIC
    ;
    ; Enter no fill cache mode, CD=1(Bit30), NW=0 (Bit29)
    ;
    mov rax, cr0
    and rax, 0DFFFFFFFh
    or  rax, 040000000h
    mov cr0, rax
    ;
    ; Flush cache
    ;
    wbinvd
    ;
    ; Clear PGE flag Bit 7
    ;
    mov rax, cr4
    mov rdx, rax
    and rax, 0FFFFFF7Fh
    mov cr4, rax
    ;
    ; Flush all TLBs
    ;
    mov rax, cr3
    mov cr3, rax
    
    mov rax, rdx
    
    ret
    
MpMtrrSynchUpEntry  ENDP
    
MpMtrrSynchUpExit  PROC        PUBLIC
    ;
    ; Flush all TLBs the second time
    ;
    mov rax, cr3
    mov cr3, rax
    ;
    ; Enable Normal Mode caching CD=NW=0, CD(Bit30), NW(Bit29)
    ;
    mov rax, cr0
    and rax, 09FFFFFFFh
    mov cr0, rax
    ;
    ; Set PGE Flag in CR4 if set
    ;
    mov cr4, rcx
    ret

MpMtrrSynchUpExit  ENDP

;
; Initializes floating point units for requirement of UEFI specification.
;
; This function initializes floating-point control word to 0x037F (all exceptions
; masked,double-extended-precision, round-to-nearest) and multimedia-extensions control word
; (if supported) to 0x1F80 (all exceptions masked, round-to-nearest, flush to zero
; for masked underflow).
;
CpuInitFloatPointUnit  PROC        PUBLIC
    ;
    ; Initialize floating point units
    ;
    ; The following opcodes stand for instruction 'finit' 
    ; to be supported by some 64-bit assemblers
    ;
    DB      9Bh, 0DBh, 0E3h
    fldcw   mFpuControlWord

    ;
    ; Set OSFXSR bit 9 in CR4
    ;
    mov     rax, cr4
    or      rax, 200h
    mov     cr4, rax

    ldmxcsr mMmxControlWord
    ret
CpuInitFloatPointUnit  ENDP

CpuDisableInterrupt PROC    PUBLIC

    cli
    ret
    
CpuDisableInterrupt  ENDP

CpuEnableInterrupt PROC    PUBLIC

    sti
    ret
    
CpuEnableInterrupt  ENDP

MAX_NR_BUS                    EQU 0FFh
CSR_DESIRED_CORES             EQU 080h  ; CSR D0:F0:R80

GetCsrDesiredCores PROC    PUBLIC

  push rbx

  ;
  ; get Bus number from CPUID[1] EBX[31:24]
  ;
  mov     eax, 1                        ; bus 0
  cpuid
  bswap   ebx
  shr     bl, 4
  movzx   eax, bl
  ;
  ; Compute CPU Bus Num
  ; Bus Num = (MAX_NB_BUS - socket ID)
  ;
  xor     eax, MAX_NR_BUS               ; bus number = MAX_NR_BUS - socket ID
  ;
  ; eax = bus number
  ; out 0CF8h, GQ1_CR_PCIEXBAR OR (Bus Num shl 16)
  ;
  or      eax, 8000h
  shl     eax, 16
  or      eax, CSR_DESIRED_CORES        ; D0:F1:R80h
  mov     dx, 0CF8h
  out     dx, eax
  mov     dx, 0CFCh
  in      eax, dx

  pop  rbx

  ret

GetCsrDesiredCores  ENDP

SetLockCsrDesiredCores PROC    PUBLIC

  push rbx

  ;
  ; get Bus number from CPUID[1] EBX[31:24]
  ;
  mov     eax, 1                        ; bus 0
  cpuid
  bswap   ebx
  shr     bl, 4
  movzx   eax, bl
  ;
  ; Compute CPU Bus Num
  ; Bus Num = (MAX_NB_BUS - socket ID)
  ;
  xor     eax, MAX_NR_BUS               ; bus number = MAX_NR_BUS - socket ID
  ;
  ; eax = bus number
  ; out 0CF8h, GQ1_CR_PCIEXBAR OR (Bus Num shl 16)
  ;
  or      eax, 8000h
  shl     eax, 16
  or      eax, CSR_DESIRED_CORES        ; D0:F1:R80h
  mov     dx, 0CF8h
  out     dx, eax
  mov     dx, 0CFCh
  in      eax, dx
  or      eax, 10000h                   ; Bit[16] = Lock
  out     dx, eax

  pop  rbx
  ret

SetLockCsrDesiredCores  ENDP

;------------------------------------------------------------------------------
;  UINTN
;  CpuFlushTlb (
;    VOID
;    )
;------------------------------------------------------------------------------
CpuFlushTlb PROC        PUBLIC
    mov   rax, cr3
    mov   cr3, rax
    ret
CpuFlushTlb  ENDP

;------------------------------------------------------------------------------
; UINT16
; CpuCodeSegment (
;   VOID
;   );
;------------------------------------------------------------------------------
CpuCodeSegment PROC        PUBLIC  
    xor   eax, eax
    mov   eax, cs
    ret
CpuCodeSegment  ENDP

;------------------------------------------------------------------------------
; VOID
; CpuLoadGlobalDescriptorTable (
;   VOID  *Table16ByteAligned
;   );
;------------------------------------------------------------------------------
CpuLoadGlobalDescriptorTable PROC   PUBLIC
    lgdt  FWORD PTR [rcx]
    ret
CpuLoadGlobalDescriptorTable  ENDP

;------------------------------------------------------------------------------
; VOID
; CpuLoadInterruptDescriptorTable (
;   VOID  *Table16ByteAligned
;   );
;------------------------------------------------------------------------------
CpuLoadInterruptDescriptorTable PROC   PUBLIC
    lidt  FWORD PTR [rcx]
    ret
CpuLoadInterruptDescriptorTable  ENDP


text  ENDS
END