From 60b67c31fcad6d0dc9603a034994a12efe3d5202 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Sun, 21 Apr 2019 17:09:24 -0700 Subject: Open doc files, version 1 of everything This copies over files from: http://download.nvidia.com/open-gpu-doc/ with a few overrides from local, as-yet-to-be-published files (generally just a file suffix and licensing tweak). Here's the script that was used to create this commit: wget -r http://download.nvidia.com/open-gpu-doc/ mv download.nvidia.com/open-gpu-doc/* . rm -rf download.nvidia.com/ find . -name index.html | xargs rm -f find . -name Thumbs.db | xargs rm -f find . -type f | grep -v \.git | xargs file | grep CRLF | cut -f1 -d: | xargs dos2unix for f in `find . -name 1 | cut -f2 -d "/"`; do pushd $f; mv 1/* .; popd; done find . -name 1 | xargs rmdir rm -rf Host-Fifo/volta/gv100 mkdir -p Host-Fifo/volta/gv100 cp $sw/docs/Public_Devel_Docs/release/Host-Fifo/volta/gv100/* Host-Fifo/volta/gv100/ rm Display-Ref-Manuals/gv100/dev_display.ref cp $sw/docs/Public_Devel_Docs/release/Display-Ref-Manuals/1/gv100/dev_display.ref \ Display-Ref-Manuals/gv100/dev_display.ref.txt git add . git mv DCB/DCB-4.0-Specification.html DCB/DCB-4.x-Specification.html git reset HEAD Display-Class-Methods/2 git reset HEAD DCB/2 git commit Reviewed-by: Andy Ritger --- Shader-Program-Header/Shader-Program-Header.html | 2550 ++++++++++++++++++++++ 1 file changed, 2550 insertions(+) create mode 100644 Shader-Program-Header/Shader-Program-Header.html (limited to 'Shader-Program-Header') diff --git a/Shader-Program-Header/Shader-Program-Header.html b/Shader-Program-Header/Shader-Program-Header.html new file mode 100644 index 0000000..3ae0dfa --- /dev/null +++ b/Shader-Program-Header/Shader-Program-Header.html @@ -0,0 +1,2550 @@ + + + + + +Shader Program Header Specification + + + + + +
+
+

Purpose

+
+

The first 80 bytes of a GPU program, known as the Shader Program Header (SPH), +contains information about the program, which the GPU uses to determine how to +execute the instructions.

+
+
+
+

SPH Overall Structure

+
+

Some portions of the SPH are interpreted differently depending on which stage +of the pipeline the program is used with (e.g., Vertex or Fragment), whereas +some portions are always interpreted the same way — they are common for all +program types.

+

There are two main types of programs; PS and VTG. PS is used for +pixel/fragment shaders, and VTG is used for everything else. When PS is used, +field SphType in CommonWord0 must be set to 1; similarly, when VTG is used, +SphType in CommonWord0 must be set to 2.

+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1. SPH Type 1 Definition
Field Bit width Type

CommonWord0

32

struct CommonWord0

CommonWord1

32

struct CommonWord1

CommonWord2

32

struct CommonWord2

CommonWord3

32

struct CommonWord3

CommonWord4

32

struct CommonWord4

ImapSystemValuesA

24

struct ImapSystemValuesA

ImapSystemValuesB

8

struct ImapSystemValuesB

ImapGenericVector[32]

128

struct ImapVector

ImapColor

16

struct ImapColor

ImapSystemValuesC

16

struct ImapSystemValuesC

ImapFixedFncTexture[10]

40

struct ImapTexture

ImapReserved

8

-

OmapSystemValuesA

24

struct OmapSystemValuesA

OmapSystemValuesB

8

struct OmapSystemValuesB

OmapGenericVector[32]

128

struct OmapVector

OmapColor

16

struct OmapColor

OmapSystemValuesC

16

struct OmapSystemValuesC

OmapFixedFncTexture[10]

40

struct OmapTexture

OmapReserved

8

-

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2. SPH Type 2 Definition
Field Bit width Type

CommonWord0

32

struct CommonWord0

CommonWord1

32

struct CommonWord1

CommonWord2

32

struct CommonWord2

CommonWord3

32

struct CommonWord3

CommonWord4

32

struct CommonWord4

ImapSystemValuesA

24

struct ImapSystemValuesA

ImapSystemValuesB

8

struct ImapSystemValuesB

ImapGenericVector[32]

256

struct ImapPixelVector

ImapColor

16

struct ImapPixelColor

ImapSystemValuesC

16

struct ImapSystemValuesC

ImapFixedFncTexture[10]

80

struct ImapPixelTexture

ImapReserved

16

-

OmapTarget[8]

32

struct OmapTarget

OmapSampleMask

1

bool

OmapDepth

1

bool

OmapReserved

30

-

+
+
+
+
+

SPH Common Word Definitions

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3. CommonWord0 Definition
Field Bit width Type

SphType

5

enum

Version

5

U05

ShaderType

4

enum

MrtEnable

1

bool

KillsPixels

1

bool

DoesGlobalStore

1

bool

SassVersion

4

U04

Reserved

5

-

DoesLoadOrStore

1

bool

DoesFp64

1

bool

StreamOutMask

4

U04

+
+
    +
  • +

    +The SPH field SphType sets the type of shader, where the type is either + TYPE_01_VTG or TYPE_02_PS. +

    +
  • +
+
+ +++ + + + + + + + + + + + + + + + +
Name Value

VTG

1

PS

2

+
+
    +
  • +

    +The SPH field Version sets is used during development to pick the version. +

    +
  • +
  • +

    +The SPH field ShaderType sets the type (e.g, VERTEX, TESSELLATION, GEOMETRY, or PIXEL) + of shader for the shader program. +

    +
  • +
+
+ +++ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Name Value

VERTEX

1

TESSELLATION_INIT

2

TESSELLATION

3

GEOMETRY

4

PIXEL

5

+
+
    +
  • +

    +The SPH field MrtEnable, when TRUE indicates that the pixel shader outputs + multiple colors (the number being controlled by the SPH Omap). It is always + AND’d with SetCtMrtEnable.V(eff) to allow the driver to dynamically override the + MRT (Multiple Render Target) behavior of the pixel shader. If the result is + TRUE, then the pixel shader outputs will each be sent to its corresponding + enabled target. If the result is FALSE, then pixel shader output 0 + will be sent to each enabled target. This override of MRT is necessary to + support OGL’s DrawBuffer call (which is inherently non-MRT) when an MRT + enabled pixel shader is active. This field has no effect on the blending + enables; that is, whether MrtEnable result is TRUE or FALSE, each color + target still has an independent blend enable (unless + SetSingleRopControl.Enable is TRUE). This SPH field is only used for pixel + shaders. +

    +
  • +
  • +

    +The SPH field KillsPixels, if TRUE, enables pixel shader programs to kill pixels. When + set to FALSE, pixel shaders KIL instructions become no-operations and trigger + a hardware exception. Also, when this field is TRUE, EarlyZ is turned off, + and Zcull’s visible pixel counting acceleration is turned off. This field + has no effect on the texture color key operations. This SPH field is only + used for pixels shaders. +

    +
  • +
  • +

    +The SPH field DoesGlobalStore indicates the shader might perform a global store. +

    +
  • +
+
+ + + +
+
Note
+
When SPH.DoesGlobalStore == 0, any global store instructions (ST/ATOM/SUST) are +noop’d and a hardware exception is generated. The STL instruction may still +be used for local stores.
+
+
    +
  • +

    +The SPH field StreamOutMask selects which GS output streams are enbled + as outputs from the GS. There are four GS output streams, numbered 0 to 3. + If a stream is disabled in StreamOutMask, it is never written even if a buffer + is bound to it. +

    +
  • +
  • +

    +The SPH field DoesLoadOrStore is used to enable power optimizations by disabling the load/store + path if it is not being used. If a shader unit is only running pixel work that + has DoesLoadOrStore set to FALSE, and it has declared no additional CallReturnStack + by setting ShaderLocalMemoryCrsSize to zero, the load-store path can be + safely shut down temporarily. When DoesLoadOrStore == FALSE, LD, ST, and all the + variations thereof in the ISA, will be noop’ed by the HW. +

    +
  • +
  • +

    +The SPH field DoesFp64 is used power-off the double precision math if the compiler can + guarantee it will never be used. If all of the work running on a given Shader unit + has DoesFp64 set to FALSE, this math block will be powered down. Any double precision + instruction encountered when DoesFp64 is FALSE will be noop’ed by the HW. +

    +
  • +
+
+ + ++++ + + + + + + + + + + + + + + + + + + +
Table 4. CommonWord1 Definition
Field Bit width Type

ShaderLocalMemoryLowSize

24

U24

PerPatchAttributeCount

8

U08

+
+
    +
  • +

    +The SPH fields ShaderLocalMemoryLowSize and ShaderLocalMemoryHighSize set the + required size of thread-private memory, for variable storage, needed by the + shader program. +

    +
  • +
  • +

    +The SPH field PerPatchAttributeCount indicates the number of per-patch attributes + that are written by the tesselation init shader (and read by the subsequent tesselation + shader). Per-patch attributes are in addition to per-vertex attributes. + This field is only used on tesselation init shaders. +

    +
  • +
+
+ + + +
+
Note
+
Triangles generated by the geometry shader always have all their edge flags set +to TRUE.
+
+
+ + ++++ + + + + + + + + + + + + + + + + + + +
Table 5. CommonWord2 Definition
Field Bit width Type

ShaderLocalMemoryHighSize

24

U24

ThreadsPerInputPrimitive

8

U08

+
+
    +
  • +

    +The SPH field ThreadsPerInputPrimitive sets the maximum number of threads that + are invoked for a primitive, thereby allowing the work of one shader + to be divided amongst several shaders. This is the number of "instanced" + shaders. This field has the following shader-specific meanings: +

    +
  • +
+
+ +++ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Program Type Meaning

VERTEX

Unused

TESSELLATION_INIT

Sets the number of threads run per patch

TESSELLATION

Unused

GEOMETRY

Sets the number of threads run per primitive

PIXEL

Unused

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + +
Table 6. CommonWord3 Definition
Field Bit width Type

ShaderLocalMemoryCrsSize

24

U24

OutputTopology

4

enum

Reserved

4

-

+
+
    +
  • +

    +The SPH field ShaderLocalMemoryCrsSize sets the additional (off chip) call/return stack size (CRS_SZ). + Units are in Bytes/Warp. Minimum value 0, maximum 1 megabyte. Must be multiples of 512 bytes. +

    +
  • +
  • +

    +The SPH field OutputTopology sets the primitive topology of the vertices that are output from the + pipe stage. This field is only used with geometry shaders, where the value must be greater than zero and + has a maximum of 1024. The allowed values are: +

    +
  • +
+
+ +++ + + + + + + + + + + + + + + + + + + + +
Name Value

POINTLIST

1

LINESTRIP

6

TRIANGLESTRIP

7

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 7. CommonWord4 Definition
Field Bit width Type

MaxOutputVertexCount

12

U12

StoreReqStart

8

U08

Reserved

4

-

StoreReqEnd

8

U08

+
+
    +
  • +

    +The SPH field MaxOutputVertexCount sets the maximum number of vertices that can be output by + one shader thread. This field is only used with geometry shaders, where the + value sets the maximum number of vertices output per thread, and OUT instructions + beyond this are noop’ed. +

    +
  • +
  • +

    +The SPH fields StoreReqStart and StoreReqEnd set a range of attributes + whose corresponding Odmap values of ST or ST_LAST are treated as ST_REQ. + Normally, for an attribute whose Omap bit is TRUE and Odmap value is ST, when + the shader writes data to this output, it can not count on being able to read + it back, since the next downstream shader might have its Imap bit FALSE, + thereby causing the Bmap bit to be FALSE. By including a ST type of attribute + in the range of StoreReqStart and StoreReqEnd, the attribute’s Odmap value + is treated as ST_REQ, so an Omap bit being TRUE causes the Bmap bit to + be TRUE. This guarantees the shader program can output the value and + then read it back later. This will save register space. +

    +
  • +
  • +

    +The SPH field StoreReqStart sets the first attribute whose ST or ST_LAST + Odmap values are treated as ST_REQ. Note that Odmap values of discard are + not affected. +

    +
  • +
  • +

    +The SPH field StoreReqEnd sets the last attribute whose ST of ST_LAST Odmap + values are treated as ST_REQ. If no attributes are to have their Odmap value + treated as ST_REQ, then the SPH needs to have StoreReqStart greater + than StoreReqEnd. +

    +
  • +
+
+ + + +
+
Note
+
SPH fields StoreReqStart and StoreReqEnd are ignored for geometry and pixel +shaders. For geometry shaders, ALD.O is disallowed because a single geometry +shader thread can output multiple vertices, so it is not possible to read back +every attribute that was previously written (unlike vertex, tesselation and +tesselation init shaders).
+
+
+
+
+

SPH IMAP Definitions

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 8. ImapSystemValuesA Definition
Field Bit width Type

Reserved

4

-

ImapTessellationLodLeft

1

bool

ImapTessellationLodRight

1

bool

ImapTessellationLodBottom

1

bool

ImapTessellationLodTop

1

bool

ImapTessellationInteriorU

1

bool

ImapTessellationInteriorV

1

bool

Reserved

14

-

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 9. ImapSystemValuesB Definition
Field Bit width Type

ImapPrimitiveId

1

bool

ImapRtArrayIndex

1

bool

ImapViewportIndex

1

bool

ImapPointSize

1

bool

ImapPositionX

1

bool

ImapPositionY

1

bool

ImapPositionZ

1

bool

ImapPositionW

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 10. ImapColor Definition
Field Bit width Type

ImapColorFrontDiffuseRed

1

bool

ImapColorFrontDiffuseGreen

1

bool

ImapColorFrontDiffuseBlue

1

bool

ImapColorFrontDiffuseAlpha

1

bool

ImapColorFrontSpecularRed

1

bool

ImapColorFrontSpecularGreen

1

bool

ImapColorFrontSpecularBlue

1

bool

ImapColorFrontSpecularAlpha

1

bool

ImapColorBackDiffuseRed

1

bool

ImapColorBackDiffuseGreen

1

bool

ImapColorBackDiffuseBlue

1

bool

ImapColorBackDiffuseAlpha

1

bool

ImapColorBackSpecularRed

1

bool

ImapColorBackSpecularGreen

1

bool

ImapColorBackSpecularBlue

1

bool

ImapColorBackSpecularAlpha

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 11. ImapSystemValuesC Definition
Field Bit width Type

ImapClipDistance0

1

bool

ImapClipDistance1

1

bool

ImapClipDistance2

1

bool

ImapClipDistance3

1

bool

ImapClipDistance4

1

bool

ImapClipDistance5

1

bool

ImapClipDistance6

1

bool

ImapClipDistance7

1

bool

ImapPointSpriteS

1

bool

ImapPointSpriteT

1

bool

ImapFogCoordinate

1

bool

Reserved

1

bool

ImapTessellationEvaluationPointU

1

bool

ImapTessellationEvaluationPointV

1

bool

ImapInstanceId

1

bool

ImapVertexId

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 12. ImapPixelColor Definition
Field Bit width Type

ImapColorDiffuseRed

2

enum PixelImap

ImapColorDiffuseGreen

2

enum PixelImap

ImapColorDiffuseBlue

2

enum PixelImap

ImapColorDiffuseAlpha

2

enum PixelImap

ImapColorSpecularRed

2

enum PixelImap

ImapColorSpecularGreen

2

enum PixelImap

ImapColorSpecularBlue

2

enum PixelImap

ImapColorSpecularAlpha

2

enum PixelImap

+
+
+ + +++ + + + + + + + + + + + + + + + + + + + + + + + +
Table 13. PixelImap enum definition
Name Value

Unused

0

Constant

1

Perspective

2

ScreenLinear

3

+
+
+
+
+

SPH OMAP Definitions

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 14. OmapSystemValuesA Definition
Field Bit width Type

Reserved

4

-

OmapTessellationLodLeft

1

bool

OmapTessellationLodRight

1

bool

OmapTessellationLodBottom

1

bool

OmapTessellationLodTop

1

bool

OmapTessellationInteriorU

1

bool

OmapTessellationInteriorV

1

bool

Reserved

14

-

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 15. OmapSystemValuesB Definition
Field Bit width Type

OmapPrimitiveId

1

bool

OmapRtArrayIndex

1

bool

OmapViewportIndex

1

bool

OmapPointSize

1

bool

OmapPositionX

1

bool

OmapPositionY

1

bool

OmapPositionZ

1

bool

OmapPositionW

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 16. OmapColor Definition
Field Bit width Type

OmapColorFrontDiffuseRed

1

bool

OmapColorFrontDiffuseGreen

1

bool

OmapColorFrontDiffuseBlue

1

bool

OmapColorFrontDiffuseAlpha

1

bool

OmapColorFrontSpecularRed

1

bool

OmapColorFrontSpecularGreen

1

bool

OmapColorFrontSpecularBlue

1

bool

OmapColorFrontSpecularAlpha

1

bool

OmapColorBackDiffuseRed

1

bool

OmapColorBackDiffuseGreen

1

bool

OmapColorBackDiffuseBlue

1

bool

OmapColorBackDiffuseAlpha

1

bool

OmapColorBackSpecularRed

1

bool

OmapColorBackSpecularGreen

1

bool

OmapColorBackSpecularBlue

1

bool

OmapColorBackSpecularAlpha

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 17. OmapSystemValuesC Definition
Field Bit width Type

OmapClipDistance0

1

bool

OmapClipDistance1

1

bool

OmapClipDistance2

1

bool

OmapClipDistance3

1

bool

OmapClipDistance4

1

bool

OmapClipDistance5

1

bool

OmapClipDistance6

1

bool

OmapClipDistance7

1

bool

OmapPointSpriteS

1

bool

OmapPointSpriteT

1

bool

OmapFogCoordinate

1

bool

OmapSystemValuesReserved17

1

bool

OmapTessellationEvaluationPointU

1

bool

OmapTessellationEvaluationPointV

1

bool

OmapInstanceId

1

bool

OmapVertexId

1

bool

+
+
+
+
+

SPH Vector Definitions

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 18. ImapVector Definition
Field Bit width Type

ImapX

1

bool

ImapY

1

bool

ImapZ

1

bool

ImapW

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 19. OmapVector Definition
Field Bit width Type

OmapX

1

bool

OmapY

1

bool

OmapZ

1

bool

OmapW

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 20. ImapPixelVector Definition
Field Bit width Type

ImapX

2

enum PixelImap

ImapY

2

enum PixelImap

ImapZ

2

enum PixelImap

ImapW

2

enum PixelImap

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 21. ImapTexture Definition
Field Bit width Type

ImapS

1

bool

ImapT

1

bool

ImapR

1

bool

ImapQ

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 22. OmapTexture Definition
Field Bit width Type

OmapS

1

bool

OmapT

1

bool

OmapR

1

bool

OmapQ

1

bool

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 23. ImapPixelTexture Definition
Field Bit width Type

ImapS

2

enum PixelImap

ImapT

2

enum PixelImap

ImapR

2

enum PixelImap

ImapQ

2

enum PixelImap

+
+
+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 24. OmapTarget Definition
Field Bit width Type

OmapRed

1

bool

OmapGreen

1

bool

OmapBlue

1

bool

OmapAlpha

1

bool

+
+
+
+
+

+ + + -- cgit v1.2.3