summaryrefslogtreecommitdiff
path: root/Host-Fifo/volta/gv100/dev_pbdma.ref.txt
blob: bc5163a5a5b8316121ddc976d34a5df5dfa44c73 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------

1  -  INTRODUCTION
==================

     A Host's PBDMA unit fetches pushbuffer data from memory, generates
commands, called "methods", from the fetched data, executes some of the
generated methods itself, and sends the remainder of the methods to engines.
     This manual describes the Host PBDMA register space and all Host methods.
The NV_PPBDMA space defines registers that are contained within each of Host's
PBDMA units.  Each PBDMA unit is allocated a 8KB address space for its
registers.
     The NV_UDMA space defines the Host methods.  A method consists of an
address doubleword and a data doubleword.  The address specifies the operation
to be performed.  The data is an operand.  The NV_UDMA address space contains
the addresses of the methods that are executed by a PBDMA unit.
GP_ENTRY0 and GP_ENTRY1 - GP-Entry Memory Format

     A pushbuffer contains the specifications of the operations that a GPU
context is to perform for a particular client.  Pushbuffers are stored in
memory.  A doubleword-sized (4-byte) unit of pushbuffer data is known as a
pushbuffer entry.  GP entries indicate the location of the pushbuffer data in
memory.  GP entries themselves are also stored in memory.
     A GP entry specifies the location and size of a pushbuffer segment (a
contiguous block of PB entries) in memory.  See "FIFO_DMA" in dev_ram.ref for
details about pushbuffer segments and the format of pushbuffer data.

     The NV_PPBDMA_GP_ENTRY0_GET and NV_PPBDMA_GP_ENTRY1_GET_HI fields of a GP
entry specify the 38-bit dword-address (which would make a 40-bit byte-address)
of the first pushbuffer entry of the GP entry's pushbuffer segment.  Because
each pushbuffer entry (and by extension each pushbuffer segment) is doubleword
aligned (4-byte aligned), the least significant 2 bits of the 40-bit
byte-address are not stored.  The byte-address of the first pushbuffer entry in
a GP entry's pushbuffer segment is
(GP_ENTRY1_GET_HI << 32) + (GP_ENTRY0_GET << 2).
     The NV_PPBDMA_GP_ENTRY1_LENGTH field, when non-zero, indicates the number
of pushbuffer entries contained within the GP entry's pushbuffer segment.  The
byte-address of the first pushbuffer entry beyond the pushbuffer segment is
(GP_ENTRY1_GET_HI << 32) + (GP_ENTRY0_GET << 2) + (GP_ENTRY1_LENGTH * 4).
     If NV_PPBDMA_GP_ENTRY1_LENGTH is CONTROL (0), then the GP entry is a
"control" entry, meaning this GP entry will not cause any PB data to be fetched
or executed.  In this case, the NV_PPBDMA_GP_ENTRY1_OPCODE field specifies an
operation to perform, and the NV_PPBDMA_GP_ENTRY0_OPERAND field contains the
operand.  The available operations are as follows:

     * NV_PPBDMA_GP_ENTRY1_OPCODE_NOP: no operation will be performed, but note
       that the SYNC field is still respected--see below.

     * NV_PPBDMA_GP_ENTRY1_OPCODE_GP_CRC: the ENTRY0_OPERAND field is compared
       with the cyclic redundancy check value that was calculated over previous
       GP entries (NV_PPBDMA_GP_CRC). After each comparison, the
       NV_PPBDMA_GP_CRC is cleared, whether they match or differ.  If they
       differ, then Host initiates an interrupt (NV_PPBDMA_INTR_0_GPCRC).  For
       recovery, clearing the interrupt will cause the PBDMA to continue as if
       the control entry was OPCODE_NOP.

     * NV_PPBDMA_GP_ENTRY1_OPCODE_PB_CRC: the ENTRY0_OPERAND is compared
       with the CRC value that was calculated over the previous pushbuffer
       segment (NV_PPBDMA_PB_CRC).  The PB CRC resets to 0 with each pushbuffer
       segment.  If the two CRCs differ, Host will raise the
       NV_PPBDMA_INTR_0_PBCRC interrupt.  For recovery, clearing the interrupt
       will continue as if the control entry was OPCODE_NOP.  Note the PB_CRC is
       indeterminate if an END_PB_SEGMENT PB control entry was used in the prior
       segment or if SSDM disabled the device and the segment had conditional
       fetching enabled.

     Host supports two privilege levels for channels: privileged and
non-privileged.  The privilege level is determined by the
NV_PPBDMA_CONFIG_AUTH_LEVEL field set from the corresponding NV_RAMFC_CONFIG
dword in the RAMFC.  Non-privileged channels cannot execute privileged methods,
but privileged channels can.  Any attempt to run a privileged operation from a
non-privileged channel will result in PB raising NV_PPBDMA_INTR_0_METHOD.


     The NV_PPBDMA_GP_ENTRY1_SYNC field specifies whether a pushbuffer may be
fetched before Host has finished processing the preceding PB segment.  If this
field is SYNC_PROCEED, then Host does not wait for the preceding PB segment to
be processed.  If this field is SYNC_WAIT, then Host waits until the preceding
PB segment has been processed by Host before beginning to fetch the current PB
segment.
     Host's processing of a PB segment consists of parsing PB entries into PB
instructions, decoding those instructions into control entries or method
headers, generating methods from method headers, determining whether methods are
to be executed by Host or by an engine, executing Host methods, and sending
non-Host methods and SetObject methods to engines.
     Note that in the case where the final PB entry of the preceding PB segment
is a method header representing a PB compressed method sequence of nonzero
length--that is, the compressed method sequence is split across PB segments with
all of its method data entries in the PB segment for which SYNC_WAIT is
set--then Host is considered to have finished processing the preceding PB
segment once that method header is read.  However, splitting a PB compressed
method sequence for software methods is not supported because Host will issue
the DEVICE interrupt indicating the SW method as soon as it processess the
method header, which happens prior to fetching the method data entries for that
compressed method sequence.  Thus SW cannot actually execute any of the methods
in the sequence because the method data is not yet available, leaving the PBDMA
wedged.
     When SYNC_WAIT is set, Host does not wait for any engine methods generated
from the preceding PB segment to complete.  Host does not automatically wait
until an engine is done processing all methods generated from that PB segment.
If software desires that the engine finish processing all methods generated from
one PB segment before a second PB segment is fetched, then software may place
Host methods that wait until the engine is idle in the first PB segment (like
WFI, SET_REF, or SEM_EXECUTE with RELEASE_WFI_EN set).  Alternatively, software
might put a semaphore acquire at the end of the first PB segment, and have an
engine release the semaphore.  In both cases, SYNC_WAIT must be set on the
second PB segment.  This field applies even if the NV_PPBDMA_GP_ENTRY1_LENGTH
field is zero; if SYNC_WAIT is specified in this case, no further GP entries
will be processed until the wait finishes.

     Some parts of a pushbuffer may not be executed depending on the value of
the NV_PPBDMA_SUBDEVICE_ID and SUBDEVICE_MASK.  If an entire PB segment will not
be executed due to conditional execution, Host need not even bother fetching the
PB segment.
     The NV_PPBDMA_GP_ENTRY0_FETCH field indicates whether the PB segment
specified by the GP entry should be fetched unconditionally or fetched
conditionally.  If this field is FETCH_UNCONDITIONAL, then the PB segment is
fetched unconditionally.  If this field is FETCH_CONDITIONAL, then the PB
segment is only fetched if the NV_PPBDMA_SUBDEVICE_STATUS field is
STATUS_ACTIVE.

********************************************************************************
Warning: When using subdevice masking, one must take care to synchronize
properly with any later GP entries marked FETCH_CONDITIONAL.  If GP fetching
gets too far ahead of PB processing, it is possible for a later conditional PB
segment to be discarded prior to reaching an SSDM command that sets
SUBDEVICE_STATUS to ACTIVE.  This would cause Host to execute garbage data.  One
way to avoid this would be to set the SYNC_WAIT flag on any FETCH_CONDITIONAL
segments following a subdevice reenable.
********************************************************************************

     If the PB segment is not fetched then it behaves as an OPCODE_NOP control
entry.  If a PB segment contains a SET_SUBDEVICE_MASK PB instruction that Host
must see, then the GP entry for that PB segment must specify
FETCH_UNCONDITIONAL.
     If the PB segment specifies FETCH_CONDITIONAL and the subdevice mask shows
STATUS_ACTIVE, but the PB segment contains a SET_SUBDEVICE_MASK PB instruction
that will disable the mask, the rest of the PB segment will be discarded.  In
that case, an arbitrary number of entries past the SSDM may have already updated
the PB CRC, rendering the PB CRC indeterminate.
     If Host must wait for a previous PB segment's Host processing to be
completed before examining NV_PPBDMA_SUBDEVICE_STATUS, then the GP entry should
also have its SYNC_WAIT field set.
     A PB segment marked FETCH_CONDITIONAL must not have a PB compressed method
sequence that crosses a PB segment boundary (with its header in previous non-
conditional PB segment and its final valid data in a conditional PB segment)--
doing so will cause a NV_PPBDMA_INTR_0_PBSEG interrupt.

     Software may monitor Host's progress through the pushbuffer by reading the
channel's NV_RAMUSERD_TOP_LEVEL_GET entry from USERD, which is backed by Host's
NV_PPBDMA_TOP_LEVEL_GET register.  See "NV_PFIFO_USERD_WRITEBACK" in
dev_fifo.ref for information about how frequently this information is written
back into USERD.  If a PB segment occurs multiple times within a pushbuffer
(like a commonly used subroutine), then progress through that segment may be
less useful for monitoring, because software will not know which occurrence of
the segment is being processed.
     The NV_PPBDMA_GP_ENTRY_LEVEL field specifies whether progress through the
GP entry's PB segment should be indicated in NV_RAMUSERD_TOP_LEVEL_GET.  If this
field is LEVEL_MAIN, then progress through the PB segment will be reported --
NV_RAMUSERD_TOP_LEVEL_GET will equal NV_RAMUSERD_GET.  If this field is
LEVEL_SUBROUTINE, then progress through this PB segment is not reported -- Host
will not alter NV_RAMUSERD_TOP_LEVEL_GET.  If this field is LEVEL_SUBROUTINE,
reads of NV_RAMUSERD_TOP_LEVEL_GET will return the last value of NV_RAMUSERD_GET
from a PB segment at LEVEL_MAIN.

     If the GP entry's opcode is OPCODE_ILLEGAL or an invalid opcode, Host will
initiate an interrupt (NV_PPBDMA_INTR_0_GPENTRY).  If a GP entry specifies a PB
segment that crosses the end of the virtual address space (0xFFFFFFFFFF), then
Host will initiate an interrupt (NV_PPBDMA_INTR_0_GPENTRY).  Invalid GP entries
are treated like traps: they will set the interrupt and freeze the PBDMA, but
the invalid GP entry is discarded.  Once the interrupt is cleared, the PBDMA
unit will simply continue with the next GP entry.
     Note a corner case exists where the PB segment described by a GP entry is
at the end of the virtual address space, or in other words, the last PB entry in
the described PB segment is the last dword in the virtual address space.  This
type of GP entry is not valid and will generate a GPENTRY interrupt.  The
PBDMA's PUT pointer describes the address of the first dword beyond the PB
segment, thus making the last dword in the virtual address space unusable for
storing a pbentry.



#define NV_PPBDMA_GP_ENTRY__SIZE                                  8 /*       */

#define NV_PPBDMA_GP_ENTRY0                              0x10000000 /* RW-4R */

#define NV_PPBDMA_GP_ENTRY0_OPERAND                            31:0 /* RWXUF */
#define NV_PPBDMA_GP_ENTRY0_FETCH                               0:0 /*       */
#define NV_PPBDMA_GP_ENTRY0_FETCH_UNCONDITIONAL          0x00000000 /*       */
#define NV_PPBDMA_GP_ENTRY0_FETCH_CONDITIONAL            0x00000001 /*       */
#define NV_PPBDMA_GP_ENTRY0_GET                                31:2 /*       */

#define NV_PPBDMA_GP_ENTRY1                              0x10000004 /* RW-4R */

#define NV_PPBDMA_GP_ENTRY1_GET_HI                              7:0 /* RWXUF */


#define NV_PPBDMA_GP_ENTRY1_LEVEL                               9:9 /* RWXUF */
#define NV_PPBDMA_GP_ENTRY1_LEVEL_MAIN                   0x00000000 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_LEVEL_SUBROUTINE             0x00000001 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_LENGTH                            30:10 /* RWXUF */
#define NV_PPBDMA_GP_ENTRY1_LENGTH_CONTROL               0x00000000 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_SYNC                              31:31 /* RWXUF */
#define NV_PPBDMA_GP_ENTRY1_SYNC_PROCEED                 0x00000000 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_SYNC_WAIT                    0x00000001 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_OPCODE                              7:0 /* RWXUF */
#define NV_PPBDMA_GP_ENTRY1_OPCODE_NOP                   0x00000000 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_OPCODE_ILLEGAL               0x00000001 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_OPCODE_GP_CRC                0x00000002 /* RW--V */
#define NV_PPBDMA_GP_ENTRY1_OPCODE_PB_CRC                0x00000003 /* RW--V */





Number of NOPs for self-modifying gpfifo

This is a formula for SW to estimate the number of NOPs needed to pad the gpfifo
such that the modification of a gp entry by the engine or by the CPU can take
effect. Here, NV_PFIFO_LB_GPBUF_CONTROL_SIZE(eng) refers to the SIZE field in the
NV_PFIFO_LB_GPBUF_CONTROL(eng) register.(More info about the register in dev_fifo.ref)

NUM_GP_NOPS(eng) = ((NV_PFIFO_LB_GPBUF_CONTROL_SIZE(eng)+1) * NV_PFIFO_LB_ENTRY_SIZE)/ NV_PPBDMA_GP_ENTRY__SIZE





GP_BASE - Base and Limit of the Circular Buffer of GP Entries

     GP entries are stored in a buffer in memory.  The NV_PPBDMA_GP_BASE_OFFSET
and NV_PPBDMA_GP_BASE_HI_OFFSET fields specify the 37-bit address in 8-byte
granularity of the start of a circular buffer that contains GP entries (GPFIFO).
This address is a virtual (not a physical) address.  GP entries are always
GP_ENTRY__SIZE-byte aligned, so the least significant three bits of the byte
address are not stored.  The byte address of the GPFIFO base pointer is thus:

     gpfifo_base_ptr = GP_BASE + (GP_BASE_HI_OFFSET << 32)

     The number of GP entries in the circular buffer is always a power of 2.
The NV_PPBDMA_GP_BASE_HI_LIMIT2 field specifies the number of bits used to count
the memory allocated to the GP FIFO.  The LIMIT2 value specified in these
registers is Log base 2 of the number of entries in the GP FIFO.  For example,
if the number of entries is 2^16--indicating a memory area of
(2^16)*GP_ENTRY__SIZE bytes--then the value written in LIMIT2 is 16.
     The circular buffer containing GP entries cannot cross the maximum address.
If OFFSET + (1<<LIMIT2)*GP_ENTRY__SIZE - 1 > 0xFFFFFFFFFF, then Host will
initiate a CPU interrupt (NV_PPBDMA_INTR_0_GPFIFO).
     The NV_PPBDMA_GP_PUT, NV_PPBDMA_GP_GET, and NV_PPBDMA_GP_FETCH registers
(and their associated NV_RAMFC and NV_RAMUSERD entries) are relative to the
value of this register.
     These registers are part of a GPU context's state.  On a switch, the values
of these registers are saved to, and restored from, the NV_RAMFC_GP_BASE and
NV_RAMFC_GP_BASE_HI entries in the RAMFC part of the GPU context's GPU-instance
block.
     Typically, software initializes the information in NV_RAMFC_GP_BASE and
NV_RAMFC_GP_BASE_HI when the GPU context's GPU-instance block is first created.
These registers are available to software only for debug.  Software should use
them only if the GPU context is assigned to a PBDMA unit and that PBDMA unit is
stalled.  While a GPU context's Host context is not contained within a PBDMA
unit, software should use the RAMFC entries to access this information.
     A pair of these registers exists for each of Host's PBDMA units.  These
registers run on Host's internal bus clock.


#define NV_PPBDMA_GP_BASE(i)                  (0x00040048+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_BASE__SIZE_1                 14 /*       */

#define NV_PPBDMA_GP_BASE_OFFSET                               31:3 /* RW-UF */
#define NV_PPBDMA_GP_BASE_OFFSET_ZERO                    0x00000000 /* RW--V */
#define NV_PPBDMA_GP_BASE_RSVD                                  2:0 /* RW-UF */
#define NV_PPBDMA_GP_BASE_RSVD_ZERO                      0x00000000 /* RW--V */

#define NV_PPBDMA_GP_BASE_HI(i)               (0x0004004c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_BASE_HI__SIZE_1              14 /*       */

#define NV_PPBDMA_GP_BASE_HI_OFFSET                             7:0 /* RW-UF */
#define NV_PPBDMA_GP_BASE_HI_OFFSET_ZERO                 0x00000000 /* RW--V */
#define NV_PPBDMA_GP_BASE_HI_LIMIT2                           20:16 /* RW-UF */
#define NV_PPBDMA_GP_BASE_HI_LIMIT2_ZERO                 0x00000000 /* RW--V */
#define NV_PPBDMA_GP_BASE_HI_RSVDA                             15:8 /* RW-UF */
#define NV_PPBDMA_GP_BASE_HI_RSVDA_ZERO                  0x00000000 /* RW--V */
#define NV_PPBDMA_GP_BASE_HI_RSVDB                            31:21 /* RW-UF */
#define NV_PPBDMA_GP_BASE_HI_RSVDB_ZERO                  0x00000000 /* RW--V */


GP_FETCH - Pointer to the next GP-Entry to be Fetched

     Host does not fetch all GP entries with a single request to the memory
subsystem.  Host fetches GP entries in batches.  The NV_PPBDMA_GP_FETCH register
indicates index of the next GP entry to be fetched by Host.  The actual 40-bit
virtual address of the specified GP entry is computed as follows:
     fetch address = GP_FETCH_ENTRY * NV_PPBDMA_GP_ENTRY__SIZE + GP_BASE
     If NV_PPBDMA_GP_PUT==NV_PPBDMA_GP_FETCH, then requests to fetch the entire
GP circular buffer have been issued, and Host cannot make more requests until
NV_PPBDMA_GP_PUT is changed.  Host may finish fetching GP entries long before it
has finished processing the PB segments specified by those entries.
Software should not use NV_PPBDMA_GP_FETCH (it should use NV_PPBDMA_GP_GET), to
determine whether the GP circular buffer is full. NV_PPBDMA_GP_FETCH represents
the current extent of prefetching of GP entries; prefetched entries may be
discarded and refetched later.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_GP_FETCH entry of
the RAMFC part of the GPU context's GPU-instance block.
     A PBDMA unit maintains this register.  Typically, software does not need to
access this register.  This register is available to software only for debug.
Because Host may fetch GP entries long before it is ready to process the
entries, and because Host may discard GP entries that it has fetched, software
should not use NV_PPBDMA_GP_FETCH to monitor Host's progress (software should
use NV_PPBDMA_GP_GET for monitoring).  Software should use this register only if
the GPU context is assigned to a PBDMA unit and that PBDMA unit is stalled.
While a GPU context's Host context is not contained within a PBDMA unit,
software should use NV_RAMFC_GP_FETCH to access this information.
     If after a PRI write, or after this register has been restored from RAMFC
memory, the value equals or exceeds the size of the circular buffer that stores
GP entries (1<<NV_PPBDMA_GP_BASE_HI_LIMIT2), Host will initiate an interrupt
(NV_PPBDMA_INTR_*_GPPTR), and stall.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal bus clock.  This register was introduced in
Fermi.


#define NV_PPBDMA_GP_FETCH(i)                 (0x00040050+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_FETCH__SIZE_1                14 /*       */

#define NV_PPBDMA_GP_FETCH_ENTRY                               31:0 /* RW-UF */
#define NV_PPBDMA_GP_FETCH_ENTRY_ZERO                    0x00000000 /* RW--V */



GP_GET - Pointer to the next GP-Entry to be Processed

     After a GP entry is fetched, it needs to be processed.  Typically, a GP
entry is processed by fetching the segment of pushbuffer data specified by that
GP entry, parsing the pushbuffer data into PB instructions, decoding
instructions into PB control entries or method headers, and generating methods
from method headers and their corresponding method data entries.
     The NV_PPBDMA_GP_GET register contains the index of the GP entry for the
next PB segment to begin being processed. Once the next GP entry has
begun processing, that GP entry is committed and will not be refetched, and
NV_PPBDMA_GP_GET is incremented to indicate that the memory location is no
longer referenced.
     NV_PPBDMA_GP_GET is not an address, but rather an index into the GP FIFO,
offset from the beginning of the GP circular buffer in memory (defined by
NV_PPBDMA_GP_BASE).  The actual 40-bit address is computed as follows:
     GP_GET address = GP_GET_ENTRY * NV_PPBDMA_GP_ENTRY__SIZE + GP_BASE
     If it is desired that user-level software be prevented from writing GP
entries ,
GP entries may be
stored in privileged pages of memory.  Since NV_PPBDMA_GP_GET is an index, not
an address, user-level software (which may be able to alter NV_PPBDMA_GP_GET)
cannot move GP_GET outside of the memory area defined by NV_PPBDMA_GP_BASE.
     While the circular buffer containing GP entries is full, the CPU cannot
write any more GP entries.  There is no extra state bit to distinguish between a
full GP buffer and an empty GP buffer.  If NV_PPBDMA_GP_PUT equals
NV_PPBDMA_GP_GET-1, then the buffer is full.  If NV_PPBDMA_GP_PUT equals
NV_PPBDMA_GP_GET, then the GP circular buffer is empty, and there are no more GP
entries for Host to process.  Because of these definitions of full and empty,
the GP circular buffer must always have at least one entry that is empty.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_GP_GET entry of
the RAMFC part of the GPU context's GPU-instance block.  Host stores GP entries
that have been fetched but have not been processed in Host's Latency Buffer.
     Typically, software initializes this information using NV_RAMFC_GP_GET
when the GPU context is first created.  Hardware maintains the value of this
register.  Software usually accesses this information using NV_RAMUSERD_GP_GET.
This register is available to software only for debug--software should use the
register directly only if the GPU context is assigned to a PBDMA unit and that
PBDMA unit is stalled.  While a GPU context is not assigned to a PBDMA unit and
not bound to a channel, software should use NV_RAMFC_GP_GET to access this
information.
     If after a PRI write, or after this register has been restored from RAMFC
memory, the value equals or exceeds the size of the circular buffer that stores
GP entries (1<<NV_PPBDMA_GP_BASE_HI_LIMIT2), Host will initiate an interrupt
(NV_PPBDMA_INTR_*_GPPTR), and stall.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_GP_GET(i)                   (0x00040014+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_GET__SIZE_1                  14 /*       */

#define NV_PPBDMA_GP_GET_ENTRY                                 31:0 /* RW-UF */
#define NV_PPBDMA_GP_GET_ENTRY_ZERO                      0x00000000 /* RW--V */


GP_PUT - Pointer to the next GP-Entry to be Written

     Typically, the CPU writes GP entries to a circular buffer, and Host reads
them from that buffer.  Host should not read entries before they have been
written.
     The NV_PPBDMA_GP_PUT register contains the index of the next GP entry
that the CPU will write to memory.  NV_PPBDMA_GP_PUT points past the last entry
that has been written.  NV_PPBDMA_GP_PUT is an offset from the beginning of the
GP circular buffer in memory (NV_PPBDMA_GP_BASE).  The actual 40-bit address is
computed as follows:
     GP_PUT address = GP_PUT_ENTRY * NV_PPBDMA_GP_ENTRY__SIZE + GP_BASE
     If NV_PPBDMA_GP_PUT==NV_PPBDMA_GP_GET-1, then the buffer is full.  While
the buffer is full, the CPU can write no more GP entries.  If NV_PPBDMA_GP_PUT
equals NV_PPBDMA_GP_GET, then the buffer is empty.  While the buffer is empty,
Host can process no more GP entries.  Because of these definitions of full and
empty, the GP circular buffer must always have at least one empty entry.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from the NV_RAMFC_GP_PUT entry of
the RAMFC part of the GPU context's GPU-instance block.
     Typically, software alters GP_PUT by writing to NV_RAMUSERD_GP_PUT.  This
register is not immediately synchronized with NV_RAMUSERD_GP_PUT--there will be a
delay in that synchronization until internal reads of the pushbuffer are
guaranteed to be ordered behind the write (soft-flush).  This
register is available to software only for debug.  Software should use this
register only if the GPU context is assigned to a PBDMA unit and that PBDMA unit
is stalled.  While a GPU context is not assigned to a PBDMA unit and is not
bound to a channel, software should use NV_RAMFC_GP_PUT to access this
information.
     If after a PRI write, or after this register has been restored from RAMFC
memory, the value equals or exceeds the size of the circular buffer that stores
GP entries (1<<NV_PPBDMA_GP_BASE_HI_LIMIT2), Host will initiate an interrupt
(NV_PPBDMA_INTR_*_GPPTR), and stall.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_GP_PUT(i)                   (0x00040000+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_PUT__SIZE_1                  14 /*       */

#define NV_PPBDMA_GP_PUT_ENTRY                                 31:0 /* RW-UF */
#define NV_PPBDMA_GP_PUT_ENTRY_ZERO                      0x00000000 /* RW--V */



PB_FETCH - Pointer to the next PB Data to be Fetched

     As directed by GP entries, Host fetches pushbuffer data for a channel,
processes the data, and sends methods generated from the data to engines.  Each
GP entry specifies a range of addresses from which Host is to fetch pushbuffer
data.
     Typically, PB segments are too large for Host to fetch the entire
segment at one time.  The NV_PPBDMA_PB_FETCH_ADDR and NV_PPBDMA_FETCH_HI_ADDR
registers contain the next address from which Host will fetch pushbuffer data.
PB compressed method sequences have variable sizes.  Until PB data is parsed,
Host does not know where one PB compressed method sequence ends and another
begins.  PB_FETCH may point to the middle of a compressed method sequence.
Before Host begins fetching PB data for a new GP entry, it sets this field to
the value from the GP entry's NV_PPBDMA_GP_ENTRY0_GET and
NV_PPBDMA_GP_ENTRY1_GET_HI entries so that Host will start fetching from the new
PB segment.
     The NV_PPBDMA_PB_FETCH_HI_LENGTH field contains the number of PB entries in
the PB segment for which no fetch request has been issued.  Before Host begins
fetching PB data for a new GP entry, it sets this field to the value from the GP
entry's NV_PPBDMA_GP_ENTRY1_LENGTH field.
     The NV_PPBDMA_PB_FETCH_CONDITIONAL field indicates whether the PB
segment specified by the GP entry should be fetched unconditionally, or should
be fetched only if the NV_PPBDMA_SUBDEVICE_STATUS field is STATUS_ACTIVE.
Before Host begins fetching PB data for a new GP entry, it sets this field to
the value from the GP entry's NV_PPBDMA_GP_ENTRY0_FETCH field.
     The NV_PPBDMA_PB_FETCH_HI_SYNC field specifies whether a pushbuffer may be
fetched before Host has finished processing the preceding PB segment.
Before Host begins fetching PB data for a new GP entry, it sets this field to
the value from the GP entry's NV_PPBDMA_GP_ENTRY1_SYNC field.
     The NV_PPBDMA_PB_FETCH_HI_LEVEL field specifies whether progress through
the GP entry's PB segment should be indicated in
NV_RAMUSERD_TOP_LEVEL_GET.  If LEVEL is SUBROUTINE, progress is not reflected in
TOP_LEVEL_GET.  Before Host begins fetching PB data for a new GP entry, it sets
this field to the value from the GP entry's NV_PPBDMA_GP_ENTRY1_LEVEL field.
     These registers are part of a GPU context's state.  On a switch, the
register values are saved to and restored from the NV_RAMFC_PB_FETCH and
NV_RAMFC_PB_FETCH_HI entries of the RAMFC part of the GPU context's
GPU-instance block.
     Hardware maintains these registers.  Typically, software does not access
them directly; they are available to software only for debug.  Because Host
may fetch pushbuffer data long before it is ready to process the data, and
because Host may discard pushbuffer data that it has fetched, software should
not use PB_FETCH to monitor Host's progress.  Software should use
these registers only if the GPU context is assigned to a PBDMA unit and that
PBDMA unit is stalled.  While a GPU context's Host context is not contained
within a PBDMA unit, software should use NV_RAMFC_PB_FETCH and
NV_RAMFC_PB_FETCH_HI to access this information.
     A pair of these registers exists for each of Host's PBDMA units.  These
registers run on Host's internal domain clock.


#define NV_PPBDMA_PB_FETCH(i)                 (0x00040054+(i)*8192) /* RW-4A */
#define NV_PPBDMA_PB_FETCH__SIZE_1                14 /*       */

#define NV_PPBDMA_PB_FETCH_CONDITIONAL                          0:0 /* RW-UF */
#define NV_PPBDMA_PB_FETCH_CONDITIONAL_FALSE             0x00000000 /* RW--V */
#define NV_PPBDMA_PB_FETCH_CONDITIONAL_TRUE              0x00000001 /* RW--V */

#define NV_PPBDMA_PB_FETCH_ADDR                                31:2 /* RW-UF */
#define NV_PPBDMA_PB_FETCH_ADDR_ZERO                     0x00000000 /* RW--V */

#define NV_PPBDMA_PB_FETCH_HI(i)              (0x00040058+(i)*8192) /* RW-4A */
#define NV_PPBDMA_PB_FETCH_HI__SIZE_1             14 /*       */

#define NV_PPBDMA_PB_FETCH_HI_ADDR                              7:0 /* RW-UF */
#define NV_PPBDMA_PB_FETCH_HI_ADDR_ZERO                  0x00000000 /* RW--V */


#define NV_PPBDMA_PB_FETCH_HI_LEVEL                             9:9 /* RW-UF */
#define NV_PPBDMA_PB_FETCH_HI_LEVEL_MAIN                 0x00000000 /* RW--V */
#define NV_PPBDMA_PB_FETCH_HI_LEVEL_SUBROUTINE           0x00000001 /* RW--V */

#define NV_PPBDMA_PB_FETCH_HI_SYNC                            10:10 /* RW-UF */
#define NV_PPBDMA_PB_FETCH_HI_SYNC_PROCEED               0x00000000 /* RW--V */
#define NV_PPBDMA_PB_FETCH_HI_SYNC_WAIT                  0x00000001 /* RW--V */

#define NV_PPBDMA_PB_FETCH_HI_LENGTH                          31:11 /* RW-UF */
#define NV_PPBDMA_PB_FETCH_HI_LENGTH_ZERO                0x00000000 /* RW--V */


GET - Pointer to the next PB Data to be Processed

     The NV_PPBDMA_GET and NV_PPBDMA_GET_HI registers contain the virtual
address of the next pushbuffer data to be processed, called the "GET" pointer.
GET may point to the middle of a PB compressed method sequence.
     Pushbuffer data that has been fetched but has not been processed is stored
in Host's Latency Buffer.  When a channel's context is restored from memory to
Host, if that channel's Latency Buffer data has been preserved, then Host will
continue fetching pushbuffer data from PB_FETCH (which is stored in the
NV_PPBDMA_PB_FETCH and NV_PPBDMA_PB_FETCH_HI registers described above).  If
that Latency Buffer data has been lost, then Host will continue fetching
pushbuffer data from the GET address.  Typically, Latency Buffer data is
preserved if there are more engines than Host has PBDMA units for serving
engines.
     These registers are part of a GPU context's state.  On a switch, the
register values are saved to, and restored from, the NV_RAMFC_PB_GET and
NV_RAMFC_PB_GET_HI entries of the RAMFC part of the GPU context's GPU-instance
block.
     Hardware maintains the values of these registers.  Typically, software
accesses this information using NV_RAMUSERD_GET and NV_RAMUSERD_GET_HI.  These
registers are available to software only for debug.  Software should use them
only if the GPU context is assigned to a PBDMA unit.  While a GPU context is not
assigned to a PBDMA unit and is not bound to a channel, software should use
NV_RAMFC_PB_GET and NV_RAMFC_PB_GET_HI to access this information instead.
     If after a PRI write, or after this register has been restored from RAMFC
memory, the value exceeds the value of NV_PPBDMA_PUT, Host will initiate an
interrupt (NV_PPBDMA_INTR_0_PBPTR), and stall.
     A pair of these registers exists for each of Host's PBDMA units.  These
registers run on Host's internal domain clock.


#define NV_PPBDMA_GET(i)                      (0x00040018+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GET__SIZE_1                     14 /*       */

#define NV_PPBDMA_GET_OFFSET                                   31:2 /* RW-UF */
#define NV_PPBDMA_GET_OFFSET_ZERO                        0x00000000 /* RW--V */

#define NV_PPBDMA_GET_HI(i)                   (0x0004001c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GET_HI__SIZE_1                  14 /*       */

#define NV_PPBDMA_GET_HI_OFFSET                                 7:0 /* RW-UF */
#define NV_PPBDMA_GET_HI_OFFSET_ZERO                     0x00000000 /* RW--V */


PUT - Pointer to the End of the PB Segment

     Each GP entry specifies a range of addresses from which Host is to fetch
pushbuffer data.  This range of addresses defines a PB segment.  The
NV_PPBDMA_PUT and NV_PPBDMA_PUT_HI registers contain the PUT field, which
specifies the address of the first memory location after the end of the
PB segment currently being processed.  Host will stop fetching the
PB segment when it reaches this address.
     This register is part of a GPU context's state.  On a switch, the values of
theses registers are saved to and restored from the NV_RAMFC_PB_PUT and
NV_RAMFC_PB_PUT_HI entries of the RAMFC part of the GPU context's GPU-instance
block.
     Hardware maintains these registers.  Typically, software may access this
information through NV_RAMUSERD_PUT and NV_RAMUSERD_PUT_HI.  Software should
generally not access these registers directly; they are available to software
only for debug.  Software should use them only if the GPU context is assigned
to a PBDMA unit.  While a GPU context is not assigned to a PBDMA unit and is not
bound to a channel, software should use NV_RAMFC_PB_PUT and NV_RAMFC_PB_PUT_HI
to access this information.
     A pair of these registers exists for each of Host's PBDMA units.  These
registers run on Host's internal domain clock.


#define NV_PPBDMA_PUT(i)                      (0x0004005c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_PUT__SIZE_1                     14 /*       */

#define NV_PPBDMA_PUT_OFFSET                                   31:2 /* RW-UF */
#define NV_PPBDMA_PUT_OFFSET_ZERO                        0x00000000 /* RW--V */
#define NV_PPBDMA_PUT_RSVD                                      1:0 /* R-IUF */
#define NV_PPBDMA_PUT_RSVD_ZERO                          0x00000000 /* R-I-V */

#define NV_PPBDMA_PUT_HI(i)                   (0x00040060+(i)*8192) /* RW-4A */
#define NV_PPBDMA_PUT_HI__SIZE_1                  14 /*       */

#define NV_PPBDMA_PUT_HI_OFFSET                                 7:0 /* RW-UF */
#define NV_PPBDMA_PUT_HI_OFFSET_ZERO                     0x00000000 /* RW--V */


TOP_LEVEL_GET - Pointer to next top-level (non-subroutine) PB Data to be Processed

     Software may use Host's GET pointers to monitor Host's progress fetching
and processing the pushbuffer.  However, pushbuffers may contain segments that
are used at many different places within the pushbuffer (for example, a commonly
called subroutine).  If a segment is used in many different places, it may be
less helpful to know that Host is in the middle of such a lower-level segment.
Host contains a mechanism (NV_PPBDMA_GP_ENTRY1_LEVEL_SUBROUTINE) to allow
software to specify that some segments be ignored for GET pointer monitoring.
TOP_LEVEL_GET reflects GET for the last address in a segment that is not ignored
for monitoring.
     The NV_PPBDMA_TOP_LEVEL_GET and NV_PPBDMA_TOP_LEVEL_GET_HI registers hold
the last value obtained from a GP_ENTRY for NV_PPBDMA_GET and NV_PPBDMA_GET_HI
respectively that had the NV_PPBDMA_GP_ENTRY1_LEVEL set to LEVEL_MAIN.  If Host
has not yet encountered a GP entry with LEVEL_MAIN, then the
TOP_LEVEL_GET_HI_VALID field is FALSE.  VALID becomes TRUE only after the first
method has been fetched from the LEVEL_MAIN segment, and becomes FALSE again
when the channel is switched out.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_PB_TOP_LEVEL_GET and
NV_RAMFC_PB_TOP_LEVEL_GET_HI entries of the RAMFC part of the GPU context's
GPU-instance block.
     Hardware maintains this register.  Typically, software accessses this
information by reading NV_RAMUSERD_TOP_LEVEL_GET first and then
NV_RAMUSERD_TOP_LEVEL_GET_HI.  The TOP_LEVEL_GET registers are available to
software only for debug.  Software should only directly use these registers if
the GPU context is assigned to a PBDMA unit.  While a GPU context is not
assigned to a PBDMA unit and not bound to a channel, software should use
NV_RAMFC_PB_TOP_LEVEL_GET and NV_RAMFC_PB_TOP_LEVEL_GET_HI to access this
information.
     A pair of these registers exists for each of Host's PBDMA units.  These
registers run on Host's internal domain clock.



#define NV_PPBDMA_TOP_LEVEL_GET(i)            (0x00040020+(i)*8192) /* RW-4A */
#define NV_PPBDMA_TOP_LEVEL_GET__SIZE_1           14 /*       */

#define NV_PPBDMA_TOP_LEVEL_GET_OFFSET                         31:2 /* RW-UF */
#define NV_PPBDMA_TOP_LEVEL_GET_OFFSET_ZERO              0x00000000 /* RW--V */
#define NV_PPBDMA_TOP_LEVEL_GET_RSVD                            1:0 /* R-IUF */
#define NV_PPBDMA_TOP_LEVEL_GET_RSVD_ZERO                0x00000000 /* R-I-V */

#define NV_PPBDMA_TOP_LEVEL_GET_HI(i)         (0x00040024+(i)*8192) /* RW-4A */
#define NV_PPBDMA_TOP_LEVEL_GET_HI__SIZE_1        14 /*       */

#define NV_PPBDMA_TOP_LEVEL_GET_HI_OFFSET                       7:0 /* RW-UF */
#define NV_PPBDMA_TOP_LEVEL_GET_HI_OFFSET_ZERO           0x00000000 /* RW--V */
#define NV_PPBDMA_TOP_LEVEL_GET_HI_VALID                      31:31 /* RWIUF */
#define NV_PPBDMA_TOP_LEVEL_GET_HI_VALID_FALSE           0x00000000 /* RWI-V */
#define NV_PPBDMA_TOP_LEVEL_GET_HI_VALID_TRUE            0x00000001 /* RW--V */


GP_CRC - CRC Value over GP Entries

     The NV_PPBDMA_GP_CRC register contains a cyclic redundancy check value that
was calculated from GP entries.  It may be used for debug to determine whether
GP entries have been properly fetched and whether the data returned is expected.
     The IEEE 802.3 CRC-32 polynomial is used to calculate CRC values.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_GP_CRC entry of
the RAMFC part of the GPU context's GPU-instance block.
     Hardware maintains the value of this register.  Software may use special GP
entries (NV_PPBDMA_GP_ENTRY1_OPCODE_GP_CRC) to check and clear this CRC value.
This register is available to software only for debug.  Software should use this
register only if the GPU context is assigned to a PBDMA unit and that PBDMA unit
is stalled.  While a GPU context's Host context is not contained within a PBDMA
unit, software should use NV_RAMFC_GP_CRC to access this information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  This register was introduced in
Fermi.


#define NV_PPBDMA_GP_CRC(i)                   (0x00040074+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_CRC__SIZE_1                  14 /*       */

#define NV_PPBDMA_GP_CRC_VALUE                                 31:0 /* RW-UF */
#define NV_PPBDMA_GP_CRC_VALUE_ZERO                      0x00000000 /* RW--V */



PB_HEADER - The PB Instruction Currently Being Processed

     The NV_PPBDMA_PB_HEADER register contains information about the PB
instruction (either a PB method header or a PB control entry) currently being
processed.  It also contains information about the PB segment from which the PB
instruction was fetched.  Not all of the PB instruction's information is stored
in this register.

     Note the information stored in PB_HEADER register is a dynamic
representation of the instruction being processed.  It does not contain an exact
copy of the original PB entry in which the instruction was found.  For instance
if the instruction is a PB incrementing method header, the VALUE field of the
NV_PPBDMA_PB_COUNT register stores the number of method data entries left to be
consumed, and thus is decremented for each method generated.

     The NV_PPBDMA_PB_HEADER_TYPE field indicates the specific type of method
header or control entry currently being processed.  The TYPE may be an
incrementing method header (TYPE_INC), a non-incrementing method header
(TYPE_NON_INC), an increment-once method header (TYPE_INC_ONCE), an
immediate-data method header (TYPE_IMMD), a SET_SUBDEVICE_MASK control entry
(TYPE_SSDM), a STORE_SUBDEVICE_MASK control entry (TYPE_STORE_SDM), a
USE_SUBDEVICE_MASK control entry (TYPE_USE_SDM), or an end-of-pushbuffer-segment
control entry (TYPE_END_SEG).  See "FIFO_DMA" in dev_ram.ref for details about
these types of PB instructions.  Note when PB_HEADER_TYPE is TYPE_INC_ONCE, this
field will be updated to TYPE_NON_INC after the first method in the compressed
sequence has been generated.
     The NV_PPBDMA_PB_HEADER_METHOD field contains the current method address.
While processing an incrementing method header and its method data entries, this
field will increment after each method is generated.
     The NV_PPBDMA_PB_HEADER_SUBCHANNEL field identifies the subchannel to which
methods generated from the current instruction are targeting (if applicable).
Note that the mapping from subchannels to engines is fixed for each runlist
type.
     The NV_PPBDMA_PB_HEADER_LEVEL field indicates whether the current PB
instruction is within a PB segment that is being used for progress monitoring.
If this field is LEVEL_MAIN, then progress through the current PB segment is
available in NV_PPBDMA_TOP_LEVEL_GET.  If this field is LEVEL_SUBROUTINE, the
progress through the current PB segment does not affect TOP_LEVEL_GET.  The
value of this field comes from the GP entry that specified the PB segment.
     The NV_PPBDMA_PB_HEADER_FINAL field indicates that the PB entry in which
the current PB instruction was found is the final PB entry of a PB segment.
This field is used by hardware for tracking PB segment boundaries.
     The NV_PPBDMA_PB_HEADER_FIRST field indicates whether this PB instruction
is the first PB instruction of a new PB segment.  This field is used by hardware
for tracking.
     The NV_PPBDMA_PB_HEADER_CONDITIONAL field indicates whether this PB
instruction is from a conditionally fetched PB segment.  If this PB instruction
changes the subdevice mask to not match, then the remainder of this PB segment
is not processed.

     This register is part of a channel's state.  On a switch, the value of this
register is saved to and restored from the NV_RAMFC_PB_HEADER field of the RAMFC
part of the channel's instance block.
     Software typically does not access this register directly, unless this is
being done while debugging.  Software can directly access this register without
the risk of race conditions when the channel is loaded on a PBDMA unit and that
PBDMA unit is stalled.  While a channel is not loaded on a PBDMA unit, software
can read from the NV_RAMFC_PB_HEADER instance block field to access this
information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_PB_HEADER(i)                (0x00040084+(i)*8192) /* RW-4A */
#define NV_PPBDMA_PB_HEADER__SIZE_1               14 /*       */

#define NV_PPBDMA_PB_HEADER_METHOD_OR_SDMASK                   15:2 /* RW-UF */
#define NV_PPBDMA_PB_HEADER_METHOD                             13:2 /*       */
#define NV_PPBDMA_PB_HEADER_METHOD_ZERO                  0x00000000 /*       */
#define NV_PPBDMA_PB_HEADER_SDMASK                             15:4 /*       */
#define NV_PPBDMA_PB_HEADER_SUBCHANNEL                        18:16 /* RW-UF */
#define NV_PPBDMA_PB_HEADER_SUBCHANNEL_ZERO              0x00000000 /* RW--V */
#define NV_PPBDMA_PB_HEADER_LEVEL                             20:20 /* RW-VF */
#define NV_PPBDMA_PB_HEADER_LEVEL_MAIN                   0x00000000 /* RW--V */
#define NV_PPBDMA_PB_HEADER_LEVEL_SUBROUTINE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_HEADER_FIRST                             22:22 /* RW-VF */
#define NV_PPBDMA_PB_HEADER_FIRST_FALSE                  0x00000000 /* RW--V */
#define NV_PPBDMA_PB_HEADER_FIRST_TRUE                   0x00000001 /* RW--V */
#define NV_PPBDMA_PB_HEADER_CONDITIONAL                       23:23 /* RW-VF */
#define NV_PPBDMA_PB_HEADER_CONDITIONAL_FALSE            0x00000000 /* RW--V */
#define NV_PPBDMA_PB_HEADER_CONDITIONAL_TRUE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_HEADER_FINAL                             24:24 /* RW-VF */
#define NV_PPBDMA_PB_HEADER_FINAL_FALSE                  0x00000000 /* RW--V */
#define NV_PPBDMA_PB_HEADER_FINAL_TRUE                   0x00000001 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE                              31:29 /* RW-UF */
#define NV_PPBDMA_PB_HEADER_TYPE_SSDM                    0x00000000 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE_INC                     0x00000001 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE_STORE_SDM               0x00000002 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE_NON_INC                 0x00000003 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE_IMMD                    0x00000004 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE_INC_ONCE                0x00000005 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE_USE_SDM                 0x00000006 /* RW--V */
#define NV_PPBDMA_PB_HEADER_TYPE_END_SEG                 0x00000007 /* RW--V */



PB_COUNT - PB Entry Processor Remaining Count

     Multiple method address/data pairs may be generated from a single PB method
header.  The number of methods generated from a PB method header is indicated by
the header's count field.  A single PB entry may require many cycles to process.
A channel may be switched out while Host is in the middle of processing a PB
compressed method sequence.  The NV_PPBDMA_PB_COUNT register along with
NV_PPBDMA_PB_HEADER contains information about the PB method header currently
being processed.
     The VALUE field of the NV_PPBDMA_PB_COUNT register contains the number of
method data entries remaining to be processed in the current compressed method
sequence.  When PB_COUNT_VALUE is 0, there are no more remaining method data
entries to process, and the next PB entry in the pushbuffer data stream is
interpreted as the next PB instruction.  When PB_COUNT_VALUE is nonzero, the
next PB entry in the PB data stream is interpreted as method data for use in
generating the next method address/data pair.  After each method data entry is
processed, PB_COUNT_VALUE is decremented.
     A PBDMA unit may contain up to three PB entries that have not yet begun
being parsed into PB instructions or method data.  This raw pushbuffer data is
stored in NV_PPBDMA_PB_DATA*.  The NV_PPBDMA_PB_COUNT_DATAVAL* fields indicate
whether or not the NV_PPBDMA_PB_DATA* registers contain valid PB entries.  Each
PB entry can be from separate PB segments, and therefore may have different
GP-entry attributes.  The attributes for each PB entry are stored in the
remaining fields (LEVEL*, CONDITIONAL*, and FINAL*) in this register; see
the above documentation for the associated NV_PPBDMA_PB_HEADER fields.
     If the PB instruction being processed by Host's PB instruction processor is
an immediate-data method header, then instead of a count value, PB_COUNT_VALUE
contains a value to be used as the data part of a method address/data pair.
     See "FIFO_DMA" in dev_ram.ref for details about compressed method
sequences and method headers.

     When the RAMFC in the instance block of a new channel is initialized, the
PB_COUNT_VALUE field should be cleared to allow the first PB entry to be decoded
as a PB instruction rather than as method data.
     This register is part of a channel's state.  On a switch, the value of this
register is saved to and restored from the NV_RAMFC_PB_COUNT field of the RAMFC
part of the channel's instance block.
     Software typically does not access this register directly, unless this is
being done while debugging.  Software can directly access this register without
the risk of race conditions when the channel is loaded on a PBDMA unit and that
PBDMA unit is stalled.  While a channel is not loaded on a PBDMA unit, software
can read from the NV_RAMFC_COUNT instance block field to access this
information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_PB_COUNT(i)                 (0x00040088+(i)*8192) /* RW-4A */
#define NV_PPBDMA_PB_COUNT__SIZE_1                14 /*       */

#define NV_PPBDMA_PB_COUNT_VALUE                               12:0 /* RW-UF */
#define NV_PPBDMA_PB_COUNT_VALUE_ZERO                    0x00000000 /* RW--V */

#define NV_PPBDMA_PB_COUNT_DATAVAL0                           16:16 /* RW-UF */
#define NV_PPBDMA_PB_COUNT_DATAVAL0_FALSE                0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_DATAVAL0_TRUE                 0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_LEVEL0                             18:18 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_LEVEL0_MAIN                   0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_LEVEL0_SUBROUTINE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL0                       14:14 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL0_FALSE            0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL0_TRUE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_FINAL0                             15:15 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_FINAL0_FALSE                  0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_FINAL0_TRUE                   0x00000001 /* RW--V */

#define NV_PPBDMA_PB_COUNT_DATAVAL1                           20:20 /* RW-UF */
#define NV_PPBDMA_PB_COUNT_DATAVAL1_FALSE                0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_DATAVAL1_TRUE                 0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_LEVEL1                             22:22 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_LEVEL1_MAIN                   0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_LEVEL1_SUBROUTINE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL1                       28:28 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL1_FALSE            0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL1_TRUE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_FINAL1                             29:29 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_FINAL1_FALSE                  0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_FINAL1_TRUE                   0x00000001 /* RW--V */

#define NV_PPBDMA_PB_COUNT_DATAVAL2                           24:24 /* RW-UF */
#define NV_PPBDMA_PB_COUNT_DATAVAL2_FALSE                0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_DATAVAL2_TRUE                 0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_LEVEL2                             26:26 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_LEVEL2_MAIN                   0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_LEVEL2_SUBROUTINE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL2                       30:30 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL2_FALSE            0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_CONDITIONAL2_TRUE             0x00000001 /* RW--V */
#define NV_PPBDMA_PB_COUNT_FINAL2                             31:31 /* RW-VF */
#define NV_PPBDMA_PB_COUNT_FINAL2_FALSE                  0x00000000 /* RW--V */
#define NV_PPBDMA_PB_COUNT_FINAL2_TRUE                   0x00000001 /* RW--V */

PB_CRC - CRC Value over PB Entries

     The NV_PPBDMA_PB_CRC register contains a cyclic redundancy check value
calculated from PB entries.  It may be used for debug to determine whether PB
entries have been properly fetched and whether the data returned is expected.
The NV_PPBDMA_PB_CRC register is cleared at the beginning of each new PB
segment.  Note the CRC is indeterminate if an END_PB_SEGMENT instruction was
used in the prior segment (or if the subdevice is disabled via SSDM and the
segment was marked for conditional fetching) because Host may have already
calculated the CRC for an arbitrary number of PB entries before processing the
END_PB_SEGMENT or SSDM control entry.
     The IEEE 802.3 CRC-32 polynomial is used to calculate CRC values.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to and restored from the NV_RAMFC_PB_CRC entry of the
RAMFC part of the GPU context's GPU-instance block.
     This register is maintained by hardware.  Software may use special GP
entries (NV_PPBDMA_GP_ENTRY1_OPCODE_PB_CRC) to check (and clear) the CRC value
for the previous PB segment.  Typically, software does not access this
register--it is available to software only for debug.  Software should use it
only if the GPU context is assigned to a PBDMA unit and that PBDMA unit is
stalled.  While a GPU context's Host state is not contained within a PBDMA unit,
software should use NV_RAMFC_PB_CRC to access this information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  This register was introduced in
Fermi.



#define NV_PPBDMA_PB_CRC(i)                   (0x00040098+(i)*8192) /* RW-4A */
#define NV_PPBDMA_PB_CRC__SIZE_1                  14 /*       */

#define NV_PPBDMA_PB_CRC_VALUE                                 31:0 /* RW-UF */
#define NV_PPBDMA_PB_CRC_VALUE_ZERO                      0x00000000 /* RW--V */


SUBDEVICE - Subdevice Identifier and Status Register

     The NV_PPBDMA_SUBDEVICE register is used to differentiate between GPU
contexts using the same pushbuffer.  For example, two different GPU's in a SLI
configuration might use the same pushbuffer, or two different GPU contexts doing
stereo rendering might use the same pushbuffer.  Using this register and
SET_SUBDEVICE_MASK PB instructions, software can specify that a set of methods
be sent to the engine only for a subset of the channels sharing the pushbuffer.
     The SET_SUBDEVICE_MASK instruction (see dev_ram.ref) compares its mask
operand with the value in this register, if SUBDEVICE_CHANNEL_DMA is set to
ENABLED.  If the logical-AND of the current SUBDEVICE_ID and the mask is
non-zero, and if SUBDEVICE_CHANNEL_DMA is ENABLED, SUBDEVICE_STATUS is set to
ACTIVE, and Host will send methods to the engine.  If the current SUBDEVICE_ID
is not in the mask, SUBDEVICE_STATUS will be set to INACTIVE, and Host will not
send any methods to the engine.
     The NV_PPBDMA_SUBDEVICE_STATUS field indicates whether methods being are
filtered.  If this field is INACTIVE, later methods are not being generated,
decoded, executed by Host, or sent to an engine.  If this field is ACTIVE,
methods are being processed normally.
     The NV_PPBDMA_SUBDEVICE_CHANNEL_DMA field controls whether filtering
methods according to the SUBDEVICE_ID is enabled.  If this field is DISABLE,
then SUBDEVICE_STATUS will always be set to ACTIVE, and all methods will be sent
to the engine.  If a SET_SUBDEVICE_MASK or USE_SUBDEVICE_MASK instruction is sent
while this field is DISABLE, Host will generate an interrupt
(NV_PPBDMA_INTR_0_PBENTRY).
     The NV_PPBDMA_SUBDEVICE_STORED_MASK field contains a subdevice mask value
to be used later by a USE_SUBDEVICE_MASK instruction.  This field is loaded by a
USE_SUBDEVICE_MASK instruction.  See dev_ram.ref for details.
     This register is part of a GPU context's state.  Each channel has its own
NV_PPBDMA_SUBDEVICE register value.  On a switch, the NV_PPBDMA_SUBDEVICE value
is saved to and restored from the NV_RAMFC_SUBDEVICE entry of the RAMFC part of
the GPU context's GPU-instance block.
     Typically, software initializes this information in NV_RAMFC_SUBDEVICE when
the GPU context is first created.  This register is available to software only
for debug.  Software should use this register only if the GPU context is
assigned to a PBDMA unit, and if that PBDMA unit is stalled.  While a GPU
context's Host state is not contained within a PBDMA unit, software should
NV_RAMFC_SUBDEVICE to access this information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  It was introduced with the NV36
Channel DMA class.


#define NV_PPBDMA_SUBDEVICE(i)                (0x00040094+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SUBDEVICE__SIZE_1               14 /*       */

#define NV_PPBDMA_SUBDEVICE_ID                                 11:0 /* RW-UF */
#define NV_PPBDMA_SUBDEVICE_ID_ENABLE                    0x00000FFF /* RW--V */
#define NV_PPBDMA_SUBDEVICE_STORED_MASK                       27:16 /* RW-UF */
#define NV_PPBDMA_SUBDEVICE_STORED_MASK_ENABLE           0x00000FFF /* RW--V */
#define NV_PPBDMA_SUBDEVICE_STATUS                            28:28 /* RW-UF */
#define NV_PPBDMA_SUBDEVICE_STATUS_INACTIVE              0x00000000 /* RW--V */
#define NV_PPBDMA_SUBDEVICE_STATUS_ACTIVE                0x00000001 /* RW--V */
#define NV_PPBDMA_SUBDEVICE_CHANNEL_DMA                       29:29 /* RW-UF */
#define NV_PPBDMA_SUBDEVICE_CHANNEL_DMA_DISABLE          0x00000000 /* RW--V */
#define NV_PPBDMA_SUBDEVICE_CHANNEL_DMA_ENABLE           0x00000001 /* RW--V */


METHODn - Method FIFO Address Registers

     The NV_PPBDMA_METHOD registers contain the method header information for
the PBDMA unit's tiny Method FIFO (called "Cache1" in the Tesla architecture).
The format of these registers does not match the method headers as present in
the pushbuffer, but they contain the necessary information for Host to process
each method.  Method addresses generated from PB method headers and their
associated method data entries are stored in the Method FIFO until Host is ready
to process them.  Method addresses indicate an operation to be performed by Host
or by an engine.  The corresponding data for these methods are stored in
NV_PPBDMA_DATA registers.  Compressed method sequences (method headers and their
associated method data entries) are expanded into these registers such that each
method data entry corresponds to a method address/data pair in the method FIFO.
     The size of the method FIFO is given by the METHOD_FIFO_SIZE define; this
size is hard-coded and will remain constant for any given architecture.
     The NV_PPBDMA_METHOD0 register contains the first method to be executed.
METHOD1 contains the second method to be executed, and so forth.

     The NV_PPBDMA_METHOD_SUBCH field contains the subchannel to which the
method is targeted.  Subchannels are associated with engines according to a
fixed mapping and with class identifiers via the NV_UDMA_OBJECT method.


     The NV_PPBDMA_METHOD_FIRST field indicates whether the header for this
method is the first method header of a PB segment (as specified by a GP entry).
     The NV_PPBDMA_METHOD_VALID field indicates whether this queue entry is
valid.  If this field is VALID_FALSE, then the entry is empty.
     For some engines, Host may send two method address/data pairs in a cycle if
the addresses of the two methods are the same or if the address of the second
method is the address of the first method incremented.  The
NV_PPBDMA_METHOD_DUAL field indicates that a method may be paired with the
following entry.  If the engine that a method targets cannot support dual
methods, or if the method address indicates a Host-executed method, then methods
may be sent one at a time even if the first method is marked DUAL.  When
generating methods from PB method headers and their associated method data
entries, Host sets this field deterministically (independently of the rate at
which the PBDMA unit receives PB data from memory).  If the
NV_PPBDMA_METHOD_DUAL field is DUAL_TRUE, then the NV_PPBDMA_METHOD_INCR field
indicates whether the method address of the second is equal to the address of
the first incremented. In the case of an incrementing method, DUAL_TRUE and
INCR_TRUE will only be set if the method address is even.

     This register is part of a GPU context's state.  On a switch, the values of
these registers are saved to, and restored from, the NV_RAMFC_METHOD* fields
of the RAMFC part of the GPU context's GPU-instance block.
     Hardware maintains this information.  Software should use this register
only if the GPU context is assigned to a PBDMA unit and that PBDMA unit is
stalled.  While GPU context's Host state is not contained within a PBDMA unit,
software should use NV_RAMFC_METHOD* to access this information.
     When a PBDMA unit is stalled due to a software method, software may use
these registers to determine the method address/data pairs that software is to
execute.  After executing a software method, to indicate to hardware that the
method has been executed, software should set the METHOD_VALID field to FALSE
before clearing the NV_PPBDMA_INTR_*_DEVICE register field.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_METHOD0(i)                  (0x000400c0+(i)*8192) /* RW-4A */
#define NV_PPBDMA_METHOD0__SIZE_1                 14 /*       */
#define NV_PPBDMA_METHOD0_INCR                                  0:0 /* RW-UF */
#define NV_PPBDMA_METHOD0_INCR_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD0_INCR_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD0_ADDR                                 13:2 /* RW-UF */
#define NV_PPBDMA_METHOD0_ADDR_NULL                      0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD0_SUBCH                               18:16 /* RW-UF */
#define NV_PPBDMA_METHOD0_SUBCH_ZERO                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD0_FIRST                               22:22 /* RW-UF */
#define NV_PPBDMA_METHOD0_FIRST_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD0_FIRST_TRUE                     0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD0_DUAL                                23:23 /* RW-UF */
#define NV_PPBDMA_METHOD0_DUAL_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD0_DUAL_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD0_VALID                               31:31 /* RW-UF */
#define NV_PPBDMA_METHOD0_VALID_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD0_VALID_TRUE                     0x00000001 /* RW--V */

#define NV_PPBDMA_METHOD1(i)                  (0x000400c8+(i)*8192) /* RW-4A */
#define NV_PPBDMA_METHOD1__SIZE_1                 14 /*       */
#define NV_PPBDMA_METHOD1_INCR                                  0:0 /* RW-UF */
#define NV_PPBDMA_METHOD1_INCR_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD1_INCR_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD1_ADDR                                 13:2 /* RW-UF */
#define NV_PPBDMA_METHOD1_ADDR_NULL                      0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD1_SUBCH                               18:16 /* RW-UF */
#define NV_PPBDMA_METHOD1_SUBCH_ZERO                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD1_FIRST                               22:22 /* RW-UF */
#define NV_PPBDMA_METHOD1_FIRST_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD1_FIRST_TRUE                     0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD1_DUAL                                23:23 /* RW-UF */
#define NV_PPBDMA_METHOD1_DUAL_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD1_DUAL_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD1_VALID                               31:31 /* RW-UF */
#define NV_PPBDMA_METHOD1_VALID_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD1_VALID_TRUE                     0x00000001 /* RW--V */

#define NV_PPBDMA_METHOD2(i)                  (0x000400d0+(i)*8192) /* RW-4A */
#define NV_PPBDMA_METHOD2__SIZE_1                 14 /*       */
#define NV_PPBDMA_METHOD2_INCR                                  0:0 /* RW-UF */
#define NV_PPBDMA_METHOD2_INCR_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD2_INCR_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD2_ADDR                                 13:2 /* RW-UF */
#define NV_PPBDMA_METHOD2_ADDR_NULL                      0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD2_SUBCH                               18:16 /* RW-UF */
#define NV_PPBDMA_METHOD2_SUBCH_ZERO                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD2_FIRST                               22:22 /* RW-UF */
#define NV_PPBDMA_METHOD2_FIRST_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD2_FIRST_TRUE                     0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD2_DUAL                                23:23 /* RW-UF */
#define NV_PPBDMA_METHOD2_DUAL_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD2_DUAL_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD2_VALID                               31:31 /* RW-UF */
#define NV_PPBDMA_METHOD2_VALID_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD2_VALID_TRUE                     0x00000001 /* RW--V */

#define NV_PPBDMA_METHOD3(i)                  (0x000400d8+(i)*8192) /* RW-4A */
#define NV_PPBDMA_METHOD3__SIZE_1                 14 /*       */
#define NV_PPBDMA_METHOD3_INCR                                  0:0 /* RW-UF */
#define NV_PPBDMA_METHOD3_INCR_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD3_INCR_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD3_ADDR                                 13:2 /* RW-UF */
#define NV_PPBDMA_METHOD3_ADDR_NULL                      0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD3_SUBCH                               18:16 /* RW-UF */
#define NV_PPBDMA_METHOD3_SUBCH_ZERO                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD3_FIRST                               22:22 /* RW-UF */
#define NV_PPBDMA_METHOD3_FIRST_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD3_FIRST_TRUE                     0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD3_DUAL                                23:23 /* RW-UF */
#define NV_PPBDMA_METHOD3_DUAL_FALSE                     0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD3_DUAL_TRUE                      0x00000001 /* RW--V */
#define NV_PPBDMA_METHOD3_VALID                               31:31 /* RW-UF */
#define NV_PPBDMA_METHOD3_VALID_FALSE                    0x00000000 /* RW--V */
#define NV_PPBDMA_METHOD3_VALID_TRUE                     0x00000001 /* RW--V */

DATAn - Method FIFO Data Registers

     The NV_PPBDMA_DATA registers contain the data part of a PBDMA unit's tiny
method FIFO (Cache1).  Method data from the pushbuffer is stored in Host's
method FIFO until the PBDMA unit is ready to process it.
     NV_PPBDMA_DATA(0) contains the data for the first method.  DATA(1) contains
data for the second method, and so forth.
     This register is part of a GPU context's state.  On a switch, the values of
these registers are saved to, and restored from, the NV_RAMFC_DATA*
fields of the RAMFC part of the GPU context's GPU-instance block.
     Hardware maintains this information.  Software should use this register
only if the GPU context is assigned to a PBDMA unit and that PBDMA unit is
stalled.  While GPU context's Host state is not contained within a PBDMA unit,
software should and NV_RAMFC_DATA* to access this information.
     When a PBDMA unit is stalled due to a software method, software may use
these registers to determine the data part of the method address/data pairs that
software is to execute.  When handling a software method, software need only set
the method's NV_PPBDMA_METHOD_VALID bit to VALID_FALSE.  It need not move or
alter the contents of this register.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.



#define NV_PPBDMA_DATA0(i)                    (0x000400c4+(i)*8192) /* RW-4A */
#define NV_PPBDMA_DATA0__SIZE_1                   14 /*       */
#define NV_PPBDMA_DATA0_VALUE                                  31:0 /* RW-UF */
#define NV_PPBDMA_DATA0_VALUE_ZERO                       0x00000000 /* RW--V */

#define NV_PPBDMA_DATA1(i)                    (0x000400cc+(i)*8192) /* RW-4A */
#define NV_PPBDMA_DATA1__SIZE_1                   14 /*       */
#define NV_PPBDMA_DATA1_VALUE                                  31:0 /* RW-UF */
#define NV_PPBDMA_DATA1_VALUE_ZERO                       0x00000000 /* RW--V */

#define NV_PPBDMA_DATA2(i)                    (0x000400d4+(i)*8192) /* RW-4A */
#define NV_PPBDMA_DATA2__SIZE_1                   14 /*       */
#define NV_PPBDMA_DATA2_VALUE                                  31:0 /* RW-UF */
#define NV_PPBDMA_DATA2_VALUE_ZERO                       0x00000000 /* RW--V */

#define NV_PPBDMA_DATA3(i)                    (0x000400dc+(i)*8192) /* RW-4A */
#define NV_PPBDMA_DATA3__SIZE_1                   14 /*       */
#define NV_PPBDMA_DATA3_VALUE                                  31:0 /* RW-UF */
#define NV_PPBDMA_DATA3_VALUE_ZERO                       0x00000000 /* RW--V */

TARGET [register] - Target Engine

     The NV_PPBDMA_TARGET_ENGINE field contains the last non-software engine
that received data from the Method Processor.  This register is used to
determine if an inter-engine subchannel switch has happened.  Methods executed
by Host (not by an engine), regardless of their subchannel, do not affect the
value of this field.
     The imaginary software engine is treated specially.  A method is directed
at the software engine by setting its NV_FIFO_DMA_*_SUBCHANNEL field to one of
the SW subchannels 5-7; see dev_ram.ref.  When such a method is encountered, the
PBDMA unit freezes and raises the NV_PPBDMA_INTR_0_DEVICE interrupt.  CPU
software handles the method, marks the method as having been executed by setting
NV_PPBDMA_METHOD0_VALID to FALSE, and clears the interrupt to allow the PBDMA to
continue processing subsequent methods.  When initializing a channel, SW should
set the ENGINE field in NV_RAMFC_TARGET to match the engine that the channel
will serve.  If the ENGINE is not a valid engine for the runqueue, Host will
force the field to the lowest numbered engine served by the runqueue.  If the
ENGINE still does not match the first encountered engine method on the channel,
Host will WFI on the engine specified by the TARGET entry in RAMFC before
submitting the first method to the engine targeted by SUBCHANNEL field of the
method.

     The NV_PPBDMA_TARGET_ENG_CTX_VALID field indicates whether a valid non-CE
engine context exists for the channel loaded on the PBDMA.  The field is
populated by the value in the corresponding field of the NV_RAMFC_TARGET entry
and is not modified by HW.  When initializing a channel in a TSG for which a
valid engine context exists, SW should set the channel's NV_RAMFC_TARGET
ENG_CTX_VALID field to TRUE.  If a valid engine context does not exist at
channel creation time, the field should be set to FALSE.  When a valid engine
context is created for the TSG, the RAMFC field must be set TRUE for all
channels in the TSG.  Prior to a TSG's engine context being deleted, the TSG's
channels must be disabled or unbound and the TSG preempted, followed by setting
the channels' RAMFC ENG_CTX_VALID fields to FALSE.  The RAMFC field for a
channel should only be updated when the channel is disabled and idle.
     The NV_PPBDMA_TARGET_CE_CTX_VALID field indicates whether a valid copy
engine method buffer exists for the channel loaded on the PBDMA.  The field is
populated by the value in the corresponding field of the NV_RAMFC_TARGET entry
and is not modified by HW.  When initializing a channel, SW should set the
channel's NV_RAMFC_TARGET CE_CTX_VALID field to TRUE if the copy engine method
buffer for the channel's TSG runqueue has already been created; see
NV_RAMIN_ENG_METHOD_BUFFER_ADDR_* in dev_ram.ref.  If a valid method buffer has
not been created, the field should be set to FALSE.  When a method buffer is
created for the TSG runqueue, this RAMFC field must be set to TRUE for all
channels in the TSG that target the runqueue.  Prior to deallocating the method
buffer for a TSG runqueue, all channels in the TSG that map to the runqueue must
be disabled or unbound and the TSG preempted, followed by setting the channel's
RAMFC CE_CTX_VALID fields to FALSE.  The RAMFC field for a channel should only
be updated when the channel is disabled and idle.
     If Host receives an engine method for an engine that has the corresponding
NV_PPBDMA_TARGET_*_CTX_VALID field set to FALSE, Host will raise the stalling
PBDMA interrupt NV_PPBDMA_INTR_1_CTXNOTVALID.

     Host sets NV_PPBDMA_TARGET_SHOULD_SEND_HOST_TSG_EVENT whenever the PBDMA
sends any method to the graphics engine.  When set, Host must eventually send a
HOST_TSG_EVENT at a TSG event point: the channel runs out of work, a TSG yield
is reached, or a semaphore acquire fails.  Therefore, as a performance
optimization, Host will initiate a context load immediately following the RAMFC
load in preparation for sending the HOST_TSG_EVENT.  SHOULD_SEND_HOST_TSG_EVENT
is cleared once Host issues a HOST_TSG_EVENT method or when Host does a
subchannel switch to the PBDMA's grcopy.  Note if the clear occurs due to the
latter case, the initial context load may have been needless.
     Host sets NV_PPBDMA_TARGET_NEEDS_HOST_TSG_EVENT to TRUE when in a TSG, the
TARGET_ENGINE is NV_ENGINE_GRAPHICS, and the PBDMA needs to send the target
engine a HOST_TSG_EVENT internal method.  When TRUE on channel load, Host will
send the HOST_TSG_EVENT prior to sending any other engine methods.  This is
somewhat like having another entry in the NV_PPBDMA_METHODn/DATAn Host method
fifo that comes before the 0th entry.  However, Host may process other Host
methods concurrently with attempting to send the HOST_TSG_EVENT.  Note that when
this field is TRUE, the PBDMA will initiate a context load immediately after the
RAMFC is loaded unless a context load is already in progress because of
CTX_RELOAD or the other PBDMA sharing the runlist.  On channel creation,
software should initialize this field to FALSE in the corresponding
NV_RAMFC_TARGET entry.  This bit is required for Pascal SCG functional
correctness--when Host cannot send a HOST_TSG_EVENT due to backpressure on the
method interface to FE, it must remember the fact that it still needs to send a
HOST_TSG_EVENT if the PBDMA channel switches out.  Dropping a HOST_TSG_EVENT can
result in a hang in FE if the current pipe is in compute mode and the other pipe
has methods to send.
     The HOST_TSG_EVENT_REASON field indicates the reason for which a
HOST_TSG_EVENT internal method must be sent when NEEDS_HOST_TSG_EVENT is TRUE.
These defines match those of the NV_PMETHOD_HOST_TSG_EVENT_REASON field of the
internal method; see internal_methods.ref.

     This register is part of a GPU context's state.  During a channel switch,
the value of this register is saved to and restored from the NV_RAMFC_TARGET
entry of the GPU context's GPU-instance block.
     This information is maintained by Hardware.  Typically, software does not
access this register.  This register is available for debug purposes.  Software
should use this register only if the GPU context is assigned to a PBDMA unit and
that PBDMA unit is stalled.  While a GPU context's Host state is not contained
within a PBDMA unit, software should use NV_RAMFC_TARGET to access this
information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.



#define NV_PPBDMA_TARGET(i)                   (0x000400ac+(i)*8192) /* RW-4A */
#define NV_PPBDMA_TARGET__SIZE_1                  14 /*       */

#define NV_PPBDMA_TARGET_ENGINE                                 4:0 /* RW-UF */
#define NV_PPBDMA_TARGET_ENGINE_SW               31 /* RW--V */

#define NV_PPBDMA_TARGET_ENG_CTX_VALID                        16:16 /* RW-UF */
#define NV_PPBDMA_TARGET_ENG_CTX_VALID_TRUE                       1 /* RW--V */
#define NV_PPBDMA_TARGET_ENG_CTX_VALID_FALSE                      0 /* RW--V */

#define NV_PPBDMA_TARGET_CE_CTX_VALID                         17:17 /* RW-UF */
#define NV_PPBDMA_TARGET_CE_CTX_VALID_TRUE                        1 /* RW--V */
#define NV_PPBDMA_TARGET_CE_CTX_VALID_FALSE                       0 /* RW--V */

#define NV_PPBDMA_TARGET_HOST_TSG_EVENT_REASON                25:24 /* RW-UF */
#define NV_PPBDMA_TARGET_HOST_TSG_EVENT_REASON_PBDMA_IDLE       0x0 /* RW--V */
#define NV_PPBDMA_TARGET_HOST_TSG_EVENT_REASON_SEMAPHORE_ACQUIRE_FAILURE 0x1 /* RW--V */
#define NV_PPBDMA_TARGET_HOST_TSG_EVENT_REASON_TSG_YIELD        0x2 /* RW--V */
#define NV_PPBDMA_TARGET_HOST_TSG_EVENT_REASON_HOST_SUBCHANNEL_SWITCH    0x3 /* RW--V */

#define NV_PPBDMA_TARGET_SHOULD_SEND_HOST_TSG_EVENT           29:29 /* RW-UF */
#define NV_PPBDMA_TARGET_SHOULD_SEND_HOST_TSG_EVENT_TRUE          1 /* RW--V */
#define NV_PPBDMA_TARGET_SHOULD_SEND_HOST_TSG_EVENT_FALSE         0 /* RW--V */

#define NV_PPBDMA_TARGET_NEEDS_HOST_TSG_EVENT                 31:31 /* RW-UF */
#define NV_PPBDMA_TARGET_NEEDS_HOST_TSG_EVENT_TRUE                1 /* RW--V */
#define NV_PPBDMA_TARGET_NEEDS_HOST_TSG_EVENT_FALSE               0 /* RW--V */


METHOD_CRC - Method CRC Value

     The NV_PPBDMA_METHOD_CRC register contains a cyclic redundancy check value
calculated from the methods sent to Host's Crossbar.  This therefore excludes
software methods and Host-only methods. It may be used for debug
to determine whether the correct methods have been sent.  A method CRC can
detect errors in the fetching of GP data, the fetching of PB data, and the
generation of methods from PB data.  If Host fetched GP data incorrectly,
fetched PB data incorrectly, or generated methods from PB data incorrectly it is
unlikely that the CRC value calculated by Host would match the CRC value
calculated by software.
     The IEEE 802.3 CRC-32 polynomial (x32 + x26 + x23 + x22 + x16 + x12 + x11 +
x10 + x8 + x7 + x5 + x4 + x2 + x + 1) is used to calculate CRC values.  Methods
can be sent to Host's Crossbar as single methods, or dual methods.  The CRC is
calculated as if dual methods were always sent as two single methods.  Each
method consists of a subchannel identifer (3 bits), and a method address (12
bits), and method data (32 bits).  For the CRC calculation, a method is
organized into a 6-byte value.  Bytes are added to the CRC from the least
significant byte to the most significant byte.


     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_METHOD_CRC field of
the RAMFC part of the GPU context's GPU-instance block.
     This information is maintained by hardware.  Software may use special
methods (NV_UDMA_CRC_CHECK) to check and clear the CRC value.  Typically,
software does not access this register directly.  This register is available to
software only for debug.  Software should use this register only if the GPU
context is assigned to a PBDMA unit and that PBDMA unit is stalled.  While a GPU
context's Host state is not contained within a PBDMA unit, software should use
NV_RAMFC_METHOD_CRC to access this information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  This register was introduced in
Fermi.



#define NV_PPBDMA_METHOD_CRC(i)               (0x000400b0+(i)*8192) /* RW-4A */
#define NV_PPBDMA_METHOD_CRC__SIZE_1              14 /*       */

#define NV_PPBDMA_METHOD_CRC_VALUE                             31:0 /* RW-UF */
#define NV_PPBDMA_METHOD_CRC_VALUE_ZERO                  0x00000000 /* RW--V */

REF - Reference Count

     Software may use Reference Counts to monitor Host's progress processing a
pushbuffer.  The pushbuffer specifies that the Reference Count be written.  For
synchronization, software might wait until a particular Reference Count value
has a particular value before proceeding.
     The NV_PPBDMA_REF register holds a 32-bit Reference Count value that can be
written with the NV_UDMA_SET_REF method.  The value written to the register is
from the NV_UDMA_SET_REF method's parameter.  The value is not written to this
register until the target engine reports that it is idle and the memory
subsystem has been flushed.  Waiting for the engine to become idle and the
memory subsysten to be flushed ensures that all previous instructions in the
current channel context have completed execution.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_REF entry of the
RAMFC part of the GPU context's GPU-instance block.
     Typically, while a GPU context is bound to a channel, software uses
NV_RAMUSERD_REF to access this information.  Typically, software does not access
this register directly.  This register is available to software only for debug.
Software should use this register only if the GPU context is assigned to a PBDMA
unit and that PBDMA unit is stalled.  While a GPU context is not assigned to a
PBDMA unit and is not bound to a channel, software should use NV_RAMFC_REF to
access this information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.



#define NV_PPBDMA_REF(i)                      (0x00040028+(i)*8192) /* RW-4A */
#define NV_PPBDMA_REF__SIZE_1                     14 /*       */

#define NV_PPBDMA_REF_CNT                                      31:0 /* RW-UF */
#define NV_PPBDMA_REF_CNT_ZERO                           0x00000000 /* RW--V */



RUNTIME - Active run time on Host

     The NV_PPBDMA_RUNTIME register contains the amount of time a GPU context
has been actively running within Host.  This is not the amount of time that the
GPU context has been actively running on an engine.  The amount of time is
measured in 1024 ns ticks from the PTIMER.  Software may set this value to 0 and
can later read the value to see whether the GPU context ran.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_RUNTIME entry of the
RAMFC part of the GPU context's GPU-instance block.
     This information is maintained by hardware.  Software may read this
register at any time.  Software should write this register only if the GPU
context is assigned to a PBDMA unit and that PBDMA unit is stalled.  While a GPU
context's Host state is not contained within a PBDMA unit, software should use
NV_RAMFC_RUNTIME to access this information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_RUNTIME(i)                  (0x0004002c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_RUNTIME__SIZE_1                 14 /*       */

#define NV_PPBDMA_RUNTIME_VALUE                                31:0 /* RW-UF */
#define NV_PPBDMA_RUNTIME_VALUE_ZERO                     0x00000000 /* RW--V */


SEM_ADDR_LO [register] - Semaphore Address Low Backing Register

     Semaphores are synchronization primitives located in memory; see the
documentation above the NV_UDMA_SEM_ADDR_LO method description for a brief
overview.
     The NV_PPBDMA_SEM_ADDR_LO register specifies the least significant bits of
a semaphore's virtual memory address.  This register is written to via the
NV_UDMA_SEM_ADDR_LO method.  See the method documentation of
NV_UDMA_SEM_ADDR_LO for information regarding usage and behavior.
     This register is part of a channel's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_SEM_ADDR_LO field of
the RAMFC part of the channel's instance block.
     Software typically does not access this register directly, unless this is
being done while debugging.  Software can directly access this register without
the risk of race conditions when the channel is loaded on a PBDMA unit and that
PBDMA unit is stalled.  While a channel is not loaded on a PBDMA unit, software
can read from the NV_RAMFC_SEM_ADDR_LO instance block field to access this
information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_SEM_ADDR_LO(i)              (0x0004003c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SEM_ADDR_LO__SIZE_1             14 /*       */

#define NV_PPBDMA_SEM_ADDR_LO_ADDR                             31:2 /* RW-UF */
#define NV_PPBDMA_SEM_ADDR_LO_ADDR_ZERO                  0x00000000 /* RW--V */


SEM_ADDR_HI [register] - Semaphore Address High Backing Register

     The NV_PPBDMA_SEM_ADDR_HI register contains the most significant 8 bits of
a semaphore's 40-bit virtual memory address.  This register is written to via
the NV_UDMA_SEM_ADDR_HI method.  See the method documentation of
NV_UDMA_SEM_ADDR_HI for information regarding usage and behavior.
     This register is part of a channel's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_SEM_ADDR_HI field of
the RAMFC part of the channel's instance block.
     Software typically does not access this register directly, unless this is
being done while debugging.  Software can directly access this register without
the risk of race conditions when the channel is loaded on a PBDMA unit and that
PBDMA unit is stalled.  While a channel is not loaded on a PBDMA unit, software
can read from the NV_RAMFC_SEM_ADDR_HI instance block field to access this
information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_SEM_ADDR_HI(i)              (0x00040038+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SEM_ADDR_HI__SIZE_1             14 /*       */

#define NV_PPBDMA_SEM_ADDR_HI_ADDR                              7:0 /* RW-UF */
#define NV_PPBDMA_SEM_ADDR_HI_ADDR_ZERO                  0x00000000 /* RW--V */


SEM_PAYLOAD_LO [register] - Semaphore Payload Low Backing Register

     The NV_PPBDMA_SEM_PAYLOAD_LO register contains the lowest 32 bits of the
semaphore payload.  The payload is used to either write to the semaphore or
provide an operand for a semaphore operation.  This register is written to via
the NV_UDMA_SEM_PAYLOAD_LO method.  See the method documentation of
NV_UDMA_SEM_PAYLOAD_LO for information regarding usage and behavior.
     This register is part of a channel's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_SEM_PAYLOAD_LO field
of the RAMFC part of the channel's instance block.
     Software typically does not access this register directly, unless this is
being done while debugging.  Software can directly access this register without
the risk of race conditions when the channel is loaded on a PBDMA unit and that
PBDMA unit is stalled.  While a channel is not loaded on a PBDMA unit, software
can read from the NV_RAMFC_SEM_PAYLOAD_LO instance block field to access this
information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_SEM_PAYLOAD_LO(i)           (0x00040040+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SEM_PAYLOAD_LO__SIZE_1          14 /*       */

#define NV_PPBDMA_SEM_PAYLOAD_LO_DATA                          31:0 /* RW-VF */
#define NV_PPBDMA_SEM_PAYLOAD_LO_DATA_ZERO               0x00000000 /* RW--V */


SEM_PAYLOAD_HI [register] - Semaphore Payload High Backing Register

     The NV_PPBDMA_SEM_PAYLOAD_HI register contains the highest 32 bits of the
semaphore payload.  The payload is used to either write to the semaphore or
provide an operand for a semaphore operation.  This register is written to via
the NV_UDMA_SEM_PAYLOAD_HI method.  See the method documentation of
NV_UDMA_SEM_PAYLOAD_HI for information regarding usage and behavior.
     This register is part of a channel's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_SEM_PAYLOAD_HI field of
the RAMFC part of the channel's instance block.
     Software typically does not access this register directly, unless this is
being done while debugging.  Software can directly access this register without
the risk of race conditions when the channel is loaded on a PBDMA unit and that
PBDMA unit is stalled.  While a channel is not loaded on a PBDMA unit, software
can read from the NV_RAMFC_SEM_PAYLOAD_HI instance block field to access this
information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_SEM_PAYLOAD_HI(i)           (0x0004009c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SEM_PAYLOAD_HI__SIZE_1          14 /*       */

#define NV_PPBDMA_SEM_PAYLOAD_HI_DATA                          31:0 /* RW-VF */
#define NV_PPBDMA_SEM_PAYLOAD_HI_DATA_ZERO               0x00000000 /* RW--V */


SEM_EXECUTE [register] - Semaphore Operation Backing Register

     The NV_PPBDMA_SEM_EXECUTE register contains a type of semaphore operation
to be performed and additional parameters for that operation.  This register is
written to via the NV_UDMA_SEM_EXECUTE method.
     A semaphore operation is launched by executing the NV_UDMA_SEM_EXECUTE
method.  This semaphore operation uses the semaphore address from the
NV_PPBDMA_SEM_ADDR_LO and NV_PPBDMA_SEM_ADDR_HI registers, and uses the
payload value from the NV_PPBDMA_SEM_PAYLOAD_LO and NV_PPBDMA_SEM_PAYLOAD_HI
registers.  However, after the semaphore operation has completed, these
registers may be updated individually by other semaphore methods; that is, they
do not retain an accurate view of the most previously executed semaphore
operation.  See the method documentation of NV_UDMA_SEM_EXECUTE for information
regarding usage and behavior.
     During execution of the semaphore operation, the ACQUIRE_FAIL field of the
NV_PPBDMA_SEM_EXECUTE register indicates whether or not an attempt to acquire a
semaphore has failed or faulted.  This field is used by Host to determine
whether the NV_PPBDMA_ACQUIRE_DEADLINE register should be updated.  If the
value of this field is FALSE, this means an acquire has not yet been attempted,
and Host will set ACQUIRE_DEADLINE to a new value.  If this field is TRUE, this
means an acquire has been attempted and has failed, and Host will not modify
ACQUIRE_DEADLINE.
     The ACQUIRE_FAIL field also indicates whether, during the execution of a
NV_UDMA_CLEAR_FAULTED method, an attempt to clear a _FAULTED bit of a channel's
NV_PCCSR_CHANNEL register has failed or not. If this field is FALSE, this might
mean a CLEAR_FAULTED has not yet been attempted, and Host will set
ACQUIRE_DEADLINE to a new value. If CLEAR_FAULTED method fails the field is set
to TRUE.  By reading the PPBDMA_METHOD0 register, SW can determine the method
for which the field is in use. Host will set this field to FALSE when the
CLEAR_FAULTED method succeeds or its timeout is triggered.
     Note that during execution of a semaphore operation, the value of the
NV_PPBDMA_SEM_EXECUTE register is the same as the value of NV_PPBDMA_DATA0,
with the exception of the NV_PPBDMA_SEM_EXECUTE_ACQUIRE_FAIL field.  If
software modifies NV_PPBDMA_DATA0 during execution of a NV_UDMA_SEM_EXECUTE
method, it must be careful to update the NV_PPBDMA_SEM_EXECUTE register to be
consistent with the DATA0 register.
     This register is part of a channel's state.  When the channel is switched
out, the value of this register is saved to, and restored from, the
NV_RAMFC_SEM_EXECUTE field of the RAMFC part of the channel's instance block.
     Software typically does not access this register directly, unless this is
being done while debugging.  Software can directly access this register without
the risk of race conditions when the channel is loaded on a PBDMA unit and that
PBDMA unit is stalled.  While a channel is not loaded on a PBDMA unit, software
can read from the NV_RAMFC_SEM_EXECUTE instance block field to access this
information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.


#define NV_PPBDMA_SEM_EXECUTE(i)              (0x00040044+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SEM_EXECUTE__SIZE_1             14 /*       */

#define NV_PPBDMA_SEM_EXECUTE_OPERATION                         2:0 /* RWXVF */
#define NV_PPBDMA_SEM_EXECUTE_OPERATION_ACQUIRE          0x00000000 /* -W--V */
#define NV_PPBDMA_SEM_EXECUTE_OPERATION_RELEASE          0x00000001 /* -W--V */
#define NV_PPBDMA_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ   0x00000002 /* -W--V */
#define NV_PPBDMA_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ     0x00000003 /* -W--V */
#define NV_PPBDMA_SEM_EXECUTE_OPERATION_ACQ_AND          0x00000004 /* -W--V */
#define NV_PPBDMA_SEM_EXECUTE_OPERATION_ACQ_NOR          0x00000005 /* -W--V */
#define NV_PPBDMA_SEM_EXECUTE_OPERATION_REDUCTION        0x00000006 /* -W--V */

#define NV_PPBDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG              12:12 /* RW-VF */
#define NV_PPBDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS     0x00000000 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN      0x00000001 /* RW--V */

#define NV_PPBDMA_SEM_EXECUTE_ACQUIRE_FAIL                    19:19 /* RWXVF */
#define NV_PPBDMA_SEM_EXECUTE_ACQUIRE_FAIL_FALSE         0x00000000 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_ACQUIRE_FAIL_TRUE          0x00000001 /* RW--V */

#define NV_PPBDMA_SEM_EXECUTE_RELEASE_WFI                     20:20 /* RW-VF */
#define NV_PPBDMA_SEM_EXECUTE_RELEASE_WFI_DIS            0x00000000 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_RELEASE_WFI_EN             0x00000001 /* RW--V */

#define NV_PPBDMA_SEM_EXECUTE_PAYLOAD_SIZE                    24:24 /* RWXVF */
#define NV_PPBDMA_SEM_EXECUTE_PAYLOAD_SIZE_32BIT         0x00000000 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_PAYLOAD_SIZE_64BIT         0x00000001 /* RW--V */

#define NV_PPBDMA_SEM_EXECUTE_RELEASE_TIMESTAMP               25:25 /* RW-VF */
#define NV_PPBDMA_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS      0x00000000 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_RELEASE_TIMESTAMP_EN       0x00000001 /* RW--V */

#define NV_PPBDMA_SEM_EXECUTE_REDUCTION                       30:27 /* RWXVF */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_IMIN             0x00000000 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_IMAX             0x00000001 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_IXOR             0x00000002 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_IAND             0x00000003 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_IOR              0x00000004 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_IADD             0x00000005 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_INC              0x00000006 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_DEC              0x00000007 /* RW--V */

#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_FORMAT                31:31 /* RW-VF */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED    0x00000000 /* RW--V */
#define NV_PPBDMA_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED  0x00000001 /* RW--V */


ACQUIRE_DEADLINE - Deadline for Semaphore Acquire and Clear Faulted Timeouts

     The NV_PPBDMA_ACQUIRE_DEADLINE register contains timeout information used
by the NV_UDMA_SEM_EXECUTE and NV_UDMA_CLEAR_FAULTED methods.

     During execution of a semaphore acquire operation, the timeout period from
NV_PPBDMA_ACQUIRE_TIMEOUT is added to the current time from PTIMER to compute
the time at which the acquire will time out.  This timeout time is stored in
NV_PPBDMA_ACQUIRE_DEADLINE_TIMESTAMP.
     Whenever an acquire is retried, the current time from the PTIMER is
compared with the value in this register.  The comparison is circular.  If an
acquire attempt fails to match, and if the current time is not between the start
time (STARTTIME = ACQUIRE_DEADLINE - ACQUIRE_TIMEOUT) and ACQUIRE_DEADLINE in
the circle of 32-bit unsigned integers, then the deadline was missed, and Host
will raise the NV_PPBDMA_INTR_0_ACQUIRE interrupt.

     During execution of a CLEAR_FAULTED method, if the targeted channel has not
reported FAULTED and NV_PFIFO_CLEAR_FAULTED_TIMEOUT_DETECTION is ENABLED, the
value in NV_PFIFO_CLEAR_FAULTED_TIMEOUT_PERIOD is added to the current time from
PTIMER to compute the time at which the CLEAR_FAULTED will time out.  This
timeout time is stored in NV_PPBDMA_ACQUIRE_DEADLINE_TIMESTAMP.
     The CLEAR_FAULTED method will be retried approximately every microsecond
while its containing channel is loaded and active on the PBDMA.  When
CLEAR_FAULTED is retried and its targeted FAULTED bit is still FALSE, the
current time from PTIMER is compared against the ACQUIRE_DEADLINE_TIMESTAMP.  If
the 32 least-significant microseconds of the PTIMER time exceeds the TIMESTAMP
in a circular 32-bit comparison, the deadline was missed, and Host will raise
the NV_PPBDMA_INTR_0_CLEAR_FAULTED_ERROR interrupt.

     This register is part of a channel's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_ACQUIRE_DEADLINE
field of the RAMFC part of the channel's instance block.
     The value of this register is maintained by hardware.  Software typically
does not access this register directly, unless is this is being done while
debugging.  Software can directly access this register without the risk of race
conditions when the channel is loaded on a PBDMA unit and that PBDMA unit is
stalled.  While a channel is not loaded on a PBDMA unit, software can read from
the NV_RAMFC_ACQUIRE_DEADLINE instance block field to access this information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.



#define NV_PPBDMA_ACQUIRE_DEADLINE(i)         (0x00040034+(i)*8192) /* RW-4A */
#define NV_PPBDMA_ACQUIRE_DEADLINE__SIZE_1        14 /*       */

#define NV_PPBDMA_ACQUIRE_DEADLINE_TIMESTAMP                   31:0 /* RW-UF */
#define NV_PPBDMA_ACQUIRE_DEADLINE_TIMESTAMP_ZERO        0x00000000 /* RW--V */


ACQUIRE - Acquire Periods

     The NV_UDMA_SEM_EXECUTE method may specify a semaphore acquire operation,
which involves not continuing channel execution until a given semaphore has a
particular value.  If a semaphore acquire fails (polling the semaphore reveals
it does not have the desired value), the PBDMA unit may either switch out to a
different channel, or keep trying to acquire the semaphore; see the
documentation for the NV_UDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG field.  If the
channel does not switch out and continues trying to acquire the semaphore, then
the NV_PPBDMA_ACQUIRE_RETRY register controls how long to wait between attempts
to acquire the semaphore.
     The NV_PPBDMA_ACQUIRE_RETRY_MAN and RETRY_EXP fields specify the minimum
number of internal-domain cycles that Host will wait before retrying a failed
Semaphore Acquire operation.  The wait period is MAN*2^EXP nvclk cycles.
Increasing the period between acquire attempts will reduce the memory throughput
consumed, but may increase the time between when the semaphore is released and
when it is acquired.
     The NV_PPBDMA_ACQUIRE_TIMEOUT_MAN and TIMEOUT_EXP fields specify the
maximum number of 1024ns periods that a acquire attempt can fail before an
acquire timeout interrupt is initiated.  The acquire timeout period is
1024*MAN*2^EXP ns.  TIMEOUT_EN specifies whether acquire timeouts are enabled.
The timeout period is limited to a maximum of 0x7FFF8000 so that
NV_PPBDMA_ACQUIRE_DEADLINE can fit into a single 32-bit register.
     This register is part of a channel's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_ACQUIRE field of the
RAMFC part of the channel's instance block.
     Typically, this register is initialized in NV_RAMFC_ACQUIRE when the
channel is first created.  Software typically does not access this register
directly, unless this is being done while debugging.  Software can directly
access this register without the risk of race conditions when the channel is
loaded on a PBDMA unit and that PBDMA unit is stalled.  While a channel is not
loaded on a PBDMA unit, software can read from the NV_RAMFC_ACQUIRE instance
block field to access this information.
     One of this type of register exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.



#define NV_PPBDMA_ACQUIRE(i)                  (0x00040030+(i)*8192) /* RW-4A */
#define NV_PPBDMA_ACQUIRE__SIZE_1                 14 /*       */

#define NV_PPBDMA_ACQUIRE_RETRY_MAN                             6:0 /* RW-UF */
#define NV_PPBDMA_ACQUIRE_RETRY_MAN_2                    0x00000002 /* RW--V */
#define NV_PPBDMA_ACQUIRE_RETRY_EXP                            10:7 /* RW-UF */
#define NV_PPBDMA_ACQUIRE_RETRY_EXP_2                    0x00000002 /* RW--V */

#define NV_PPBDMA_ACQUIRE_TIMEOUT_EXP                         14:11 /* RW-UF */
#define NV_PPBDMA_ACQUIRE_TIMEOUT_EXP_MAX                0x0000000F /* RW--V */
#define NV_PPBDMA_ACQUIRE_TIMEOUT_MAN                         30:15 /* RW-UF */
#define NV_PPBDMA_ACQUIRE_TIMEOUT_MAN_MAX                0x0000FFFF /* RW--V */
#define NV_PPBDMA_ACQUIRE_TIMEOUT_EN                          31:31 /* RW-UF */
#define NV_PPBDMA_ACQUIRE_TIMEOUT_EN_DISABLE             0x00000000 /* RW--V */
#define NV_PPBDMA_ACQUIRE_TIMEOUT_EN_ENABLE              0x00000001 /* RW--V */


^L
STATUS - PBDMA Unit Status Register

     The NV_PPBDMA_STATUS register contains the status of a PBDMA unit (Pusher,
Cache1, and Puller).
     The NV_PPBDMA_STATUS_GPF field contains the status of the PBDMA unit's
GP-Entry fetching.  If this field is GPF_EMPTY, then GP_GET equals GP_PUT, so
there are no more GP entries to be fetched.  If this field is GPF_SUSPENDED,
then GP-Entry fetching has been suspended (either by Host's Scheduler, by a
stalling interrupt condition).
If this field is GPF_BLOCKED, then the GP-Entry fetching is blocked from issuing new
GP-entry fetch requests because Host's Latency Buffer will not accept them
(either there is no space in Host's Latency Buffer to store the return data, or
Host's FB-request Arbiter is not accepting requests from the Latency Buffer).
Otherwise, this field is GPF_BUSY.
     The NV_PPBDMA_STATUS_GPP field contains the status of the PBDMA unit's
GP-Entry processing.  If this field is GPP_EMPTY, then the PBDMA unit has no
GP-Entry to process.  If this field is GPP_SUSPENDED, then GP-Entry processing
has been suspended (either by Host's Scheduler or by a stalling interrupt
condition).  If this field is GPP_BLOCKED, then GP-Entry processing is
blocked from issuing new pushbuffer read requests because the Latency
Buffer will not accept them.  Otherwise, this field is GPP_BUSY.
     The NV_PPBDMA_STATUS_PBP field contains the status of the PBDMA unit's
pushbuffer data processing.  If this field is PBP_EMPTY, then the PBDMA unit has
no pushbuffer data to process.  If this field is PBP_SUSPENDED, then
pushbuffer's processing operations have been suspended (either by Host's
Scheduler or a stalling interrupt condition). If this field is
PBP_BLOCKED, then pushbuffer processing is blocked because Host's
method FIFO is full.  Otherwise, this field is PBP_BUSY.
     The NV_PPBDMA_STATUS_MP field contains the status of a PBDMA unit's method
processing.  If this field is MP_EMPTY, then Host's method FIFO is empty.  If
this field is MP_SUSPENDED, then method processing has been suspended (either by
Host's Scheduler, by a NV_UDMA_YIELD method,or by an inter-engine
subchannel switch).  If this field is MP_BLOCKED then method
processing is blocked from making progress either because of a
semaphore acquire, a FB flush, because Host's Crossbar is not accepting methods, or
because Host's Semaphore Processor, or Run-List Processor is not accepting a
request or notification.  Otherwise, this field is MP_BUSY.


     The NV_PPBDMA_STATUS_PBDMA field contains the state of the PBDMA unit as a
whole.  If this field is PBDMA_EMPTY, then all of the PBDMA unit's sub-blocks
are reporting that they are empty.  If this field is PBDMA_SUSPENDED, then all
of the PBDMA unit's
sub-blocks are reporting that they are suspended.  If this field is
PBDMA_BLOCKED, then all of the PBDMA unit's sub-blocks are reporting that they
are blocked from making progress.  Otherwise, this field is PBDMA_BUSY.
     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on Host's internal domain
clock.  This register is new for Fermi.
     While NV_PPBDMA_CHANNEL_VALID is FALSE, no channel is present in
the PBDMA, so, like other non-configuration NV_PPBDMA registers, while
NV_PPBDMA_CHANNEL_VALID is FALSE, this register should be ignored.


#define NV_PPBDMA_STATUS(i)                   (0x00040100+(i)*8192) /* R--4A */
#define NV_PPBDMA_STATUS__SIZE_1                  14 /*       */

#define NV_PPBDMA_STATUS_GPF                                    3:0 /* R-IUF */
#define NV_PPBDMA_STATUS_GPF_EMPTY                       0x00000000 /* R-I-V */
#define NV_PPBDMA_STATUS_GPF_SUSPENDED                   0x00000001 /* R---V */
#define NV_PPBDMA_STATUS_GPF_BLOCKED                     0x00000002 /* R---V */
#define NV_PPBDMA_STATUS_GPF_BUSY                        0x00000008 /* R---V */
#define NV_PPBDMA_STATUS_GPP                                    7:4 /* R-IUF */
#define NV_PPBDMA_STATUS_GPP_EMPTY                       0x00000000 /* R-I-V */
#define NV_PPBDMA_STATUS_GPP_SUSPENDED                   0x00000001 /* R---V */
#define NV_PPBDMA_STATUS_GPP_BLOCKED                     0x00000002 /* R---V */
#define NV_PPBDMA_STATUS_GPP_BUSY                        0x00000008 /* R---V */
#define NV_PPBDMA_STATUS_PBP                                   11:8 /* R-IUF */
#define NV_PPBDMA_STATUS_PBP_EMPTY                       0x00000000 /* R-I-V */
#define NV_PPBDMA_STATUS_PBP_SUSPENDED                   0x00000001 /* R---V */
#define NV_PPBDMA_STATUS_PBP_BLOCKED                     0x00000002 /* R---V */
#define NV_PPBDMA_STATUS_PBP_BUSY                        0x00000008 /* R---V */
#define NV_PPBDMA_STATUS_MP                                   15:12 /* R-IUF */
#define NV_PPBDMA_STATUS_MP_EMPTY                        0x00000000 /* R-I-V */
#define NV_PPBDMA_STATUS_MP_SUSPENDED                    0x00000001 /* R---V */
#define NV_PPBDMA_STATUS_MP_BLOCKED                      0x00000002 /* R---V */
#define NV_PPBDMA_STATUS_MP_BUSY                         0x00000008 /* R---V */
#define NV_PPBDMA_STATUS_PBDMA                                31:28 /* R-IUF */
#define NV_PPBDMA_STATUS_PBDMA_EMPTY                     0x00000000 /* R-I-V */
#define NV_PPBDMA_STATUS_PBDMA_SUSPENDED                 0x00000001 /* R---V */
#define NV_PPBDMA_STATUS_PBDMA_BLOCKED                   0x00000002 /* R---V */
#define NV_PPBDMA_STATUS_PBDMA_BUSY                      0x00000008 /* R---V */



CHANNEL - Channel Identifier

     The NV_PPBDMA_CHANNEL register contains the channel number that is
currently assigned to a PBDMA unit. If VALID_FALSE, then this PBDMA unit
does not contain any valid state. After loading state from RAMFC, VALID
is set to TRUE. After saving the state to RAMFC, or during the load of RAMFC,
VALID is set to FALSE.
     This information is maintained by Hardware.  This register is available for
debug purposes.
     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on the internal-domain
clock.


#define NV_PPBDMA_CHANNEL(i)                  (0x00040120+(i)*8192) /* RW-4A */
#define NV_PPBDMA_CHANNEL__SIZE_1                 14 /*       */

#define NV_PPBDMA_CHANNEL_CHID                                 11:0 /*       */
#define NV_PPBDMA_CHANNEL_CHID_HW                11:0 /* RWXUF */
#define NV_PPBDMA_CHANNEL_VALID                               13:13 /* RWIVF */
#define NV_PPBDMA_CHANNEL_VALID_FALSE                    0x00000000 /* RWI-V */
#define NV_PPBDMA_CHANNEL_VALID_TRUE                     0x00000001 /* RW--V */



GP_SHADOW_0 and GP_SHADOW_1 - Last Received GP-Entry Header

     The NV_PPBDMA_GP_SHADOW_* registers contain the last GP entry that was
received by the PBDMA unit.  This is the data at NV_PPBDMA_GP_GET-8.  If the
PBDMA unit is indicating an invalid GP entry (NV_PPBDMA_INTR_0_GPENTRY), then
this register will contain that entry.
     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on the internal-domain
clock.


#define NV_PPBDMA_GP_SHADOW_0(i)              (0x00040110+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_SHADOW_0__SIZE_1             14 /*       */

#define NV_PPBDMA_GP_SHADOW_0_VALUE                            31:0 /* RWXUF */

#define NV_PPBDMA_GP_SHADOW_1(i)              (0x00040114+(i)*8192) /* RW-4A */
#define NV_PPBDMA_GP_SHADOW_1__SIZE_1             14 /*       */

#define NV_PPBDMA_GP_SHADOW_1_VALUE                            31:0 /* RWXUF */


HDR_SHADOW - Last fetched Pushbuffer-Entry Header

     The NV_PPBDMA_HDR_SHADOW register contains the raw PB instruction
corresponding to the information in NV_PPBDMA_PB_HEADER.  If the PBDMA unit is
indicating an invalid PB entry (NV_PPBDMA_INTR_0_PBENTRY), then this register
will contain the raw data for that entry.
     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on the internal-domain
clock.


#define NV_PPBDMA_HDR_SHADOW(i)               (0x00040118+(i)*8192) /* RW-4A */
#define NV_PPBDMA_HDR_SHADOW__SIZE_1              14 /*       */

#define NV_PPBDMA_HDR_SHADOW_VALUE                             31:0 /* RWXUF */



MEM_OP_* [registers] - Memory-Operation Operand Backing Registers

     The NV_PPBDMA_MEM_OP_* registers contain bits 95:0 of the operands
to a memory management operation.  Memory management operations are
triggered by NV_UDMA_MEM_OP_D methods; see NV_UDMA_MEM_OP* below for the method
documentation.
     This register is part of a GPU context's state.  On a switch, the value of
these registers are saved to, and restored from, the NV_RAMFC_MEM_OP_A,
NV_RAMFC_MEM_OP_B, and NV_RAMFC_MEM_OP_C fields of the RAMFC part of the GPU
context's GPU-instance block.
     Software uses NV_UDMA_MEM_OP_* methods to alter this information.
Typically, software does not access this register directly.  This register is
available to software only for debug.  Software should use this register only if
the GPU context is assigned to a PBDMA unit and that PBDMA unit is stalled.
While a GPU context's Host state is not contained within a PBDMA unit, software
should use NV_RAMFC_MEM_OP_C to access this information.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  These registers were added
and/or moved for Pascal (MEM_OP_A used to exist at offsets 400a0 + i*8192).



#define NV_PPBDMA_MEM_OP_A(i)                 (0x00040004+(i)*8192) /* RW-4A */
#define NV_PPBDMA_MEM_OP_A__SIZE_1                14 /*       */
#define NV_PPBDMA_MEM_OP_A_DATA                                31:0 /* RW-UF */
#define NV_PPBDMA_MEM_OP_B(i)                 (0x00040064+(i)*8192) /* RW-4A */
#define NV_PPBDMA_MEM_OP_B__SIZE_1                14 /*       */
#define NV_PPBDMA_MEM_OP_B_DATA                                31:0 /* RW-UF */
#define NV_PPBDMA_MEM_OP_C(i)                 (0x000400a0+(i)*8192) /* RW-4A */
#define NV_PPBDMA_MEM_OP_C__SIZE_1                14 /*       */
#define NV_PPBDMA_MEM_OP_C_DATA                                31:0 /* RW-UF */


SIGNATURE - RAMFC Signature Register

     This register contains a value that specifies which Host class ID software
expects the hardware to support, and indicates if the RAMFC might be valid.  It
is intended for debug and as a runtime check that RM is exposing the proper Host
class ID for the chip.
     When the RAMFC part of a GPU context's instance block is restored into
Host, if the HW field does not contain the class ID specified by
HW_HOST_CLASS_ID or the value HW_VALID, then Host will freeze and initiate an
NV_PPBDMA_INTR_*_SIGNATURE interrupt.  Host's class ID can be queried at runtime
from NV_PFIFO_CFG2_HOST_CLASS_ID; see dev_fifo.ref.  Note the Host class is also
known as "channel_gpfifo".  HW_VALID (0xface) is meant to be used by RM to ease
transitions between Host classes for new architectures.  The HW field does not
provide a direct check for Host methods sent by a given user mode driver;
attempting to send methods from a mismatching Host class may or may not work
depending on the method.
     The SW field is for use by software.  Host is not affected by the value.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_SIGNATURE field of
the RAMFC part of the GPU context's GPU-instance block.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  This register was added for
Fermi.



#define NV_PPBDMA_SIGNATURE(i)                (0x00040010+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SIGNATURE__SIZE_1               14 /*       */
#define NV_PPBDMA_SIGNATURE_HW                                 15:0 /* RW-UF */
#define NV_PPBDMA_SIGNATURE_HW_VALID                     0x0000face /* RW--V */
#define NV_PPBDMA_SIGNATURE_HW_HOST_CLASS_ID                 50031 /* RW--V */
#define NV_PPBDMA_SIGNATURE_SW                                31:16 /* RW-UF */
#define NV_PPBDMA_SIGNATURE_SW_ZERO                      0x00000000 /* RW--V */


USERD - Address of User-Driver Accessible State

     A user driver is permitted access to some, but not all, of a GPU context's
state (for example, GP_PUT).  NV_PPBDMA_USERD contains the physical address of a
block of memory that contains the state the user-driver may access.  This block
is NV_RAMUSERD_CHAN_SIZE-byte aligned.  Please see the NV_RAMUSERD section of
"dev_ram.ref" for a description of the user-driver accessible state.
     TARGET - The aperture of the physical address space in which USERD resides.
     ADDR - The low bits of the block-aligned (right shifted) USERD address.
This field corresponds to the low 32 bits of the byte address with the low bits
corresponding to its block alignment masked off.
     HI_ADDR - The high bits of the USERD address.  This field specifieds bits
32+ of the USERD byte-aligned address.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_USERD and
NV_RAMFC_USERD_HI fields of the RAMFC part of the GPU context's GPU-instance
block.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  This register was added for
Fermi.


#define NV_PPBDMA_USERD(i)                    (0x00040008+(i)*8192) /* RW-4A */
#define NV_PPBDMA_USERD__SIZE_1                   14 /*       */
#define NV_PPBDMA_USERD_TARGET                                  1:0 /* RW-UF */
#define NV_PPBDMA_USERD_TARGET_VID_MEM                   0x00000000 /* RW--V */
#define NV_PPBDMA_USERD_TARGET_VID_MEM_NVLINK_COHERENT   0x00000001 /* RW--V */
#define NV_PPBDMA_USERD_TARGET_SYS_MEM_COHERENT          0x00000002 /* RW--V */
#define NV_PPBDMA_USERD_TARGET_SYS_MEM_NONCOHERENT       0x00000003 /* RW--V */
#define NV_PPBDMA_USERD_ADDR            31:9 /* RW-UF */
#define NV_PPBDMA_USERD_ADDR_ZERO                        0x00000000 /* RW--V */

#define NV_PPBDMA_USERD_HI(i)                 (0x0004000c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_USERD_HI__SIZE_1                14 /*       */
#define NV_PPBDMA_USERD_HI_ADDR                                 7:0 /* RW-UF */
#define NV_PPBDMA_USERD_HI_ADDR_ZERO                     0x00000000 /* RW--V */


CONFIG - Miscellaneous Configuration Register

     The CONFIG register is used to configure miscellaneous functions of a PBDMA on a
per-channel basis.  Software can configure these bits via the corresponding
NV_RAMFC_CONFIG dword in each channel's RAMFC.
     The L2_EVICT field controls the l2_class field for memory requests from a PBDMA unit.
     The CE_SPLIT field controls Host taking large copies and splitting them into smaller
copies to allow fast Copy Engine (CE) switching.  If the field value is ENABLE, Host will analyze
each copy command to determine if the copy should be split into smaller copies, and may
modify the commands sent to the CE.


If the field value is DISABLE, Host will not modify the copy commands sent to the CE.
If the field is written from ENABLE to DISABLE while Host is in the middle of splitting a copy,
Host will continue splitting the current copy until the whole copy has been split.  Future
copies, however, will not be split while the field remains set to DISABLE.
     The THROTTLE_MODE field controls how much work Host sends to the CE.  The
goal is to send enough work to keep the Copy Engine busy while Host switches
away to another channel to check on a semaphore, while at the same time
maintaining the CE preemption latency below 10 microseconds.  When the field is
set to THROTTLE, Host will limit the number of copies it sends to the CE.  This
is legacy behavior and is needed on PCIE GEN3 systems.  Setting the field to
NO_THROTTLE will prevent Host from limiting the amount of work that Host sends
to the CE.  NVLINK2 and PCIE GEN4_LITE systems should have the field set to
NO_THROTTLE.
Note: Because this is a static setting, if a system slowdown occurs and the link
is downgraded, preemption latency may exceed 10 microseconds.
     The AUTH_LEVEL field specifies the authorization level of the channel.
When AUTH_LEVEL is NON_PRIVILEGED, the channel will not be able to execute
privileged operations via Host methods on its pushbuffer.  Any attempt to do so
will result in the NV_PPBDMA_INTR_*_METHOD interrupt being raised.  When
AUTH_LEVEL is PRIVILEGED, the channel will be able to execute all methods.
     The USERD_WRITEBACK field controls whether USERD will be written back to
memory.  Regardless of the setting here, USERD is always written back to memory
when the channel switches off of the PBDMA.  When USERD_WRITEBACK is ENABLE,
USERD will also be written back to memory whenever the PBDMA falls idle or the
writeback timer configured via NV_PFIFO_USERD_WRITEBACK_TIMER expires.  When the
field value is DISABLE, the writeback only occurs on channel save.  Note GP_PUT
does not get written back to memory because it is written by software;
otherwise, GP_PUT updates could be lost on writeback.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to, and restored from, the NV_RAMFC_CONFIG field of the RAMFC
part of the GPU context's GPU-instance block.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.  This register was added for
Fermi.

#define NV_PPBDMA_CONFIG(i)                 (0x000400f4+(i)*8192) /* R--4A */
#define NV_PPBDMA_CONFIG__SIZE_1                14 /*       */


#define NV_PPBDMA_CONFIG_L2_EVICT                             1:0 /* R--VF */
#define NV_PPBDMA_CONFIG_L2_EVICT_FIRST                0x00000000 /* R---V */
#define NV_PPBDMA_CONFIG_L2_EVICT_NORMAL               0x00000001 /* R---V */


#define NV_PPBDMA_CONFIG_CE_SPLIT                             4:4 /* R--VF */
#define NV_PPBDMA_CONFIG_CE_SPLIT_ENABLE               0x00000000 /* R---V */
#define NV_PPBDMA_CONFIG_CE_SPLIT_DISABLE              0x00000001 /* R---V */
#define NV_PPBDMA_CONFIG_CE_THROTTLE_MODE                     5:5 /* R--VF */
#define NV_PPBDMA_CONFIG_CE_THROTTLE_MODE_THROTTLE     0x00000000 /* R---V */
#define NV_PPBDMA_CONFIG_CE_THROTTLE_MODE_NO_THROTTLE  0x00000001 /* R---V */
#define NV_PPBDMA_CONFIG_AUTH_LEVEL                           8:8 /* R--VF */
#define NV_PPBDMA_CONFIG_AUTH_LEVEL_NON_PRIVILEGED     0x00000000 /* R---V */
#define NV_PPBDMA_CONFIG_AUTH_LEVEL_PRIVILEGED         0x00000001 /* R---V */
#define NV_PPBDMA_CONFIG_USERD_WRITEBACK                    12:12 /* R--VF */
#define NV_PPBDMA_CONFIG_USERD_WRITEBACK_DISABLE       0x00000000 /* R---V */
#define NV_PPBDMA_CONFIG_USERD_WRITEBACK_ENABLE        0x00000001 /* R---V */


     After a channel switch, the first method Host will send to the graphics or
copy engine is a NV_PMETHOD_SET_CHANNEL_INFO method.  The lower 16 bits of the
payload of this method (defined in internal_methods.ref) will consist of the
lower 16 bit value from this register.  The upper 16 bits of the payload will
be populated by Host with the channel ID.
     The lower 16 bits of the value of this method is expected to be set in
RAMFC by writing 32 bits to the offset specified as NV_RAMFC_SET_CHANNEL_INFO
at channel allocation. When generating the method, Host will ignore the upper
16 bits of the register value and populate the upper 16 bits of the method
payload with the channel ID. The register value should only change if the
channel is preempted and not loaded on a PBDMA.
     The VEID field is used to specify the Virtual Engine ID (VEID) for the
channel.  A VEID is a collection of independent compute or graphics state which
shares execution resources and a context image.  Each channel in a TSG can be
for a different VEID, any channels sharing a VEID will share WFI behavior.
     The RESERVED field is reserved for Host and any value written in these
upper 16 bits by SW is ignored by Host when generating the internal method
NV_PMETHOD_SET_CHANNEL_INFO.
     The SET_CHANNEL_INFO data should be set in RAMFC via the
NV_RAMFC_SET_CHANNEL_INFO entry rather than through this register.
     This register is part of a GPU context's state.  On a switch, the value of
this register is saved to and restored from the NV_RAMFC_SET_CHANNEL_INFO
field of the RAMFC part of the GPU context's GPU-instance block.
     One of these registers exists for each of Host's PBDMA units.  This
register runs on Host's internal domain clock.



#define NV_PPBDMA_SET_CHANNEL_INFO(i)             (0x000400fc+(i)*8192) /* RW-4A */
#define NV_PPBDMA_SET_CHANNEL_INFO__SIZE_1            14 /*       */

#define NV_PPBDMA_SET_CHANNEL_INFO_VALUE                           31:0 /* RW--F */

#define NV_PPBDMA_SET_CHANNEL_INFO_SCG_TYPE                         0:0 /*       */
#define NV_PPBDMA_SET_CHANNEL_INFO_SCG_TYPE_GRAPHICS_COMPUTE0 0x00000000 /*       */
#define NV_PPBDMA_SET_CHANNEL_INFO_SCG_TYPE_COMPUTE1          0x00000001 /*       */

#define NV_PPBDMA_SET_CHANNEL_INFO_VEID     ((6-1)+8):8 /*       */

#define NV_PPBDMA_SET_CHANNEL_INFO_RESERVED                        31:16 /*       */
HCI_CTRL - Misc Additional HCE State

     HCE_CTRL is used for misc. HCE state that needs to be channel swapped
in addition to the normal CE CLASS state.
Some of the state bits are part of the MP/SP blocks' interactions with the
HCE Handling logic.
  SP_AWAITS_HCEH indicates that the SP block is waiting for HCEH to finish
processing an HCE trigger method.
  HCE_RENDER_DISABLED indicates that CE class rendering has been turned off.
  HCE_SUBCHSW indicates that methods have been sent to HCE, and thus GR
will need to flush its caches when the next GR method in this channel
flows down to GR (indicated by interface bit).
  HCE_PRIV_MODE indicates that physical launchDMA copies are allowed.
  NOP_RCVD indicates that HCE logic has decoded a NOP method, and will
send the NOP to CE when permitted.(see launch_dma_rcvd description)
  LAUNCH_DMA_RCVD indicates that the HCE logic has decoded a launchdma
method from MP, and it will be sent to CE when CE has returned enough
credits, and other criteria are met.
  PM_TRIGGER_RCVD indicates that HCE logic has decoded a pm_trigger method
and wants to send it to CE.
  SET_RENDER_ENABLE_C_RCVD indicates that HCE logic has decoded a
set_render_enable method, and is in the process of updating the render enable
state for CE.  Note, this is not strictly necessary as channel state, but it
is useful for debug while the channel is loaded.




#define NV_PPBDMA_HCE_CTRL(i)                 (0x000400e4+(i)*8192) /* RW-4A */
#define NV_PPBDMA_HCE_CTRL__SIZE_1                14 /*       */
#define NV_PPBDMA_HCE_CTRL_SP_AWAITS_HCEH                       0:0 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_SP_AWAITS_HCEH_NO             0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_SP_AWAITS_HCEH_YES            0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_HCE_RENDER_DISABLED                  2:2 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_HCE_RENDER_DISABLED_NO        0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_HCE_RENDER_DISABLED_YES       0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_HCE_SUBCHSW                          4:4 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_HCE_SUBCHSW_NO                0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_HCE_SUBCHSW_YES               0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_HCE_PRIV_MODE                        5:5 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_HCE_PRIV_MODE_NO              0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_HCE_PRIV_MODE_YES             0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_LAUNCH_DMA_RCVD                    16:16 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_LAUNCH_DMA_RCVD_NO            0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_LAUNCH_DMA_RCVD_YES           0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_NOP_RCVD                           17:17 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_NOP_RCVD_NO                   0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_NOP_RCVD_YES                  0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_PM_TRIGGER_RCVD                    18:18 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_PM_TRIGGER_RCVD_NO            0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_PM_TRIGGER_RCVD_YES           0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_PM_TRIGGER_END_RCVD                19:19 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_PM_TRIGGER_END_RCVD_NO        0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_PM_TRIGGER_END_RCVD_YES       0x00000001 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_SET_RENDER_ENABLE_C_RCVD           20:20 /* RW-UF */
#define NV_PPBDMA_HCE_CTRL_SET_RENDER_ENABLE_C_RCVD_NO   0x00000000 /* RW--V */
#define NV_PPBDMA_HCE_CTRL_SET_RENDER_ENABLE_C_RCVD_YES  0x00000001 /* RW--V */
TIMEOUT - Timeout Period Register

     The NV_PPBDMA_TIMEOUT register contains a value used for detecting
timeouts.  The timeout value is in microsecond ticks.

The timeouts that use this value are:
GPfifo fetch timouts to FB for acks, reqs, rdats.
PBDMA connection to LB.
GPfifo processor timeouts to FB for acks, reqs, rdats.
Method processor timeouts to FB for acks, reqs, rdats.
The init value was changed to 64K us

     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on the internal-domain
clock.



#define NV_PPBDMA_TIMEOUT(i)                  (0x0004012c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_TIMEOUT__SIZE_1                 14 /*       */

#define NV_PPBDMA_TIMEOUT_PERIOD                               31:0 /* RWEUF */
#define NV_PPBDMA_TIMEOUT_PERIOD_INIT                    0x00010000 /* RWE-V */
#define NV_PPBDMA_TIMEOUT_PERIOD_MAX                     0xffffffff /* RW--V */

6  -  INTERRUPT REGISTERS
=========================

     The interrupt registers control the interrupts for the local devices.
Interrupts are set by an event and are cleared by software.

INTR_0 - PBDMA Unit Interrupt Register

     The NV_PPBDMA_INTR_* registers are a PBDMA unit's interrupt register.  The
logical-OR of this register feeds into the NV_PFIFO_INTR_* register.  If a field
in this register is PENDING, then the corresponding interrupt condition has
occurred, and software has not yet indicated to hardware that the exception has
been handled.  If a field is NON_PENDING then there are no exceptions of the
corresponding type that have not been handled.  Software writes RESET to one of
these fields to indicate that a pending interrupt has been handled.
     Software cannot set bits in this register.  Attempting to write a bit to a
one actually clears the interrupt source.  In this way, software can clear
individual bits in this register.  When software recognizes an interrupt, and
services it, it can then clear the individual source by writing that single bit
in this register to RESET.  Then it can read the register and see if all bits
are clear.  If not, it can service other interrupts in this reg.  This is
especially important since some of these bits are asynchronous to others in this
register.  While an interrupt service routine (ISR) is clearing an interrupt,
other interrupts may occur.
     Interrupts differ in severity.  Some interrupts (like software interrupts)
are expected in the normal operation of of the GPU, and do not indicate that any
GPU context has been damaged, or hung.  Some interrupts (like timeouts) do not
indicate damage, but indicate that deadlock might have occured.  Some interrupts
indicate that an error has occured that might have damaged a GPU context, but
has not damaged any of the others.  Finally some interrupts indicate that any
or all of the active GPU contexts have been damaged.
     This register is for interrupts that cause a PBDMA unit to stall
(non-stalling non-switching interrupts are stored on a per-channel bias) Bits in
this register being set to PENDING will prevent the contents of the PBDMA unit
from being switched out.  Until software handles these interrupts and writes the
bits to RESET, the PBDMA will be frozen.
     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on Host's internal domain
clock.  This register is new for Fermi.

Interrupt field summary for INTR_0, INTR_EN_0, and INTR_STALL:



#define NV_PPBDMA_INTR_0(i)                   (0x00040108+(i)*8192) /* RW-4A */
#define NV_PPBDMA_INTR_0__SIZE_1                  14 /*       */

     The NV_PPBDMA_INTR_*_MEMREQ field indicates that a memory request was not
accepted within NV_PPBDMA_TIMEOUT_PERIOD.  This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_MEMREQ                                 0:0 /* RWIUF */
#define NV_PPBDMA_INTR_0_MEMREQ_NOT_PENDING              0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_MEMREQ_PENDING                  0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_MEMREQ_RESET                    0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_MEMACK_TIMEOUT field indicates that a PBDMA unit
has not received a MMU acknowledge within NV_PPBDMA_TIMEOUT_PERIOD.  This is an
unrecoverable error.

#define NV_PPBDMA_INTR_0_MEMACK_TIMEOUT                         1:1 /* RWIUF */
#define NV_PPBDMA_INTR_0_MEMACK_TIMEOUT_NOT_PENDING      0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_MEMACK_TIMEOUT_PENDING          0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_MEMACK_TIMEOUT_RESET            0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_MEMACK_EXTRA field indicates thatr a PBDMA unit
received more MMU acknowledges than it was expecting, or received an
acknowledge with an unexpected subidentifer.  This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_MEMACK_EXTRA                           2:2 /* RWIUF */
#define NV_PPBDMA_INTR_0_MEMACK_EXTRA_NOT_PENDING        0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_MEMACK_EXTRA_PENDING            0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_MEMACK_EXTRA_RESET              0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_MEMDAT_TIMEOUT field indicates that read data was
not received within NV_PPBDMA_TIMEOUT_PERIOD.  This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_MEMDAT_TIMEOUT                         3:3 /* RWIUF */
#define NV_PPBDMA_INTR_0_MEMDAT_TIMEOUT_NOT_PENDING      0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_MEMDAT_TIMEOUT_PENDING          0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_MEMDAT_TIMEOUT_RESET            0x00000001 /* -W--C */

     NV_PPBDMA_INTR_*_MEMDAT_EXTRA field indicates that a PBDMA unit received
more data than it requested, or received read data with an unexpected
sub-identifier.  This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_MEMDAT_EXTRA                           4:4 /* RWIUF */
#define NV_PPBDMA_INTR_0_MEMDAT_EXTRA_NOT_PENDING        0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_MEMDAT_EXTRA_PENDING            0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_MEMDAT_EXTRA_RESET              0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_MEMFLUSH field indicates a PBDMA unit issued a FB
flush request due to a NV_UDMA_FB_FLUSH method, and did not receive a flush
acknowledge within NV_PPBDMA_TIMEOUT_PERIOD.  This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_MEMFLUSH                               5:5 /* RWIUF */
#define NV_PPBDMA_INTR_0_MEMFLUSH_NOT_PENDING            0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_MEMFLUSH_PENDING                0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_MEMFLUSH_RESET                  0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_MEM_OP field indicates that a PBDMA unit issued a
memory request due to a NV_UDMA_MEM_OP_D method, and did not receive an
acknowledge within NV_PPBDMA_TIMEOUT_PERIOD.  This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_MEMOP                                  6:6 /* RWIUF */
#define NV_PPBDMA_INTR_0_MEMOP_NOT_PENDING               0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_MEMOP_PENDING                   0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_MEMOP_RESET                     0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_LBCONNECT field indicates that a request to connect to
a Latency Buffer was not acknowledged within NV_PPBDMA_TIMEOUT_PERIOD.  This
is an unrecoverable error.

#define NV_PPBDMA_INTR_0_LBCONNECT                              7:7 /* RWIUF */
#define NV_PPBDMA_INTR_0_LBCONNECT_NOT_PENDING           0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_LBCONNECT_PENDING               0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_LBCONNECT_RESET                 0x00000001 /* -W--C */


     The NV_PPBDMA_INTR_*_LBACK_TIMEOUT field indicates that a PBDMA unit did
not receive an acknowledge to a memory request within NV_PPBDMA_TIMEOUT_PERIOD.
This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_LBACK_TIMEOUT                          9:9 /* RWIUF */
#define NV_PPBDMA_INTR_0_LBACK_TIMEOUT_NOT_PENDING       0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_LBACK_TIMEOUT_PENDING           0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_LBACK_TIMEOUT_RESET             0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_LBACK_EXTRA field indicates that a PBDMA received
more acknowledges from the Latency Buffer than it was expected, or that it
received more acknowledges than it was expecting.  This is an unrecoverable
error.

#define NV_PPBDMA_INTR_0_LBACK_EXTRA                          10:10 /* RWIUF */
#define NV_PPBDMA_INTR_0_LBACK_EXTRA_NOT_PENDING         0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_LBACK_EXTRA_PENDING             0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_LBACK_EXTRA_RESET               0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_0_LBDAT_TIMEOUT field indicates that a PBDMA has not
received read data for a request within NV_PPBDMA_TIMEOUT_PERIOD.  This is an
unrecoverable error.

#define NV_PPBDMA_INTR_0_LBDAT_TIMEOUT                        11:11 /* RWIUF */
#define NV_PPBDMA_INTR_0_LBDAT_TIMEOUT_NOT_PENDING       0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_LBDAT_TIMEOUT_PENDING           0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_LBDAT_TIMEOUT_RESET             0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_LBDAT_EXTRA field indicates that a PBDMA receive
more data from the Latency Buffer than expected, or has received read data
with an unexpected sub-identifier.  This is an unrecoverable error.

#define NV_PPBDMA_INTR_0_LBDAT_EXTRA                          12:12 /* RWIUF */
#define NV_PPBDMA_INTR_0_LBDAT_EXTRA_NOT_PENDING         0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_LBDAT_EXTRA_PENDING             0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_LBDAT_EXTRA_RESET               0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_GPFIFO field indicates that a PBDMA unit encountered
an invalid GPFIFO (circular buffer of GP-Entries).  A GPFIFO that crosses the
end of the memory address space (0xFFFFFFFFFF) is invalid.  The invalid value
will be in NV_PPBDMA_GP_BASE and NV_PPBDMA_GP_BASE_HI. Fixing this and clearing
the interrupt will allow the PBDMA unit to continue.  The error is limited to
the channel.

#define NV_PPBDMA_INTR_0_GPFIFO                               13:13 /* RWIUF */
#define NV_PPBDMA_INTR_0_GPFIFO_NOT_PENDING              0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_GPFIFO_PENDING                  0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_GPFIFO_RESET                    0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_GPPTR field indicated that a PBDMA unit encountered
invalid GP pointers (either NV_PPBDMA_GP_PUT, NV_PPBDMA_GP_FETCH, or
NV_PPBDMA_GP_GET).  These pointers are invalid if they are not between zero and
one less than the size of the circular buffer that contains GP entries:
1<<NV_PPBDMA_GP_BASE_HI_LIMIT2.  Fixing the invalid pointer and clearing the
interrupt will allow the PBDMA unit to continue.  The error is limited to the
channel.

#define NV_PPBDMA_INTR_0_GPPTR                                14:14 /* RWIUF */
#define NV_PPBDMA_INTR_0_GPPTR_NOT_PENDING               0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_GPPTR_PENDING                   0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_GPPTR_RESET                     0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_GPENTRY field indicates that a PBDMA unit encountered
an invalid GP entry.  The invalid entry will be in NV_PPBDMA_GP_SHADOW_*.
Invalid GP entries are treated like traps, they will set the interrupt and
freeze the PBDMA, but the invalid entry is discarded. Once the interrupt is
cleared, the PBDMA unit will simply continue with the next GP entry.  The
GP_CRC is not updated by the discarded entry.  Important: Graceful interrupt
recovery is only possible if a GP entry with a length of ZERO caused this
interrupt.  For NON-ZERO length GP entries, this interrupt is fatal. The error
is limited to the channel.

#define NV_PPBDMA_INTR_0_GPENTRY                              15:15 /* RWIUF */
#define NV_PPBDMA_INTR_0_GPENTRY_NOT_PENDING             0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_GPENTRY_PENDING                 0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_GPENTRY_RESET                   0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_GPCRC field indicates that the cyclic redundancy check
value measured over GP entries did not match the expected value.  This interrupt
is for debug, and indicates that the memory subsystem returned corrupted data on
previous GP fetches.  The NV_PPBDMA_GP_CRC register is cleared independent of
the comparison succeeding, so clearing the interrupt will continue as if the CRC
had passed.  The error is limited to the channel.

#define NV_PPBDMA_INTR_0_GPCRC                                16:16 /* RWIUF */
#define NV_PPBDMA_INTR_0_GPCRC_NOT_PENDING               0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_GPCRC_PENDING                   0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_GPCRC_RESET                     0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_PBPTR field indicates that a PBDMA unit encountered an
invalid PB pointer.  NV_PPBDMA_GET is invalid if it is not less than
NV_PPBDMA_PUT.  Fixing the invalid pointer and clearing the interrupt will allow
the PBDMA unit to continue.  The error is limited to the channel.

#define NV_PPBDMA_INTR_0_PBPTR                                17:17 /* RWIUF */
#define NV_PPBDMA_INTR_0_PBPTR_NOT_PENDING               0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_PBPTR_PENDING                   0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_PBPTR_RESET                     0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_PBENTRY field indicates that a PBDMA unit has
encountered an invalid PB entry.  This can occur when Host expects the PB entry
to be a PB instruction, and any of the following happen:

     * The PB entry does not decode properly into a PB instruction.
     * The decoded instruction is in an obsolete format or is otherwise not
       valid (see "FIFO_DMA" in dev_ram.ref).
     * The decoded instruction is either an incrementing method header or an
       increment-once method header, and the header's COUNT field would cause
       the method addresses for the generated method sequence to exceed the
       maximum method address, thus the method addresses would wrap.

     The expected recovery procedure for handling a PBENTRY interrupt is
described below:

     1. In order to determine the cause of a PBENTRY interrupt while an error is
        pending:
          1a. Examine the NV_PPBDMA_HDR_SHADOW register for proper encoding.
              This register contains the raw PB entry that triggered the PBENTRY
              interrupt.  If its contents are not properly encoded then this was
              the cause of the interrupt.
          1b. If the raw PB entry is properly encoded then the PB header is
              invalid for some other reason.  This means the PB entry was
              decoded before the PBENTRY interrupt was triggered, and the
              NV_PPBDMA_PB_HEADER register will contain the decoded PB entry.
     2. Regardless of the cause of the PBENTRY interrupt, one must update the
        NV_PPBDMA_PB_HEADER register to contain a valid header.
     3. If the valid updated header is a PB method header, then the VALUE field
        of the NV_PPBDMA_PB_COUNT register must also be updated to reflect the
        number of subsequent PB entries to interpret as method data (note that
        the other fields of PB_COUNT should be left alone; this requires a
        read-modify-write of this register).  If this value is incorrect, then
        the pushbuffer decoding will become out of sync between headers and
        data.  Note that when decoding PB method headers normally, the HW sets
        NV_PPBDMA_PB_COUNT_VALUE to the NV_FIFO_DMA_METHOD_COUNT field value of
        the raw PB entry.
     4. For consistency, NV_PPBDMA_HDR_SHADOW should be fixed too, but that is
        not required for proper HW operation (the HW ignores
        NV_PPBDMA_HDR_SHADOW).
     5. Clear the PBENTRY interrupt after fixing the state to allow the PBDMA
        unit to continue.

     The PBENTRY error is limited to the channel.  Note that while a PBENTRY
interrupt is pending on a given channel, one cannot assume that any
method/address pair generated from the preceding PB entries on that channel has
executed yet (the PB entries themselves are processed in order, but this
processing consists only executing PB control entries and generating the
method/address pairs from the PB method headers and PB method data dwords; see
dev_ram.ref for the difference between control entries and methods).

#define NV_PPBDMA_INTR_0_PBENTRY                              18:18 /* RWIUF */
#define NV_PPBDMA_INTR_0_PBENTRY_NOT_PENDING             0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_PBENTRY_PENDING                 0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_PBENTRY_RESET                   0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_PBCRC field indicates that the cyclic redundancy check
value measured over a PB segment did not match the expected value.  This
interrupt is for debug, and indicates that the memory subsystem returned
corrupted data on previous PB fetches.  The NV_PPBDMA_PB_CRC register is cleared
at the start of each new segment, independent of the comparison succeeding, so
clearing the interrupt will continue as if the CRC had passed.  The error is
limited to the channel.

#define NV_PPBDMA_INTR_0_PBCRC                                19:19 /* RWIUF */
#define NV_PPBDMA_INTR_0_PBCRC_NOT_PENDING               0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_PBCRC_PENDING                   0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_PBCRC_RESET                     0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_CLEAR_FAULTED_ERROR field indicates that a PBDMA unit
encountered a Host CLEAR_FAULTED method and the target FAULT bit for the target
chid specified in the method payload was not set within the
NV_PFIFO_CLEAR_FAULTED_TIMEOUT_PERIOD.  This is intended to catch SW errors
where a CLEAR_FAULT method targets the wrong channel or a channel that has
already had its fault cleared.  Please refer to the description of the
NV_UDMA_CLEAR_FAULTED method in section 9 (HOST METHODS) for details.

     When PENDING, the PBDMA is stalled and remains loaded on the channel.  The
address of the invalid method will be in NV_PPBDMA_METHOD0, and its data will be
in NV_PPBDMA_DATA0.  Fixing the invalid method in NV_PPBDMA_METHOD0 (or changing
it to NV_UDMA_NOP) and clearing the interrupt will allow the PBDMA unit to
continue.  The error is limited to the channel.

#define NV_PPBDMA_INTR_0_CLEAR_FAULTED_ERROR                  20:20 /* RWIUF */
#define NV_PPBDMA_INTR_0_CLEAR_FAULTED_ERROR_NOT_PENDING 0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_CLEAR_FAULTED_ERROR_PENDING     0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_CLEAR_FAULTED_ERROR_RESET       0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_METHOD field indicates that a PBDMA unit encountered
a method that could not be processed for one of the following reasons:
     * The method is an internal method; that is, its address is in the
       NV_PMETHOD range (see internal_methods.ref)
     * The method address is not in the range of engine methods, but it is not a
       valid Host method either
     * The method is NV_UDMA_ILLEGAL
     * The method attempted to perform a privileged operation, but
       NV_PPBDMA_CONFIG_AUTH_LEVEL is NON_PRIVILEGED
     * An NV_UDMA_YIELD method with an unknown OP is encountered
     * A Host SYNCPOINT method is encountered.  Syncpoints are only supported on
       Tegra parts.

     The address of the invalid method will be in NV_PPBDMA_METHOD0, and its
data will be in NV_PPBDMA_DATA0.  Fixing the invalid method in
NV_PPBDMA_METHOD0 (or changing it to NV_UDMA_NOP) and clearing the interrupt
will allow the PBDMA unit to continue.  The error is limited to the channel.

#define NV_PPBDMA_INTR_0_METHOD                               21:21 /* RWIUF */
#define NV_PPBDMA_INTR_0_METHOD_NOT_PENDING              0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_METHOD_PENDING                  0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_METHOD_RESET                    0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_METHODCRC field indicates that the cyclic redundancy
check value measured over methods sent to Host's crossbar did not match the
expected value.  This interrupt is for debug, and indicates that the PBDMA unit
sent incorrect methods to the engine. There is no use continuing with the
corrupted method stream, but for debug purposes execution may continue if the
crc from the NV_UDMA_CRC_CHECK method (from NV_PPBDMA_DATA0) is copied over the
NV_PPBDMA_METHOD_CRC register before clearing the interrupt.  The error is
limited to the channel.

#define NV_PPBDMA_INTR_0_METHODCRC                            22:22 /* RWIUF */
#define NV_PPBDMA_INTR_0_METHODCRC_NOT_PENDING           0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_METHODCRC_PENDING               0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_METHODCRC_RESET                 0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_DEVICE field indicates a SW-class method.  More
specifically, it indicates that the method's subchannel specified a SW engine or
a non-existent engine.  Note the subchannel-to-engine mapping is fixed, and that
it is not possible to specify a non-existent engine--see NV_UDMA_OBJECT.  The
method information is in NV_PPBDMA_METHOD0 and NV_PPBDMA_DATA0.  For a software
method, METHOD0_SUBCH will be 5, 6, or 7. After handling the SW-class method, SW
should clear the METHOD0_VALID field to FALSE or replace the method ADDR with
NV_UDMA_NOP.  Consecutive SW-class methods in the method FIFO
(NV_PPBDMA_{METHOD,DATA}{1,2,3}) may also be handled and replaced with NOPs or
their VALID fields cleared up to the first non-SW method.

#define NV_PPBDMA_INTR_0_DEVICE                               23:23 /* RWIUF */
#define NV_PPBDMA_INTR_0_DEVICE_NOT_PENDING              0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_DEVICE_PENDING                  0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_DEVICE_RESET                    0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_ENG_RESET field indicates that an engine was reset
while the PBDMA unit was processing a channel from a runlist which serves the
engine.  The interrupt is not triggered if PBDMA is in halted state while the
engine is reset. However, If the engine remains in reset, when the PBDMA continues,
the interrupt will be fired.  This is a potentially fatal condition for the
channel which was loaded on the PBDMA while the engine was reset.  The PBDMA which
encountered the interrupt will stall and prevent the channel which was loaded at
the time the interrupt fired from being swapped out until the interrupt is cleared.
To unblock the PBDMA, SW needs to do the following:

     1. Disable all the channels in the TSG
     2. Initiate a preempt (but do not poll for completion yet)
     3. Clear the interrupt bit
     4. Poll for preempt completion
     5. Tear down the context

Note the TSG ID can be obtained by reading NV_PFIFO_PBDMA_STATUS_ID;
see dev_fifo.ref.  The error is limited to the channel.

#define NV_PPBDMA_INTR_0_ENG_RESET                            24:24 /* RWIUF */
#define NV_PPBDMA_INTR_0_ENG_RESET_NOT_PENDING           0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_ENG_RESET_PENDING               0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_ENG_RESET_RESET                 0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_SEMAPHORE field indicates that a PBDMA unit has
encountered a NV_UDMA_SEM_EXECUTE method whose data field (which indicates the
details of the semaphore operation) is invalid.  The method will be in
NV_PPBDMA_METHOD0.  The method data is in both NV_PPBDMA_DATA0 and
NV_UDMA_SEM_EXECUTE. Any changes to NV_PPBDMA_METHOD0 or NV_PPBDMA_DATA0 should
also be reflected consistently in NV_PPBDMA_SEM_EXECUTE.  After fixing the
method and/or data, clearing the interrupt will allow the PBDMA unit to
continue.  The error is limited to the channel.

#define NV_PPBDMA_INTR_0_SEMAPHORE                            25:25 /* RWIUF */
#define NV_PPBDMA_INTR_0_SEMAPHORE_NOT_PENDING           0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_SEMAPHORE_PENDING               0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_SEMAPHORE_RESET                 0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_ACQUIRE field indicates that a semaphore acquire did
not occur within the maximum period (as specified by the
NV_PPBDMA_ACQUIRE_TIMEOUT register).  The method will be in NV_PPBDMA_METHOD0.
The method data is in both NV_PPBDMA_DATA0 and NV_PPBDMA_SEM_EXECUTE. Any
changes to NV_PPBDMA_METHOD0 or NV_PPBDMA_DATA0 should also be reflected
consistently in NV_PPBDMA_SEM_EXECUTE.  Because the timeout counter is not
automatically reset after an acquire failure, clearing the interrupt may result
in a subsequent ACQUIRE timeout on the next acquire attempt.  To prevent this,
one should choose one of the following cleanup options before clearing the
interrupt:
1 - Preempt/unbind the channel
2 - NOP the semaphore method
3 - Release the semaphore
4 - Clear the SEM_EXECUTE_ACQUIRE_FAIL bit to restart the counter.
After fixing the method and/or data, clearing the
interrupt will allow the PBDMA unit to continue.  The error is limited to the
channel.

#define NV_PPBDMA_INTR_0_ACQUIRE                              26:26 /* RWIUF */
#define NV_PPBDMA_INTR_0_ACQUIRE_NOT_PENDING             0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_ACQUIRE_PENDING                 0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_ACQUIRE_RESET                   0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_PRI field indicates that a PRI write access to a
register occurred while a valid channel is loaded on PBDMA and the PBDMA is not
IDLE or frozen for an interrupt. This interrupt will occur only if the PRI access
will cause the PBDMA unit to operate incorrectly.  Clearing the interrupt will
allow the PBDMA unit to continue, however the PBDMA state will be corrupted.
Depending on the register, this may be an unrecoverable error, or may be limited
to the channel.

#define NV_PPBDMA_INTR_0_PRI                                  27:27 /* RWIUF */
#define NV_PPBDMA_INTR_0_PRI_NOT_PENDING                 0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_PRI_PENDING                     0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_PRI_RESET                       0x00000001 /* -W--C */




     The NV_PPBDMA_INTR_*_PBSEG field indicates that a PBDMA unit encountered a
PB compressed method sequence that begins in a non-conditionally  fetched PB
segment and ends in a conditionally-fetched PB segment.  That is, the first valid
PB entry of a conditionally-fetched PB segment is interpreted as method data.
This is likely to corrupt the pushbuffer data stream.  Clearing the interrupt will
allow the PBDMA unit to continue.  The error is limited to the channel.

Note: Although the PBDMA will continue after the interrupt is cleared, it might
have a faulty method stream after this interrupt. This is generally fatal to the
context and an RC will be needed.

#define NV_PPBDMA_INTR_0_PBSEG                                30:30 /* RWIUF */
#define NV_PPBDMA_INTR_0_PBSEG_NOT_PENDING               0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_PBSEG_PENDING                   0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_PBSEG_RESET                     0x00000001 /* -W--C */

      The NV_PPBDMA_INTR_*_SIGNATURE field indicates that an invalid Host class
ID was specified in NV_RAMFC_SIGNATURE when a channel's RAMFC was loaded.  This
usually indicates SW is attempting to use the wrong Host class for the current
chip.  The invalid value will be in NV_PPBDMA_SIGNATURE_HW.  Fixing the invalid
value and clearing the interrupt will allow the PBDMA unit to continue.  The
error is limited to the channel.  Note that attempting to use methods from a
mismatched Host class may or may not work depending on the method, but will not
necessarily cause an interrupt.

#define NV_PPBDMA_INTR_0_SIGNATURE                            31:31 /* RWIUF */
#define NV_PPBDMA_INTR_0_SIGNATURE_NOT_PENDING           0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_0_SIGNATURE_PENDING               0x00000001 /* R---V */
#define NV_PPBDMA_INTR_0_SIGNATURE_RESET                 0x00000001 /* -W--C */


INTR_1 is a continuation of INTR_0.
Added for Kepler to handle HCE interrupts.
Interrupts related to HCE occupy the least significant bits of the register and
any new HCE interrupt should be added to the available least significant bit.
New non-HCE PBDMA interrupts should be added the available most significant bit
of the register. If a new class of interrupts need to be added, they can be
added from bit 8 or 16.


#define NV_PPBDMA_INTR_1(i)                   (0x00040148+(i)*8192) /* RW-4A */
#define NV_PPBDMA_INTR_1__SIZE_1                  14 /*       */

     The INTR_*_HCE_ILLEGAL_OP field indicates that a PBDMA encountered
a render enable method with an invalid render enable operation.
The sent invalid op can be found in the pbdma's NV_PPBDMA_HCE_DBG1_MTHD_DATA
register.

#define NV_PPBDMA_INTR_1_HCE_RE_ILLEGAL_OP                      0:0 /* RWIUF */
#define NV_PPBDMA_INTR_1_HCE_RE_ILLEGAL_OP_NOT_PENDING   0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_1_HCE_RE_ILLEGAL_OP_PENDING       0x00000001 /* R---V */
#define NV_PPBDMA_INTR_1_HCE_RE_ILLEGAL_OP_RESET         0x00000001 /* -W--C */

     The INTR_*_HCE_RE_ALIGNB field indicates that a PBDMA unit encountered
a Set_Render_Enable_C Copy Engine Class method while the Render_Enable_B value
was not aligned.
This is effectively a CE Launch Check.
This error is limited to the channel.

#define NV_PPBDMA_INTR_1_HCE_RE_ALIGNB                          1:1 /* RWIUF */
#define NV_PPBDMA_INTR_1_HCE_RE_ALIGNB_NOT_PENDING       0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_1_HCE_RE_ALIGNB_PENDING           0x00000001 /* R---V */
#define NV_PPBDMA_INTR_1_HCE_RE_ALIGNB_RESET             0x00000001 /* -W--C */

     The INTR_*_HCE_PRIV field indicates that a PBDMA unit encountered
a LaunchDMA Copy Engine Class method setup to access the physical memory aperature,
but the PRIV_MODE bit in the RAMFC for the loaded channel was NOT set.
This is effectively a CE Launch Check.
This error is limited to the channel.

#define NV_PPBDMA_INTR_1_HCE_PRIV                               2:2 /* RWIUF */
#define NV_PPBDMA_INTR_1_HCE_PRIV_NOT_PENDING            0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_1_HCE_PRIV_PENDING                0x00000001 /* R---V */
#define NV_PPBDMA_INTR_1_HCE_PRIV_RESET                  0x00000001 /* -W--C */

     The INTR_*_HCE_ILLEGAL_MTHD field indicates that a PBDMA encountered
a method bound for CE that is not decoded in the CE CLASS.
The method and its data that triggered the error can be found in the pbdma's
NV_PPBDMA_HCE_DBG0_MTHD_ADDR and NV_PPBDMA_HCE_DBG1_MTHD_DATA registers.

#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_MTHD                       3:3 /* RWIUF */
#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_MTHD_NOT_PENDING    0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_MTHD_PENDING        0x00000001 /* R---V */
#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_MTHD_RESET          0x00000001 /* -W--C */

     The INTR_*_HCE_ILLEGAL_CLASS field indicates that a PBDMA encountered
a SetObject method that specifies an unrecognized class ID.
The sent illegal class ID can be found in NV_PPBDMA_HCE_DBG1_MTHD_DATA.

#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_CLASS                      4:4 /* RWIUF */
#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_CLASS_NOT_PENDING   0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_CLASS_PENDING       0x00000001 /* R---V */
#define NV_PPBDMA_INTR_1_HCE_ILLEGAL_CLASS_RESET         0x00000001 /* -W--C */

     The NV_PPBDMA_INTR_*_CTXNOTVALID field indicates error conditions related
to the NV_PPBDMA_TARGET_*_CTX_VALID fields for a channel.  The following
conditions trigger the interrupt:

     * The PBDMA unit encountered an engine method or SetObject but the
       corresponding CTX_VALID bit for the targeted engine is FALSE, or
     * At channel start/resume, all preemptable engines have CTX_VALID FALSE but:
         - CTX_RELOAD is set in NV_PCCSR_CHANNEL_STATUS,
         - NV_PPBDMA_TARGET_SHOULD_SEND_HOST_TSG_EVENT is TRUE, or
         - NV_PPBDMA_TARGET_NEEDS_HOST_TSG_EVENT is TRUE

The PBDMA which encountered the interrupt will stall and prevent the channel
which was loaded at the time the interrupt fired from being swapped out until
the interrupt is cleared.  The field is left NOT_PENDING and the interrupt is
not raised if the PBDMA is currently halted.  This allows SW to unblock the
PBDMA and recover via the below procedure.  SW may read METHOD0, CHANNEL_STATUS,
and TARGET to determine whether the interrupt was due to an engine method,
CTX_RELOAD, SHOULD_SEND_HOST_TSG_EVENT, or NEEDS_HOST_TSG_EVENT.  If METHOD0
VALID is TRUE, lazy context creation can be used or the TSG may be destroyed.
If METHOD0 VALID is FALSE, the error is likely a bug in SW, and the TSG
will have to be destroyed.

Recovery procedure:

     1. Determine which CHID and TSG hit the interrupt, and read NV_PPBDMA_METHOD0,
        NV_PCCSR_CHANNEL_STATUS, and NV_PPBDMA_TARGET to find out whether the
        interrupt was due to an engine method or not.
     2. Disable all channels in the containing TSG by writing ENABLE_CLR to TRUE
        in their channel RAM entries in NV_PCCSR_CHANNEL (see dev_fifo.ref).
     3. Initiate a preempt of the TSG via NV_PFIFO_PREEMPT or
        NV_PFIFO_RUNLIST_PREEMPT.  This must be done prior to clearing the
        interrupt or it will just fire again.
     4. Set the channel's relevant NV_PPBDMA_TARGET_*_CTX_VALID bit to TRUE
        by writing the PRI register directly.  Even though no context is valid,
        this is required to allow the interrupt to be cleared.  This must be
        done prior to the interrupt even if SW intends to create a context on
        the fly via step 7c.
     5. Clear the interrupt by writing CTXNOTVALID_RESET to NV_PPBDMA_INTR_1.
     6. Poll for the preempt to complete.  Note: If other interrupts have fired,
        those must be cleared as well before the preempt will complete.
        The preempt must finish before any channel or context is torn down.
     7. Destroy the TSG, or dynamically allocate the engine context as follows:
          7a. Allocate an engine context
          7b. Add its pointer to NV_RAMIN and set up NV_PRAMIN (dev_ram.ref)
              for all channels in the TSG
          7c. Set the relevant CTX_VALID to TRUE in NV_RAMFC_TARGET for all
              channels in the TSG
          7d. Re-enable the channels by writing ENABLE_SET_TRUE to each
              NV_PCCSR_CHANNEL in the TSG

Alternatively, SCHED_DISABLE can be used in lieu of disabling the TSG channels.
The error is limited to the channel.
     Warning: If NV_PPBDMA_INTR_STALL_1_CTXNOTVALID is DISABLED, this error is
non-recoverable.

#define NV_PPBDMA_INTR_1_CTXNOTVALID                         31:31 /* RWIUF */
#define NV_PPBDMA_INTR_1_CTXNOTVALID_NOT_PENDING        0x00000000 /* R-I-V */
#define NV_PPBDMA_INTR_1_CTXNOTVALID_PENDING            0x00000001 /* R---V */
#define NV_PPBDMA_INTR_1_CTXNOTVALID_RESET              0x00000001 /* -W--C */



INTR_EN_0 - PBDMA-Unit Interrupt Enable Register

     The NV_PPBDMA_INTR_EN_0 register controls which PBDMA interrupt conditions
are enabled.  If a field is DISABLED, then the corresponding interrupt in
NV_PPBDMA_INTR_0 is disabled.  If a field is ENABLED, then the corresponding
interrupt in NV_PPBDMA_INTR_0 is enabled.
     The masking of interrupts by this register is done after the
NV_PPBDMA_INTR_0 register.  This register stops interrupts from being reported,
it does not stop bits in the NV_PPBDMA_INTR_0 from being set.
     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on the internal-domain
clock.


#define NV_PPBDMA_INTR_EN_0(i)                (0x0004010c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_INTR_EN_0__SIZE_1               14 /*       */

#define NV_PPBDMA_INTR_EN_0_MEMREQ                              0:0 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_MEMREQ_DISABLED              0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_MEMREQ_ENABLED               0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_MEMACK_TIMEOUT                      1:1 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_MEMACK_TIMEOUT_DISABLED      0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_MEMACK_TIMEOUT_ENABLED       0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_MEMACK_EXTRA                        2:2 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_MEMACK_EXTRA_DISABLED        0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_MEMACK_EXTRA_ENABLED         0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_MEMDAT_TIMEOUT                      3:3 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_MEMDAT_TIMEOUT_DISABLED      0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_MEMDAT_TIMEOUT_ENABLED       0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_MEMDAT_EXTRA                        4:4 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_MEMDAT_EXTRA_DISABLED        0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_MEMDAT_EXTRA_ENABLED         0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_MEMFLUSH                            5:5 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_MEMFLUSH_DISABLED            0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_MEMFLUSH_ENABLED             0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_MEMOP                               6:6 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_MEMOP_DISABLED               0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_MEMOP_ENABLED                0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_LBCONNECT                           7:7 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_LBCONNECT_DISABLED           0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_LBCONNECT_ENABLED            0x00000001 /* RW--V */


#define NV_PPBDMA_INTR_EN_0_LBACK_TIMEOUT                       9:9 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_LBACK_TIMEOUT_DISABLED       0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_LBACK_TIMEOUT_ENABLED        0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_LBACK_EXTRA                       10:10 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_LBACK_EXTRA_DISABLED         0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_LBACK_EXTRA_ENABLED          0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_LBDAT_TIMEOUT                     11:11 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_LBDAT_TIMEOUT_DISABLED       0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_LBDAT_TIMEOUT_ENABLED        0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_LBDAT_EXTRA                       12:12 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_LBDAT_EXTRA_DISABLED         0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_LBDAT_EXTRA_ENABLED          0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_GPFIFO                            13:13 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_GPFIFO_DISABLED              0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_GPFIFO_ENABLED               0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_GPPTR                             14:14 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_GPPTR_DISABLED               0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_GPPTR_ENABLED                0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_GPENTRY                           15:15 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_GPENTRY_DISABLED             0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_GPENTRY_ENABLED              0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_GPCRC                             16:16 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_GPCRC_DISABLED               0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_GPCRC_ENABLED                0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_PBPTR                             17:17 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_PBPTR_DISABLED               0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_PBPTR_ENABLED                0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_PBENTRY                           18:18 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_PBENTRY_DISABLED             0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_PBENTRY_ENABLED              0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_PBCRC                             19:19 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_PBCRC_DISABLED               0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_PBCRC_ENABLED                0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_CLEAR_FAULTED_ERROR               20:20 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_CLEAR_FAULTED_ERROR_DISABLED 0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_CLEAR_FAULTED_ERROR_ENABLED  0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_METHOD                            21:21 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_METHOD_DISABLED              0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_METHOD_ENABLED               0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_METHODCRC                         22:22 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_METHODCRC_DISABLED           0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_METHODCRC_ENABLED            0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_DEVICE                            23:23 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_DEVICE_DISABLED              0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_DEVICE_ENABLED               0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_ENG_RESET                         24:24 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_ENG_RESET_DISABLED           0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_ENG_RESET_ENABLED            0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_SEMAPHORE                         25:25 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_SEMAPHORE_DISABLED           0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_SEMAPHORE_ENABLED            0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_ACQUIRE                           26:26 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_ACQUIRE_DISABLED             0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_ACQUIRE_ENABLED              0x00000001 /* RW--V */
#define NV_PPBDMA_INTR_EN_0_PRI                               27:27 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_PRI_DISABLED                 0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_PRI_ENABLED                  0x00000001 /* RW--V */


#define NV_PPBDMA_INTR_EN_0_PBSEG                             30:30 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_PBSEG_DISABLED               0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_PBSEG_ENABLED                0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_0_SIGNATURE                         31:31 /* RWEUF */
#define NV_PPBDMA_INTR_EN_0_SIGNATURE_DISABLED           0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_0_SIGNATURE_ENABLED            0x00000001 /* RW--V */

INTR_EN_1 is a continuation of INTR_EN_0.
Added for Kepler to handle HCE interrupts.


#define NV_PPBDMA_INTR_EN_1(i)                   (0x0004014c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_INTR_EN_1__SIZE_1                  14 /*       */

#define NV_PPBDMA_INTR_EN_1_HCE_RE_ILLEGAL_OP                      0:0 /* RWEUF */
#define NV_PPBDMA_INTR_EN_1_HCE_RE_ILLEGAL_OP_DISABLED      0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_1_HCE_RE_ILLEGAL_OP_ENABLED       0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_1_HCE_RE_ALIGNB                          1:1 /* RWEUF */
#define NV_PPBDMA_INTR_EN_1_HCE_RE_ALIGNB_DISABLED          0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_1_HCE_RE_ALIGNB_ENABLED           0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_1_HCE_PRIV                               2:2 /* RWEUF */
#define NV_PPBDMA_INTR_EN_1_HCE_PRIV_DISABLED               0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_1_HCE_PRIV_ENABLED                0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_1_HCE_ILLEGAL_MTHD                       3:3 /* RWEUF */
#define NV_PPBDMA_INTR_EN_1_HCE_ILLEGAL_MTHD_DISABLED       0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_1_HCE_ILLEGAL_MTHD_ENABLED        0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_1_HCE_ILLEGAL_CLASS                      4:4 /* RWEUF */
#define NV_PPBDMA_INTR_EN_1_HCE_ILLEGAL_CLASS_DISABLED      0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_1_HCE_ILLEGAL_CLASS_ENABLED       0x00000001 /* RW--V */

#define NV_PPBDMA_INTR_EN_1_CTXNOTVALID                          31:31 /* RWEUF */
#define NV_PPBDMA_INTR_EN_1_CTXNOTVALID_DISABLED            0x00000000 /* RWE-V */
#define NV_PPBDMA_INTR_EN_1_CTXNOTVALID_ENABLED             0x00000001 /* RW--V */



INTR_STALL - PBDMA-Unit Interrupt Stall Control Register

     The NV_PPBDMA_INTR_STALL register controls whether an interrupt causes the
PBDMA unit to stop and stall.  If an interrupt's field is STALL_*_ENABLED, then
the interrupt causes the PBDMA to stall.  If an interrupt's field is
STALL_*_DISABLED then the interrupt does not cause the PBDMA unit to stall.
     This register is intended for verification.  In normal operation, the
register should be left at the default value, meaning all interrupts cause the
PBDMA unit to stall.
     One of these registers exists for each of Host's PBDMA units.  This
register is not context switched.  This register runs on the internal-domain
clock.


#define NV_PPBDMA_INTR_STALL(i)                (0x0004013c+(i)*8192) /* RW-4A */
#define NV_PPBDMA_INTR_STALL__SIZE_1               14 /*       */

#define NV_PPBDMA_INTR_STALL_MEMREQ                              0:0 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_MEMREQ_DISABLED              0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_MEMREQ_ENABLED               0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_MEMACK_TIMEOUT                      1:1 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_MEMACK_TIMEOUT_DISABLED      0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_MEMACK_TIMEOUT_ENABLED       0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_MEMACK_EXTRA                        2:2 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_MEMACK_EXTRA_DISABLED        0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_MEMACK_EXTRA_ENABLED         0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_MEMDAT_TIMEOUT                      3:3 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_MEMDAT_TIMEOUT_DISABLED      0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_MEMDAT_TIMEOUT_ENABLED       0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_MEMDAT_EXTRA                        4:4 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_MEMDAT_EXTRA_DISABLED        0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_MEMDAT_EXTRA_ENABLED         0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_MEMFLUSH                            5:5 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_MEMFLUSH_DISABLED            0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_MEMFLUSH_ENABLED             0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_MEMOP                               6:6 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_MEMOP_DISABLED               0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_MEMOP_ENABLED                0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_LBCONNECT                           7:7 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_LBCONNECT_DISABLED           0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_LBCONNECT_ENABLED            0x00000001 /* RWE-V */


#define NV_PPBDMA_INTR_STALL_LBACK_TIMEOUT                       9:9 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_LBACK_TIMEOUT_DISABLED       0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_LBACK_TIMEOUT_ENABLED        0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_LBACK_EXTRA                       10:10 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_LBACK_EXTRA_DISABLED         0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_LBACK_EXTRA_ENABLED          0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_LBDAT_TIMEOUT                     11:11 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_LBDAT_TIMEOUT_DISABLED       0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_LBDAT_TIMEOUT_ENABLED        0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_LBDAT_EXTRA                       12:12 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_LBDAT_EXTRA_DISABLED         0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_LBDAT_EXTRA_ENABLED          0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_GPFIFO                            13:13 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_GPFIFO_DISABLED              0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_GPFIFO_ENABLED               0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_GPPTR                             14:14 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_GPPTR_DISABLED               0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_GPPTR_ENABLED                0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_GPENTRY                           15:15 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_GPENTRY_DISABLED             0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_GPENTRY_ENABLED              0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_GPCRC                             16:16 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_GPCRC_DISABLED               0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_GPCRC_ENABLED                0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_PBPTR                             17:17 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_PBPTR_DISABLED               0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_PBPTR_ENABLED                0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_PBENTRY                           18:18 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_PBENTRY_DISABLED             0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_PBENTRY_ENABLED              0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_PBCRC                             19:19 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_PBCRC_DISABLED               0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_PBCRC_ENABLED                0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_CLEAR_FAULTED_ERROR               20:20 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_CLEAR_FAULTED_ERROR_DISABLED 0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_CLEAR_FAULTED_ERROR_ENABLED  0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_METHOD                            21:21 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_METHOD_DISABLED              0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_METHOD_ENABLED               0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_METHODCRC                         22:22 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_METHODCRC_DISABLED           0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_METHODCRC_ENABLED            0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_DEVICE                            23:23 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_DEVICE_DISABLED              0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_DEVICE_ENABLED               0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_ENG_RESET                         24:24 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_ENG_RESET_DISABLED           0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_ENG_RESET_ENABLED            0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_SEMAPHORE                         25:25 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_SEMAPHORE_DISABLED           0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_SEMAPHORE_ENABLED            0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_ACQUIRE                           26:26 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_ACQUIRE_DISABLED             0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_ACQUIRE_ENABLED              0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_PRI                               27:27 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_PRI_DISABLED                 0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_PRI_ENABLED                  0x00000001 /* RWE-V */



#define NV_PPBDMA_INTR_STALL_PBSEG                             30:30 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_PBSEG_DISABLED               0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_PBSEG_ENABLED                0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_SIGNATURE                         31:31 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_SIGNATURE_DISABLED           0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_SIGNATURE_ENABLED            0x00000001 /* RWE-V */


INTR_STALL_1 - PBDMA-Unit HCE Interrupt Stall Control Register

     The NV_PPBDMA_INTR_STALL_1 register controls whether an interrupt causes
the PBDMA unit to stop and stall on HCE interrupts.  All HCE interrupts
that are reported by the PBDMA are launch check interrupts and are immediately
dropped when encountered.  Host will latch the last interrupting method and data
in HCE_DBG0 and HCE_DBG1.  If stalling is ENABLED here, an interrupt will stall
the pbdma regardless of whether the interrupt is enabled or not via INTR_EN_1.
     Warning: Do not disable stalling for CTXNOTVALID.  Doing so will cause
undefined behavior if the interrupt condition occurs.


#define NV_PPBDMA_INTR_STALL_1(i)              (0x00040140+(i)*8192) /* RW-4A */
#define NV_PPBDMA_INTR_STALL_1__SIZE_1             14 /*       */

#define NV_PPBDMA_INTR_STALL_1_HCE_RE_ILLEGAL_OP                 0:0 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_1_HCE_RE_ILLEGAL_OP_DISABLED 0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_1_HCE_RE_ILLEGAL_OP_ENABLED  0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_1_HCE_RE_ALIGNB                     1:1 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_1_HCE_RE_ALIGNB_DISABLED     0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_1_HCE_RE_ALIGNB_ENABLED      0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_1_HCE_PRIV                          2:2 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_1_HCE_PRIV_DISABLED          0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_1_HCE_PRIV_ENABLED           0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_1_HCE_ILLEGAL_MTHD                  3:3 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_1_HCE_ILLEGAL_MTHD_DISABLED  0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_1_HCE_ILLEGAL_MTHD_ENABLED   0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_1_HCE_ILLEGAL_CLASS                 4:4 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_1_HCE_ILLEGAL_CLASS_DISABLED 0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_1_HCE_ILLEGAL_CLASS_ENABLED  0x00000001 /* RWE-V */

#define NV_PPBDMA_INTR_STALL_1_CTXNOTVALID                     31:31 /* RWEUF */
#define NV_PPBDMA_INTR_STALL_1_CTXNOTVALID_DISABLED       0x00000000 /* RW--V */
#define NV_PPBDMA_INTR_STALL_1_CTXNOTVALID_ENABLED        0x00000001 /* RWE-V */


HCE_DBG0 - Last HCE Method Address

    HCE_DBG0 Stores the method address seen by the HCE Handler that caused
an HCE interrupt (PBDMA_INTR_1).
    Only valid to read when a PBDMA_INTR_1 register has an interrupt
pending and the PBDMA_STALL_1 register is set for the corresponding
interrupt. Without the stall bit, Host will continue to process
methods, so other methods might trigger interrupts.  Consequently,
the contents of this register may be unpredictable.


#define NV_PPBDMA_HCE_DBG0(i)                  (0x00040150+(i)*8192) /* R--4A */
#define NV_PPBDMA_HCE_DBG0__SIZE_1                 14 /*       */

#define NV_PPBDMA_HCE_DBG0_MTHD_ADDR                            13:2 /* R-EUF */
#define NV_PPBDMA_HCE_DBG0_MTHD_ADDR_VAL0                 0x00000000 /* R-E-V */

HCE_DBG0 - Last HCE Method Data

    HCE_DBG1 Stores the method data seen by the HCE Handler that caused
and HCE interrupt (PBDMA_INTR_1).
    Only valid to read when a PBDMA_INTR_1 register has an interrupt
pending and the PBDMA_STALL_1 register is set for the corresponding interrupt.


#define NV_PPBDMA_HCE_DBG1(i)                  (0x00040154+(i)*8192) /* R--4A */
#define NV_PPBDMA_HCE_DBG1__SIZE_1                 14 /*       */

#define NV_PPBDMA_HCE_DBG1_MTHD_DATA                            31:0 /* R-EUF */
#define NV_PPBDMA_HCE_DBG1_MTHD_DATA_VAL0                 0x00000000 /* R-E-V */


9  -  HOST METHODS (NV_UDMA)
============================

     This section describes the types of methods that are executed by Host.  In
DMA mode, Host reads the pushbuffer data and generates method address/data pairs
from that data.
     Terminology:
Host method - the methods listed here, below (left-shifted) address 0x100
Host-only method - any Host method excluding SetObject, which also sends the
                   method to the engine specified by the subchannel field
non-Host method - engine or SW method; excludes SetObject


OBJECT [method] (SetObject) - Assign Object to Engine via Subchannel Method

     The NV_UDMA_OBJECT method, generally known as SetObject, SET_OBJECT, or
occasionally SetObj, verifies the engine targeted by the method's subchannel
field supports the specified class ID.
     The NVCLASS field specifies the object's class identifier.  The target
engine for the check is determined by the NV_FIFO_DMA_*_SUBCHANNEL specified in
the method header.  See dev_ram.ref for specifics regarding the subchannel
mapping and for information regarding subchannel switching.  On copy engines,
Host ensures that the object specified in NVCLASS is supported on the HCE, and
will raise INTR_*_HCE_ILLEGAL_CLASS if it is not; a CE SetObject is otherwise a
no-op and is not sent to the copy engine.  This method is not used to verify the
chip's Host class ID.  Use the NV_PPBDMA_SIGNATURE_HW field in
NV_RAMFC_SIGNATURE for that.
     SetObject is a misnomer: the GPU provides no mechanism for SW to select any
other class interface than the one a given chip supports.  SetObject is not
required by any engine.
     No subchannel-object mapping is stored in Host.  Each engine is
responsible for maintaining its class identifier state if multiple classes are
supported.  In a TSG on a runlist targeting such a hypothetical engine, the
SetObject method need only be sent once for a given subchannel on the engine
because all channels in the TSG share a context.  After the SetObject, all
channels targeting the same engine in the TSG will use the same class binding.



#define NV_UDMA_OBJECT                                   0x00000000 /* -W-4R */

#define NV_UDMA_OBJECT_NVCLASS                                 15:0 /* -W-VF */


ILLEGAL [method] - Illegal Method

     By reserving an opcode for an ILLEGAL method, triggering an error can be
guaranteed to be future-compatible. This triggers the NV_PPBDMA_INTR_0_METHOD
interrupt. This can be thought of as a software method for the channel class
XX6f, but a different interrupt bit is set (METHOD instead of DEVICE).


#define NV_UDMA_ILLEGAL                                  0x00000004 /* -W-4R */

#define NV_UDMA_ILLEGAL_HANDLE                                 31:0 /* -W-VF */


NOP [method] - No Operation Method

     This method is discarded upon execution.



#define NV_UDMA_NOP                                      0x00000008 /* -W-4R */

#define NV_UDMA_NOP_HANDLE                                     31:0 /* -W-VF */





Host Semaphore Methods

     NVIDIA semaphores provide a basic synchronization mechanism for the GPU.
(They do not behave like classic Dijkstra semaphores; instead, they provide a
conditional barrier.)  A semaphore refers to a 4-byte or 8-byte payload value
in memory, the location of which is referred to as the semaphore address.  A
semaphore release writes a payload to the semaphore or performs a reduction
operation on the semaphore using the payload.  A release may optionally write a
timestamp, in which case 16 bytes are written at the semaphore address.  A
semaphore acquire waits for the semaphore to reach a given condition before
allowing a channel to proceed.  Five Host methods, described below, are
provided to perform semaphore releases and acquires:

     SEM_ADDR_LO - Set semaphore address least significant bits
     SEM_ADDR_HI - Set semaphore address most significant bits
     SEM_PAYLOAD_LO - Set the lower 32 bits of the semaphore payload
     SEM_PAYLOAD_HI - Set the upper 32 bits of the semaphore payload
     SEM_EXECUTE - Configure and begin execution of the release or acquire

SEM_ADDR_LO [method] - Set Semaphore Address Low Method

     The NV_UDMA_SEM_ADDR_LO method sets the least significant bits of the
address of a semaphore.

     The NV_UDMA_SEM_ADDR_LO_OFFSET field contains bits 31:2 of a semaphore
address.  Since the smallest supported semaphore is 4-byte aligned, Host will
not store bits 1:0 of the address.
     Host will keep the lowest two bits of the SEM_ADDR_LO method reserved so
software can directly pack the low 32 bits of an address into the method
without needing to mask off the lowest two bits.
     Note that software is required to align all semaphore addresses according
to their respective sizes, and Host enforces this requirement with the
NV_PPBDMA_INTR_0 interrupt.  See the documentation below for the
NV_UDMA_SEM_EXECUTE method and its fields PAYLOAD_SIZE and RELEASE_TIMESTAMP.
     While the channel is loaded on a PBDMA unit, the OFFSET value is stored in
the NV_PPBDMA_SEM_ADDR_LO register.  Otherwise, this value is stored in the
NV_RAMFC_SEM_ADDR_LO field of the RAMFC part of the channel's instance block.


#define NV_UDMA_SEM_ADDR_LO                              0x0000005C /* -W-4R */

#define NV_UDMA_SEM_ADDR_LO_OFFSET                             31:2 /* -W-VF */


SEM_ADDR_HI [method] - Set Semaphore Address High Method

     The NV_UDMA_SEM_ADDR_HI method sets the most significant bits of the
address of a semaphore.

     The NV_UDMA_SEM_ADDR_HI_OFFSET field contains bits 39:32 of the address of
a semaphore.
     While the channel is loaded on a PBDMA unit, the OFFSET value is stored in
the NV_PPBDMA_SEM_ADDR_HI register.  Otherwise, this value is stored in the
NV_RAMFC_SEM_ADDR_HI field of the RAMFC part of the channel's instance block.


#define NV_UDMA_SEM_ADDR_HI                              0x00000060 /* -W-4R */

#define NV_UDMA_SEM_ADDR_HI_OFFSET                              7:0 /* -W-VF */


SEM_PAYLOAD_LO [method] - Set Semaphore Payload Low Method

     The NV_UDMA_SEM_PAYLOAD_LO method sets the lower 32 bits of the semaphore
payload.  This value is used according to the NV_UDMA_SEM_EXECUTE_OPERATION
field described below.

     While the channel is loaded on a PBDMA unit, the PAYLOAD_LO value is
stored in the NV_PPBDMA_SEM_PAYLOAD_LO register.  Otherwise, this value is
stored in the NV_RAMFC_SEM_PAYLOAD_LO field of the RAMFC part of the channel's
instance block.


#define NV_UDMA_SEM_PAYLOAD_LO                           0x00000064 /* -W-4R */

#define NV_UDMA_SEM_PAYLOAD_LO_PAYLOAD                         31:0 /* -W-VF */


SEM_PAYLOAD_HI [method] - Set Semaphore Payload High Method

     The NV_UDMA_SEM_PAYLOAD_HI method sets the upper 32 bits of the semaphore
payload.  This value is used according to the NV_UDMA_SEM_EXECUTE_OPERATION
field described below.

     While the channel is loaded on a PBDMA unit, the PAYLOAD_HI value is
stored in the NV_PPBDMA_SEM_PAYLOAD_HI register.  Otherwise, this value is
stored in the NV_RAMFC_SEM_PAYLOAD_HI field of the channel's instance block.


#define NV_UDMA_SEM_PAYLOAD_HI                           0x00000068 /* -W-4R */

#define NV_UDMA_SEM_PAYLOAD_HI_PAYLOAD                         31:0 /* -W-VF */


SEM_EXECUTE [method] - Semaphore Execute Method

     The NV_UDMA_SEM_EXECUTE method specifies a synchronization operation and
initiates that operation.  To use a semaphore, set the semaphore's address with
the NV_UDMA_SEM_ADDR_LO/_HI methods, set the semaphore payload with
NV_UDMA_SEM_ADDR_LO/_HI methods, and then initiate the semaphore operation with
an NV_UDMA_SEM_EXECUTE method.

Semaphore operation and payload size:

     The NV_UDMA_SEM_EXECUTE_OPERATION field specifies the semaphore operation.
RELEASE and REDUCTION cause a semaphore release to occur, potentially allowing
future acquires to succeed and causing a timestamp to be written if
RELEASE_TIMESTAMP is EN.
     For iGPU cases where a semaphore release can be mapped to an onchip syncpoint,
the SIZE must be 4Bytes to avoid double incrementing the target syncpoint.
Timestamping should also be disabled to avoid unwanted behavior.
     An operation of ACQUIRE, ACQ_STRICT_GEQ, ACQ_CIRC_GEQ, ACQ_AND, or ACQ_NOR
causes Host to perform a semaphore acquire, meaning that Host will not process
any subsequent methods in the channel until the acquire succeeds.  When the
semaphore value does not satisfy the conditions of the acquire, the semahore
acquire is said to have failed.  In this case, the PBDMA unit will switch to
the next pending channel on its runqueue within the same TSG, if it has not
reached the end of the runqueue, but otherwise may either start again switching
to channels on its runqueue within the same TSG or switch to another TSG; see
the documentation below for NV_UDMA_SEM_EXECSWITCH_TSG field.  Upon switching
back into a channel waiting on a semaphore the PBDMA unit continues to poll the
semaphore address.  When the channel is loaded on the PBDMA unit, the
NV_PPBDMA_SEM_EXECUTE_ACQUIRE_FAIL register field can be read for debug
purposes in order to determine whether an acquire has failed or not.
     If OPERATION is ACQUIRE, the acquire succeeds when the semaphore value is
equal to the payload value.  The PAYLOAD_SIZE controls the size of the memory
read performed by Host and the comparison.  If PAYLOAD_SIZE is 32BIT then a 32
bit memory read is performed and the return value is compared to PAYLOAD_LO.
If PAYLOAD_SIZE is 64BIT then a single 64 bit memory read is performed and the
return value is compared to PAYLOAD_LO/_HI.
     If OPERATION is ACQ_STRICT_GEQ, the acquire succeeds when (SV >= PV),
where SV is the semaphore value in memory, PV is the payload value, and >= is
an unsigned greater-than-or-equal-to comparison.
     If OPERATION is ACQ_CIRC_GEQ, the acquire succeeds when the two's
complement signed representation of the semaphore value minus the payload value
is non-negative; that is, when the semaphore value is within half a range
greater than or equal to the payload value, modulo that range.  The
PAYLOAD_SIZE field determines if Host is doing a 32 bit comparison or a 64 bit
comparison.  So in other words, the condition is met when the PAYLOAD_SIZE is
32BIT and the semaphore value is within the range [payload,
((payload+(2^(32-1)))-1)], modulo 2^32, or when the PAYLOAD_SIZE is 64BIT and
the semaphore value is within the range [payload, ((payload+(2^(64-1)))-1)],
modulo 2^64.
     If OPERATION is ACQ_AND, the acquire succeeds when the bitwise-AND of the
semaphore value and the payload value is not zero.  The PAYLOAD_SIZE field
determines if a 32 bit or 64 bit value is read from memory, and compared to.
     If OPERATION is ACQ_NOR, the acquire succeeds when the bitwise-NOR of the
semaphore value and the payload value is not zero.  PAYLOAD_SIZE determines if
a 32 bit or 64 bit value is read from memory, and compared to.
     If OPERATION is RELEASE, then Host simply writes the payload value to the
semaphore structure in memory at the SEM_ADDR_LO/_HI address.  The exact value
written depends on the operation defined.  If PAYLOAD_SIZE is 32BIT then a 32
bit payload value from PAYLOAD_LO is used.  If PAYLOAD_SIZE is 64BIT then a 64
bit payload specified by PAYLOAD_LO/_HI is used.
     If OPERATION is REDUCTION, then Host sends the memory system an
instruction to perform the atomic reduction operation specified in the
REDUCTION field on the memory value, using the PAYLOAD_LO/_HI payload value as
the operand.  The OPERATION_PAYLOAD_SIZE field determines if a 32 bit or 64 bit
reduction is performed.  Note that if the semaphore address refers to a page
whose PTE has ATOMIC_DISABLE set, the operation will result in an
ATOMIC_VIOLATION fault;
     Note that if the PAYLOAD_SIZE is 64BIT, the semaphore address is required
to be 8-byte aligned.  If RELEASE_TIMESTAMP is EN while the operation is a
RELEASE or REDUCTION operation, the semaphore address is required to be 16-byte
aligned.  The semaphore address is not required to be 16-byte aligned during an
acquire operation.  If the semaphore address is not aligned according to the
field values Host will raise the NV_PPBDMA_INTR_0 interrupt.
     For iGPU cases where a semaphore release can be mapped to an onchip syncpoint,
the SIZE must be 4Bytes to avoid double incrementing the target syncpoint.
Timestamping should also be disabled to avoid unwanted behavior.

Semaphore switch option:

     The NV_UDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG field specifies whether or not
Host should switch to processing another TSG if the acquire fails.  If every
channel within the same TSG has no work (is waiting on a semaphore acquire, is
idle, is unbound, or is disabled), the TSG can make no further progress until
one of the relevant semaphores is released.  Because it may be a long time
before the release, it may be more efficient for the PBDMA unit to switch off
the blocked TSG prior to the runqueue timeslice expiring, so that it can serve
a different TSG that is not waiting, or so that it can poll other semaphores on
other TSGs whose channels are waiting on acquires.
     When a semaphore acquire fails, the PBDMA unit will always switch to
another channel within the same TSG, provided that it has not completed a
traversal through all the TSG's channels.  If every pending channel in the TSG
is waiting on a semaphore acquire, the Host scheduler is able identify a lack
of progress for the entire TSG by the time it has completed a traversal through
all those channels.  In this case the value of ACQUIRE_SWITCH_TSG for each of
these channels determines whether the PBDMA will switch to another TSG or start
another traversal through the same TSG.
     If ACQUIRE_SWITCH_TSG is DIS for any of the channels in the TSG, the Host
scheduler will ignore any lack of progress and continue processing the TSG,
until either every channel in the TSG runs out of work or the timeslice
expires.  If ACQUIRE_SWITCH_TSG is EN for every pending channel in the TSG, the
Host scheduler will recognize a lack of progress for the whole TSG, and will
switch to the next serviceable TSG on the runqueue, if possible.
     In the case described above, if there isn't a different serviceable TSG
on the runlist, then the current channel's TSG will continue to be scheduled
and the acquire retry will be naturally delayed by the time it takes for Host's
runlist processing to return to the same channel.  This retry delay may be too
short, in which case the runlist search can be throttled to increase the delay
by configuring NV_PFIFO_ACQ_PRETEST; see dev_fifo.ref.  Note that if the
channel remains switched in, the prefetched pushbuffer data is not discarded,
so setting ACQUIRE_SWITCH_TSG_EN cannot deterministically be depended on to
cause the discarding of prefetched pushbuffer data.
     Also note that when switching between channels within a TSG, Host does not
wait on any timer (such as NV_PFIFO_ACQ_PRETEST or NV_PPBDMA_ACQUIRE_RETRY),
but is instead throttled by the time it takes to switch channels.  Host will
honor the ACQUIRE_RETRY time, but only if the same channel is rescheduled
without a channel switch.

Semaphore wait-for-idle option:

     The NV_UDMA_SEM_EXECUTE_RELEASE_WFI field applies only to releases and
reductions.  It specifies whether Host should wait until the engine to which
the channel last sent methods is idle (in other words, until all previous
methods in the channel have been completed) before writing to memory as part of
the release or reduction operation.  If this field is RELEASE_WFI_EN, then Host
waits for the engine to be idle, inserts a system memory barrier, and then
updates the value in memory.  If this field is RELEASE_WFI_DIS, Host performs
the semaphore operation on the memory without waiting for the engine to be
idle, and without using a system memory barrier.

Semaphore timestamp option:

     The NV_UDMA_SEM_EXECUTE_RELEASE_TIMESTAMP specifies whether a timestamp
should be written by a release in addition to the payload.  If
RELEASE_TIMESTAMP is DIS, then only the semaphore payload will be written.  If
the field is EN then both the semaphore payload and a nanosecond timestamp will
be written.  In this case, the semaphore address must be 16-byte aligned; see
the related note at NV_UDMA_SEM_ADDR_LO.  If RELEASE_TIMESTAMP is EN and
SEM_ADDR_LO is not 16-byte aligned, then Host will initiate an interrupt
(NV_PPBDMA_INTR_0_SEMAPHORE).  When a 16-byte semaphore is written, the
semaphore timestamp will be written before the semaphore payload so that when
an acquire succeeds, the timestamp write will have completed.  This ensures SW
will not get an out-of-date timestamp on platforms which guarantee ordering
within a 16-byte aligned region.  The timestamp value is snapped from the
NV_PTIMER_TIME_1/0 registers; see dev_timer.ref.
     For iGPU cases where a semaphore release can be mapped to an onchip syncpoint,
the SIZE must be 4Bytes to avoid double incrementing the target syncpoint.
Timestamping should also be disabled for a synpoint backed releast to avoid
unexpected behavior.

     Below is the little endian format of 16-byte semaphores in memory:

    ---- ------------------- -------------------
    byte Data(Little endian) Data(Little endian)
         PAYLOAD_SIZE=32BIT  PAYLOAD_SIZE=64BIT
    ---- ------------------- -------------------
      0  Payload[ 7: 0]      Payload[ 7: 0]
      1  Payload[15: 8]      Payload[15: 8]
      2  Payload[23:16]      Payload[23:16]
      3  Payload[31:24]      Payload[31:24]
      4  0                   Payload[39:32]
      5  0                   Payload[47:40]
      6  0                   Payload[55:48]
      7  0                   Payload[63:56]
      8  timer[ 7: 0]        timer[ 7: 0]
      9  timer[15: 8]        timer[15: 8]
     10  timer[23:16]        timer[23:16]
     11  timer[31:24]        timer[31:24]
     12  timer[39:32]        timer[39:32]
     13  timer[47:40]        timer[47:40]
     14  timer[55:48]        timer[55:48]
     15  timer[63:56]        timer[63:56]
    ---- ------------------- -------------------


Semaphore reduction operations:

     The NV_UDMA_SEM_EXECUTE_REDUCTION field specifies the reduction operation
to perform on the semaphore memory value, using the semaphore payload from
SEM_PAYLOAD_LO/HI as an operand, when the OPERATION field is
OPERATION_REDUCTION.  Based on the PAYLOAD_SIZE field the semaphore value and
the payload are interpreted as 32bit or 64bit integers and the reduction
operation is performed according to the signedness specified via the
REDUCTION_FORMAT field described below.  The reduction operation leaves the
modified value in the semaphore memory according to the operation as follows:

REDUCTION_IMIN - the minimum of the value and payload
REDUCTION_IMAX - the maximum of the value and payload
REDUCTION_IXOR - the bitwise exclusive or (XOR) of the value and payload
REDUCTION_IAND - the bitwise AND of the value and payload
REDUCTION_IOR  - bitwise OR of the value and payload
REDUCTION_IADD - the sum of the value and payload
REDUCTION_INC  - the value incremented by 1, or reset to 0 if the incremented
                 value would exceed the payload
REDUCTION_DEC  - the value decremented by 1, or reset back to the payload
                 if the original value is already 0 or exceeds the payload

Note that INC and DEC are somewhat surprising: they can be used to repeatedly
loop the semaphore value when performed successively with the same payload p.
INC repeatedly iterates from 0 to p inclusive, resetting to 0 once exceeding p.
DEC repeatedly iterates down from p to 0 inclusive, resetting back to p once
the value would otherwise underflow.  Therefore, an INC or DEC reduction with
payload 0 effectively releases a semaphore by setting its value to 0.

The reduction opcode assignment matches the enumeration in the XBAR translator
(to avoid extra remapping of hardware), but this does not match the graphics FE
reduction opcodes used by graphics backend semaphores.  The reduction operation
itself is performed by L2.

Semaphore signedness option:

     The NV_UDMA_SEM_EXECUTE_REDUCTION_FORMAT field specifies whether the
values involved in a reduction operation will be interpreted as signed or
unsigned.

The following table summarizes each reduction operation, and the signedness and
payload size supported for each operation:

         signedness
  r op   32b   64b   function (v = memory value, p = semaphore payload)
  -----+-----+-----+---------------------------------------------------
  IMIN   U,S   U,S   v = (v < p) ? v : p
  IMAX   U,S   U,S   v = (v > p) ? v : p
  IXOR   N/A   N/A   v = v ^ p
  IAND   N/A   N/A   v = v & p
  IOR    N/A   N/A   v = v | p
  IADD   U,S   U     v = v + p
  INC    U     inv   v = (v >= p) ? 0 : v + 1
  DEC    U     inv   v = (v == 0 || v > p) ? p : v - 1   (from L2 IAS)

An operation with signedness "N/A" will ignore the value of REDUCTION_FORMAT
when executing, and either value of REDUCTION_FORMAT is valid.  If an operation
is "U only" this means a signed version of this operation is not supported, and
if it is marked "inv" then it is unsupported for any signedness.  If Host sees
an unsupported reduction op (in other words, is expected to run a reduction op
while PAYLOAD_SIZE and REDUCTION_FORMAT are set to unsupported values for that
op), Host will raise the NV_PPBDMA_INTR_0_SEMAPHORE interrupt.

Example: A signed 32-bit IADD reduction operation is valid.  A signed 64-bit
IADD reduction operation is unsupported and will trigger an interrupt if sent to
Host.  A 64-bit INC (or DEC) operation is not supported and will trigger an
interrupt if sent to Host.

Legal semaphore operation combinations:

     For iGPU cases where a semaphore release can be mapped to an onchip syncpoint,
the SIZE must be 4Bytes to avoid double incrementing the target syncpoint.
Timestamping should also be disabled for a synpoint backed release to avoid
unexpected behavior.

     The following table diagrams the types of semaphore operations that are
possible.  In the columns, "x" matches any field value.  ACQ refers to any of
the ACQUIRE, ACQ_STRICT_GEQ, ACQ_CIRC_GEQ, ACQ_AND, and ACQ_NOR operations.
REL refers to either a RELEASE or a REDUCTION operation.

  OP  SWITCH WFI PAYLOAD_SIZE TIMESTAMP  Description
  --- ------ --- ------------ --------- --------------------------------------------------------------
  ACQ    0    x             0         x  acquire; 4B (32 bit comparison); retry on fail
  ACQ    0    x             1         x  acquire; 8B (64 bit comparison); retry on fail
  ACQ    1    x             0         x  acquire; 4B (32 bit comparison); switch on fail
  ACQ    1    x             1         x  acquire; 8B (64 bit comparison); switch on fail
  REL    x    0             0         1  WFI & release 4B payload + timestamp semaphore
  REL    x    0             1         1  WFI & release 8B payload + timestamp semaphore
  REL    x    1             0         1  do not WFI & release 4B payload + timestamp semaphore
  REL    x    1             1         1  do not WFI & release 8B payload + timestamp semaphore
  REL    x    0             0         0  WFI & release doubleword (4B) semaphore payload
  REL    x    0             1         0  WFI & release quadword (8B) semaphore payload
  REL    x    1             0         0  do not WFI & release doubleword (4B) semaphore payload
  REL    x    1             1         0  do not WFI & release quadword (8B) semaphore payload
  --- ------ --- ------------ --------- --------------------------------------------------------------

     While the channel is loaded on a PBDMA unit, information from this method
is stored in the NV_PPBDMA_SEM_EXECUTE register.  Otherwise, this information
is stored in the NV_RAMFC_SEM_EXECUTE field of the RAMFC part of the channel's
instance block.

Undefined bits:

     Bits in the NV_UDMA_SEM_EXECUTE method data that are not used by the
specified OPERATION should be set to 0.  When non-zero, their behavior is
undefined.



#define NV_UDMA_SEM_EXECUTE                              0x0000006C /* -W-4R */

#define NV_UDMA_SEM_EXECUTE_OPERATION                           2:0 /* -W-VF */
#define NV_UDMA_SEM_EXECUTE_OPERATION_ACQUIRE            0x00000000 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_OPERATION_RELEASE            0x00000001 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ     0x00000002 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ       0x00000003 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_OPERATION_ACQ_AND            0x00000004 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_OPERATION_ACQ_NOR            0x00000005 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_OPERATION_REDUCTION          0x00000006 /* -W--V */

#define NV_UDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG                12:12 /* -W-VF */
#define NV_UDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS       0x00000000 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN        0x00000001 /* -W--V */

#define NV_UDMA_SEM_EXECUTE_RELEASE_WFI                       20:20 /* -W-VF */
#define NV_UDMA_SEM_EXECUTE_RELEASE_WFI_DIS              0x00000000 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_RELEASE_WFI_EN               0x00000001 /* -W--V */

#define NV_UDMA_SEM_EXECUTE_PAYLOAD_SIZE                      24:24 /* -W-VF */
#define NV_UDMA_SEM_EXECUTE_PAYLOAD_SIZE_32BIT           0x00000000 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_PAYLOAD_SIZE_64BIT           0x00000001 /* -W--V */

#define NV_UDMA_SEM_EXECUTE_RELEASE_TIMESTAMP                 25:25 /* -W-VF */
#define NV_UDMA_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS        0x00000000 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_RELEASE_TIMESTAMP_EN         0x00000001 /* -W--V */

#define NV_UDMA_SEM_EXECUTE_REDUCTION                         30:27 /* -W-VF */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_IMIN               0x00000000 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_IMAX               0x00000001 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_IXOR               0x00000002 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_IAND               0x00000003 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_IOR                0x00000004 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_IADD               0x00000005 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_INC                0x00000006 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_DEC                0x00000007 /* -W--V */

#define NV_UDMA_SEM_EXECUTE_REDUCTION_FORMAT                  31:31 /* -W-VF */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED      0x00000000 /* -W--V */
#define NV_UDMA_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED    0x00000001 /* -W--V */


NON_STALL_INT [method] - Non-Stalling Interrupt Method

     The NON_STALL_INT method causes the NV_PFIFO_INTR_0_CHANNEL_INTR field
to be set to PENDING in the channel's interrupt register, as well as
NV_PFIFO_INTR_HIER_* registers.  This will cause an interrupt if it is
enabled.  Host does not stall the execution of the GPU context's
method, does not switch out the GPU context, and does not disable switching the
GPU context.
     A NON_STALL_INT method's data (NV_UDMA_NON_STALL_INT_HANDLE) is ignored.
     Software should handle all of a channel's non-stalling interrupts before it
unbinds the channel from the GPU context.


#define NV_UDMA_NON_STALL_INT                            0x00000020 /* -W-4R */

#define NV_UDMA_NON_STALL_INT_HANDLE                           31:0 /* -W-VF */




MEM_OP methods: membars, and cache and TLB management.

     MEM_OP_A, MEM_OP_B, and MEM_OP_C set up state for performing a memory
operation.  MEM_OP_D sets additional state, specifies the type of memory
operation to perform, and triggers sending the mem op to HUB.  To avoid
unexpected behavior for future revisions of the MEM_OP methods, all 4 methods
should be sent for each requested mem op, with irrelevant fields set to 0.
Note that hardware does not enforce the requirement that unrelated fields be set
to 0, but ignoring this advice could break forward compatibility.
     Host does not wait until an engine is idle before beginning to execute
this method.
     While a GPU context is bound to a channel and assigned to a PBDMA unit,
the NV_UDMA_MEM_OP_A-C values are stored in the NV_PPBDMA_MEM_OP_A-C registers
respectively.  While the GPU context is not assigned to a PBDMA unit, these
values are stored in the respective NV_RAMFC_MEM_OP_A-C fields of the RAMFC part
of the GPU context's instance block in memory.

Usage, operations, and configuration:

     MEM_OP_D_OPERATION specifies the type of memory operation to perform.  This
field determines the value of the opcode on the Host/FB interface.  When Host
encounters the MEM_OP_D method, Host sends the specified request to the FB and
waits for an indication that the request has completed before beginning to
process the next method.  To issue a memory operation, first issue the 3
MEM_OP_A-C methods to configure the operation as documented below.  Then send
MEM_OP_D to complete the configuration and trigger the operation.  The
operations available for MEM_OP_D_OPERATION are as follows:
     MEMBAR - perform a memory barrier; see below.
     MMU_TLB_INVALIDATE - invalidate page translation and attribute data from
the given page directory that are cached in the Memory-Management Unit TLBs.
     MMU_TLB_INVALIDATE_TARGETED - invalidate page translation and attributes
data corresponding to a specific page in a given page directory.
     L2_SYSMEM_INVALIDATE - invalidate data from system memory cached in L2.
     L2_PEERMEM_INVALIDATE - invalidate peer-to-peer data in the L2 cache.
     L2_CLEAN_COMPTAGS - clean the L2 compression tag cache.
     L2_FLUSH_DIRTY - flush dirty lines from L2.
     L2_WAIT_FOR_SYS_PENDING_READS - ensure all sysmem reads are past the point
of being modified by a write through a reflected mapping.  To do this, L2 drains
all sysmem reads to the point where they cannot be modified by future
non-blocking writes to reflected sysmem.  L2 will block any new sysmem read
requests and drain out all read responses.  Note VC's with sysmem read requests
at the head would stall any request till the flush is complete.  The niso-nb vc
does not have sysmem read requests so it would continue to flow.  L2 will ack
that the sys flush is complete and unblock all VC's.  Note this operation is a
NOP on tegra chips.
     ACCESS_COUNTER_CLR - clear page access counters.

     Depending on the operation given in MEM_OP_D_OPERATION, the other fields of
all four MEM_OP methods are interpreted differently:

MMU_TLB_INVALIDATE*
-------------------

     When the operation is MMU_TLB_INVALIDATE or MMU_TLB_INVALIDATE_TARGETED,
then Host will initiate a TLB invalidate as described above.  The MEM_OP
configuration fields specify what to invalidate, where to perform the
invalidate, and optionally trigger a replay or cancel event for replayable
faults buffered within the TLBs as part of UVM page management.
     When the operation is MMU_TLB_INVALIDATE_TARGETED,
MEM_OP_C_TLB_INVALIDATE_PDB must be ONE, and the TLB_INVALIDATE_TARGET_ADDR_LO
and HI fields must be filled in to specify the target page.
     These operations are privileged and can only be executed from channels
with NV_PPBDMA_CONFIG_AUTH_LEVEL set to PRIVILEGED.  This is configured via the
NV_RAMFC_CONFIG dword in the channel's RAMFC during channel setup.

     MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID and
MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID identify the GPC and uTLB
within that GPC respectively that should perform the cancel operation when
MEM_OP_C_TLB_INVALIDATE_REPLAY is CANCEL_TARGETED.  These field values should be
copied from the GPC_ID and CLIENT fields from the associated
NV_UVM_FAULT_BUF_ENTRY packet or NV_PFIFO_INTR_MMU_FAULT_INFO(i) entry.  The
CLIENT_UNIT_ID corresponds to the values specified by NV_PFAULT_CLIENT_GPC_* in
dev_fault.ref. These fields are used with the CANCEL_TARGETED operation. The
fields also overlap with CANCEL_MMU_ENGINE_ID, and are interpreted as
CANCEL_MMU_ENGINE_ID during reply of type REPLAY_CANCEL_VA_GLOBAL. For other
replay operations, these fields must be 0.

     MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID specifies the associated
MMU_ENGINE_ID of the requests targeted by a REPLAY_CANCEL_VA_GLOBAL
operation. The field is ignored if the replay operation is not
REPLAY_CANCEL_VA_GLOBAL. This field overlaps with CANCEL_TARGET_GPC_ID and
CANCEL_TARGET_CLIENT_UNIT_ID field.

     MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE is aliased/repurposed
     with MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID field
     when MEM_OP_C_TLB_INVALIDATE_REPLAY (below) is anything other
     than CANCEL_TARGETED or CANCEL_VA_GLOBAL or
     CANCEL_VA_TARGETED. In the invalidation size enabled replay type
     cases, actual region to be invalidated iscalculated as
     4K*(2^INVALIDATION_SIZE) i.e.,
     4K*(2^CANCEL_TARGET_CLIENT_UNIT_ID); client unit id and gpc id
     are not applicable.

     MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR controls whether a Hub SYSMEMBAR
operation is performed after waiting for all outstanding acks to complete, after
the TLB is invalidated.  Note if ACK_TYPE is ACK_TYPE_NONE then this field is
ignored and no MEMBAR will be performed.  This is provided as a SW optimization
so that SW does not need to perform a NV_UDMA_MEM_OP_D_OPERATION_MEMBAR op with
MEMBAR_TYPE SYS_MEMBAR after the TLB_INVALIDATE.  This field must be 0 if
TLB_INVALIDATE_GPC is DISABLE.

     MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI:MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO
specifies the 4k aligned virtual address of the page whose translation to
invalidate within the TLBs.  These fields are valid only when OPERATION is
MMU_TLB_INVALIDATE_TARGETED; otherwise, they must be set to 0.

     MEM_OP_C_TLB_INVALIDATE_PDB controls whether a TLB invalidate should apply
to a particular page directory or to all of them.  If PDB is ALL, then all page
directories are invalidated.  If PDB is ONE, then the PDB address and aperture
are specified in the PDB_ADDR_LO:PDB_ADDR_HI and PDB_APERTURE fields.
Note that ALL does not make sense when OPERATION is MMU_TLB_INVALIDATE_TARGETED;
the behavior in that case is undefined.

     MEM_OP_C_TLB_INVALIDATE_GPC controls whether the GPC-MMU and uTLB entries
should be invalidated in addition to the Hub-MMU TLB (Note: the Hub TLB is
always invalidated). Set it to INVALIDATE_GPC_ENABLE to invalidate the GPC TLBs.
The REPLAY, ACK_TYPE, and SYSMEMBAR fields are only used by the GPC TLB and so
are ignored if INVALIDATE_GPC is DISABLE.

     MEM_OP_C_TLB_INVALIDATE_REPLAY specifies the type of replay to perform in
addition to the invalidate.  A replay causes all replayable faults outstanding
in the TLB to attempt their translations again.  Once a TLB acks a replay, that
TLB may start accepting new translations again.  The replay flavors are as
follows:
     NONE - do not replay any replayable faults on invalidate.
     START - initiate a replay across all TLBs, but don't wait for completion.
          The replay will be acked as soon as the invalidate is processed, but
          replays themselves are in flight and not necessarily translated.
     START_ACK_ALL - initiate the replay and wait until it completes.
          The replay will be acked after all pending transactions in the replay
          fifo have been translated.  New requests will remain stalled in the
          gpcmmu until all transactions in the replay fifo have completed and
          there are no pending faults left in the replay fifo.
     CANCEL_TARGETED - initiate a cancel-replay on a targeted uTLB, causing any
          replayable translations buffered in that uTLB to become non-replayable
          if they fault again.  In this case, the first faulting translation
          will be reported in the NV_PFIFO_INTR_MMU_FAULT registers and will
          raise PFIFO_INTR_0_MMU_FAULT.  The specific TLB to target for the
          cancel is specified in the CANCEL_TARGET fields.  Note the TLB
          invalidate still applies globally to all TLBs.
     CANCEL_GLOBAL - like CANCEL_TARGETED, but all TLBs will cancel-replay.
     CANCEL_VA_GLOBAL - initiates a cancel operation that cancels all requests
          with the matching mmu_engine_id and access_type that land in the
          specified 4KB aligned virtual address within the scope of specified
          PDB. All other requests are replayed. If the specified engine is not
          bound, or if the PDB of the specified engine does not match the
          specified PDB, all requests will be replayed and none will be canceled.

     MEM_OP_C_TLB_INVALIDATE_ACK_TYPE controls which sort of ACK the uTLBs wait
for after having issued a membar to L2.  ACK_TYPE_NONE does not perform any sort
of membar.  ACK_TYPE_INTRANODE waits for an ack from the XBAR.
ACK_TYPE_GLOBALLY waits for an L2 ACK.  ACK_TYPE_GLOBALLY is equivalent to a
MEMBAR operation from the engine, or a SYS_MEMBAR if
MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR is EN.

     MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL specifies which levels in the page
directory hierarchy of the TLB cache to invalidate.  The levels are numbered
from the bottom up, with the PTE being at the bottom with level 1.  The
specified level and all those below it in the hierarchy -- that is, all those
with a lower numbered level -- are invalidated.  ALL (the 0 default) is
special-cased to indicate the top level; this causes the invalidate to apply to
the entire page mapping structure. The field is ignored if the replay operation
is REPLAY_CANCEL_VA_GLOBAL.

     MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE specifies the associated ACCESS_TYPE of
the requests targeted by a REPLAY_CANCEL_VA_GLOBAL operation. This field
overlaps with the INVALIDATE_PAGE_TABLE_LEVEL field, and is ignored if the
replay operation is not REPLAY_CANCEL_VA_GLOBAL. The ACCESS_TYPE field can get
one of the following values:
     READ - the cancel_va_global should be performed on all pending read requests.
     WRITE - the cancel_va_global should be performed on all pending write requests.
     ATOMIC_STRONG - the cancel_va_global should be performed on all pending
         strong atomic requests.
     ATOMIC_WEAK - the cancel_va_global should be performed on all pending
         weak atomic requests.
     ATOMIC_ALL - the cancel_va_global should be performed on all pending atomic
         requests.
     WRITE_AND_ATOMIC - the cancel_va_global should be performed on all pending
         write and atomic requests.
     ALL - the cancel_va_global should be performed on all pending requests.


     MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE specifies the target aperture of the
page directory for which TLB entries should be invalidated.  This field must be
0 when TLB_INVALIDATE_PDB is ALL.

     MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO specifies the low 20 bits of the
4k-block-aligned PDB (base address of the page directory) when
TLB_INVALIDATE_PDB is ONE; otherwise this field must be 0.  The PDB byte address
should be 4k aligned and right-shifted by 12 before being split and packed into
the ADDR fields.  Note that the PDB_ADDR_LO field starts at bit 12, so it is
possible to set MEM_OP_C to the low 32 bits of the byte address, mask off the
low 12, and then or in the rest of the configuration fields.

     MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI contains the high bits of the PDB when
TLB_INVALIDATE_PDB is ONE.  Otherwise this field must be 0.

UVM handling of replayable faults:

     The following example illustrates how TLB invalidate may be used by the
UVM driver:
     1. When the TLB invalidate completes, all memory accesses using the old
        TLB entries prior to the invalidate will finish translation (but not
        completion), and any new virtual accesses will trigger new
        translations.  The outstanding in-flight translations are allowed to
        fault but will not indefinitely stall the invalidate.
     2. When the TLB invalidate completes, in-flight memory accesses using the
        old physical translations may not yet be visible to other GPU clients
        (such as CopyEngine) or to the CPU. Accesses coming from clients that
        support recoverable faults (such as TEX and GCC) can be made visible by
        requesting the MMU to perform a membar using the ACK_TYPE and SYSMEMBAR
        fields.
          a. If ACK_TYPE is NONE the SYSMEMBAR field is ignored and no membar
             is performed.
          b. If ACK_TYPE is INTRANODE the invalidate will wait until all
             in-flight physical accesses using the old translations are visible
             to XBAR clients on the blocking VC.
          c. If ACK_TYPE is GLOBALLY the invalidate will wait until all
             in-flight physical accesses using the old translations are at the
             point of coherence in L2, meaning writes will be visible to all
             other GPU clients and reads will not be mutable by them.
          d. If the SYSMEMBAR field is set to EN then a Hub SYSMEMBAR will also
             be performed following the ACK_TYPE membar. This is the equivalent
             of performing a NV_UDMA_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR.
     3. If fault replay was requested then all pending recoverable faults in
        the TLB replay list will be retranslated. This includes all faults
        discovered while the invalidate was pending. This replay may generate
        more recoverable faults.
     4. If fault replay cancel was requested then another replay is attempted of
        all pending replayable faults on the targeted TLB(s). If any of these
        re-fault they are discarded (sticky NACK or ACK/TRAP sent back to the
        client depending on the setting of NV_PGPC_PRI_MMU_DEBUG_CTRL).



MEMBAR
------

     When the operation is MEMBAR, Host will perform a memory barrier operation.
All other fields must be set to 0 except for MEM_OP_C_MEMBAR_TYPE.  When
MEMBAR_TYPE is MEMBAR, then a memory barrier will be performed with respect to
other clients on the GPU. When it is SYS_MEMBAR, the memory barrier will also be
performed with respect to the CPU and peer GPUs.

     MEMBAR - This issues a MEMBAR operation following all reads, writes, and
atomics currently in flight from the PBDMA. The MEMBAR operation will push all
such accesses already in flight on the same VC as the PBDMA to a point of GPU
coherence before proceeding. After this operation is complete, reads from any
GPU client will see prior writes from this PBDMA, and writes from any GPU client
cannot modify the return data of earlier reads from this PBDMA. This is true
regardless of whether those accesses target vidmem, sysmem, or peer mem.
     WARNING: This only guarantees accesses from the same VC as the PBDMA that
are already in flight are coherent. Accesses from clients such as SM or a
non-PBDMA engine need already be at some point of coherency before this
operation to be coherent.

     SYS_MEMBAR - This implies the MEMBAR type above but in addition to having
accesses reach coherence with all GPU clients, this further waits for accesses
to be coherent with respect to the CPU and peer GPUs as well.  After this
operation is complete, reads from the CPU or peer GPUs will see prior writes
from this PBDMA, and writes from the CPU or peer GPUs cannot modify the return
data of earlier reads from this PBDMA (with the exception of CPU reflected
writes, which can modify earlier reads). Note SYS_MEMBAR is really only needed
to guarantee ordering with off-chip clients. For on-chip clients such as the
graphics engine or copy engine, accesses to sysmem will be coherent with just a
MEMBAR operation.  SYS_MEMBAR provides the same function as
OPERATION_SYSMEMBAR_FLUSH on previous architectures.
     WARNING: As described above, SYS_MEMBAR will not prevent CPU reflected
writes issued after the SYS_MEMBAR from clobbering the return data of reads
issued before the SYS_MEMBAR.  To handle this case, the invalidate must be
followed with a separate L2_WAIT_FOR_SYS_PENDING_READS mem op.



L2*
---

     These values initiate a cache management operation -- see above.  All other
fields must be 0; there are no configuration options.




The ACCESS_COUNTER_CLR operation
--------------------------------
     When MEM_OP_D_OPERATION is ACCESS_COUNTER_CLR, Host will request to clear
the the page access counters. There are two types of access counters - MIMC and
MOMC. This operation can be issued to clear all counters of all types, all
counters of a specified type (MIMC or MOMC), or a specific counter indicated by
counter type, bank and notify tag.
     This operation is privileged and can only be executed from channels with
NV_PPBDMA_CONFIG_AUTH_LEVEL set to PRIVILEGED.  This is configured via the
NV_RAMFC_CONFIG dword in the channel's RAMFC during channel setup.

The operation uses the following fields in the MEM_OP_* methods:
ACCESS_COUNTER_CLR_TYPE (TY)           : type of the access counter clear
                                         operation
ACCESS_COUNTER_CLR_TARGETED_TYPE (T)   : type of the access counter for
                                         targeted operation
ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG : 20 bits notify tag of the access
                                         counter for targeted  operation
ACCESS_COUNTER_CLR_TARGETED_BANK       : 4 bits bank number of the access
                                         counter for targeted operation





MEM_OP method field defines:

MEM_OP_A [method] - Memory Operation Method 1/4 - see above for documentation

#define NV_UDMA_MEM_OP_A                                             0x00000028 /* -W-4R */

#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0 /* -W-VF */
#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE                   5:0 /* -W-VF */
#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6 /* -W-VF */
#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID                6:0 /* -W-VF */
#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11 /* -W-VF */
#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001 /* -W--V */
#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO                    31:12 /* -W-VF */


MEM_OP_B [method] - Memory Operation Method 2/4 - see above for documentation

#define NV_UDMA_MEM_OP_B                                             0x0000002c /* -W-4R */

#define NV_UDMA_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0 /* -W-VF */


MEM_OP_C [method] - Memory Operation Method 3/4 - see above for documentation

#define NV_UDMA_MEM_OP_C                                             0x00000030 /* -W-4R */

Membar configuration field.  Note: overlaps MMU_TLB_INVALIDATE* config fields.
#define NV_UDMA_MEM_OP_C_MEMBAR_TYPE                                        2:0 /* -W-VF */
#define NV_UDMA_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001 /* -W--V */
Invalidate TLB entries for ONE page directory base, or for ALL of them.
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0 /* -W-VF */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001 /* -W--V */
Invalidate GPC MMU TLB entries or not (Hub-MMU entries are always invalidated).
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1 /* -W-VF */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2 /* -W-VF */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL      0x00000005 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5 /* -W-VF */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE                         9:7 /* -W-VF */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ                 0 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE                1 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG        2 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD               3 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK          4 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL           5 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC     6 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL                  7 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7 /* -W-VF */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                          11:10 /* -W-VF */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM             0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT    0x00000002 /* -W--V */
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003 /* -W--V */
Address[31:12] of page directory for which TLB entries should be invalidated.
#define NV_UDMA_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12 /* -W-VF */

#define NV_UDMA_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG       19:0 /* -W-VF */

MEM_OP_D [method] - Memory Operation Method 4/4 - see above for documentation
(Must be preceded by MEM_OP_A-C.)

#define NV_UDMA_MEM_OP_D                                             0x00000034 /* -W-4R */

Address[58:32] of page directory for which TLB entries should be invalidated.
#define NV_UDMA_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0 /* -W-VF */
#define NV_UDMA_MEM_OP_D_OPERATION                                        31:27 /* -W-VF */
#define NV_UDMA_MEM_OP_D_OPERATION_MEMBAR                            0x00000005 /* -W--V */
#define NV_UDMA_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009 /* -W--V */
#define NV_UDMA_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a /* -W--V */
#define NV_UDMA_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d /* -W--V */
#define NV_UDMA_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e /* -W--V */
#define NV_UDMA_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f /* -W--V */
#define NV_UDMA_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010 /* -W--V */
#define NV_UDMA_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015 /* -W--V */

#define NV_UDMA_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR                0x00000016 /* -W--V */

#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE                            1:0 /* -W-VF */
#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC                0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC                0x00000001 /* -W--V */
#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL                 0x00000002 /* -W--V */
#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED            0x00000003 /* -W--V */

#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE                   2:2 /* -W-VF */
#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC       0x00000000 /* -W--V */
#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC       0x00000001 /* -W--V */

#define NV_UDMA_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK                   6:3 /* -W-VF */


SET_REF [method] - Set Reference Count Method

     The SET_REF method allows the user to set the reference count
(NV_PPBDMA_REF_CNT) to a value.  The reference count may be monitored to track
Host's progress through the pushbuffer.  Instead of monitoring
NV_RAMUSERD_TOP_LEVEL_GET, software may put into the method stream SET_REF
methods that set the reference count to ever increasing values, and then read
NV_RAMUSERD_REF to determine how far in the stream Host has gone.
     Before the reference count value is altered, Host waits for the engine to
be idle (to have completed executing all earlier methods), issues a SysMemBar
flush, and waits for the flush to complete.
     While the GPU context is bound to a channel and assigned to a PBDMA unit,
the reference count value is stored in the NV_PPBDMA_REF register.  While the
GPU context is not assigned to a PBDMA unit, the reference count value is stored
in the NV_RAMFC_REF field of the RAMFC portion of the GPU context's GPU-instance
block.


#define NV_UDMA_SET_REF                                  0x00000050 /* -W-4R */

#define NV_UDMA_SET_REF_CNT                                    31:0 /* -W-VF */



CRC_CHECK [method] - Method-CRC Check Method

     When debugging a problem in a real chip, it may be useful to determine
whether a PBDMA unit has sent the proper methods toward the engine.  The
CRC_CHECK method checks whether the cyclic redundancy check value
calculated over previous methods has an expected value.  If the value in the
NV_PPBDMA_METHOD_CRC register is not equal to NV_UDMA_CRC_CHECK_VALUE, then
Host initiates an interrupt (NV_PPBDMA_INTR_0_METHODCRC) and stalls.  After
each comparison, the NV_PPBDMA_METHOD_CRC register is cleared.
     The IEEE 802.3 CRC-32 polynomial (0x04c11db7) is used to calculate CRC
values.  The CRC is calculated over the method subchannel, method address, and
method data of methods sent to an engine.  Host can set both single and dual
methods to engines.  The CRC is calculated as if dual methods were sent as
two single methods.  The CRC is calculated on the byte-stream in little-endian
order.


Pseudocode for CRC calculation is:

          static NVR_U32 table[256];
          void init() {
              for (NVR_U32 i = 0; i < 256; i++) { // create crc value for every byte
                  NVR_U32 crc = i << 24;
                  for (int j = 0; j < 8; j++) {   // for every bit in the byte
                      if (crc & 0x80000000) crc = (crc << 1) ^ 0x04c11db7
                      else                  crc = (crc << 1);
                  }
                  table[i] = crc;
              }
          }
          NVR_U32 new_crc(unsigned char byte, NVR_U32 old_crc) {
              NVR_U32 crc_top_byte = old_crc >> 24;
              crc_top_byte ^= byte;
              NVR_U32 new_crc = (old_crc << 8) ^ table[crc_top_byte];
              return new_crc;
          }

     This method is used for debug.
     This method was added in Fermi.


#define NV_UDMA_CRC_CHECK                                0x0000007c /* -W-4R */

#define NV_UDMA_CRC_CHECK_VALUE                                31:0 /* -W-VF */


YIELD [method] - Yield Method

     The YIELD method causes a channel to yield the remainder of its timeslice.
The method's OP field specifies whether the channels' PBDMA timeslice, the
channel's runlist timeslice, or no timeslice is yielded.
     If YIELD_OP_RUNLIST_TIMESLICE, then Host will act as if the channel's
runlist or TSG timeslice expired.  Host will exit the TSG and switch to the next
channel after the TSG on the runlist.  If there is no such channel to switch to,
then YIELD_OP_RUNLIST_TIMESLICE will not cause a switch.
     When the PBDMA executes a YIELD_OP_RUNLIST_TIMESLICE method, it guarantees
that it will not execute further methods from the same channel or TSG until the
channel is restarted by the scheduler.  However, note that this does not yield
the engine timeslice; if the engine is preemptable, the context will continue
to run on the engine until the remainder of its timeslice expires before Host
will attempt to preempt it.  Also if there is an outstanding ctx load either
due to ctx_reload or from the other PBDMA in the SCG case, then yielding won't
take place until the outstanding ctx load finishes or aborts due to a preempt.
When the ctx load does complete on the other PBDMA, it is possible for that
PBDMA to execute some small number of additional methods before the runlist
yield takes effect and that PBDMA halts work for its channel.
     If NV_UDMA_YIELD_OP_TSG, and if the channel is part of a TSG, then Host
will switch to the next channel in the same TSG, and if the channel is not part
of the TSG then this will be treated similar to YIELD_OP_NOP. If there is only
one channel with work in the TSG, Host will simply reschedule the same channel
in the TSG. YIELD_OP_TSG does not cause the scheduler to leave the TSG. The TSG
timeslice (TSG timeslice is equivalent to runlist timeslice for TSGs) counter
continues to increment through the channel switch and does not restart after
executing the yield method.  When the PBDMA executes a Yield method, it
guarantees that it will not execute the method following that Yield until the
channel is restarted by the scheduler.
     YIELD_OP_NOP is simply a NOP.  Neither timeslice is yielded. This was kept
for compatibility with existing tests; NV_UDMA_NOP is the preferred NOP, but
also see the universal NOP PB instruction.  See the description of
NV_FIFO_DMA_NOP in the "FIFO_DMA" section of dev_ram.ref.

     If an unknown OP is specified, Host will raise an NV_PPBDMA_INTR_*_METHOD
interrupt.


#define NV_UDMA_YIELD                                    0x00000080 /* -W-4R */

#define NV_UDMA_YIELD_OP                                        1:0 /* -W-VF */
#define NV_UDMA_YIELD_OP_NOP                             0x00000000 /* -W--V */
#define NV_UDMA_YIELD_OP_RUNLIST_TIMESLICE               0x00000002 /* -W--V */
#define NV_UDMA_YIELD_OP_TSG                             0x00000003 /* -W--V */


WFI [method] - Wait-for-Idle Method

     The WFI (Wait-For-Idle) method will stall Host from processing any more
methods on the channel until the engine to which the channel last sent methods
is idle.  Note that the subchannel encoded in the method header is ignored (as
it is for all Host-only methods) and does NOT specify which engine to idle.  In
Kepler, this is only relevant on runlists that serve multiple engines
(specifically, the graphics runlist, which also serves GR COPY).
     The WFI method has a single field SCOPE which specifies the level of WFI
the Host method performs. ALL waits for all work in the engine from the same
context to be idle across all classes and subchannels.  CURRENT_VEID causes the
WFI to only apply to work from the same VEID as the current channel.  Note for
engines that do not support VEIDs, CURRENT_VEID works identically to ALL.
     Note that Host methods ignore the subchannel field in the method.  A Host
WFI method always applies to the engine the channel last sent methods to.  If a
WFI with ALL is specified and the channel last sent work to the GRCE, this will
only guarantee that GRCE has no work in progress.  It is possible that the GR
context will have work in progress from other VEIDs, or even the current VEID if
the current channel targets GRCE and has never sent FE methods before.  This
means that if SW wants to idle the graphics pipe for all VEIDs, SW must send a
method to GR immediately before the WFI method.  A GR_NOP is sufficient.
     Note also that even if the current NV_PPBDMA_TARGET is GRAPHICS and not
GRCE, there are cases where Host can trivially complete a WFI without sending
the NV_PMETHOD_HOST_WFI internal method to FE.  This can happen when

1. the runlist timeslices to a different TSG just before the WFI method,
2. the other TSG does a ctxsw request due to methods for FE, and
3. FECS reports non-preempted in the ctx ack, so CTX_RELOAD doesn't get set.

In that case, when the channel switches back onto the PBDMA, the PBDMA rightly
concludes that there is no way the context could be non-idle for that channel,
and therefore filters out the WFI, even if the other PBDMA is sending work to
other VEIDs.  As in the subchannel case, a GR_NOP preceding the WFI is
sufficient to ensure that a SCOPE_ALL_VEID WFI will be sent to FE regardless of
timeslicing as long as the NOP and the WFI are submitted as part of the same
GP_PUT update.  This is ensured by the semantics of the channel state
SHOULD_SEND_HOST_TSG_EVENT behaving like CTX_RELOAD: the GR_NOP causes the PBDMA
to set the SHOULD_SEND_HOST_TSG_EVENT state, so even a channel or context switch
will still result in the PBDMA having the engine context loaded.  Thus the WFI
will cause the HOST_WFI internal method to be sent to FE.


#define NV_UDMA_WFI                                      0x00000078 /* -W-4R */

#define NV_UDMA_WFI_SCOPE                                       0:0 /* -W-VF */
#define NV_UDMA_WFI_SCOPE_CURRENT_VEID                   0x00000000 /* -W--V */
#define NV_UDMA_WFI_SCOPE_ALL                            0x00000001 /* -W--V */
#define NV_UDMA_WFI_SCOPE_ALL_VEID                       0x00000001 /*       */



CLEAR_FAULTED [method] - Clear Faulted Method

     The CLEAR_FAULTED method clears a channel's PCCSR PBDMA_FAULTED or
ENG_FAULTED bit. These bits are set by Host in response to a PBDMA fault or
engine fault respectively on the specified channel; see dev_fifo.ref.

     The CHID field specifies the ID of the channel whose FAULTED bit is to be
cleared.

     The TYPE field specifies which FAULTED bit is to be cleared: either
PBDMA_FAULTED or ENG_FAULTED.

     When Host receives a CLEAR_FAULTED method for a channel, the corresponding
PCCSR FAULTED bit for the channel should be set. However, due to a race between
SW seeing the fault message from MMU and handling the fault and sending the
CLEAR_FAULT method verses Host seeing the fault from CE or MMU and setting the
FAULTED bit, it is possible for the CLEAR_FAULTED method to arrive before the
FAULTED bit is set. Host will handle a CLEAR_FAULTED method according to the
following cases:

     a. The FAULTED bit specified by TYPE is set. Host will clear the bit and
retire the CLEAR_FAULTED method.

     b. If the bit is not set, the PBDMA will continue to retry the
CLEAR_FAULTED method on every PTIMER microsecond tick by rechecking the FAULTED
bit of the target channel. Once the bit is set, the PBDMA will clear the bit and
retire the method. The execution of the fault handling channel will stall on the
CLEAR_FAULTED method until the FAULTED bit for the target channel is set. The
PBDMA will retry the CLEAR_FAULTED method approximately every microsecond.

     c. If the fault handling channel's timeslice expires while stalled on a
CLEAR_FAULTED method, the channel will switch out. Once rescheduled, the
channel will resume retrying the CLEAR_FAULTED method.

     d. To avoid indefinitely waiting for the CLEAR_FAULTED method to retire
(likely due to wrongly injected CLEAR_FAULTED method due to a SW bug), Host
has a timeout mechanism to inform SW of a potential bug. This timeout is
controlled by NV_PFIFO_CLEAR_FAULTED_TIMEOUT; see dev_fifo.ref for details.

     e. When a CLEAR_FAULTED timeout is detected, Host will raise a stalling
interrupt by setting the NV_PPBDMA_INTR_0_CLEAR_FAULTED_ERROR field. The
address of the invalid CLEAR_FAULTED method will be in NV_PPBDMA_METHOD0, and
its payload will be in NV_PPBDMA_DATA0.

     Note Setting the timeout value too low could result in false stalling
interrupts to SW. The timeout should be set equal to NV_PFIFO_FB_TIMEOUT_PERIOD.

     Note the CLEAR_FAULTED timeout mechanism uses the same PBDMA registers and
RAMFC fields as the semaphore acquire timeout mechanism:
NV_PPBDMA_SEM_EXECUTE_ACQUIRE_FAIL is set TRUE when the first attempt fails, and
the NV_PPBDMA_ACQUIRE_DEADLINE is loaded with the sum of the current PTIMER and
the NV_PFIFO_CLEAR_FAULTED_TIMEOUT.  The ACQUIRE_FAIL bit is reset to FALSE when
the CLEAR_FAULTED method times out or succeeds.


#define NV_UDMA_CLEAR_FAULTED                            0x00000084 /* -W-4R */

#define NV_UDMA_CLEAR_FAULTED_CHID                             11:0 /* -W-VF */
#define NV_UDMA_CLEAR_FAULTED_TYPE                            31:31 /* -W-VF */
#define NV_UDMA_CLEAR_FAULTED_TYPE_PBDMA_FAULTED         0x00000000 /* -W--V */
#define NV_UDMA_CLEAR_FAULTED_TYPE_ENG_FAULTED           0x00000001 /* -W--V */



     Addresses that are not defined in this device are reserved.  Those below
0x100 are reserved for future Host methods.  Addresses 0x100 and beyond are
reserved for the engines served by Host.