1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include "dc.h"
27#include "dc_link.h"
28#include "../display_mode_lib.h"
29#include "display_mode_vba_31.h"
30#include "../dml_inline_defs.h"
31
32
33
34
35
36
37
38
39
40
41#define BPP_INVALID 0
42#define BPP_BLENDED_PIPE 0xffffffff
43#define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44#define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45
46
47
48
49
50
51
52#define __DML_VBA_MIN_VSTARTUP__ 9
53
54
55#define __DML_ARB_TO_RET_DELAY__ (7 + 95)
56
57
58#define __DML_MIN_DCFCLK_FACTOR__ 1.15
59
60typedef struct {
61 double DPPCLK;
62 double DISPCLK;
63 double PixelClock;
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
66 bool ScalerEnabled;
67 enum scan_direction_class SourceScan;
68 unsigned int BlockWidth256BytesY;
69 unsigned int BlockHeight256BytesY;
70 unsigned int BlockWidth256BytesC;
71 unsigned int BlockHeight256BytesC;
72 unsigned int InterlaceEnable;
73 unsigned int NumberOfCursors;
74 unsigned int VBlank;
75 unsigned int HTotal;
76 unsigned int DCCEnable;
77 bool ODMCombineIsEnabled;
78 enum source_format_class SourcePixelFormat;
79 int BytePerPixelY;
80 int BytePerPixelC;
81 bool ProgressiveToInterlaceUnitInOPP;
82} Pipe;
83
84#define BPP_INVALID 0
85#define BPP_BLENDED_PIPE 0xffffffff
86
87static bool CalculateBytePerPixelAnd256BBlockSizes(
88 enum source_format_class SourcePixelFormat,
89 enum dm_swizzle_mode SurfaceTiling,
90 unsigned int *BytePerPixelY,
91 unsigned int *BytePerPixelC,
92 double *BytePerPixelDETY,
93 double *BytePerPixelDETC,
94 unsigned int *BlockHeight256BytesY,
95 unsigned int *BlockHeight256BytesC,
96 unsigned int *BlockWidth256BytesY,
97 unsigned int *BlockWidth256BytesC);
98static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
99static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
100static unsigned int dscceComputeDelay(
101 unsigned int bpc,
102 double BPP,
103 unsigned int sliceWidth,
104 unsigned int numSlices,
105 enum output_format_class pixelFormat,
106 enum output_encoder_class Output);
107static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
108static bool CalculatePrefetchSchedule(
109 struct display_mode_lib *mode_lib,
110 double HostVMInefficiencyFactor,
111 Pipe *myPipe,
112 unsigned int DSCDelay,
113 double DPPCLKDelaySubtotalPlusCNVCFormater,
114 double DPPCLKDelaySCL,
115 double DPPCLKDelaySCLLBOnly,
116 double DPPCLKDelayCNVCCursor,
117 double DISPCLKDelaySubtotal,
118 unsigned int DPP_RECOUT_WIDTH,
119 enum output_format_class OutputFormat,
120 unsigned int MaxInterDCNTileRepeaters,
121 unsigned int VStartup,
122 unsigned int MaxVStartup,
123 unsigned int GPUVMPageTableLevels,
124 bool GPUVMEnable,
125 bool HostVMEnable,
126 unsigned int HostVMMaxNonCachedPageTableLevels,
127 double HostVMMinPageSize,
128 bool DynamicMetadataEnable,
129 bool DynamicMetadataVMEnabled,
130 int DynamicMetadataLinesBeforeActiveRequired,
131 unsigned int DynamicMetadataTransmittedBytes,
132 double UrgentLatency,
133 double UrgentExtraLatency,
134 double TCalc,
135 unsigned int PDEAndMetaPTEBytesFrame,
136 unsigned int MetaRowByte,
137 unsigned int PixelPTEBytesPerRow,
138 double PrefetchSourceLinesY,
139 unsigned int SwathWidthY,
140 double VInitPreFillY,
141 unsigned int MaxNumSwathY,
142 double PrefetchSourceLinesC,
143 unsigned int SwathWidthC,
144 double VInitPreFillC,
145 unsigned int MaxNumSwathC,
146 int swath_width_luma_ub,
147 int swath_width_chroma_ub,
148 unsigned int SwathHeightY,
149 unsigned int SwathHeightC,
150 double TWait,
151 double *DSTXAfterScaler,
152 double *DSTYAfterScaler,
153 double *DestinationLinesForPrefetch,
154 double *PrefetchBandwidth,
155 double *DestinationLinesToRequestVMInVBlank,
156 double *DestinationLinesToRequestRowInVBlank,
157 double *VRatioPrefetchY,
158 double *VRatioPrefetchC,
159 double *RequiredPrefetchPixDataBWLuma,
160 double *RequiredPrefetchPixDataBWChroma,
161 bool *NotEnoughTimeForDynamicMetadata,
162 double *Tno_bw,
163 double *prefetch_vmrow_bw,
164 double *Tdmdl_vm,
165 double *Tdmdl,
166 double *TSetup,
167 int *VUpdateOffsetPix,
168 double *VUpdateWidthPix,
169 double *VReadyOffsetPix);
170static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
171static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
172static void CalculateDCCConfiguration(
173 bool DCCEnabled,
174 bool DCCProgrammingAssumesScanDirectionUnknown,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceWidthLuma,
177 unsigned int SurfaceWidthChroma,
178 unsigned int SurfaceHeightLuma,
179 unsigned int SurfaceHeightChroma,
180 double DETBufferSize,
181 unsigned int RequestHeight256ByteLuma,
182 unsigned int RequestHeight256ByteChroma,
183 enum dm_swizzle_mode TilingFormat,
184 unsigned int BytePerPixelY,
185 unsigned int BytePerPixelC,
186 double BytePerPixelDETY,
187 double BytePerPixelDETC,
188 enum scan_direction_class ScanOrientation,
189 unsigned int *MaxUncompressedBlockLuma,
190 unsigned int *MaxUncompressedBlockChroma,
191 unsigned int *MaxCompressedBlockLuma,
192 unsigned int *MaxCompressedBlockChroma,
193 unsigned int *IndependentBlockLuma,
194 unsigned int *IndependentBlockChroma);
195static double CalculatePrefetchSourceLines(
196 struct display_mode_lib *mode_lib,
197 double VRatio,
198 double vtaps,
199 bool Interlace,
200 bool ProgressiveToInterlaceUnitInOPP,
201 unsigned int SwathHeight,
202 unsigned int ViewportYStart,
203 double *VInitPreFill,
204 unsigned int *MaxNumSwath);
205static unsigned int CalculateVMAndRowBytes(
206 struct display_mode_lib *mode_lib,
207 bool DCCEnable,
208 unsigned int BlockHeight256Bytes,
209 unsigned int BlockWidth256Bytes,
210 enum source_format_class SourcePixelFormat,
211 unsigned int SurfaceTiling,
212 unsigned int BytePerPixel,
213 enum scan_direction_class ScanDirection,
214 unsigned int SwathWidth,
215 unsigned int ViewportHeight,
216 bool GPUVMEnable,
217 bool HostVMEnable,
218 unsigned int HostVMMaxNonCachedPageTableLevels,
219 unsigned int GPUVMMinPageSize,
220 unsigned int HostVMMinPageSize,
221 unsigned int PTEBufferSizeInRequests,
222 unsigned int Pitch,
223 unsigned int DCCMetaPitch,
224 unsigned int *MacroTileWidth,
225 unsigned int *MetaRowByte,
226 unsigned int *PixelPTEBytesPerRow,
227 bool *PTEBufferSizeNotExceeded,
228 int *dpte_row_width_ub,
229 unsigned int *dpte_row_height,
230 unsigned int *MetaRequestWidth,
231 unsigned int *MetaRequestHeight,
232 unsigned int *meta_row_width,
233 unsigned int *meta_row_height,
234 int *vm_group_bytes,
235 unsigned int *dpte_group_bytes,
236 unsigned int *PixelPTEReqWidth,
237 unsigned int *PixelPTEReqHeight,
238 unsigned int *PTERequestSize,
239 int *DPDE0BytesFrame,
240 int *MetaPTEBytesFrame);
241static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
242static void CalculateRowBandwidth(
243 bool GPUVMEnable,
244 enum source_format_class SourcePixelFormat,
245 double VRatio,
246 double VRatioChroma,
247 bool DCCEnable,
248 double LineTime,
249 unsigned int MetaRowByteLuma,
250 unsigned int MetaRowByteChroma,
251 unsigned int meta_row_height_luma,
252 unsigned int meta_row_height_chroma,
253 unsigned int PixelPTEBytesPerRowLuma,
254 unsigned int PixelPTEBytesPerRowChroma,
255 unsigned int dpte_row_height_luma,
256 unsigned int dpte_row_height_chroma,
257 double *meta_row_bw,
258 double *dpte_row_bw);
259
260static void CalculateFlipSchedule(
261 struct display_mode_lib *mode_lib,
262 double HostVMInefficiencyFactor,
263 double UrgentExtraLatency,
264 double UrgentLatency,
265 unsigned int GPUVMMaxPageTableLevels,
266 bool HostVMEnable,
267 unsigned int HostVMMaxNonCachedPageTableLevels,
268 bool GPUVMEnable,
269 double HostVMMinPageSize,
270 double PDEAndMetaPTEBytesPerFrame,
271 double MetaRowBytes,
272 double DPTEBytesPerRow,
273 double BandwidthAvailableForImmediateFlip,
274 unsigned int TotImmediateFlipBytes,
275 enum source_format_class SourcePixelFormat,
276 double LineTime,
277 double VRatio,
278 double VRatioChroma,
279 double Tno_bw,
280 bool DCCEnable,
281 unsigned int dpte_row_height,
282 unsigned int meta_row_height,
283 unsigned int dpte_row_height_chroma,
284 unsigned int meta_row_height_chroma,
285 double *DestinationLinesToRequestVMInImmediateFlip,
286 double *DestinationLinesToRequestRowInImmediateFlip,
287 double *final_flip_bw,
288 bool *ImmediateFlipSupportedForPipe);
289static double CalculateWriteBackDelay(
290 enum source_format_class WritebackPixelFormat,
291 double WritebackHRatio,
292 double WritebackVRatio,
293 unsigned int WritebackVTaps,
294 int WritebackDestinationWidth,
295 int WritebackDestinationHeight,
296 int WritebackSourceHeight,
297 unsigned int HTotal);
298
299static void CalculateVupdateAndDynamicMetadataParameters(
300 int MaxInterDCNTileRepeaters,
301 double DPPCLK,
302 double DISPCLK,
303 double DCFClkDeepSleep,
304 double PixelClock,
305 int HTotal,
306 int VBlank,
307 int DynamicMetadataTransmittedBytes,
308 int DynamicMetadataLinesBeforeActiveRequired,
309 int InterlaceEnable,
310 bool ProgressiveToInterlaceUnitInOPP,
311 double *TSetup,
312 double *Tdmbf,
313 double *Tdmec,
314 double *Tdmsks,
315 int *VUpdateOffsetPix,
316 double *VUpdateWidthPix,
317 double *VReadyOffsetPix);
318
319static void CalculateWatermarksAndDRAMSpeedChangeSupport(
320 struct display_mode_lib *mode_lib,
321 unsigned int PrefetchMode,
322 unsigned int NumberOfActivePlanes,
323 unsigned int MaxLineBufferLines,
324 unsigned int LineBufferSize,
325 unsigned int WritebackInterfaceBufferSize,
326 double DCFCLK,
327 double ReturnBW,
328 bool SynchronizedVBlank,
329 unsigned int dpte_group_bytes[],
330 unsigned int MetaChunkSize,
331 double UrgentLatency,
332 double ExtraLatency,
333 double WritebackLatency,
334 double WritebackChunkSize,
335 double SOCCLK,
336 double DRAMClockChangeLatency,
337 double SRExitTime,
338 double SREnterPlusExitTime,
339 double SRExitZ8Time,
340 double SREnterPlusExitZ8Time,
341 double DCFCLKDeepSleep,
342 unsigned int DETBufferSizeY[],
343 unsigned int DETBufferSizeC[],
344 unsigned int SwathHeightY[],
345 unsigned int SwathHeightC[],
346 unsigned int LBBitPerPixel[],
347 double SwathWidthY[],
348 double SwathWidthC[],
349 double HRatio[],
350 double HRatioChroma[],
351 unsigned int vtaps[],
352 unsigned int VTAPsChroma[],
353 double VRatio[],
354 double VRatioChroma[],
355 unsigned int HTotal[],
356 double PixelClock[],
357 unsigned int BlendingAndTiming[],
358 unsigned int DPPPerPlane[],
359 double BytePerPixelDETY[],
360 double BytePerPixelDETC[],
361 double DSTXAfterScaler[],
362 double DSTYAfterScaler[],
363 bool WritebackEnable[],
364 enum source_format_class WritebackPixelFormat[],
365 double WritebackDestinationWidth[],
366 double WritebackDestinationHeight[],
367 double WritebackSourceHeight[],
368 bool UnboundedRequestEnabled,
369 int unsigned CompressedBufferSizeInkByte,
370 enum clock_change_support *DRAMClockChangeSupport,
371 double *UrgentWatermark,
372 double *WritebackUrgentWatermark,
373 double *DRAMClockChangeWatermark,
374 double *WritebackDRAMClockChangeWatermark,
375 double *StutterExitWatermark,
376 double *StutterEnterPlusExitWatermark,
377 double *Z8StutterExitWatermark,
378 double *Z8StutterEnterPlusExitWatermark,
379 double *MinActiveDRAMClockChangeLatencySupported);
380
381static void CalculateDCFCLKDeepSleep(
382 struct display_mode_lib *mode_lib,
383 unsigned int NumberOfActivePlanes,
384 int BytePerPixelY[],
385 int BytePerPixelC[],
386 double VRatio[],
387 double VRatioChroma[],
388 double SwathWidthY[],
389 double SwathWidthC[],
390 unsigned int DPPPerPlane[],
391 double HRatio[],
392 double HRatioChroma[],
393 double PixelClock[],
394 double PSCL_THROUGHPUT[],
395 double PSCL_THROUGHPUT_CHROMA[],
396 double DPPCLK[],
397 double ReadBandwidthLuma[],
398 double ReadBandwidthChroma[],
399 int ReturnBusWidth,
400 double *DCFCLKDeepSleep);
401
402static void CalculateUrgentBurstFactor(
403 int swath_width_luma_ub,
404 int swath_width_chroma_ub,
405 unsigned int SwathHeightY,
406 unsigned int SwathHeightC,
407 double LineTime,
408 double UrgentLatency,
409 double CursorBufferSize,
410 unsigned int CursorWidth,
411 unsigned int CursorBPP,
412 double VRatio,
413 double VRatioC,
414 double BytePerPixelInDETY,
415 double BytePerPixelInDETC,
416 double DETBufferSizeY,
417 double DETBufferSizeC,
418 double *UrgentBurstFactorCursor,
419 double *UrgentBurstFactorLuma,
420 double *UrgentBurstFactorChroma,
421 bool *NotEnoughUrgentLatencyHiding);
422
423static void UseMinimumDCFCLK(
424 struct display_mode_lib *mode_lib,
425 int MaxInterDCNTileRepeaters,
426 int MaxPrefetchMode,
427 double FinalDRAMClockChangeLatency,
428 double SREnterPlusExitTime,
429 int ReturnBusWidth,
430 int RoundTripPingLatencyCycles,
431 int ReorderingBytes,
432 int PixelChunkSizeInKByte,
433 int MetaChunkSize,
434 bool GPUVMEnable,
435 int GPUVMMaxPageTableLevels,
436 bool HostVMEnable,
437 int NumberOfActivePlanes,
438 double HostVMMinPageSize,
439 int HostVMMaxNonCachedPageTableLevels,
440 bool DynamicMetadataVMEnabled,
441 enum immediate_flip_requirement ImmediateFlipRequirement,
442 bool ProgressiveToInterlaceUnitInOPP,
443 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
444 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
445 int VTotal[],
446 int VActive[],
447 int DynamicMetadataTransmittedBytes[],
448 int DynamicMetadataLinesBeforeActiveRequired[],
449 bool Interlace[],
450 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
451 double RequiredDISPCLK[][2],
452 double UrgLatency[],
453 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
454 double ProjectedDCFCLKDeepSleep[][2],
455 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
456 double TotalVActivePixelBandwidth[][2],
457 double TotalVActiveCursorBandwidth[][2],
458 double TotalMetaRowBandwidth[][2],
459 double TotalDPTERowBandwidth[][2],
460 unsigned int TotalNumberOfActiveDPP[][2],
461 unsigned int TotalNumberOfDCCActiveDPP[][2],
462 int dpte_group_bytes[],
463 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
464 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
465 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
466 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
467 int BytePerPixelY[],
468 int BytePerPixelC[],
469 int HTotal[],
470 double PixelClock[],
471 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
472 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
473 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
474 bool DynamicMetadataEnable[],
475 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
476 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
477 double ReadBandwidthLuma[],
478 double ReadBandwidthChroma[],
479 double DCFCLKPerState[],
480 double DCFCLKState[][2]);
481
482static void CalculatePixelDeliveryTimes(
483 unsigned int NumberOfActivePlanes,
484 double VRatio[],
485 double VRatioChroma[],
486 double VRatioPrefetchY[],
487 double VRatioPrefetchC[],
488 unsigned int swath_width_luma_ub[],
489 unsigned int swath_width_chroma_ub[],
490 unsigned int DPPPerPlane[],
491 double HRatio[],
492 double HRatioChroma[],
493 double PixelClock[],
494 double PSCL_THROUGHPUT[],
495 double PSCL_THROUGHPUT_CHROMA[],
496 double DPPCLK[],
497 int BytePerPixelC[],
498 enum scan_direction_class SourceScan[],
499 unsigned int NumberOfCursors[],
500 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
501 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
502 unsigned int BlockWidth256BytesY[],
503 unsigned int BlockHeight256BytesY[],
504 unsigned int BlockWidth256BytesC[],
505 unsigned int BlockHeight256BytesC[],
506 double DisplayPipeLineDeliveryTimeLuma[],
507 double DisplayPipeLineDeliveryTimeChroma[],
508 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
509 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
510 double DisplayPipeRequestDeliveryTimeLuma[],
511 double DisplayPipeRequestDeliveryTimeChroma[],
512 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
513 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
514 double CursorRequestDeliveryTime[],
515 double CursorRequestDeliveryTimePrefetch[]);
516
517static void CalculateMetaAndPTETimes(
518 int NumberOfActivePlanes,
519 bool GPUVMEnable,
520 int MetaChunkSize,
521 int MinMetaChunkSizeBytes,
522 int HTotal[],
523 double VRatio[],
524 double VRatioChroma[],
525 double DestinationLinesToRequestRowInVBlank[],
526 double DestinationLinesToRequestRowInImmediateFlip[],
527 bool DCCEnable[],
528 double PixelClock[],
529 int BytePerPixelY[],
530 int BytePerPixelC[],
531 enum scan_direction_class SourceScan[],
532 int dpte_row_height[],
533 int dpte_row_height_chroma[],
534 int meta_row_width[],
535 int meta_row_width_chroma[],
536 int meta_row_height[],
537 int meta_row_height_chroma[],
538 int meta_req_width[],
539 int meta_req_width_chroma[],
540 int meta_req_height[],
541 int meta_req_height_chroma[],
542 int dpte_group_bytes[],
543 int PTERequestSizeY[],
544 int PTERequestSizeC[],
545 int PixelPTEReqWidthY[],
546 int PixelPTEReqHeightY[],
547 int PixelPTEReqWidthC[],
548 int PixelPTEReqHeightC[],
549 int dpte_row_width_luma_ub[],
550 int dpte_row_width_chroma_ub[],
551 double DST_Y_PER_PTE_ROW_NOM_L[],
552 double DST_Y_PER_PTE_ROW_NOM_C[],
553 double DST_Y_PER_META_ROW_NOM_L[],
554 double DST_Y_PER_META_ROW_NOM_C[],
555 double TimePerMetaChunkNominal[],
556 double TimePerChromaMetaChunkNominal[],
557 double TimePerMetaChunkVBlank[],
558 double TimePerChromaMetaChunkVBlank[],
559 double TimePerMetaChunkFlip[],
560 double TimePerChromaMetaChunkFlip[],
561 double time_per_pte_group_nom_luma[],
562 double time_per_pte_group_vblank_luma[],
563 double time_per_pte_group_flip_luma[],
564 double time_per_pte_group_nom_chroma[],
565 double time_per_pte_group_vblank_chroma[],
566 double time_per_pte_group_flip_chroma[]);
567
568static void CalculateVMGroupAndRequestTimes(
569 unsigned int NumberOfActivePlanes,
570 bool GPUVMEnable,
571 unsigned int GPUVMMaxPageTableLevels,
572 unsigned int HTotal[],
573 int BytePerPixelC[],
574 double DestinationLinesToRequestVMInVBlank[],
575 double DestinationLinesToRequestVMInImmediateFlip[],
576 bool DCCEnable[],
577 double PixelClock[],
578 int dpte_row_width_luma_ub[],
579 int dpte_row_width_chroma_ub[],
580 int vm_group_bytes[],
581 unsigned int dpde0_bytes_per_frame_ub_l[],
582 unsigned int dpde0_bytes_per_frame_ub_c[],
583 int meta_pte_bytes_per_frame_ub_l[],
584 int meta_pte_bytes_per_frame_ub_c[],
585 double TimePerVMGroupVBlank[],
586 double TimePerVMGroupFlip[],
587 double TimePerVMRequestVBlank[],
588 double TimePerVMRequestFlip[]);
589
590static void CalculateStutterEfficiency(
591 struct display_mode_lib *mode_lib,
592 int CompressedBufferSizeInkByte,
593 bool UnboundedRequestEnabled,
594 int ConfigReturnBufferSizeInKByte,
595 int MetaFIFOSizeInKEntries,
596 int ZeroSizeBufferEntries,
597 int NumberOfActivePlanes,
598 int ROBBufferSizeInKByte,
599 double TotalDataReadBandwidth,
600 double DCFCLK,
601 double ReturnBW,
602 double COMPBUF_RESERVED_SPACE_64B,
603 double COMPBUF_RESERVED_SPACE_ZS,
604 double SRExitTime,
605 double SRExitZ8Time,
606 bool SynchronizedVBlank,
607 double Z8StutterEnterPlusExitWatermark,
608 double StutterEnterPlusExitWatermark,
609 bool ProgressiveToInterlaceUnitInOPP,
610 bool Interlace[],
611 double MinTTUVBlank[],
612 int DPPPerPlane[],
613 unsigned int DETBufferSizeY[],
614 int BytePerPixelY[],
615 double BytePerPixelDETY[],
616 double SwathWidthY[],
617 int SwathHeightY[],
618 int SwathHeightC[],
619 double NetDCCRateLuma[],
620 double NetDCCRateChroma[],
621 double DCCFractionOfZeroSizeRequestsLuma[],
622 double DCCFractionOfZeroSizeRequestsChroma[],
623 int HTotal[],
624 int VTotal[],
625 double PixelClock[],
626 double VRatio[],
627 enum scan_direction_class SourceScan[],
628 int BlockHeight256BytesY[],
629 int BlockWidth256BytesY[],
630 int BlockHeight256BytesC[],
631 int BlockWidth256BytesC[],
632 int DCCYMaxUncompressedBlock[],
633 int DCCCMaxUncompressedBlock[],
634 int VActive[],
635 bool DCCEnable[],
636 bool WritebackEnable[],
637 double ReadBandwidthPlaneLuma[],
638 double ReadBandwidthPlaneChroma[],
639 double meta_row_bw[],
640 double dpte_row_bw[],
641 double *StutterEfficiencyNotIncludingVBlank,
642 double *StutterEfficiency,
643 int *NumberOfStutterBurstsPerFrame,
644 double *Z8StutterEfficiencyNotIncludingVBlank,
645 double *Z8StutterEfficiency,
646 int *Z8NumberOfStutterBurstsPerFrame,
647 double *StutterPeriod);
648
649static void CalculateSwathAndDETConfiguration(
650 bool ForceSingleDPP,
651 int NumberOfActivePlanes,
652 unsigned int DETBufferSizeInKByte,
653 double MaximumSwathWidthLuma[],
654 double MaximumSwathWidthChroma[],
655 enum scan_direction_class SourceScan[],
656 enum source_format_class SourcePixelFormat[],
657 enum dm_swizzle_mode SurfaceTiling[],
658 int ViewportWidth[],
659 int ViewportHeight[],
660 int SurfaceWidthY[],
661 int SurfaceWidthC[],
662 int SurfaceHeightY[],
663 int SurfaceHeightC[],
664 int Read256BytesBlockHeightY[],
665 int Read256BytesBlockHeightC[],
666 int Read256BytesBlockWidthY[],
667 int Read256BytesBlockWidthC[],
668 enum odm_combine_mode ODMCombineEnabled[],
669 int BlendingAndTiming[],
670 int BytePerPixY[],
671 int BytePerPixC[],
672 double BytePerPixDETY[],
673 double BytePerPixDETC[],
674 int HActive[],
675 double HRatio[],
676 double HRatioChroma[],
677 int DPPPerPlane[],
678 int swath_width_luma_ub[],
679 int swath_width_chroma_ub[],
680 double SwathWidth[],
681 double SwathWidthChroma[],
682 int SwathHeightY[],
683 int SwathHeightC[],
684 unsigned int DETBufferSizeY[],
685 unsigned int DETBufferSizeC[],
686 bool ViewportSizeSupportPerPlane[],
687 bool *ViewportSizeSupport);
688static void CalculateSwathWidth(
689 bool ForceSingleDPP,
690 int NumberOfActivePlanes,
691 enum source_format_class SourcePixelFormat[],
692 enum scan_direction_class SourceScan[],
693 int ViewportWidth[],
694 int ViewportHeight[],
695 int SurfaceWidthY[],
696 int SurfaceWidthC[],
697 int SurfaceHeightY[],
698 int SurfaceHeightC[],
699 enum odm_combine_mode ODMCombineEnabled[],
700 int BytePerPixY[],
701 int BytePerPixC[],
702 int Read256BytesBlockHeightY[],
703 int Read256BytesBlockHeightC[],
704 int Read256BytesBlockWidthY[],
705 int Read256BytesBlockWidthC[],
706 int BlendingAndTiming[],
707 int HActive[],
708 double HRatio[],
709 int DPPPerPlane[],
710 double SwathWidthSingleDPPY[],
711 double SwathWidthSingleDPPC[],
712 double SwathWidthY[],
713 double SwathWidthC[],
714 int MaximumSwathHeightY[],
715 int MaximumSwathHeightC[],
716 int swath_width_luma_ub[],
717 int swath_width_chroma_ub[]);
718
719static double CalculateExtraLatency(
720 int RoundTripPingLatencyCycles,
721 int ReorderingBytes,
722 double DCFCLK,
723 int TotalNumberOfActiveDPP,
724 int PixelChunkSizeInKByte,
725 int TotalNumberOfDCCActiveDPP,
726 int MetaChunkSize,
727 double ReturnBW,
728 bool GPUVMEnable,
729 bool HostVMEnable,
730 int NumberOfActivePlanes,
731 int NumberOfDPP[],
732 int dpte_group_bytes[],
733 double HostVMInefficiencyFactor,
734 double HostVMMinPageSize,
735 int HostVMMaxNonCachedPageTableLevels);
736
737static double CalculateExtraLatencyBytes(
738 int ReorderingBytes,
739 int TotalNumberOfActiveDPP,
740 int PixelChunkSizeInKByte,
741 int TotalNumberOfDCCActiveDPP,
742 int MetaChunkSize,
743 bool GPUVMEnable,
744 bool HostVMEnable,
745 int NumberOfActivePlanes,
746 int NumberOfDPP[],
747 int dpte_group_bytes[],
748 double HostVMInefficiencyFactor,
749 double HostVMMinPageSize,
750 int HostVMMaxNonCachedPageTableLevels);
751
752static double CalculateUrgentLatency(
753 double UrgentLatencyPixelDataOnly,
754 double UrgentLatencyPixelMixedWithVMData,
755 double UrgentLatencyVMDataOnly,
756 bool DoUrgentLatencyAdjustment,
757 double UrgentLatencyAdjustmentFabricClockComponent,
758 double UrgentLatencyAdjustmentFabricClockReference,
759 double FabricClockSingle);
760
761static void CalculateUnboundedRequestAndCompressedBufferSize(
762 unsigned int DETBufferSizeInKByte,
763 int ConfigReturnBufferSizeInKByte,
764 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
765 int TotalActiveDPP,
766 bool NoChromaPlanes,
767 int MaxNumDPP,
768 int CompressedBufferSegmentSizeInkByteFinal,
769 enum output_encoder_class *Output,
770 bool *UnboundedRequestEnabled,
771 int *CompressedBufferSizeInkByte);
772
773static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
774
775void dml31_recalculate(struct display_mode_lib *mode_lib)
776{
777 ModeSupportAndSystemConfiguration(mode_lib);
778 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
779 DisplayPipeConfiguration(mode_lib);
780#ifdef __DML_VBA_DEBUG__
781 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
782#endif
783 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
784}
785
786static unsigned int dscceComputeDelay(
787 unsigned int bpc,
788 double BPP,
789 unsigned int sliceWidth,
790 unsigned int numSlices,
791 enum output_format_class pixelFormat,
792 enum output_encoder_class Output)
793{
794
795
796
797
798
799
800
801
802
803 unsigned int rcModelSize = 8192;
804
805
806 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
807
808 if (pixelFormat == dm_420)
809 pixelsPerClock = 2;
810 else if (pixelFormat == dm_444)
811 pixelsPerClock = 1;
812 else if (pixelFormat == dm_n422)
813 pixelsPerClock = 2;
814
815 else
816 pixelsPerClock = 1;
817
818
819 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
820
821
822 if (bpc == 8)
823 D = 81;
824 else if (bpc == 10)
825 D = 89;
826 else
827 D = 113;
828
829
830 w = sliceWidth / pixelsPerClock;
831
832
833 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
834 s = 0;
835 else
836 s = 1;
837
838
839 ix = initalXmitDelay + 45;
840 wx = (w + 2) / 3;
841 P = 3 * wx - w;
842 l0 = ix / w;
843 a = ix + P * l0;
844 ax = (a + 2) / 3 + D + 6 + 1;
845 L = (ax + wx - 1) / wx;
846 if ((ix % w) == 0 && P != 0)
847 lstall = 1;
848 else
849 lstall = 0;
850 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
851
852
853 pixels = Delay * 3 * pixelsPerClock;
854 return pixels;
855}
856
857static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
858{
859 unsigned int Delay = 0;
860
861 if (pixelFormat == dm_420) {
862
863 Delay = Delay + 2;
864
865 Delay = Delay + 0;
866
867 Delay = Delay + 3;
868
869 Delay = Delay + 2;
870
871 Delay = Delay + 12;
872
873 Delay = Delay + 13;
874
875 Delay = Delay + 2;
876
877 Delay = Delay + 7;
878
879 Delay = Delay + 3;
880
881 Delay = Delay + 2;
882
883 Delay = Delay + 1;
884
885 Delay = Delay + 1;
886 } else if (pixelFormat == dm_n422) {
887
888 Delay = Delay + 2;
889
890 Delay = Delay + 1;
891
892 Delay = Delay + 5;
893
894 Delay = Delay + 25;
895
896 Delay = Delay + 2;
897
898 Delay = Delay + 10;
899
900 Delay = Delay + 2;
901
902 Delay = Delay + 1;
903
904 Delay = Delay + 1;
905 } else {
906
907 Delay = Delay + 2;
908
909 Delay = Delay + 0;
910
911 Delay = Delay + 3;
912
913 Delay = Delay + 12;
914
915 Delay = Delay + 2;
916
917 Delay = Delay + 7;
918
919 Delay = Delay + 1;
920
921 Delay = Delay + 2;
922
923 Delay = Delay + 1;
924 }
925
926 return Delay;
927}
928
929static bool CalculatePrefetchSchedule(
930 struct display_mode_lib *mode_lib,
931 double HostVMInefficiencyFactor,
932 Pipe *myPipe,
933 unsigned int DSCDelay,
934 double DPPCLKDelaySubtotalPlusCNVCFormater,
935 double DPPCLKDelaySCL,
936 double DPPCLKDelaySCLLBOnly,
937 double DPPCLKDelayCNVCCursor,
938 double DISPCLKDelaySubtotal,
939 unsigned int DPP_RECOUT_WIDTH,
940 enum output_format_class OutputFormat,
941 unsigned int MaxInterDCNTileRepeaters,
942 unsigned int VStartup,
943 unsigned int MaxVStartup,
944 unsigned int GPUVMPageTableLevels,
945 bool GPUVMEnable,
946 bool HostVMEnable,
947 unsigned int HostVMMaxNonCachedPageTableLevels,
948 double HostVMMinPageSize,
949 bool DynamicMetadataEnable,
950 bool DynamicMetadataVMEnabled,
951 int DynamicMetadataLinesBeforeActiveRequired,
952 unsigned int DynamicMetadataTransmittedBytes,
953 double UrgentLatency,
954 double UrgentExtraLatency,
955 double TCalc,
956 unsigned int PDEAndMetaPTEBytesFrame,
957 unsigned int MetaRowByte,
958 unsigned int PixelPTEBytesPerRow,
959 double PrefetchSourceLinesY,
960 unsigned int SwathWidthY,
961 double VInitPreFillY,
962 unsigned int MaxNumSwathY,
963 double PrefetchSourceLinesC,
964 unsigned int SwathWidthC,
965 double VInitPreFillC,
966 unsigned int MaxNumSwathC,
967 int swath_width_luma_ub,
968 int swath_width_chroma_ub,
969 unsigned int SwathHeightY,
970 unsigned int SwathHeightC,
971 double TWait,
972 double *DSTXAfterScaler,
973 double *DSTYAfterScaler,
974 double *DestinationLinesForPrefetch,
975 double *PrefetchBandwidth,
976 double *DestinationLinesToRequestVMInVBlank,
977 double *DestinationLinesToRequestRowInVBlank,
978 double *VRatioPrefetchY,
979 double *VRatioPrefetchC,
980 double *RequiredPrefetchPixDataBWLuma,
981 double *RequiredPrefetchPixDataBWChroma,
982 bool *NotEnoughTimeForDynamicMetadata,
983 double *Tno_bw,
984 double *prefetch_vmrow_bw,
985 double *Tdmdl_vm,
986 double *Tdmdl,
987 double *TSetup,
988 int *VUpdateOffsetPix,
989 double *VUpdateWidthPix,
990 double *VReadyOffsetPix)
991{
992 bool MyError = false;
993 unsigned int DPPCycles, DISPCLKCycles;
994 double DSTTotalPixelsAfterScaler;
995 double LineTime;
996 double dst_y_prefetch_equ;
997 double Tsw_oto;
998 double prefetch_bw_oto;
999 double Tvm_oto;
1000 double Tr0_oto;
1001 double Tvm_oto_lines;
1002 double Tr0_oto_lines;
1003 double dst_y_prefetch_oto;
1004 double TimeForFetchingMetaPTE = 0;
1005 double TimeForFetchingRowInVBlank = 0;
1006 double LinesToRequestPrefetchPixelData = 0;
1007 unsigned int HostVMDynamicLevelsTrips;
1008 double trip_to_mem;
1009 double Tvm_trips;
1010 double Tr0_trips;
1011 double Tvm_trips_rounded;
1012 double Tr0_trips_rounded;
1013 double Lsw_oto;
1014 double Tpre_rounded;
1015 double prefetch_bw_equ;
1016 double Tvm_equ;
1017 double Tr0_equ;
1018 double Tdmbf;
1019 double Tdmec;
1020 double Tdmsks;
1021 double prefetch_sw_bytes;
1022 double bytes_pp;
1023 double dep_bytes;
1024 int max_vratio_pre = 4;
1025 double min_Lsw;
1026 double Tsw_est1 = 0;
1027 double Tsw_est3 = 0;
1028
1029 if (GPUVMEnable == true && HostVMEnable == true) {
1030 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
1031 } else {
1032 HostVMDynamicLevelsTrips = 0;
1033 }
1034#ifdef __DML_VBA_DEBUG__
1035 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
1036#endif
1037 CalculateVupdateAndDynamicMetadataParameters(
1038 MaxInterDCNTileRepeaters,
1039 myPipe->DPPCLK,
1040 myPipe->DISPCLK,
1041 myPipe->DCFCLKDeepSleep,
1042 myPipe->PixelClock,
1043 myPipe->HTotal,
1044 myPipe->VBlank,
1045 DynamicMetadataTransmittedBytes,
1046 DynamicMetadataLinesBeforeActiveRequired,
1047 myPipe->InterlaceEnable,
1048 myPipe->ProgressiveToInterlaceUnitInOPP,
1049 TSetup,
1050 &Tdmbf,
1051 &Tdmec,
1052 &Tdmsks,
1053 VUpdateOffsetPix,
1054 VUpdateWidthPix,
1055 VReadyOffsetPix);
1056
1057 LineTime = myPipe->HTotal / myPipe->PixelClock;
1058 trip_to_mem = UrgentLatency;
1059 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1060
1061#ifdef __DML_VBA_ALLOW_DELTA__
1062 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1063#else
1064 if (DynamicMetadataVMEnabled == true) {
1065#endif
1066 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1067 } else {
1068 *Tdmdl = TWait + UrgentExtraLatency;
1069 }
1070
1071#ifdef __DML_VBA_ALLOW_DELTA__
1072 if (DynamicMetadataEnable == false) {
1073 *Tdmdl = 0.0;
1074 }
1075#endif
1076
1077 if (DynamicMetadataEnable == true) {
1078 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1079 *NotEnoughTimeForDynamicMetadata = true;
1080 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1081 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1082 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1083 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1084 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1085 } else {
1086 *NotEnoughTimeForDynamicMetadata = false;
1087 }
1088 } else {
1089 *NotEnoughTimeForDynamicMetadata = false;
1090 }
1091
1092 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1093
1094 if (myPipe->ScalerEnabled)
1095 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1096 else
1097 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1098
1099 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1100
1101 DISPCLKCycles = DISPCLKDelaySubtotal;
1102
1103 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1104 return true;
1105
1106 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1107
1108#ifdef __DML_VBA_DEBUG__
1109 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1110 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1111 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1112 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1113 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1114 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1115 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1116 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1117#endif
1118
1119 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1120
1121 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1122 *DSTYAfterScaler = 1;
1123 else
1124 *DSTYAfterScaler = 0;
1125
1126 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1127 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1128 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1129
1130#ifdef __DML_VBA_DEBUG__
1131 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1132#endif
1133
1134 MyError = false;
1135
1136 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1137 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1138 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1139
1140#ifdef __DML_VBA_ALLOW_DELTA__
1141 if (!myPipe->DCCEnable) {
1142 Tr0_trips = 0.0;
1143 Tr0_trips_rounded = 0.0;
1144 }
1145#endif
1146
1147 if (!GPUVMEnable) {
1148 Tvm_trips = 0.0;
1149 Tvm_trips_rounded = 0.0;
1150 }
1151
1152 if (GPUVMEnable) {
1153 if (GPUVMPageTableLevels >= 3) {
1154 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1155 } else {
1156 *Tno_bw = 0;
1157 }
1158 } else if (!myPipe->DCCEnable) {
1159 *Tno_bw = LineTime;
1160 } else {
1161 *Tno_bw = LineTime / 4;
1162 }
1163
1164 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1165 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1166 else
1167 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1168
1169 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1170 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1171
1172 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
1173 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1174 Tsw_oto = Lsw_oto * LineTime;
1175
1176 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1177
1178#ifdef __DML_VBA_DEBUG__
1179 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1180 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1181 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1182 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1183 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1184 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1185#endif
1186
1187 if (GPUVMEnable == true)
1188 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1189 else
1190 Tvm_oto = LineTime / 4.0;
1191
1192 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1193 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips,
1194 LineTime - Tvm_oto,
1195 LineTime / 4);
1196 } else {
1197 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1198 }
1199
1200#ifdef __DML_VBA_DEBUG__
1201 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1202 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1203 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1204 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1205 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1206 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1207 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1208 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1209 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1210#endif
1211
1212 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1213 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1214 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1215 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1216 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1217 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1218
1219 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1220
1221 if (prefetch_sw_bytes < dep_bytes)
1222 prefetch_sw_bytes = 2 * dep_bytes;
1223
1224 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1225 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1226 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1227 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1228 dml_print("DML: LineTime: %f\n", LineTime);
1229 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1230
1231 dml_print("DML: LineTime: %f\n", LineTime);
1232 dml_print("DML: VStartup: %d\n", VStartup);
1233 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1234 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1235 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1236 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1237 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1238 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1239 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1240 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1241 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1242 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1243 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1244
1245 *PrefetchBandwidth = 0;
1246 *DestinationLinesToRequestVMInVBlank = 0;
1247 *DestinationLinesToRequestRowInVBlank = 0;
1248 *VRatioPrefetchY = 0;
1249 *VRatioPrefetchC = 0;
1250 *RequiredPrefetchPixDataBWLuma = 0;
1251 if (dst_y_prefetch_equ > 1) {
1252 double PrefetchBandwidth1;
1253 double PrefetchBandwidth2;
1254 double PrefetchBandwidth3;
1255 double PrefetchBandwidth4;
1256
1257 if (Tpre_rounded - *Tno_bw > 0) {
1258 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1259 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1260 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1261 } else {
1262 PrefetchBandwidth1 = 0;
1263 }
1264
1265 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1266 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1267 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1268 }
1269
1270 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1271 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1272 else
1273 PrefetchBandwidth2 = 0;
1274
1275 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1276 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1277 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1278 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1279 } else {
1280 PrefetchBandwidth3 = 0;
1281 }
1282
1283#ifdef __DML_VBA_DEBUG__
1284 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1285 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1286 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1287#endif
1288 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1289 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1290 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1291 }
1292
1293 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1294 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1295 else
1296 PrefetchBandwidth4 = 0;
1297
1298 {
1299 bool Case1OK;
1300 bool Case2OK;
1301 bool Case3OK;
1302
1303 if (PrefetchBandwidth1 > 0) {
1304 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1305 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1306 Case1OK = true;
1307 } else {
1308 Case1OK = false;
1309 }
1310 } else {
1311 Case1OK = false;
1312 }
1313
1314 if (PrefetchBandwidth2 > 0) {
1315 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1316 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1317 Case2OK = true;
1318 } else {
1319 Case2OK = false;
1320 }
1321 } else {
1322 Case2OK = false;
1323 }
1324
1325 if (PrefetchBandwidth3 > 0) {
1326 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1327 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1328 Case3OK = true;
1329 } else {
1330 Case3OK = false;
1331 }
1332 } else {
1333 Case3OK = false;
1334 }
1335
1336 if (Case1OK) {
1337 prefetch_bw_equ = PrefetchBandwidth1;
1338 } else if (Case2OK) {
1339 prefetch_bw_equ = PrefetchBandwidth2;
1340 } else if (Case3OK) {
1341 prefetch_bw_equ = PrefetchBandwidth3;
1342 } else {
1343 prefetch_bw_equ = PrefetchBandwidth4;
1344 }
1345
1346#ifdef __DML_VBA_DEBUG__
1347 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1348 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1349 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1350 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1351#endif
1352
1353 if (prefetch_bw_equ > 0) {
1354 if (GPUVMEnable == true) {
1355 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1356 } else {
1357 Tvm_equ = LineTime / 4;
1358 }
1359
1360 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1361 Tr0_equ = dml_max4(
1362 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1363 Tr0_trips,
1364 (LineTime - Tvm_equ) / 2,
1365 LineTime / 4);
1366 } else {
1367 Tr0_equ = (LineTime - Tvm_equ) / 2;
1368 }
1369 } else {
1370 Tvm_equ = 0;
1371 Tr0_equ = 0;
1372 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1373 }
1374 }
1375
1376 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1377 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1378 TimeForFetchingMetaPTE = Tvm_oto;
1379 TimeForFetchingRowInVBlank = Tr0_oto;
1380 *PrefetchBandwidth = prefetch_bw_oto;
1381 } else {
1382 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1383 TimeForFetchingMetaPTE = Tvm_equ;
1384 TimeForFetchingRowInVBlank = Tr0_equ;
1385 *PrefetchBandwidth = prefetch_bw_equ;
1386 }
1387
1388 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1389
1390 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1391
1392#ifdef __DML_VBA_ALLOW_DELTA__
1393 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1394
1395
1396 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0);
1397#else
1398 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1399#endif
1400
1401#ifdef __DML_VBA_DEBUG__
1402 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1403 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1404 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1405 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1406 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1407 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1408 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1409#endif
1410
1411 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1412
1413 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1414 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1415#ifdef __DML_VBA_DEBUG__
1416 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1417 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1418 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1419#endif
1420 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1421 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1422 *VRatioPrefetchY = dml_max(
1423 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1424 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1425 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1426 } else {
1427 MyError = true;
1428 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1429 *VRatioPrefetchY = 0;
1430 }
1431#ifdef __DML_VBA_DEBUG__
1432 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1433 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1434 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1435#endif
1436 }
1437
1438 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1439 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1440
1441#ifdef __DML_VBA_DEBUG__
1442 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1443 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1444 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1445#endif
1446 if ((SwathHeightC > 4)) {
1447 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1448 *VRatioPrefetchC = dml_max(
1449 *VRatioPrefetchC,
1450 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1451 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1452 } else {
1453 MyError = true;
1454 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1455 *VRatioPrefetchC = 0;
1456 }
1457#ifdef __DML_VBA_DEBUG__
1458 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1459 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1460 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1461#endif
1462 }
1463
1464#ifdef __DML_VBA_DEBUG__
1465 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1466 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1467 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1468#endif
1469
1470 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1471
1472#ifdef __DML_VBA_DEBUG__
1473 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1474#endif
1475
1476 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1477 / LineTime;
1478 } else {
1479 MyError = true;
1480 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1481 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1482 *VRatioPrefetchY = 0;
1483 *VRatioPrefetchC = 0;
1484 *RequiredPrefetchPixDataBWLuma = 0;
1485 *RequiredPrefetchPixDataBWChroma = 0;
1486 }
1487
1488 dml_print(
1489 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1490 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1491 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1492 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1493 dml_print(
1494 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1495 (double) LinesToRequestPrefetchPixelData * LineTime);
1496 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1497 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1498 (double) myPipe->HTotal)) * LineTime);
1499 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1500 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1501 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1502 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1503 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1504
1505 } else {
1506 MyError = true;
1507 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1508 }
1509
1510 {
1511 double prefetch_vm_bw;
1512 double prefetch_row_bw;
1513
1514 if (PDEAndMetaPTEBytesFrame == 0) {
1515 prefetch_vm_bw = 0;
1516 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1517#ifdef __DML_VBA_DEBUG__
1518 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1519 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1520 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1521 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1522#endif
1523 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1524#ifdef __DML_VBA_DEBUG__
1525 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1526#endif
1527 } else {
1528 prefetch_vm_bw = 0;
1529 MyError = true;
1530 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1531 }
1532
1533 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1534 prefetch_row_bw = 0;
1535 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1536 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1537
1538#ifdef __DML_VBA_DEBUG__
1539 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1540 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1541 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1542 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1543#endif
1544 } else {
1545 prefetch_row_bw = 0;
1546 MyError = true;
1547 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1548 }
1549
1550 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1551 }
1552
1553 if (MyError) {
1554 *PrefetchBandwidth = 0;
1555 TimeForFetchingMetaPTE = 0;
1556 TimeForFetchingRowInVBlank = 0;
1557 *DestinationLinesToRequestVMInVBlank = 0;
1558 *DestinationLinesToRequestRowInVBlank = 0;
1559 *DestinationLinesForPrefetch = 0;
1560 LinesToRequestPrefetchPixelData = 0;
1561 *VRatioPrefetchY = 0;
1562 *VRatioPrefetchC = 0;
1563 *RequiredPrefetchPixDataBWLuma = 0;
1564 *RequiredPrefetchPixDataBWChroma = 0;
1565 }
1566
1567 return MyError;
1568}
1569
1570static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1571{
1572 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1573}
1574
1575static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1576{
1577 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1578}
1579
1580static void CalculateDCCConfiguration(
1581 bool DCCEnabled,
1582 bool DCCProgrammingAssumesScanDirectionUnknown,
1583 enum source_format_class SourcePixelFormat,
1584 unsigned int SurfaceWidthLuma,
1585 unsigned int SurfaceWidthChroma,
1586 unsigned int SurfaceHeightLuma,
1587 unsigned int SurfaceHeightChroma,
1588 double DETBufferSize,
1589 unsigned int RequestHeight256ByteLuma,
1590 unsigned int RequestHeight256ByteChroma,
1591 enum dm_swizzle_mode TilingFormat,
1592 unsigned int BytePerPixelY,
1593 unsigned int BytePerPixelC,
1594 double BytePerPixelDETY,
1595 double BytePerPixelDETC,
1596 enum scan_direction_class ScanOrientation,
1597 unsigned int *MaxUncompressedBlockLuma,
1598 unsigned int *MaxUncompressedBlockChroma,
1599 unsigned int *MaxCompressedBlockLuma,
1600 unsigned int *MaxCompressedBlockChroma,
1601 unsigned int *IndependentBlockLuma,
1602 unsigned int *IndependentBlockChroma)
1603{
1604 int yuv420;
1605 int horz_div_l;
1606 int horz_div_c;
1607 int vert_div_l;
1608 int vert_div_c;
1609
1610 int swath_buf_size;
1611 double detile_buf_vp_horz_limit;
1612 double detile_buf_vp_vert_limit;
1613
1614 int MAS_vp_horz_limit;
1615 int MAS_vp_vert_limit;
1616 int max_vp_horz_width;
1617 int max_vp_vert_height;
1618 int eff_surf_width_l;
1619 int eff_surf_width_c;
1620 int eff_surf_height_l;
1621 int eff_surf_height_c;
1622
1623 int full_swath_bytes_horz_wc_l;
1624 int full_swath_bytes_horz_wc_c;
1625 int full_swath_bytes_vert_wc_l;
1626 int full_swath_bytes_vert_wc_c;
1627 int req128_horz_wc_l;
1628 int req128_horz_wc_c;
1629 int req128_vert_wc_l;
1630 int req128_vert_wc_c;
1631 int segment_order_horz_contiguous_luma;
1632 int segment_order_horz_contiguous_chroma;
1633 int segment_order_vert_contiguous_luma;
1634 int segment_order_vert_contiguous_chroma;
1635
1636 typedef enum {
1637 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1638 } RequestType;
1639 RequestType RequestLuma;
1640 RequestType RequestChroma;
1641
1642 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1643 horz_div_l = 1;
1644 horz_div_c = 1;
1645 vert_div_l = 1;
1646 vert_div_c = 1;
1647
1648 if (BytePerPixelY == 1)
1649 vert_div_l = 0;
1650 if (BytePerPixelC == 1)
1651 vert_div_c = 0;
1652 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1653 horz_div_l = 0;
1654 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1655 horz_div_c = 0;
1656
1657 if (BytePerPixelC == 0) {
1658 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1659 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1660 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1661 } else {
1662 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1663 detile_buf_vp_horz_limit = (double) swath_buf_size
1664 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1665 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1666 detile_buf_vp_vert_limit = (double) swath_buf_size
1667 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1668 }
1669
1670 if (SourcePixelFormat == dm_420_10) {
1671 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1672 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1673 }
1674
1675 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1676 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1677
1678 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1679 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1680 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1681 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1682 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1683 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1684 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1685 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1686
1687 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1688 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1689 if (BytePerPixelC > 0) {
1690 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1691 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1692 } else {
1693 full_swath_bytes_horz_wc_c = 0;
1694 full_swath_bytes_vert_wc_c = 0;
1695 }
1696
1697 if (SourcePixelFormat == dm_420_10) {
1698 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1699 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1700 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1701 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1702 }
1703
1704 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1705 req128_horz_wc_l = 0;
1706 req128_horz_wc_c = 0;
1707 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1708 req128_horz_wc_l = 0;
1709 req128_horz_wc_c = 1;
1710 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1711 req128_horz_wc_l = 1;
1712 req128_horz_wc_c = 0;
1713 } else {
1714 req128_horz_wc_l = 1;
1715 req128_horz_wc_c = 1;
1716 }
1717
1718 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1719 req128_vert_wc_l = 0;
1720 req128_vert_wc_c = 0;
1721 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1722 req128_vert_wc_l = 0;
1723 req128_vert_wc_c = 1;
1724 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1725 req128_vert_wc_l = 1;
1726 req128_vert_wc_c = 0;
1727 } else {
1728 req128_vert_wc_l = 1;
1729 req128_vert_wc_c = 1;
1730 }
1731
1732 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1733 segment_order_horz_contiguous_luma = 0;
1734 } else {
1735 segment_order_horz_contiguous_luma = 1;
1736 }
1737 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1738 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1739 segment_order_vert_contiguous_luma = 0;
1740 } else {
1741 segment_order_vert_contiguous_luma = 1;
1742 }
1743 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1744 segment_order_horz_contiguous_chroma = 0;
1745 } else {
1746 segment_order_horz_contiguous_chroma = 1;
1747 }
1748 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1749 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1750 segment_order_vert_contiguous_chroma = 0;
1751 } else {
1752 segment_order_vert_contiguous_chroma = 1;
1753 }
1754
1755 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1756 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1757 RequestLuma = REQ_256Bytes;
1758 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1759 RequestLuma = REQ_128BytesNonContiguous;
1760 } else {
1761 RequestLuma = REQ_128BytesContiguous;
1762 }
1763 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1764 RequestChroma = REQ_256Bytes;
1765 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1766 RequestChroma = REQ_128BytesNonContiguous;
1767 } else {
1768 RequestChroma = REQ_128BytesContiguous;
1769 }
1770 } else if (ScanOrientation != dm_vert) {
1771 if (req128_horz_wc_l == 0) {
1772 RequestLuma = REQ_256Bytes;
1773 } else if (segment_order_horz_contiguous_luma == 0) {
1774 RequestLuma = REQ_128BytesNonContiguous;
1775 } else {
1776 RequestLuma = REQ_128BytesContiguous;
1777 }
1778 if (req128_horz_wc_c == 0) {
1779 RequestChroma = REQ_256Bytes;
1780 } else if (segment_order_horz_contiguous_chroma == 0) {
1781 RequestChroma = REQ_128BytesNonContiguous;
1782 } else {
1783 RequestChroma = REQ_128BytesContiguous;
1784 }
1785 } else {
1786 if (req128_vert_wc_l == 0) {
1787 RequestLuma = REQ_256Bytes;
1788 } else if (segment_order_vert_contiguous_luma == 0) {
1789 RequestLuma = REQ_128BytesNonContiguous;
1790 } else {
1791 RequestLuma = REQ_128BytesContiguous;
1792 }
1793 if (req128_vert_wc_c == 0) {
1794 RequestChroma = REQ_256Bytes;
1795 } else if (segment_order_vert_contiguous_chroma == 0) {
1796 RequestChroma = REQ_128BytesNonContiguous;
1797 } else {
1798 RequestChroma = REQ_128BytesContiguous;
1799 }
1800 }
1801
1802 if (RequestLuma == REQ_256Bytes) {
1803 *MaxUncompressedBlockLuma = 256;
1804 *MaxCompressedBlockLuma = 256;
1805 *IndependentBlockLuma = 0;
1806 } else if (RequestLuma == REQ_128BytesContiguous) {
1807 *MaxUncompressedBlockLuma = 256;
1808 *MaxCompressedBlockLuma = 128;
1809 *IndependentBlockLuma = 128;
1810 } else {
1811 *MaxUncompressedBlockLuma = 256;
1812 *MaxCompressedBlockLuma = 64;
1813 *IndependentBlockLuma = 64;
1814 }
1815
1816 if (RequestChroma == REQ_256Bytes) {
1817 *MaxUncompressedBlockChroma = 256;
1818 *MaxCompressedBlockChroma = 256;
1819 *IndependentBlockChroma = 0;
1820 } else if (RequestChroma == REQ_128BytesContiguous) {
1821 *MaxUncompressedBlockChroma = 256;
1822 *MaxCompressedBlockChroma = 128;
1823 *IndependentBlockChroma = 128;
1824 } else {
1825 *MaxUncompressedBlockChroma = 256;
1826 *MaxCompressedBlockChroma = 64;
1827 *IndependentBlockChroma = 64;
1828 }
1829
1830 if (DCCEnabled != true || BytePerPixelC == 0) {
1831 *MaxUncompressedBlockChroma = 0;
1832 *MaxCompressedBlockChroma = 0;
1833 *IndependentBlockChroma = 0;
1834 }
1835
1836 if (DCCEnabled != true) {
1837 *MaxUncompressedBlockLuma = 0;
1838 *MaxCompressedBlockLuma = 0;
1839 *IndependentBlockLuma = 0;
1840 }
1841}
1842
1843static double CalculatePrefetchSourceLines(
1844 struct display_mode_lib *mode_lib,
1845 double VRatio,
1846 double vtaps,
1847 bool Interlace,
1848 bool ProgressiveToInterlaceUnitInOPP,
1849 unsigned int SwathHeight,
1850 unsigned int ViewportYStart,
1851 double *VInitPreFill,
1852 unsigned int *MaxNumSwath)
1853{
1854 struct vba_vars_st *v = &mode_lib->vba;
1855 unsigned int MaxPartialSwath;
1856
1857 if (ProgressiveToInterlaceUnitInOPP)
1858 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1859 else
1860 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1861
1862 if (!v->IgnoreViewportPositioning) {
1863
1864 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1865
1866 if (*VInitPreFill > 1.0)
1867 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1868 else
1869 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1870 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1871
1872 } else {
1873
1874 if (ViewportYStart != 0)
1875 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1876
1877 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1878
1879 if (*VInitPreFill > 1.0)
1880 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1881 else
1882 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1883 }
1884
1885#ifdef __DML_VBA_DEBUG__
1886 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1887 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1888 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1889 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1890 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1891 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1892 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1893 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1894 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1895#endif
1896 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1897}
1898
1899static unsigned int CalculateVMAndRowBytes(
1900 struct display_mode_lib *mode_lib,
1901 bool DCCEnable,
1902 unsigned int BlockHeight256Bytes,
1903 unsigned int BlockWidth256Bytes,
1904 enum source_format_class SourcePixelFormat,
1905 unsigned int SurfaceTiling,
1906 unsigned int BytePerPixel,
1907 enum scan_direction_class ScanDirection,
1908 unsigned int SwathWidth,
1909 unsigned int ViewportHeight,
1910 bool GPUVMEnable,
1911 bool HostVMEnable,
1912 unsigned int HostVMMaxNonCachedPageTableLevels,
1913 unsigned int GPUVMMinPageSize,
1914 unsigned int HostVMMinPageSize,
1915 unsigned int PTEBufferSizeInRequests,
1916 unsigned int Pitch,
1917 unsigned int DCCMetaPitch,
1918 unsigned int *MacroTileWidth,
1919 unsigned int *MetaRowByte,
1920 unsigned int *PixelPTEBytesPerRow,
1921 bool *PTEBufferSizeNotExceeded,
1922 int *dpte_row_width_ub,
1923 unsigned int *dpte_row_height,
1924 unsigned int *MetaRequestWidth,
1925 unsigned int *MetaRequestHeight,
1926 unsigned int *meta_row_width,
1927 unsigned int *meta_row_height,
1928 int *vm_group_bytes,
1929 unsigned int *dpte_group_bytes,
1930 unsigned int *PixelPTEReqWidth,
1931 unsigned int *PixelPTEReqHeight,
1932 unsigned int *PTERequestSize,
1933 int *DPDE0BytesFrame,
1934 int *MetaPTEBytesFrame)
1935{
1936 struct vba_vars_st *v = &mode_lib->vba;
1937 unsigned int MPDEBytesFrame;
1938 unsigned int DCCMetaSurfaceBytes;
1939 unsigned int MacroTileSizeBytes;
1940 unsigned int MacroTileHeight;
1941 unsigned int ExtraDPDEBytesFrame;
1942 unsigned int PDEAndMetaPTEBytesFrame;
1943 unsigned int PixelPTEReqHeightPTEs = 0;
1944 unsigned int HostVMDynamicLevels = 0;
1945 double FractionOfPTEReturnDrop;
1946
1947 if (GPUVMEnable == true && HostVMEnable == true) {
1948 if (HostVMMinPageSize < 2048) {
1949 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1950 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1951 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1952 } else {
1953 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1954 }
1955 }
1956
1957 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1958 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1959 if (ScanDirection != dm_vert) {
1960 *meta_row_height = *MetaRequestHeight;
1961 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1962 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1963 } else {
1964 *meta_row_height = *MetaRequestWidth;
1965 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1966 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1967 }
1968 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1969 if (GPUVMEnable == true) {
1970 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1971 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1972 } else {
1973 *MetaPTEBytesFrame = 0;
1974 MPDEBytesFrame = 0;
1975 }
1976
1977 if (DCCEnable != true) {
1978 *MetaPTEBytesFrame = 0;
1979 MPDEBytesFrame = 0;
1980 *MetaRowByte = 0;
1981 }
1982
1983 if (SurfaceTiling == dm_sw_linear) {
1984 MacroTileSizeBytes = 256;
1985 MacroTileHeight = BlockHeight256Bytes;
1986 } else {
1987 MacroTileSizeBytes = 65536;
1988 MacroTileHeight = 16 * BlockHeight256Bytes;
1989 }
1990 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1991
1992 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1993 if (ScanDirection != dm_vert) {
1994 *DPDE0BytesFrame = 64
1995 * (dml_ceil(
1996 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1997 / (8 * 2097152),
1998 1) + 1);
1999 } else {
2000 *DPDE0BytesFrame = 64
2001 * (dml_ceil(
2002 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
2003 / (8 * 2097152),
2004 1) + 1);
2005 }
2006 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
2007 } else {
2008 *DPDE0BytesFrame = 0;
2009 ExtraDPDEBytesFrame = 0;
2010 }
2011
2012 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2013
2014#ifdef __DML_VBA_DEBUG__
2015 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2016 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2017 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2018 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2019 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2020#endif
2021
2022 if (HostVMEnable == true) {
2023 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2024 }
2025#ifdef __DML_VBA_DEBUG__
2026 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2027#endif
2028
2029 if (SurfaceTiling == dm_sw_linear) {
2030 PixelPTEReqHeightPTEs = 1;
2031 *PixelPTEReqHeight = 1;
2032 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
2033 *PTERequestSize = 64;
2034 FractionOfPTEReturnDrop = 0;
2035 } else if (MacroTileSizeBytes == 4096) {
2036 PixelPTEReqHeightPTEs = 1;
2037 *PixelPTEReqHeight = MacroTileHeight;
2038 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2039 *PTERequestSize = 64;
2040 if (ScanDirection != dm_vert)
2041 FractionOfPTEReturnDrop = 0;
2042 else
2043 FractionOfPTEReturnDrop = 7 / 8;
2044 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
2045 PixelPTEReqHeightPTEs = 16;
2046 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2047 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2048 *PTERequestSize = 128;
2049 FractionOfPTEReturnDrop = 0;
2050 } else {
2051 PixelPTEReqHeightPTEs = 1;
2052 *PixelPTEReqHeight = MacroTileHeight;
2053 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2054 *PTERequestSize = 64;
2055 FractionOfPTEReturnDrop = 0;
2056 }
2057
2058 if (SurfaceTiling == dm_sw_linear) {
2059 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2060 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2061 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2062 } else if (ScanDirection != dm_vert) {
2063 *dpte_row_height = *PixelPTEReqHeight;
2064 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2065 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2066 } else {
2067 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2068 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2069 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2070 }
2071
2072 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2073 *PTEBufferSizeNotExceeded = true;
2074 } else {
2075 *PTEBufferSizeNotExceeded = false;
2076 }
2077
2078 if (GPUVMEnable != true) {
2079 *PixelPTEBytesPerRow = 0;
2080 *PTEBufferSizeNotExceeded = true;
2081 }
2082
2083 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2084
2085 if (HostVMEnable == true) {
2086 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2087 }
2088
2089 if (HostVMEnable == true) {
2090 *vm_group_bytes = 512;
2091 *dpte_group_bytes = 512;
2092 } else if (GPUVMEnable == true) {
2093 *vm_group_bytes = 2048;
2094 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2095 *dpte_group_bytes = 512;
2096 } else {
2097 *dpte_group_bytes = 2048;
2098 }
2099 } else {
2100 *vm_group_bytes = 0;
2101 *dpte_group_bytes = 0;
2102 }
2103 return PDEAndMetaPTEBytesFrame;
2104}
2105
2106static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2107{
2108 struct vba_vars_st *v = &mode_lib->vba;
2109 unsigned int j, k;
2110 double HostVMInefficiencyFactor = 1.0;
2111 bool NoChromaPlanes = true;
2112 int ReorderBytes;
2113 double VMDataOnlyReturnBW;
2114 double MaxTotalRDBandwidth = 0;
2115 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2116
2117 v->WritebackDISPCLK = 0.0;
2118 v->DISPCLKWithRamping = 0;
2119 v->DISPCLKWithoutRamping = 0;
2120 v->GlobalDPPCLK = 0.0;
2121
2122 {
2123 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2124 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2125 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2126 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2127 if (v->HostVMEnable != true) {
2128 v->ReturnBW = dml_min(
2129 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2130 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2131 } else {
2132 v->ReturnBW = dml_min(
2133 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2134 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2135 }
2136 }
2137
2138
2139
2140
2141 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2142 if (v->WritebackEnable[k]) {
2143 v->WritebackDISPCLK = dml_max(
2144 v->WritebackDISPCLK,
2145 dml31_CalculateWriteBackDISPCLK(
2146 v->WritebackPixelFormat[k],
2147 v->PixelClock[k],
2148 v->WritebackHRatio[k],
2149 v->WritebackVRatio[k],
2150 v->WritebackHTaps[k],
2151 v->WritebackVTaps[k],
2152 v->WritebackSourceWidth[k],
2153 v->WritebackDestinationWidth[k],
2154 v->HTotal[k],
2155 v->WritebackLineBufferSize));
2156 }
2157 }
2158
2159 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2160 if (v->HRatio[k] > 1) {
2161 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2162 v->MaxDCHUBToPSCLThroughput,
2163 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2164 } else {
2165 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2166 }
2167
2168 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2169 * dml_max(
2170 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2171 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2172
2173 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2174 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2175 }
2176
2177 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2178 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2179 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2180 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2181 } else {
2182 if (v->HRatioChroma[k] > 1) {
2183 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2184 v->MaxDCHUBToPSCLThroughput,
2185 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2186 } else {
2187 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2188 }
2189 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2190 * dml_max3(
2191 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2192 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2193 1.0);
2194
2195 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2196 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2197 }
2198
2199 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2200 }
2201 }
2202
2203 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2204 if (v->BlendingAndTiming[k] != k)
2205 continue;
2206 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2207 v->DISPCLKWithRamping = dml_max(
2208 v->DISPCLKWithRamping,
2209 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2210 * (1 + v->DISPCLKRampingMargin / 100));
2211 v->DISPCLKWithoutRamping = dml_max(
2212 v->DISPCLKWithoutRamping,
2213 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2214 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2215 v->DISPCLKWithRamping = dml_max(
2216 v->DISPCLKWithRamping,
2217 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2218 * (1 + v->DISPCLKRampingMargin / 100));
2219 v->DISPCLKWithoutRamping = dml_max(
2220 v->DISPCLKWithoutRamping,
2221 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2222 } else {
2223 v->DISPCLKWithRamping = dml_max(
2224 v->DISPCLKWithRamping,
2225 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2226 v->DISPCLKWithoutRamping = dml_max(
2227 v->DISPCLKWithoutRamping,
2228 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2229 }
2230 }
2231
2232 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2233 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2234
2235 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2236 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2237 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2238 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2239 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2240 v->DISPCLKDPPCLKVCOSpeed);
2241 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2242 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2243 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2244 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2245 } else {
2246 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2247 }
2248 v->DISPCLK = v->DISPCLK_calculated;
2249 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2250
2251 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2252 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2253 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2254 }
2255 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2257 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2258 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2259 }
2260
2261 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2262 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2263 }
2264
2265
2266 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2267 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2268
2269 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2270 CalculateBytePerPixelAnd256BBlockSizes(
2271 v->SourcePixelFormat[k],
2272 v->SurfaceTiling[k],
2273 &v->BytePerPixelY[k],
2274 &v->BytePerPixelC[k],
2275 &v->BytePerPixelDETY[k],
2276 &v->BytePerPixelDETC[k],
2277 &v->BlockHeight256BytesY[k],
2278 &v->BlockHeight256BytesC[k],
2279 &v->BlockWidth256BytesY[k],
2280 &v->BlockWidth256BytesC[k]);
2281 }
2282
2283 CalculateSwathWidth(
2284 false,
2285 v->NumberOfActivePlanes,
2286 v->SourcePixelFormat,
2287 v->SourceScan,
2288 v->ViewportWidth,
2289 v->ViewportHeight,
2290 v->SurfaceWidthY,
2291 v->SurfaceWidthC,
2292 v->SurfaceHeightY,
2293 v->SurfaceHeightC,
2294 v->ODMCombineEnabled,
2295 v->BytePerPixelY,
2296 v->BytePerPixelC,
2297 v->BlockHeight256BytesY,
2298 v->BlockHeight256BytesC,
2299 v->BlockWidth256BytesY,
2300 v->BlockWidth256BytesC,
2301 v->BlendingAndTiming,
2302 v->HActive,
2303 v->HRatio,
2304 v->DPPPerPlane,
2305 v->SwathWidthSingleDPPY,
2306 v->SwathWidthSingleDPPC,
2307 v->SwathWidthY,
2308 v->SwathWidthC,
2309 v->dummyinteger3,
2310 v->dummyinteger4,
2311 v->swath_width_luma_ub,
2312 v->swath_width_chroma_ub);
2313
2314 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2315 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2316 * v->VRatio[k];
2317 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2318 * v->VRatioChroma[k];
2319 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2320 }
2321
2322
2323 CalculateDCFCLKDeepSleep(
2324 mode_lib,
2325 v->NumberOfActivePlanes,
2326 v->BytePerPixelY,
2327 v->BytePerPixelC,
2328 v->VRatio,
2329 v->VRatioChroma,
2330 v->SwathWidthY,
2331 v->SwathWidthC,
2332 v->DPPPerPlane,
2333 v->HRatio,
2334 v->HRatioChroma,
2335 v->PixelClock,
2336 v->PSCL_THROUGHPUT_LUMA,
2337 v->PSCL_THROUGHPUT_CHROMA,
2338 v->DPPCLK,
2339 v->ReadBandwidthPlaneLuma,
2340 v->ReadBandwidthPlaneChroma,
2341 v->ReturnBusWidth,
2342 &v->DCFCLKDeepSleep);
2343
2344
2345 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2346 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2347 v->DSCCLK_calculated[k] = 0.0;
2348 } else {
2349 if (v->OutputFormat[k] == dm_420)
2350 v->DSCFormatFactor = 2;
2351 else if (v->OutputFormat[k] == dm_444)
2352 v->DSCFormatFactor = 1;
2353 else if (v->OutputFormat[k] == dm_n422)
2354 v->DSCFormatFactor = 2;
2355 else
2356 v->DSCFormatFactor = 1;
2357 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2358 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2359 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2360 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2361 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2362 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2363 else
2364 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2365 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2366 }
2367 }
2368
2369
2370 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2371 double BPP = v->OutputBpp[k];
2372
2373 if (v->DSCEnabled[k] && BPP != 0) {
2374 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2375 v->DSCDelay[k] = dscceComputeDelay(
2376 v->DSCInputBitPerComponent[k],
2377 BPP,
2378 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2379 v->NumberOfDSCSlices[k],
2380 v->OutputFormat[k],
2381 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2382 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2383 v->DSCDelay[k] = 2
2384 * (dscceComputeDelay(
2385 v->DSCInputBitPerComponent[k],
2386 BPP,
2387 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2388 v->NumberOfDSCSlices[k] / 2.0,
2389 v->OutputFormat[k],
2390 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2391 } else {
2392 v->DSCDelay[k] = 4
2393 * (dscceComputeDelay(
2394 v->DSCInputBitPerComponent[k],
2395 BPP,
2396 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2397 v->NumberOfDSCSlices[k] / 4.0,
2398 v->OutputFormat[k],
2399 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2400 }
2401 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2402 } else {
2403 v->DSCDelay[k] = 0;
2404 }
2405 }
2406
2407 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2408 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2409 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2410 v->DSCDelay[k] = v->DSCDelay[j];
2411
2412
2413 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2414 unsigned int PDEAndMetaPTEBytesFrameY;
2415 unsigned int PixelPTEBytesPerRowY;
2416 unsigned int MetaRowByteY;
2417 unsigned int MetaRowByteC;
2418 unsigned int PDEAndMetaPTEBytesFrameC;
2419 unsigned int PixelPTEBytesPerRowC;
2420 bool PTEBufferSizeNotExceededY;
2421 bool PTEBufferSizeNotExceededC;
2422
2423 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2424 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2425 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2426 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2427 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2428 } else {
2429 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2430 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2431 }
2432
2433 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2434 mode_lib,
2435 v->DCCEnable[k],
2436 v->BlockHeight256BytesC[k],
2437 v->BlockWidth256BytesC[k],
2438 v->SourcePixelFormat[k],
2439 v->SurfaceTiling[k],
2440 v->BytePerPixelC[k],
2441 v->SourceScan[k],
2442 v->SwathWidthC[k],
2443 v->ViewportHeightChroma[k],
2444 v->GPUVMEnable,
2445 v->HostVMEnable,
2446 v->HostVMMaxNonCachedPageTableLevels,
2447 v->GPUVMMinPageSize,
2448 v->HostVMMinPageSize,
2449 v->PTEBufferSizeInRequestsForChroma,
2450 v->PitchC[k],
2451 v->DCCMetaPitchC[k],
2452 &v->MacroTileWidthC[k],
2453 &MetaRowByteC,
2454 &PixelPTEBytesPerRowC,
2455 &PTEBufferSizeNotExceededC,
2456 &v->dpte_row_width_chroma_ub[k],
2457 &v->dpte_row_height_chroma[k],
2458 &v->meta_req_width_chroma[k],
2459 &v->meta_req_height_chroma[k],
2460 &v->meta_row_width_chroma[k],
2461 &v->meta_row_height_chroma[k],
2462 &v->dummyinteger1,
2463 &v->dummyinteger2,
2464 &v->PixelPTEReqWidthC[k],
2465 &v->PixelPTEReqHeightC[k],
2466 &v->PTERequestSizeC[k],
2467 &v->dpde0_bytes_per_frame_ub_c[k],
2468 &v->meta_pte_bytes_per_frame_ub_c[k]);
2469
2470 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2471 mode_lib,
2472 v->VRatioChroma[k],
2473 v->VTAPsChroma[k],
2474 v->Interlace[k],
2475 v->ProgressiveToInterlaceUnitInOPP,
2476 v->SwathHeightC[k],
2477 v->ViewportYStartC[k],
2478 &v->VInitPreFillC[k],
2479 &v->MaxNumSwathC[k]);
2480 } else {
2481 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2482 v->PTEBufferSizeInRequestsForChroma = 0;
2483 PixelPTEBytesPerRowC = 0;
2484 PDEAndMetaPTEBytesFrameC = 0;
2485 MetaRowByteC = 0;
2486 v->MaxNumSwathC[k] = 0;
2487 v->PrefetchSourceLinesC[k] = 0;
2488 }
2489
2490 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2491 mode_lib,
2492 v->DCCEnable[k],
2493 v->BlockHeight256BytesY[k],
2494 v->BlockWidth256BytesY[k],
2495 v->SourcePixelFormat[k],
2496 v->SurfaceTiling[k],
2497 v->BytePerPixelY[k],
2498 v->SourceScan[k],
2499 v->SwathWidthY[k],
2500 v->ViewportHeight[k],
2501 v->GPUVMEnable,
2502 v->HostVMEnable,
2503 v->HostVMMaxNonCachedPageTableLevels,
2504 v->GPUVMMinPageSize,
2505 v->HostVMMinPageSize,
2506 v->PTEBufferSizeInRequestsForLuma,
2507 v->PitchY[k],
2508 v->DCCMetaPitchY[k],
2509 &v->MacroTileWidthY[k],
2510 &MetaRowByteY,
2511 &PixelPTEBytesPerRowY,
2512 &PTEBufferSizeNotExceededY,
2513 &v->dpte_row_width_luma_ub[k],
2514 &v->dpte_row_height[k],
2515 &v->meta_req_width[k],
2516 &v->meta_req_height[k],
2517 &v->meta_row_width[k],
2518 &v->meta_row_height[k],
2519 &v->vm_group_bytes[k],
2520 &v->dpte_group_bytes[k],
2521 &v->PixelPTEReqWidthY[k],
2522 &v->PixelPTEReqHeightY[k],
2523 &v->PTERequestSizeY[k],
2524 &v->dpde0_bytes_per_frame_ub_l[k],
2525 &v->meta_pte_bytes_per_frame_ub_l[k]);
2526
2527 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2528 mode_lib,
2529 v->VRatio[k],
2530 v->vtaps[k],
2531 v->Interlace[k],
2532 v->ProgressiveToInterlaceUnitInOPP,
2533 v->SwathHeightY[k],
2534 v->ViewportYStartY[k],
2535 &v->VInitPreFillY[k],
2536 &v->MaxNumSwathY[k]);
2537 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2538 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2539 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2540
2541 CalculateRowBandwidth(
2542 v->GPUVMEnable,
2543 v->SourcePixelFormat[k],
2544 v->VRatio[k],
2545 v->VRatioChroma[k],
2546 v->DCCEnable[k],
2547 v->HTotal[k] / v->PixelClock[k],
2548 MetaRowByteY,
2549 MetaRowByteC,
2550 v->meta_row_height[k],
2551 v->meta_row_height_chroma[k],
2552 PixelPTEBytesPerRowY,
2553 PixelPTEBytesPerRowC,
2554 v->dpte_row_height[k],
2555 v->dpte_row_height_chroma[k],
2556 &v->meta_row_bw[k],
2557 &v->dpte_row_bw[k]);
2558 }
2559
2560 v->TotalDCCActiveDPP = 0;
2561 v->TotalActiveDPP = 0;
2562 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2563 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2564 if (v->DCCEnable[k])
2565 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2566 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2567 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2568 NoChromaPlanes = false;
2569 }
2570
2571 ReorderBytes = v->NumberOfChannels
2572 * dml_max3(
2573 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2574 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2575 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2576
2577 VMDataOnlyReturnBW = dml_min(
2578 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2579 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2580 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2581 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2582
2583#ifdef __DML_VBA_DEBUG__
2584 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2585 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2586 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2587 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2588 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2589 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2590 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2591 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2592 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2593 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2594 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2595#endif
2596
2597 if (v->GPUVMEnable && v->HostVMEnable)
2598 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2599
2600 v->UrgentExtraLatency = CalculateExtraLatency(
2601 v->RoundTripPingLatencyCycles,
2602 ReorderBytes,
2603 v->DCFCLK,
2604 v->TotalActiveDPP,
2605 v->PixelChunkSizeInKByte,
2606 v->TotalDCCActiveDPP,
2607 v->MetaChunkSize,
2608 v->ReturnBW,
2609 v->GPUVMEnable,
2610 v->HostVMEnable,
2611 v->NumberOfActivePlanes,
2612 v->DPPPerPlane,
2613 v->dpte_group_bytes,
2614 HostVMInefficiencyFactor,
2615 v->HostVMMinPageSize,
2616 v->HostVMMaxNonCachedPageTableLevels);
2617
2618 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2619
2620 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2621 if (v->BlendingAndTiming[k] == k) {
2622 if (v->WritebackEnable[k] == true) {
2623 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2624 + CalculateWriteBackDelay(
2625 v->WritebackPixelFormat[k],
2626 v->WritebackHRatio[k],
2627 v->WritebackVRatio[k],
2628 v->WritebackVTaps[k],
2629 v->WritebackDestinationWidth[k],
2630 v->WritebackDestinationHeight[k],
2631 v->WritebackSourceHeight[k],
2632 v->HTotal[k]) / v->DISPCLK;
2633 } else
2634 v->WritebackDelay[v->VoltageLevel][k] = 0;
2635 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2636 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2637 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2638 v->WritebackDelay[v->VoltageLevel][k],
2639 v->WritebackLatency
2640 + CalculateWriteBackDelay(
2641 v->WritebackPixelFormat[j],
2642 v->WritebackHRatio[j],
2643 v->WritebackVRatio[j],
2644 v->WritebackVTaps[j],
2645 v->WritebackDestinationWidth[j],
2646 v->WritebackDestinationHeight[j],
2647 v->WritebackSourceHeight[j],
2648 v->HTotal[k]) / v->DISPCLK);
2649 }
2650 }
2651 }
2652 }
2653
2654 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2655 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2656 if (v->BlendingAndTiming[k] == j)
2657 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2658
2659 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2660 v->MaxVStartupLines[k] =
2661 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2662 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2663 v->VTotal[k] - v->VActive[k]
2664 - dml_max(
2665 1.0,
2666 dml_ceil(
2667 (double) v->WritebackDelay[v->VoltageLevel][k]
2668 / (v->HTotal[k] / v->PixelClock[k]),
2669 1));
2670 if (v->MaxVStartupLines[k] > 1023)
2671 v->MaxVStartupLines[k] = 1023;
2672
2673#ifdef __DML_VBA_DEBUG__
2674 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2675 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2676 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2677#endif
2678 }
2679
2680 v->MaximumMaxVStartupLines = 0;
2681 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2682 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2683
2684
2685
2686
2687
2688 v->UrgentLatency = CalculateUrgentLatency(
2689 v->UrgentLatencyPixelDataOnly,
2690 v->UrgentLatencyPixelMixedWithVMData,
2691 v->UrgentLatencyVMDataOnly,
2692 v->DoUrgentLatencyAdjustment,
2693 v->UrgentLatencyAdjustmentFabricClockComponent,
2694 v->UrgentLatencyAdjustmentFabricClockReference,
2695 v->FabricClock);
2696
2697 v->FractionOfUrgentBandwidth = 0.0;
2698 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2699
2700 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2701
2702 do {
2703 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2704 bool DestinationLineTimesForPrefetchLessThan2 = false;
2705 bool VRatioPrefetchMoreThan4 = false;
2706 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2707 MaxTotalRDBandwidth = 0;
2708
2709 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2710
2711 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2712 Pipe myPipe;
2713
2714 myPipe.DPPCLK = v->DPPCLK[k];
2715 myPipe.DISPCLK = v->DISPCLK;
2716 myPipe.PixelClock = v->PixelClock[k];
2717 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2718 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2719 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2720 myPipe.SourceScan = v->SourceScan[k];
2721 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2722 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2723 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2724 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2725 myPipe.InterlaceEnable = v->Interlace[k];
2726 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2727 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2728 myPipe.HTotal = v->HTotal[k];
2729 myPipe.DCCEnable = v->DCCEnable[k];
2730 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2731 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2732 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2733 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2734 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2735 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2736 v->ErrorResult[k] = CalculatePrefetchSchedule(
2737 mode_lib,
2738 HostVMInefficiencyFactor,
2739 &myPipe,
2740 v->DSCDelay[k],
2741 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2742 v->DPPCLKDelaySCL,
2743 v->DPPCLKDelaySCLLBOnly,
2744 v->DPPCLKDelayCNVCCursor,
2745 v->DISPCLKDelaySubtotal,
2746 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2747 v->OutputFormat[k],
2748 v->MaxInterDCNTileRepeaters,
2749 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2750 v->MaxVStartupLines[k],
2751 v->GPUVMMaxPageTableLevels,
2752 v->GPUVMEnable,
2753 v->HostVMEnable,
2754 v->HostVMMaxNonCachedPageTableLevels,
2755 v->HostVMMinPageSize,
2756 v->DynamicMetadataEnable[k],
2757 v->DynamicMetadataVMEnabled,
2758 v->DynamicMetadataLinesBeforeActiveRequired[k],
2759 v->DynamicMetadataTransmittedBytes[k],
2760 v->UrgentLatency,
2761 v->UrgentExtraLatency,
2762 v->TCalc,
2763 v->PDEAndMetaPTEBytesFrame[k],
2764 v->MetaRowByte[k],
2765 v->PixelPTEBytesPerRow[k],
2766 v->PrefetchSourceLinesY[k],
2767 v->SwathWidthY[k],
2768 v->VInitPreFillY[k],
2769 v->MaxNumSwathY[k],
2770 v->PrefetchSourceLinesC[k],
2771 v->SwathWidthC[k],
2772 v->VInitPreFillC[k],
2773 v->MaxNumSwathC[k],
2774 v->swath_width_luma_ub[k],
2775 v->swath_width_chroma_ub[k],
2776 v->SwathHeightY[k],
2777 v->SwathHeightC[k],
2778 TWait,
2779 &v->DSTXAfterScaler[k],
2780 &v->DSTYAfterScaler[k],
2781 &v->DestinationLinesForPrefetch[k],
2782 &v->PrefetchBandwidth[k],
2783 &v->DestinationLinesToRequestVMInVBlank[k],
2784 &v->DestinationLinesToRequestRowInVBlank[k],
2785 &v->VRatioPrefetchY[k],
2786 &v->VRatioPrefetchC[k],
2787 &v->RequiredPrefetchPixDataBWLuma[k],
2788 &v->RequiredPrefetchPixDataBWChroma[k],
2789 &v->NotEnoughTimeForDynamicMetadata[k],
2790 &v->Tno_bw[k],
2791 &v->prefetch_vmrow_bw[k],
2792 &v->Tdmdl_vm[k],
2793 &v->Tdmdl[k],
2794 &v->TSetup[k],
2795 &v->VUpdateOffsetPix[k],
2796 &v->VUpdateWidthPix[k],
2797 &v->VReadyOffsetPix[k]);
2798
2799#ifdef __DML_VBA_DEBUG__
2800 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2801#endif
2802 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2803 }
2804
2805 v->NoEnoughUrgentLatencyHiding = false;
2806 v->NoEnoughUrgentLatencyHidingPre = false;
2807
2808 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2809 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2810 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2811 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2812 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2813
2814 CalculateUrgentBurstFactor(
2815 v->swath_width_luma_ub[k],
2816 v->swath_width_chroma_ub[k],
2817 v->SwathHeightY[k],
2818 v->SwathHeightC[k],
2819 v->HTotal[k] / v->PixelClock[k],
2820 v->UrgentLatency,
2821 v->CursorBufferSize,
2822 v->CursorWidth[k][0],
2823 v->CursorBPP[k][0],
2824 v->VRatio[k],
2825 v->VRatioChroma[k],
2826 v->BytePerPixelDETY[k],
2827 v->BytePerPixelDETC[k],
2828 v->DETBufferSizeY[k],
2829 v->DETBufferSizeC[k],
2830 &v->UrgBurstFactorCursor[k],
2831 &v->UrgBurstFactorLuma[k],
2832 &v->UrgBurstFactorChroma[k],
2833 &v->NoUrgentLatencyHiding[k]);
2834
2835 CalculateUrgentBurstFactor(
2836 v->swath_width_luma_ub[k],
2837 v->swath_width_chroma_ub[k],
2838 v->SwathHeightY[k],
2839 v->SwathHeightC[k],
2840 v->HTotal[k] / v->PixelClock[k],
2841 v->UrgentLatency,
2842 v->CursorBufferSize,
2843 v->CursorWidth[k][0],
2844 v->CursorBPP[k][0],
2845 v->VRatioPrefetchY[k],
2846 v->VRatioPrefetchC[k],
2847 v->BytePerPixelDETY[k],
2848 v->BytePerPixelDETC[k],
2849 v->DETBufferSizeY[k],
2850 v->DETBufferSizeC[k],
2851 &v->UrgBurstFactorCursorPre[k],
2852 &v->UrgBurstFactorLumaPre[k],
2853 &v->UrgBurstFactorChromaPre[k],
2854 &v->NoUrgentLatencyHidingPre[k]);
2855
2856 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2857 + dml_max3(
2858 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2859 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2860 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2861 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2862 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2863 v->DPPPerPlane[k]
2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2867
2868 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2869 + dml_max3(
2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2871 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2872 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2873 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2874 + v->cursor_bw_pre[k]);
2875
2876#ifdef __DML_VBA_DEBUG__
2877 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2878 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2879 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2880 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2881 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2882
2883 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2884 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2885
2886 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2887 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2888 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2889 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2890 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2891 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2892 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2893 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2894 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2895 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2896#endif
2897
2898 if (v->DestinationLinesForPrefetch[k] < 2)
2899 DestinationLineTimesForPrefetchLessThan2 = true;
2900
2901 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2902 VRatioPrefetchMoreThan4 = true;
2903
2904 if (v->NoUrgentLatencyHiding[k] == true)
2905 v->NoEnoughUrgentLatencyHiding = true;
2906
2907 if (v->NoUrgentLatencyHidingPre[k] == true)
2908 v->NoEnoughUrgentLatencyHidingPre = true;
2909 }
2910
2911 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2912
2913#ifdef __DML_VBA_DEBUG__
2914 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2915 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2916 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2917#endif
2918
2919 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2920 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2921 v->PrefetchModeSupported = true;
2922 else {
2923 v->PrefetchModeSupported = false;
2924 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2925 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2926 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2927 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2928 }
2929
2930
2931
2932
2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2934 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2935 v->PrefetchModeSupported = false;
2936 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2937 }
2938 }
2939
2940 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2941 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2942 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2943 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2944 - dml_max(
2945 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2946 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2947 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2948 v->DPPPerPlane[k]
2949 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2950 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2951 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2952 }
2953
2954 v->TotImmediateFlipBytes = 0;
2955 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2956 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2957 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2958 }
2959 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2960 CalculateFlipSchedule(
2961 mode_lib,
2962 HostVMInefficiencyFactor,
2963 v->UrgentExtraLatency,
2964 v->UrgentLatency,
2965 v->GPUVMMaxPageTableLevels,
2966 v->HostVMEnable,
2967 v->HostVMMaxNonCachedPageTableLevels,
2968 v->GPUVMEnable,
2969 v->HostVMMinPageSize,
2970 v->PDEAndMetaPTEBytesFrame[k],
2971 v->MetaRowByte[k],
2972 v->PixelPTEBytesPerRow[k],
2973 v->BandwidthAvailableForImmediateFlip,
2974 v->TotImmediateFlipBytes,
2975 v->SourcePixelFormat[k],
2976 v->HTotal[k] / v->PixelClock[k],
2977 v->VRatio[k],
2978 v->VRatioChroma[k],
2979 v->Tno_bw[k],
2980 v->DCCEnable[k],
2981 v->dpte_row_height[k],
2982 v->meta_row_height[k],
2983 v->dpte_row_height_chroma[k],
2984 v->meta_row_height_chroma[k],
2985 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2986 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2987 &v->final_flip_bw[k],
2988 &v->ImmediateFlipSupportedForPipe[k]);
2989 }
2990
2991 v->total_dcn_read_bw_with_flip = 0.0;
2992 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2994 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2995 + dml_max3(
2996 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2997 v->DPPPerPlane[k] * v->final_flip_bw[k]
2998 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2999 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
3000 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
3001 v->DPPPerPlane[k]
3002 * (v->final_flip_bw[k]
3003 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
3004 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
3005 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
3006 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
3007 + dml_max3(
3008 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
3009 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
3010 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
3011 v->DPPPerPlane[k]
3012 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
3013 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
3014 }
3015 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
3016
3017 v->ImmediateFlipSupported = true;
3018 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
3019#ifdef __DML_VBA_DEBUG__
3020 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
3021#endif
3022 v->ImmediateFlipSupported = false;
3023 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
3024 }
3025 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3026 if (v->ImmediateFlipSupportedForPipe[k] == false) {
3027#ifdef __DML_VBA_DEBUG__
3028 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
3029 __func__, k);
3030#endif
3031 v->ImmediateFlipSupported = false;
3032 }
3033 }
3034 } else {
3035 v->ImmediateFlipSupported = false;
3036 }
3037
3038 v->PrefetchAndImmediateFlipSupported =
3039 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
3040 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
3041 v->ImmediateFlipSupported)) ? true : false;
3042#ifdef __DML_VBA_DEBUG__
3043 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
3044 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
3045 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
3046 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
3047 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
3048 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
3049#endif
3050 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
3051
3052 v->VStartupLines = v->VStartupLines + 1;
3053 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
3054 ASSERT(v->PrefetchAndImmediateFlipSupported);
3055
3056
3057 CalculateUnboundedRequestAndCompressedBufferSize(
3058 v->DETBufferSizeInKByte[0],
3059 v->ConfigReturnBufferSizeInKByte,
3060 v->UseUnboundedRequesting,
3061 v->TotalActiveDPP,
3062 NoChromaPlanes,
3063 v->MaxNumDPP,
3064 v->CompressedBufferSegmentSizeInkByte,
3065 v->Output,
3066 &v->UnboundedRequestEnabled,
3067 &v->CompressedBufferSizeInkByte);
3068
3069
3070 {
3071 enum clock_change_support DRAMClockChangeSupport;
3072 CalculateWatermarksAndDRAMSpeedChangeSupport(
3073 mode_lib,
3074 PrefetchMode,
3075 v->NumberOfActivePlanes,
3076 v->MaxLineBufferLines,
3077 v->LineBufferSize,
3078 v->WritebackInterfaceBufferSize,
3079 v->DCFCLK,
3080 v->ReturnBW,
3081 v->SynchronizedVBlank,
3082 v->dpte_group_bytes,
3083 v->MetaChunkSize,
3084 v->UrgentLatency,
3085 v->UrgentExtraLatency,
3086 v->WritebackLatency,
3087 v->WritebackChunkSize,
3088 v->SOCCLK,
3089 v->DRAMClockChangeLatency,
3090 v->SRExitTime,
3091 v->SREnterPlusExitTime,
3092 v->SRExitZ8Time,
3093 v->SREnterPlusExitZ8Time,
3094 v->DCFCLKDeepSleep,
3095 v->DETBufferSizeY,
3096 v->DETBufferSizeC,
3097 v->SwathHeightY,
3098 v->SwathHeightC,
3099 v->LBBitPerPixel,
3100 v->SwathWidthY,
3101 v->SwathWidthC,
3102 v->HRatio,
3103 v->HRatioChroma,
3104 v->vtaps,
3105 v->VTAPsChroma,
3106 v->VRatio,
3107 v->VRatioChroma,
3108 v->HTotal,
3109 v->PixelClock,
3110 v->BlendingAndTiming,
3111 v->DPPPerPlane,
3112 v->BytePerPixelDETY,
3113 v->BytePerPixelDETC,
3114 v->DSTXAfterScaler,
3115 v->DSTYAfterScaler,
3116 v->WritebackEnable,
3117 v->WritebackPixelFormat,
3118 v->WritebackDestinationWidth,
3119 v->WritebackDestinationHeight,
3120 v->WritebackSourceHeight,
3121 v->UnboundedRequestEnabled,
3122 v->CompressedBufferSizeInkByte,
3123 &DRAMClockChangeSupport,
3124 &v->UrgentWatermark,
3125 &v->WritebackUrgentWatermark,
3126 &v->DRAMClockChangeWatermark,
3127 &v->WritebackDRAMClockChangeWatermark,
3128 &v->StutterExitWatermark,
3129 &v->StutterEnterPlusExitWatermark,
3130 &v->Z8StutterExitWatermark,
3131 &v->Z8StutterEnterPlusExitWatermark,
3132 &v->MinActiveDRAMClockChangeLatencySupported);
3133
3134 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3135 if (v->WritebackEnable[k] == true) {
3136 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3137 0,
3138 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3139 } else {
3140 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3141 }
3142 }
3143 }
3144
3145
3146 CalculatePixelDeliveryTimes(
3147 v->NumberOfActivePlanes,
3148 v->VRatio,
3149 v->VRatioChroma,
3150 v->VRatioPrefetchY,
3151 v->VRatioPrefetchC,
3152 v->swath_width_luma_ub,
3153 v->swath_width_chroma_ub,
3154 v->DPPPerPlane,
3155 v->HRatio,
3156 v->HRatioChroma,
3157 v->PixelClock,
3158 v->PSCL_THROUGHPUT_LUMA,
3159 v->PSCL_THROUGHPUT_CHROMA,
3160 v->DPPCLK,
3161 v->BytePerPixelC,
3162 v->SourceScan,
3163 v->NumberOfCursors,
3164 v->CursorWidth,
3165 v->CursorBPP,
3166 v->BlockWidth256BytesY,
3167 v->BlockHeight256BytesY,
3168 v->BlockWidth256BytesC,
3169 v->BlockHeight256BytesC,
3170 v->DisplayPipeLineDeliveryTimeLuma,
3171 v->DisplayPipeLineDeliveryTimeChroma,
3172 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3173 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3174 v->DisplayPipeRequestDeliveryTimeLuma,
3175 v->DisplayPipeRequestDeliveryTimeChroma,
3176 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3177 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3178 v->CursorRequestDeliveryTime,
3179 v->CursorRequestDeliveryTimePrefetch);
3180
3181 CalculateMetaAndPTETimes(
3182 v->NumberOfActivePlanes,
3183 v->GPUVMEnable,
3184 v->MetaChunkSize,
3185 v->MinMetaChunkSizeBytes,
3186 v->HTotal,
3187 v->VRatio,
3188 v->VRatioChroma,
3189 v->DestinationLinesToRequestRowInVBlank,
3190 v->DestinationLinesToRequestRowInImmediateFlip,
3191 v->DCCEnable,
3192 v->PixelClock,
3193 v->BytePerPixelY,
3194 v->BytePerPixelC,
3195 v->SourceScan,
3196 v->dpte_row_height,
3197 v->dpte_row_height_chroma,
3198 v->meta_row_width,
3199 v->meta_row_width_chroma,
3200 v->meta_row_height,
3201 v->meta_row_height_chroma,
3202 v->meta_req_width,
3203 v->meta_req_width_chroma,
3204 v->meta_req_height,
3205 v->meta_req_height_chroma,
3206 v->dpte_group_bytes,
3207 v->PTERequestSizeY,
3208 v->PTERequestSizeC,
3209 v->PixelPTEReqWidthY,
3210 v->PixelPTEReqHeightY,
3211 v->PixelPTEReqWidthC,
3212 v->PixelPTEReqHeightC,
3213 v->dpte_row_width_luma_ub,
3214 v->dpte_row_width_chroma_ub,
3215 v->DST_Y_PER_PTE_ROW_NOM_L,
3216 v->DST_Y_PER_PTE_ROW_NOM_C,
3217 v->DST_Y_PER_META_ROW_NOM_L,
3218 v->DST_Y_PER_META_ROW_NOM_C,
3219 v->TimePerMetaChunkNominal,
3220 v->TimePerChromaMetaChunkNominal,
3221 v->TimePerMetaChunkVBlank,
3222 v->TimePerChromaMetaChunkVBlank,
3223 v->TimePerMetaChunkFlip,
3224 v->TimePerChromaMetaChunkFlip,
3225 v->time_per_pte_group_nom_luma,
3226 v->time_per_pte_group_vblank_luma,
3227 v->time_per_pte_group_flip_luma,
3228 v->time_per_pte_group_nom_chroma,
3229 v->time_per_pte_group_vblank_chroma,
3230 v->time_per_pte_group_flip_chroma);
3231
3232 CalculateVMGroupAndRequestTimes(
3233 v->NumberOfActivePlanes,
3234 v->GPUVMEnable,
3235 v->GPUVMMaxPageTableLevels,
3236 v->HTotal,
3237 v->BytePerPixelC,
3238 v->DestinationLinesToRequestVMInVBlank,
3239 v->DestinationLinesToRequestVMInImmediateFlip,
3240 v->DCCEnable,
3241 v->PixelClock,
3242 v->dpte_row_width_luma_ub,
3243 v->dpte_row_width_chroma_ub,
3244 v->vm_group_bytes,
3245 v->dpde0_bytes_per_frame_ub_l,
3246 v->dpde0_bytes_per_frame_ub_c,
3247 v->meta_pte_bytes_per_frame_ub_l,
3248 v->meta_pte_bytes_per_frame_ub_c,
3249 v->TimePerVMGroupVBlank,
3250 v->TimePerVMGroupFlip,
3251 v->TimePerVMRequestVBlank,
3252 v->TimePerVMRequestFlip);
3253
3254
3255 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3256 if (PrefetchMode == 0) {
3257 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3258 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3259 v->MinTTUVBlank[k] = dml_max(
3260 v->DRAMClockChangeWatermark,
3261 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3262 } else if (PrefetchMode == 1) {
3263 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3264 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3265 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3266 } else {
3267 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3268 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3269 v->MinTTUVBlank[k] = v->UrgentWatermark;
3270 }
3271 if (!v->DynamicMetadataEnable[k])
3272 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3273 }
3274
3275
3276 v->ActiveDPPs = 0;
3277 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3278 CalculateDCCConfiguration(v->DCCEnable[k], false,
3279 v->SourcePixelFormat[k],
3280 v->SurfaceWidthY[k],
3281 v->SurfaceWidthC[k],
3282 v->SurfaceHeightY[k],
3283 v->SurfaceHeightC[k],
3284 v->DETBufferSizeInKByte[0] * 1024,
3285 v->BlockHeight256BytesY[k],
3286 v->BlockHeight256BytesC[k],
3287 v->SurfaceTiling[k],
3288 v->BytePerPixelY[k],
3289 v->BytePerPixelC[k],
3290 v->BytePerPixelDETY[k],
3291 v->BytePerPixelDETC[k],
3292 v->SourceScan[k],
3293 &v->DCCYMaxUncompressedBlock[k],
3294 &v->DCCCMaxUncompressedBlock[k],
3295 &v->DCCYMaxCompressedBlock[k],
3296 &v->DCCCMaxCompressedBlock[k],
3297 &v->DCCYIndependentBlock[k],
3298 &v->DCCCIndependentBlock[k]);
3299 }
3300
3301
3302 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3303 bool isInterlaceTiming;
3304 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3305#ifdef __DML_VBA_DEBUG__
3306 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3307#endif
3308
3309 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3310
3311#ifdef __DML_VBA_DEBUG__
3312 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3313 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3314 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3315 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3316#endif
3317
3318 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3319 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3320 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3321 }
3322
3323 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3324
3325 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3326 - v->VFrontPorch[k])
3327 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3328 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3329
3330 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3331
3332 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3333 <= (isInterlaceTiming ?
3334 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3335 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3336 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3337 } else {
3338 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3339 }
3340#ifdef __DML_VBA_DEBUG__
3341 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3342 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3343 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3344 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3345 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3346 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3347 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3348 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3349 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3350 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3351 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3352#endif
3353 }
3354
3355 {
3356
3357 double TotalWRBandwidth = 0;
3358 double MaxPerPlaneVActiveWRBandwidth = 0;
3359 double WRBandwidth = 0;
3360 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3361 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3362 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3363 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3364 } else if (v->WritebackEnable[k] == true) {
3365 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3366 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3367 }
3368 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3369 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3370 }
3371
3372 v->TotalDataReadBandwidth = 0;
3373 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3374 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3375 }
3376 }
3377
3378 CalculateStutterEfficiency(
3379 mode_lib,
3380 v->CompressedBufferSizeInkByte,
3381 v->UnboundedRequestEnabled,
3382 v->ConfigReturnBufferSizeInKByte,
3383 v->MetaFIFOSizeInKEntries,
3384 v->ZeroSizeBufferEntries,
3385 v->NumberOfActivePlanes,
3386 v->ROBBufferSizeInKByte,
3387 v->TotalDataReadBandwidth,
3388 v->DCFCLK,
3389 v->ReturnBW,
3390 v->COMPBUF_RESERVED_SPACE_64B,
3391 v->COMPBUF_RESERVED_SPACE_ZS,
3392 v->SRExitTime,
3393 v->SRExitZ8Time,
3394 v->SynchronizedVBlank,
3395 v->StutterEnterPlusExitWatermark,
3396 v->Z8StutterEnterPlusExitWatermark,
3397 v->ProgressiveToInterlaceUnitInOPP,
3398 v->Interlace,
3399 v->MinTTUVBlank,
3400 v->DPPPerPlane,
3401 v->DETBufferSizeY,
3402 v->BytePerPixelY,
3403 v->BytePerPixelDETY,
3404 v->SwathWidthY,
3405 v->SwathHeightY,
3406 v->SwathHeightC,
3407 v->DCCRateLuma,
3408 v->DCCRateChroma,
3409 v->DCCFractionOfZeroSizeRequestsLuma,
3410 v->DCCFractionOfZeroSizeRequestsChroma,
3411 v->HTotal,
3412 v->VTotal,
3413 v->PixelClock,
3414 v->VRatio,
3415 v->SourceScan,
3416 v->BlockHeight256BytesY,
3417 v->BlockWidth256BytesY,
3418 v->BlockHeight256BytesC,
3419 v->BlockWidth256BytesC,
3420 v->DCCYMaxUncompressedBlock,
3421 v->DCCCMaxUncompressedBlock,
3422 v->VActive,
3423 v->DCCEnable,
3424 v->WritebackEnable,
3425 v->ReadBandwidthPlaneLuma,
3426 v->ReadBandwidthPlaneChroma,
3427 v->meta_row_bw,
3428 v->dpte_row_bw,
3429 &v->StutterEfficiencyNotIncludingVBlank,
3430 &v->StutterEfficiency,
3431 &v->NumberOfStutterBurstsPerFrame,
3432 &v->Z8StutterEfficiencyNotIncludingVBlank,
3433 &v->Z8StutterEfficiency,
3434 &v->Z8NumberOfStutterBurstsPerFrame,
3435 &v->StutterPeriod);
3436}
3437
3438static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3439{
3440 struct vba_vars_st *v = &mode_lib->vba;
3441
3442 double BytePerPixDETY[DC__NUM_DPP__MAX];
3443 double BytePerPixDETC[DC__NUM_DPP__MAX];
3444 int BytePerPixY[DC__NUM_DPP__MAX];
3445 int BytePerPixC[DC__NUM_DPP__MAX];
3446 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3447 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3448 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3449 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3450 double dummy1[DC__NUM_DPP__MAX];
3451 double dummy2[DC__NUM_DPP__MAX];
3452 double dummy3[DC__NUM_DPP__MAX];
3453 double dummy4[DC__NUM_DPP__MAX];
3454 int dummy5[DC__NUM_DPP__MAX];
3455 int dummy6[DC__NUM_DPP__MAX];
3456 bool dummy7[DC__NUM_DPP__MAX];
3457 bool dummysinglestring;
3458
3459 unsigned int k;
3460
3461 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3462
3463 CalculateBytePerPixelAnd256BBlockSizes(
3464 v->SourcePixelFormat[k],
3465 v->SurfaceTiling[k],
3466 &BytePerPixY[k],
3467 &BytePerPixC[k],
3468 &BytePerPixDETY[k],
3469 &BytePerPixDETC[k],
3470 &Read256BytesBlockHeightY[k],
3471 &Read256BytesBlockHeightC[k],
3472 &Read256BytesBlockWidthY[k],
3473 &Read256BytesBlockWidthC[k]);
3474 }
3475
3476 CalculateSwathAndDETConfiguration(
3477 false,
3478 v->NumberOfActivePlanes,
3479 v->DETBufferSizeInKByte[0],
3480 dummy1,
3481 dummy2,
3482 v->SourceScan,
3483 v->SourcePixelFormat,
3484 v->SurfaceTiling,
3485 v->ViewportWidth,
3486 v->ViewportHeight,
3487 v->SurfaceWidthY,
3488 v->SurfaceWidthC,
3489 v->SurfaceHeightY,
3490 v->SurfaceHeightC,
3491 Read256BytesBlockHeightY,
3492 Read256BytesBlockHeightC,
3493 Read256BytesBlockWidthY,
3494 Read256BytesBlockWidthC,
3495 v->ODMCombineEnabled,
3496 v->BlendingAndTiming,
3497 BytePerPixY,
3498 BytePerPixC,
3499 BytePerPixDETY,
3500 BytePerPixDETC,
3501 v->HActive,
3502 v->HRatio,
3503 v->HRatioChroma,
3504 v->DPPPerPlane,
3505 dummy5,
3506 dummy6,
3507 dummy3,
3508 dummy4,
3509 v->SwathHeightY,
3510 v->SwathHeightC,
3511 v->DETBufferSizeY,
3512 v->DETBufferSizeC,
3513 dummy7,
3514 &dummysinglestring);
3515}
3516
3517static bool CalculateBytePerPixelAnd256BBlockSizes(
3518 enum source_format_class SourcePixelFormat,
3519 enum dm_swizzle_mode SurfaceTiling,
3520 unsigned int *BytePerPixelY,
3521 unsigned int *BytePerPixelC,
3522 double *BytePerPixelDETY,
3523 double *BytePerPixelDETC,
3524 unsigned int *BlockHeight256BytesY,
3525 unsigned int *BlockHeight256BytesC,
3526 unsigned int *BlockWidth256BytesY,
3527 unsigned int *BlockWidth256BytesC)
3528{
3529 if (SourcePixelFormat == dm_444_64) {
3530 *BytePerPixelDETY = 8;
3531 *BytePerPixelDETC = 0;
3532 *BytePerPixelY = 8;
3533 *BytePerPixelC = 0;
3534 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3535 *BytePerPixelDETY = 4;
3536 *BytePerPixelDETC = 0;
3537 *BytePerPixelY = 4;
3538 *BytePerPixelC = 0;
3539 } else if (SourcePixelFormat == dm_444_16) {
3540 *BytePerPixelDETY = 2;
3541 *BytePerPixelDETC = 0;
3542 *BytePerPixelY = 2;
3543 *BytePerPixelC = 0;
3544 } else if (SourcePixelFormat == dm_444_8) {
3545 *BytePerPixelDETY = 1;
3546 *BytePerPixelDETC = 0;
3547 *BytePerPixelY = 1;
3548 *BytePerPixelC = 0;
3549 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3550 *BytePerPixelDETY = 4;
3551 *BytePerPixelDETC = 1;
3552 *BytePerPixelY = 4;
3553 *BytePerPixelC = 1;
3554 } else if (SourcePixelFormat == dm_420_8) {
3555 *BytePerPixelDETY = 1;
3556 *BytePerPixelDETC = 2;
3557 *BytePerPixelY = 1;
3558 *BytePerPixelC = 2;
3559 } else if (SourcePixelFormat == dm_420_12) {
3560 *BytePerPixelDETY = 2;
3561 *BytePerPixelDETC = 4;
3562 *BytePerPixelY = 2;
3563 *BytePerPixelC = 4;
3564 } else {
3565 *BytePerPixelDETY = 4.0 / 3;
3566 *BytePerPixelDETC = 8.0 / 3;
3567 *BytePerPixelY = 2;
3568 *BytePerPixelC = 4;
3569 }
3570
3571 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3572 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3573 if (SurfaceTiling == dm_sw_linear) {
3574 *BlockHeight256BytesY = 1;
3575 } else if (SourcePixelFormat == dm_444_64) {
3576 *BlockHeight256BytesY = 4;
3577 } else if (SourcePixelFormat == dm_444_8) {
3578 *BlockHeight256BytesY = 16;
3579 } else {
3580 *BlockHeight256BytesY = 8;
3581 }
3582 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3583 *BlockHeight256BytesC = 0;
3584 *BlockWidth256BytesC = 0;
3585 } else {
3586 if (SurfaceTiling == dm_sw_linear) {
3587 *BlockHeight256BytesY = 1;
3588 *BlockHeight256BytesC = 1;
3589 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3590 *BlockHeight256BytesY = 8;
3591 *BlockHeight256BytesC = 16;
3592 } else if (SourcePixelFormat == dm_420_8) {
3593 *BlockHeight256BytesY = 16;
3594 *BlockHeight256BytesC = 8;
3595 } else {
3596 *BlockHeight256BytesY = 8;
3597 *BlockHeight256BytesC = 8;
3598 }
3599 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3600 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3601 }
3602 return true;
3603}
3604
3605static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3606{
3607 if (PrefetchMode == 0) {
3608 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3609 } else if (PrefetchMode == 1) {
3610 return dml_max(SREnterPlusExitTime, UrgentLatency);
3611 } else {
3612 return UrgentLatency;
3613 }
3614}
3615
3616double dml31_CalculateWriteBackDISPCLK(
3617 enum source_format_class WritebackPixelFormat,
3618 double PixelClock,
3619 double WritebackHRatio,
3620 double WritebackVRatio,
3621 unsigned int WritebackHTaps,
3622 unsigned int WritebackVTaps,
3623 long WritebackSourceWidth,
3624 long WritebackDestinationWidth,
3625 unsigned int HTotal,
3626 unsigned int WritebackLineBufferSize)
3627{
3628 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3629
3630 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3631 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3632 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3633 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3634}
3635
3636static double CalculateWriteBackDelay(
3637 enum source_format_class WritebackPixelFormat,
3638 double WritebackHRatio,
3639 double WritebackVRatio,
3640 unsigned int WritebackVTaps,
3641 int WritebackDestinationWidth,
3642 int WritebackDestinationHeight,
3643 int WritebackSourceHeight,
3644 unsigned int HTotal)
3645{
3646 double CalculateWriteBackDelay;
3647 double Line_length;
3648 double Output_lines_last_notclamped;
3649 double WritebackVInit;
3650
3651 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3652 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3653 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3654 if (Output_lines_last_notclamped < 0) {
3655 CalculateWriteBackDelay = 0;
3656 } else {
3657 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3658 }
3659 return CalculateWriteBackDelay;
3660}
3661
3662static void CalculateVupdateAndDynamicMetadataParameters(
3663 int MaxInterDCNTileRepeaters,
3664 double DPPCLK,
3665 double DISPCLK,
3666 double DCFClkDeepSleep,
3667 double PixelClock,
3668 int HTotal,
3669 int VBlank,
3670 int DynamicMetadataTransmittedBytes,
3671 int DynamicMetadataLinesBeforeActiveRequired,
3672 int InterlaceEnable,
3673 bool ProgressiveToInterlaceUnitInOPP,
3674 double *TSetup,
3675 double *Tdmbf,
3676 double *Tdmec,
3677 double *Tdmsks,
3678 int *VUpdateOffsetPix,
3679 double *VUpdateWidthPix,
3680 double *VReadyOffsetPix)
3681{
3682 double TotalRepeaterDelayTime;
3683
3684 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3685 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3686 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3687 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3688 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3689 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3690 *Tdmec = HTotal / PixelClock;
3691 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3692 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3693 } else {
3694 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3695 }
3696 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3697 *Tdmsks = *Tdmsks / 2;
3698 }
3699#ifdef __DML_VBA_DEBUG__
3700 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3701 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3702 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3703#endif
3704}
3705
3706static void CalculateRowBandwidth(
3707 bool GPUVMEnable,
3708 enum source_format_class SourcePixelFormat,
3709 double VRatio,
3710 double VRatioChroma,
3711 bool DCCEnable,
3712 double LineTime,
3713 unsigned int MetaRowByteLuma,
3714 unsigned int MetaRowByteChroma,
3715 unsigned int meta_row_height_luma,
3716 unsigned int meta_row_height_chroma,
3717 unsigned int PixelPTEBytesPerRowLuma,
3718 unsigned int PixelPTEBytesPerRowChroma,
3719 unsigned int dpte_row_height_luma,
3720 unsigned int dpte_row_height_chroma,
3721 double *meta_row_bw,
3722 double *dpte_row_bw)
3723{
3724 if (DCCEnable != true) {
3725 *meta_row_bw = 0;
3726 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3727 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3728 } else {
3729 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3730 }
3731
3732 if (GPUVMEnable != true) {
3733 *dpte_row_bw = 0;
3734 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3735 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3736 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3737 } else {
3738 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3739 }
3740}
3741
3742static void CalculateFlipSchedule(
3743 struct display_mode_lib *mode_lib,
3744 double HostVMInefficiencyFactor,
3745 double UrgentExtraLatency,
3746 double UrgentLatency,
3747 unsigned int GPUVMMaxPageTableLevels,
3748 bool HostVMEnable,
3749 unsigned int HostVMMaxNonCachedPageTableLevels,
3750 bool GPUVMEnable,
3751 double HostVMMinPageSize,
3752 double PDEAndMetaPTEBytesPerFrame,
3753 double MetaRowBytes,
3754 double DPTEBytesPerRow,
3755 double BandwidthAvailableForImmediateFlip,
3756 unsigned int TotImmediateFlipBytes,
3757 enum source_format_class SourcePixelFormat,
3758 double LineTime,
3759 double VRatio,
3760 double VRatioChroma,
3761 double Tno_bw,
3762 bool DCCEnable,
3763 unsigned int dpte_row_height,
3764 unsigned int meta_row_height,
3765 unsigned int dpte_row_height_chroma,
3766 unsigned int meta_row_height_chroma,
3767 double *DestinationLinesToRequestVMInImmediateFlip,
3768 double *DestinationLinesToRequestRowInImmediateFlip,
3769 double *final_flip_bw,
3770 bool *ImmediateFlipSupportedForPipe)
3771{
3772 double min_row_time = 0.0;
3773 unsigned int HostVMDynamicLevelsTrips;
3774 double TimeForFetchingMetaPTEImmediateFlip;
3775 double TimeForFetchingRowInVBlankImmediateFlip;
3776 double ImmediateFlipBW;
3777
3778 if (GPUVMEnable == true && HostVMEnable == true) {
3779 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3780 } else {
3781 HostVMDynamicLevelsTrips = 0;
3782 }
3783
3784 if (GPUVMEnable == true || DCCEnable == true) {
3785 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3786 }
3787
3788 if (GPUVMEnable == true) {
3789 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3790 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3791 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3792 LineTime / 4.0);
3793 } else {
3794 TimeForFetchingMetaPTEImmediateFlip = 0;
3795 }
3796
3797 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3798 if ((GPUVMEnable == true || DCCEnable == true)) {
3799 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3800 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3801 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3802 LineTime / 4);
3803 } else {
3804 TimeForFetchingRowInVBlankImmediateFlip = 0;
3805 }
3806
3807 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3808
3809 if (GPUVMEnable == true) {
3810 *final_flip_bw = dml_max(
3811 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3812 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3813 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3814 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3815 } else {
3816 *final_flip_bw = 0;
3817 }
3818
3819 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3820 if (GPUVMEnable == true && DCCEnable != true) {
3821 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3822 } else if (GPUVMEnable != true && DCCEnable == true) {
3823 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3824 } else {
3825 min_row_time = dml_min4(
3826 dpte_row_height * LineTime / VRatio,
3827 meta_row_height * LineTime / VRatio,
3828 dpte_row_height_chroma * LineTime / VRatioChroma,
3829 meta_row_height_chroma * LineTime / VRatioChroma);
3830 }
3831 } else {
3832 if (GPUVMEnable == true && DCCEnable != true) {
3833 min_row_time = dpte_row_height * LineTime / VRatio;
3834 } else if (GPUVMEnable != true && DCCEnable == true) {
3835 min_row_time = meta_row_height * LineTime / VRatio;
3836 } else {
3837 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3838 }
3839 }
3840
3841 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3842 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3843 *ImmediateFlipSupportedForPipe = false;
3844 } else {
3845 *ImmediateFlipSupportedForPipe = true;
3846 }
3847
3848#ifdef __DML_VBA_DEBUG__
3849 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3850 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3851 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3852 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3853 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3854 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3855#endif
3856
3857}
3858
3859static double TruncToValidBPP(
3860 double LinkBitRate,
3861 int Lanes,
3862 int HTotal,
3863 int HActive,
3864 double PixelClock,
3865 double DesiredBPP,
3866 bool DSCEnable,
3867 enum output_encoder_class Output,
3868 enum output_format_class Format,
3869 unsigned int DSCInputBitPerComponent,
3870 int DSCSlices,
3871 int AudioRate,
3872 int AudioLayout,
3873 enum odm_combine_mode ODMCombine)
3874{
3875 double MaxLinkBPP;
3876 int MinDSCBPP;
3877 double MaxDSCBPP;
3878 int NonDSCBPP0;
3879 int NonDSCBPP1;
3880 int NonDSCBPP2;
3881
3882 if (Format == dm_420) {
3883 NonDSCBPP0 = 12;
3884 NonDSCBPP1 = 15;
3885 NonDSCBPP2 = 18;
3886 MinDSCBPP = 6;
3887 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3888 } else if (Format == dm_444) {
3889 NonDSCBPP0 = 24;
3890 NonDSCBPP1 = 30;
3891 NonDSCBPP2 = 36;
3892 MinDSCBPP = 8;
3893 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3894 } else {
3895 if (Output == dm_hdmi) {
3896 NonDSCBPP0 = 24;
3897 NonDSCBPP1 = 24;
3898 NonDSCBPP2 = 24;
3899 } else {
3900 NonDSCBPP0 = 16;
3901 NonDSCBPP1 = 20;
3902 NonDSCBPP2 = 24;
3903 }
3904 if (Format == dm_n422) {
3905 MinDSCBPP = 7;
3906 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3907 } else {
3908 MinDSCBPP = 8;
3909 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3910 }
3911 }
3912
3913 if (DSCEnable && Output == dm_dp) {
3914 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3915 } else {
3916 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3917 }
3918
3919 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3920 MaxLinkBPP = 16;
3921 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3922 MaxLinkBPP = 32;
3923 }
3924
3925 if (DesiredBPP == 0) {
3926 if (DSCEnable) {
3927 if (MaxLinkBPP < MinDSCBPP) {
3928 return BPP_INVALID;
3929 } else if (MaxLinkBPP >= MaxDSCBPP) {
3930 return MaxDSCBPP;
3931 } else {
3932 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3933 }
3934 } else {
3935 if (MaxLinkBPP >= NonDSCBPP2) {
3936 return NonDSCBPP2;
3937 } else if (MaxLinkBPP >= NonDSCBPP1) {
3938 return NonDSCBPP1;
3939 } else if (MaxLinkBPP >= NonDSCBPP0) {
3940 return 16.0;
3941 } else {
3942 return BPP_INVALID;
3943 }
3944 }
3945 } else {
3946 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3947 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3948 return BPP_INVALID;
3949 } else {
3950 return DesiredBPP;
3951 }
3952 }
3953 return BPP_INVALID;
3954}
3955
3956void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3957{
3958 struct vba_vars_st *v = &mode_lib->vba;
3959
3960 int i, j;
3961 unsigned int k, m;
3962 int ReorderingBytes;
3963 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3964 bool NoChroma = true;
3965 bool EnoughWritebackUnits = true;
3966 bool P2IWith420 = false;
3967 bool DSCOnlyIfNecessaryWithBPP = false;
3968 bool DSC422NativeNotSupported = false;
3969 double MaxTotalVActiveRDBandwidth;
3970 bool ViewportExceedsSurface = false;
3971 bool FMTBufferExceeded = false;
3972
3973
3974
3975 CalculateMinAndMaxPrefetchMode(
3976 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3977 &MinPrefetchMode, &MaxPrefetchMode);
3978
3979
3980
3981 v->ScaleRatioAndTapsSupport = true;
3982 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3983 if (v->ScalerEnabled[k] == false
3984 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3985 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3986 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3987 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3988 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3989 v->ScaleRatioAndTapsSupport = false;
3990 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3991 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3992 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3993 || v->VRatio[k] > v->vtaps[k]
3994 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3995 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3996 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3997 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3998 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3999 || v->HRatioChroma[k] > v->MaxHSCLRatio
4000 || v->VRatioChroma[k] > v->MaxVSCLRatio
4001 || v->HRatioChroma[k] > v->HTAPsChroma[k]
4002 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
4003 v->ScaleRatioAndTapsSupport = false;
4004 }
4005 }
4006
4007
4008 v->SourceFormatPixelAndScanSupport = true;
4009 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4010 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
4011 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
4012 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
4013 v->SourceFormatPixelAndScanSupport = false;
4014 }
4015 }
4016
4017
4018 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4019 CalculateBytePerPixelAnd256BBlockSizes(
4020 v->SourcePixelFormat[k],
4021 v->SurfaceTiling[k],
4022 &v->BytePerPixelY[k],
4023 &v->BytePerPixelC[k],
4024 &v->BytePerPixelInDETY[k],
4025 &v->BytePerPixelInDETC[k],
4026 &v->Read256BlockHeightY[k],
4027 &v->Read256BlockHeightC[k],
4028 &v->Read256BlockWidthY[k],
4029 &v->Read256BlockWidthC[k]);
4030 }
4031 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4032 if (v->SourceScan[k] != dm_vert) {
4033 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
4034 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
4035 } else {
4036 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
4037 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
4038 }
4039 }
4040 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4041 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
4042 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4043 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
4044 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
4045 }
4046 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4047 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4048 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4049 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4050 } else if (v->WritebackEnable[k] == true) {
4051 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4052 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4053 } else {
4054 v->WriteBandwidth[k] = 0.0;
4055 }
4056 }
4057
4058
4059
4060 v->WritebackLatencySupport = true;
4061 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4062 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4063 v->WritebackLatencySupport = false;
4064 }
4065 }
4066
4067
4068
4069 v->TotalNumberOfActiveWriteback = 0;
4070 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4071 if (v->WritebackEnable[k] == true) {
4072 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4073 }
4074 }
4075
4076 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4077 EnoughWritebackUnits = false;
4078 }
4079
4080
4081
4082 v->WritebackScaleRatioAndTapsSupport = true;
4083 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4084 if (v->WritebackEnable[k] == true) {
4085 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4086 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4087 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4088 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4089 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4090 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4091 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4092 v->WritebackScaleRatioAndTapsSupport = false;
4093 }
4094 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4095 v->WritebackScaleRatioAndTapsSupport = false;
4096 }
4097 }
4098 }
4099
4100
4101 v->WritebackRequiredDISPCLK = 0.0;
4102 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4103 if (v->WritebackEnable[k] == true) {
4104 v->WritebackRequiredDISPCLK = dml_max(
4105 v->WritebackRequiredDISPCLK,
4106 dml31_CalculateWriteBackDISPCLK(
4107 v->WritebackPixelFormat[k],
4108 v->PixelClock[k],
4109 v->WritebackHRatio[k],
4110 v->WritebackVRatio[k],
4111 v->WritebackHTaps[k],
4112 v->WritebackVTaps[k],
4113 v->WritebackSourceWidth[k],
4114 v->WritebackDestinationWidth[k],
4115 v->HTotal[k],
4116 v->WritebackLineBufferSize));
4117 }
4118 }
4119 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4120 if (v->HRatio[k] > 1.0) {
4121 v->PSCL_FACTOR[k] = dml_min(
4122 v->MaxDCHUBToPSCLThroughput,
4123 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4124 } else {
4125 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4126 }
4127 if (v->BytePerPixelC[k] == 0.0) {
4128 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4129 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4130 * dml_max3(
4131 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4132 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4133 1.0);
4134 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4135 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4136 }
4137 } else {
4138 if (v->HRatioChroma[k] > 1.0) {
4139 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4140 v->MaxDCHUBToPSCLThroughput,
4141 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4142 } else {
4143 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4144 }
4145 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4146 * dml_max5(
4147 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4148 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4149 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4150 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4151 1.0);
4152 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4153 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4154 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4155 }
4156 }
4157 }
4158 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4159 int MaximumSwathWidthSupportLuma;
4160 int MaximumSwathWidthSupportChroma;
4161
4162 if (v->SurfaceTiling[k] == dm_sw_linear) {
4163 MaximumSwathWidthSupportLuma = 8192.0;
4164 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4165 MaximumSwathWidthSupportLuma = 2880.0;
4166 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4167 MaximumSwathWidthSupportLuma = 3840.0;
4168 } else {
4169 MaximumSwathWidthSupportLuma = 5760.0;
4170 }
4171
4172 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4173 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4174 } else {
4175 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4176 }
4177 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4178 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4179 if (v->BytePerPixelC[k] == 0.0) {
4180 v->MaximumSwathWidthInLineBufferChroma = 0;
4181 } else {
4182 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4183 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4184 }
4185 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4186 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4187 }
4188
4189 CalculateSwathAndDETConfiguration(
4190 true,
4191 v->NumberOfActivePlanes,
4192 v->DETBufferSizeInKByte[0],
4193 v->MaximumSwathWidthLuma,
4194 v->MaximumSwathWidthChroma,
4195 v->SourceScan,
4196 v->SourcePixelFormat,
4197 v->SurfaceTiling,
4198 v->ViewportWidth,
4199 v->ViewportHeight,
4200 v->SurfaceWidthY,
4201 v->SurfaceWidthC,
4202 v->SurfaceHeightY,
4203 v->SurfaceHeightC,
4204 v->Read256BlockHeightY,
4205 v->Read256BlockHeightC,
4206 v->Read256BlockWidthY,
4207 v->Read256BlockWidthC,
4208 v->odm_combine_dummy,
4209 v->BlendingAndTiming,
4210 v->BytePerPixelY,
4211 v->BytePerPixelC,
4212 v->BytePerPixelInDETY,
4213 v->BytePerPixelInDETC,
4214 v->HActive,
4215 v->HRatio,
4216 v->HRatioChroma,
4217 v->NoOfDPPThisState,
4218 v->swath_width_luma_ub_this_state,
4219 v->swath_width_chroma_ub_this_state,
4220 v->SwathWidthYThisState,
4221 v->SwathWidthCThisState,
4222 v->SwathHeightYThisState,
4223 v->SwathHeightCThisState,
4224 v->DETBufferSizeYThisState,
4225 v->DETBufferSizeCThisState,
4226 v->SingleDPPViewportSizeSupportPerPlane,
4227 &v->ViewportSizeSupport[0][0]);
4228
4229 for (i = 0; i < v->soc.num_states; i++) {
4230 for (j = 0; j < 2; j++) {
4231 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4232 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4233 v->RequiredDISPCLK[i][j] = 0.0;
4234 v->DISPCLK_DPPCLK_Support[i][j] = true;
4235 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4236 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4237 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4238 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4239 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4240 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4241 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4242 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4243 }
4244 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4245 * (1 + v->DISPCLKRampingMargin / 100.0);
4246 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4247 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4248 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4249 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4250 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4251 }
4252 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4253 * (1 + v->DISPCLKRampingMargin / 100.0);
4254 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4255 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4256 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4257 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4258 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4259 }
4260
4261 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4262 || !(v->Output[k] == dm_dp ||
4263 v->Output[k] == dm_edp)) {
4264 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4265 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4266
4267 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4268 FMTBufferExceeded = true;
4269 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4270 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4271 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4272 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4273 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4274 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4275 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4276 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4277 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4278 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4279 } else {
4280 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4281 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4282 }
4283 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4284 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4285 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4286 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4287 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4288 } else {
4289 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4290 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4291 }
4292 }
4293 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4294 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4295 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4296 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4297 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4298
4299 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4300 FMTBufferExceeded = true;
4301 } else {
4302 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4303 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4304 }
4305 }
4306 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4307 v->MPCCombine[i][j][k] = false;
4308 v->NoOfDPP[i][j][k] = 4;
4309 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4310 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4311 v->MPCCombine[i][j][k] = false;
4312 v->NoOfDPP[i][j][k] = 2;
4313 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4314 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4315 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4316 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4317 v->MPCCombine[i][j][k] = false;
4318 v->NoOfDPP[i][j][k] = 1;
4319 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4320 } else {
4321 v->MPCCombine[i][j][k] = true;
4322 v->NoOfDPP[i][j][k] = 2;
4323 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4324 }
4325 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4326 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4327 > v->MaxDppclkRoundedDownToDFSGranularity)
4328 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4329 v->DISPCLK_DPPCLK_Support[i][j] = false;
4330 }
4331 }
4332 v->TotalNumberOfActiveDPP[i][j] = 0;
4333 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4334 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4335 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4336 if (v->NoOfDPP[i][j][k] == 1)
4337 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4338 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4339 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4340 NoChroma = false;
4341 }
4342
4343
4344 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4345 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4346 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4347 double BWOfNonSplitPlaneOfMaximumBandwidth;
4348 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4349 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4350 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4351 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4352 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4353 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4354 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4355 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4356 }
4357 }
4358 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4359 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4360 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4361 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4362 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4363 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4364 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4365 }
4366 }
4367 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4368 v->RequiredDISPCLK[i][j] = 0.0;
4369 v->DISPCLK_DPPCLK_Support[i][j] = true;
4370 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4371 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4372 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4373 v->MPCCombine[i][j][k] = true;
4374 v->NoOfDPP[i][j][k] = 2;
4375 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4376 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4377 } else {
4378 v->MPCCombine[i][j][k] = false;
4379 v->NoOfDPP[i][j][k] = 1;
4380 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4381 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4382 }
4383 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4384 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4385 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4386 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4387 } else {
4388 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4389 }
4390 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4391 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4392 > v->MaxDppclkRoundedDownToDFSGranularity)
4393 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4394 v->DISPCLK_DPPCLK_Support[i][j] = false;
4395 }
4396 }
4397 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4398 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4399 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4400 }
4401 }
4402 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4403 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4404 v->DISPCLK_DPPCLK_Support[i][j] = false;
4405 }
4406 }
4407 }
4408
4409
4410
4411 for (i = 0; i < v->soc.num_states; i++) {
4412 for (j = 0; j < 2; j++) {
4413 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4414 v->TotalAvailablePipesSupport[i][j] = true;
4415 } else {
4416 v->TotalAvailablePipesSupport[i][j] = false;
4417 }
4418 }
4419 }
4420
4421
4422 v->NonsupportedDSCInputBPC = false;
4423 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4424 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4425 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4426 v->NonsupportedDSCInputBPC = true;
4427 }
4428 }
4429
4430
4431 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4432 if (v->BlendingAndTiming[k] == k) {
4433 if (v->PixelClockBackEnd[k] > 3200) {
4434 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4435 } else if (v->PixelClockBackEnd[k] > 1360) {
4436 v->NumberOfDSCSlices[k] = 8;
4437 } else if (v->PixelClockBackEnd[k] > 680) {
4438 v->NumberOfDSCSlices[k] = 4;
4439 } else if (v->PixelClockBackEnd[k] > 340) {
4440 v->NumberOfDSCSlices[k] = 2;
4441 } else {
4442 v->NumberOfDSCSlices[k] = 1;
4443 }
4444 } else {
4445 v->NumberOfDSCSlices[k] = 0;
4446 }
4447 }
4448
4449 for (i = 0; i < v->soc.num_states; i++) {
4450 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4451 v->RequiresDSC[i][k] = false;
4452 v->RequiresFEC[i][k] = false;
4453 if (v->BlendingAndTiming[k] == k) {
4454 if (v->Output[k] == dm_hdmi) {
4455 v->RequiresDSC[i][k] = false;
4456 v->RequiresFEC[i][k] = false;
4457 v->OutputBppPerState[i][k] = TruncToValidBPP(
4458 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4459 3,
4460 v->HTotal[k],
4461 v->HActive[k],
4462 v->PixelClockBackEnd[k],
4463 v->ForcedOutputLinkBPP[k],
4464 false,
4465 v->Output[k],
4466 v->OutputFormat[k],
4467 v->DSCInputBitPerComponent[k],
4468 v->NumberOfDSCSlices[k],
4469 v->AudioSampleRate[k],
4470 v->AudioSampleLayout[k],
4471 v->ODMCombineEnablePerState[i][k]);
4472 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4473 if (v->DSCEnable[k] == true) {
4474 v->RequiresDSC[i][k] = true;
4475 v->LinkDSCEnable = true;
4476 if (v->Output[k] == dm_dp) {
4477 v->RequiresFEC[i][k] = true;
4478 } else {
4479 v->RequiresFEC[i][k] = false;
4480 }
4481 } else {
4482 v->RequiresDSC[i][k] = false;
4483 v->LinkDSCEnable = false;
4484 v->RequiresFEC[i][k] = false;
4485 }
4486
4487 v->Outbpp = BPP_INVALID;
4488 if (v->PHYCLKPerState[i] >= 270.0) {
4489 v->Outbpp = TruncToValidBPP(
4490 (1.0 - v->Downspreading / 100.0) * 2700,
4491 v->OutputLinkDPLanes[k],
4492 v->HTotal[k],
4493 v->HActive[k],
4494 v->PixelClockBackEnd[k],
4495 v->ForcedOutputLinkBPP[k],
4496 v->LinkDSCEnable,
4497 v->Output[k],
4498 v->OutputFormat[k],
4499 v->DSCInputBitPerComponent[k],
4500 v->NumberOfDSCSlices[k],
4501 v->AudioSampleRate[k],
4502 v->AudioSampleLayout[k],
4503 v->ODMCombineEnablePerState[i][k]);
4504 v->OutputBppPerState[i][k] = v->Outbpp;
4505
4506
4507 }
4508 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4509 v->Outbpp = TruncToValidBPP(
4510 (1.0 - v->Downspreading / 100.0) * 5400,
4511 v->OutputLinkDPLanes[k],
4512 v->HTotal[k],
4513 v->HActive[k],
4514 v->PixelClockBackEnd[k],
4515 v->ForcedOutputLinkBPP[k],
4516 v->LinkDSCEnable,
4517 v->Output[k],
4518 v->OutputFormat[k],
4519 v->DSCInputBitPerComponent[k],
4520 v->NumberOfDSCSlices[k],
4521 v->AudioSampleRate[k],
4522 v->AudioSampleLayout[k],
4523 v->ODMCombineEnablePerState[i][k]);
4524 v->OutputBppPerState[i][k] = v->Outbpp;
4525
4526
4527 }
4528 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4529 v->Outbpp = TruncToValidBPP(
4530 (1.0 - v->Downspreading / 100.0) * 8100,
4531 v->OutputLinkDPLanes[k],
4532 v->HTotal[k],
4533 v->HActive[k],
4534 v->PixelClockBackEnd[k],
4535 v->ForcedOutputLinkBPP[k],
4536 v->LinkDSCEnable,
4537 v->Output[k],
4538 v->OutputFormat[k],
4539 v->DSCInputBitPerComponent[k],
4540 v->NumberOfDSCSlices[k],
4541 v->AudioSampleRate[k],
4542 v->AudioSampleLayout[k],
4543 v->ODMCombineEnablePerState[i][k]);
4544 v->OutputBppPerState[i][k] = v->Outbpp;
4545
4546
4547 }
4548 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4549 v->Outbpp = TruncToValidBPP(
4550 (1.0 - v->Downspreading / 100.0) * 10000,
4551 4,
4552 v->HTotal[k],
4553 v->HActive[k],
4554 v->PixelClockBackEnd[k],
4555 v->ForcedOutputLinkBPP[k],
4556 v->LinkDSCEnable,
4557 v->Output[k],
4558 v->OutputFormat[k],
4559 v->DSCInputBitPerComponent[k],
4560 v->NumberOfDSCSlices[k],
4561 v->AudioSampleRate[k],
4562 v->AudioSampleLayout[k],
4563 v->ODMCombineEnablePerState[i][k]);
4564 v->OutputBppPerState[i][k] = v->Outbpp;
4565
4566 }
4567 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4568 v->Outbpp = TruncToValidBPP(
4569 12000,
4570 4,
4571 v->HTotal[k],
4572 v->HActive[k],
4573 v->PixelClockBackEnd[k],
4574 v->ForcedOutputLinkBPP[k],
4575 v->LinkDSCEnable,
4576 v->Output[k],
4577 v->OutputFormat[k],
4578 v->DSCInputBitPerComponent[k],
4579 v->NumberOfDSCSlices[k],
4580 v->AudioSampleRate[k],
4581 v->AudioSampleLayout[k],
4582 v->ODMCombineEnablePerState[i][k]);
4583 v->OutputBppPerState[i][k] = v->Outbpp;
4584
4585 }
4586 }
4587 } else {
4588 v->OutputBppPerState[i][k] = 0;
4589 }
4590 }
4591 }
4592
4593 for (i = 0; i < v->soc.num_states; i++) {
4594 v->LinkCapacitySupport[i] = true;
4595 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4596 if (v->BlendingAndTiming[k] == k
4597 && (v->Output[k] == dm_dp ||
4598 v->Output[k] == dm_edp ||
4599 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4600 v->LinkCapacitySupport[i] = false;
4601 }
4602 }
4603 }
4604
4605
4606 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4607 if (v->BlendingAndTiming[k] == k
4608 && (v->Output[k] == dm_dp ||
4609 v->Output[k] == dm_edp ||
4610 v->Output[k] == dm_hdmi)) {
4611 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4612 P2IWith420 = true;
4613 }
4614 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4615 && !v->DSC422NativeSupport) {
4616 DSC422NativeNotSupported = true;
4617 }
4618 }
4619 }
4620
4621 for (i = 0; i < v->soc.num_states; ++i) {
4622 v->ODMCombine4To1SupportCheckOK[i] = true;
4623 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4624 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4625 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4626 || v->Output[k] == dm_hdmi)) {
4627 v->ODMCombine4To1SupportCheckOK[i] = false;
4628 }
4629 }
4630 }
4631
4632
4633
4634 for (i = 0; i < v->soc.num_states; i++) {
4635 v->NotEnoughDSCUnits[i] = false;
4636 v->TotalDSCUnitsRequired = 0.0;
4637 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4638 if (v->RequiresDSC[i][k] == true) {
4639 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4640 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4641 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4642 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4643 } else {
4644 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4645 }
4646 }
4647 }
4648 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4649 v->NotEnoughDSCUnits[i] = true;
4650 }
4651 }
4652
4653
4654 for (i = 0; i < v->soc.num_states; i++) {
4655 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4656 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4657 v->BPP = 0.0;
4658 } else {
4659 v->BPP = v->OutputBppPerState[i][k];
4660 }
4661 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4662 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4663 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4664 v->DSCInputBitPerComponent[k],
4665 v->BPP,
4666 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4667 v->NumberOfDSCSlices[k],
4668 v->OutputFormat[k],
4669 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4670 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4671 v->DSCDelayPerState[i][k] = 2.0
4672 * (dscceComputeDelay(
4673 v->DSCInputBitPerComponent[k],
4674 v->BPP,
4675 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4676 v->NumberOfDSCSlices[k] / 2,
4677 v->OutputFormat[k],
4678 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4679 } else {
4680 v->DSCDelayPerState[i][k] = 4.0
4681 * (dscceComputeDelay(
4682 v->DSCInputBitPerComponent[k],
4683 v->BPP,
4684 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4685 v->NumberOfDSCSlices[k] / 4,
4686 v->OutputFormat[k],
4687 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4688 }
4689 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4690 } else {
4691 v->DSCDelayPerState[i][k] = 0.0;
4692 }
4693 }
4694 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4695 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4696 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4697 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4698 }
4699 }
4700 }
4701 }
4702
4703
4704
4705 for (i = 0; i < v->soc.num_states; ++i) {
4706 for (j = 0; j <= 1; ++j) {
4707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4708 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4709 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4710 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4711 }
4712
4713 CalculateSwathAndDETConfiguration(
4714 false,
4715 v->NumberOfActivePlanes,
4716 v->DETBufferSizeInKByte[0],
4717 v->MaximumSwathWidthLuma,
4718 v->MaximumSwathWidthChroma,
4719 v->SourceScan,
4720 v->SourcePixelFormat,
4721 v->SurfaceTiling,
4722 v->ViewportWidth,
4723 v->ViewportHeight,
4724 v->SurfaceWidthY,
4725 v->SurfaceWidthC,
4726 v->SurfaceHeightY,
4727 v->SurfaceHeightC,
4728 v->Read256BlockHeightY,
4729 v->Read256BlockHeightC,
4730 v->Read256BlockWidthY,
4731 v->Read256BlockWidthC,
4732 v->ODMCombineEnableThisState,
4733 v->BlendingAndTiming,
4734 v->BytePerPixelY,
4735 v->BytePerPixelC,
4736 v->BytePerPixelInDETY,
4737 v->BytePerPixelInDETC,
4738 v->HActive,
4739 v->HRatio,
4740 v->HRatioChroma,
4741 v->NoOfDPPThisState,
4742 v->swath_width_luma_ub_this_state,
4743 v->swath_width_chroma_ub_this_state,
4744 v->SwathWidthYThisState,
4745 v->SwathWidthCThisState,
4746 v->SwathHeightYThisState,
4747 v->SwathHeightCThisState,
4748 v->DETBufferSizeYThisState,
4749 v->DETBufferSizeCThisState,
4750 v->dummystring,
4751 &v->ViewportSizeSupport[i][j]);
4752
4753 CalculateDCFCLKDeepSleep(
4754 mode_lib,
4755 v->NumberOfActivePlanes,
4756 v->BytePerPixelY,
4757 v->BytePerPixelC,
4758 v->VRatio,
4759 v->VRatioChroma,
4760 v->SwathWidthYThisState,
4761 v->SwathWidthCThisState,
4762 v->NoOfDPPThisState,
4763 v->HRatio,
4764 v->HRatioChroma,
4765 v->PixelClock,
4766 v->PSCL_FACTOR,
4767 v->PSCL_FACTOR_CHROMA,
4768 v->RequiredDPPCLKThisState,
4769 v->ReadBandwidthLuma,
4770 v->ReadBandwidthChroma,
4771 v->ReturnBusWidth,
4772 &v->ProjectedDCFCLKDeepSleep[i][j]);
4773
4774 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4775 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4776 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4777 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4778 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4779 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4780 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4781 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4782 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4783 }
4784 }
4785 }
4786
4787 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4788 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4789 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4790 }
4791
4792 for (i = 0; i < v->soc.num_states; i++) {
4793 for (j = 0; j < 2; j++) {
4794 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4795
4796 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4797 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4798 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4799 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4800 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4801 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4802 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4803 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4804 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4805 }
4806
4807 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4808 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4809 if (v->DCCEnable[k] == true) {
4810 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4811 }
4812 }
4813
4814 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4815 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4816 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4817
4818 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4819 && v->SourceScan[k] != dm_vert) {
4820 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4821 / 2;
4822 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4823 } else {
4824 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4825 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4826 }
4827
4828 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4829 mode_lib,
4830 v->DCCEnable[k],
4831 v->Read256BlockHeightC[k],
4832 v->Read256BlockWidthC[k],
4833 v->SourcePixelFormat[k],
4834 v->SurfaceTiling[k],
4835 v->BytePerPixelC[k],
4836 v->SourceScan[k],
4837 v->SwathWidthCThisState[k],
4838 v->ViewportHeightChroma[k],
4839 v->GPUVMEnable,
4840 v->HostVMEnable,
4841 v->HostVMMaxNonCachedPageTableLevels,
4842 v->GPUVMMinPageSize,
4843 v->HostVMMinPageSize,
4844 v->PTEBufferSizeInRequestsForChroma,
4845 v->PitchC[k],
4846 0.0,
4847 &v->MacroTileWidthC[k],
4848 &v->MetaRowBytesC,
4849 &v->DPTEBytesPerRowC,
4850 &v->PTEBufferSizeNotExceededC[i][j][k],
4851 &v->dummyinteger7,
4852 &v->dpte_row_height_chroma[k],
4853 &v->dummyinteger28,
4854 &v->dummyinteger26,
4855 &v->dummyinteger23,
4856 &v->meta_row_height_chroma[k],
4857 &v->dummyinteger8,
4858 &v->dummyinteger9,
4859 &v->dummyinteger19,
4860 &v->dummyinteger20,
4861 &v->dummyinteger17,
4862 &v->dummyinteger10,
4863 &v->dummyinteger11);
4864
4865 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4866 mode_lib,
4867 v->VRatioChroma[k],
4868 v->VTAPsChroma[k],
4869 v->Interlace[k],
4870 v->ProgressiveToInterlaceUnitInOPP,
4871 v->SwathHeightCThisState[k],
4872 v->ViewportYStartC[k],
4873 &v->PrefillC[k],
4874 &v->MaxNumSwC[k]);
4875 } else {
4876 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4877 v->PTEBufferSizeInRequestsForChroma = 0;
4878 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4879 v->MetaRowBytesC = 0.0;
4880 v->DPTEBytesPerRowC = 0.0;
4881 v->PrefetchLinesC[i][j][k] = 0.0;
4882 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4883 }
4884 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4885 mode_lib,
4886 v->DCCEnable[k],
4887 v->Read256BlockHeightY[k],
4888 v->Read256BlockWidthY[k],
4889 v->SourcePixelFormat[k],
4890 v->SurfaceTiling[k],
4891 v->BytePerPixelY[k],
4892 v->SourceScan[k],
4893 v->SwathWidthYThisState[k],
4894 v->ViewportHeight[k],
4895 v->GPUVMEnable,
4896 v->HostVMEnable,
4897 v->HostVMMaxNonCachedPageTableLevels,
4898 v->GPUVMMinPageSize,
4899 v->HostVMMinPageSize,
4900 v->PTEBufferSizeInRequestsForLuma,
4901 v->PitchY[k],
4902 v->DCCMetaPitchY[k],
4903 &v->MacroTileWidthY[k],
4904 &v->MetaRowBytesY,
4905 &v->DPTEBytesPerRowY,
4906 &v->PTEBufferSizeNotExceededY[i][j][k],
4907 &v->dummyinteger7,
4908 &v->dpte_row_height[k],
4909 &v->dummyinteger29,
4910 &v->dummyinteger27,
4911 &v->dummyinteger24,
4912 &v->meta_row_height[k],
4913 &v->dummyinteger25,
4914 &v->dpte_group_bytes[k],
4915 &v->dummyinteger21,
4916 &v->dummyinteger22,
4917 &v->dummyinteger18,
4918 &v->dummyinteger5,
4919 &v->dummyinteger6);
4920 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4921 mode_lib,
4922 v->VRatio[k],
4923 v->vtaps[k],
4924 v->Interlace[k],
4925 v->ProgressiveToInterlaceUnitInOPP,
4926 v->SwathHeightYThisState[k],
4927 v->ViewportYStartY[k],
4928 &v->PrefillY[k],
4929 &v->MaxNumSwY[k]);
4930 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4931 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4932 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4933
4934 CalculateRowBandwidth(
4935 v->GPUVMEnable,
4936 v->SourcePixelFormat[k],
4937 v->VRatio[k],
4938 v->VRatioChroma[k],
4939 v->DCCEnable[k],
4940 v->HTotal[k] / v->PixelClock[k],
4941 v->MetaRowBytesY,
4942 v->MetaRowBytesC,
4943 v->meta_row_height[k],
4944 v->meta_row_height_chroma[k],
4945 v->DPTEBytesPerRowY,
4946 v->DPTEBytesPerRowC,
4947 v->dpte_row_height[k],
4948 v->dpte_row_height_chroma[k],
4949 &v->meta_row_bandwidth[i][j][k],
4950 &v->dpte_row_bandwidth[i][j][k]);
4951 }
4952 v->UrgLatency[i] = CalculateUrgentLatency(
4953 v->UrgentLatencyPixelDataOnly,
4954 v->UrgentLatencyPixelMixedWithVMData,
4955 v->UrgentLatencyVMDataOnly,
4956 v->DoUrgentLatencyAdjustment,
4957 v->UrgentLatencyAdjustmentFabricClockComponent,
4958 v->UrgentLatencyAdjustmentFabricClockReference,
4959 v->FabricClockPerState[i]);
4960
4961 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4962 CalculateUrgentBurstFactor(
4963 v->swath_width_luma_ub_this_state[k],
4964 v->swath_width_chroma_ub_this_state[k],
4965 v->SwathHeightYThisState[k],
4966 v->SwathHeightCThisState[k],
4967 v->HTotal[k] / v->PixelClock[k],
4968 v->UrgLatency[i],
4969 v->CursorBufferSize,
4970 v->CursorWidth[k][0],
4971 v->CursorBPP[k][0],
4972 v->VRatio[k],
4973 v->VRatioChroma[k],
4974 v->BytePerPixelInDETY[k],
4975 v->BytePerPixelInDETC[k],
4976 v->DETBufferSizeYThisState[k],
4977 v->DETBufferSizeCThisState[k],
4978 &v->UrgentBurstFactorCursor[k],
4979 &v->UrgentBurstFactorLuma[k],
4980 &v->UrgentBurstFactorChroma[k],
4981 &NotUrgentLatencyHiding[k]);
4982 }
4983
4984 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4985 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4986 if (NotUrgentLatencyHiding[k]) {
4987 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4988 }
4989 }
4990
4991 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4992 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4993 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4994 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4995 }
4996
4997 v->TotalVActivePixelBandwidth[i][j] = 0;
4998 v->TotalVActiveCursorBandwidth[i][j] = 0;
4999 v->TotalMetaRowBandwidth[i][j] = 0;
5000 v->TotalDPTERowBandwidth[i][j] = 0;
5001 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5002 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5003 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5004 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5005 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5006 }
5007 }
5008 }
5009
5010
5011 for (i = 0; i < v->soc.num_states; ++i) {
5012 for (j = 0; j <= 1; ++j) {
5013 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5014 if (v->BlendingAndTiming[k] == k) {
5015 if (v->WritebackEnable[k] == true) {
5016 v->WritebackDelayTime[k] = v->WritebackLatency
5017 + CalculateWriteBackDelay(
5018 v->WritebackPixelFormat[k],
5019 v->WritebackHRatio[k],
5020 v->WritebackVRatio[k],
5021 v->WritebackVTaps[k],
5022 v->WritebackDestinationWidth[k],
5023 v->WritebackDestinationHeight[k],
5024 v->WritebackSourceHeight[k],
5025 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5026 } else {
5027 v->WritebackDelayTime[k] = 0.0;
5028 }
5029 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5030 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5031 v->WritebackDelayTime[k] = dml_max(
5032 v->WritebackDelayTime[k],
5033 v->WritebackLatency
5034 + CalculateWriteBackDelay(
5035 v->WritebackPixelFormat[m],
5036 v->WritebackHRatio[m],
5037 v->WritebackVRatio[m],
5038 v->WritebackVTaps[m],
5039 v->WritebackDestinationWidth[m],
5040 v->WritebackDestinationHeight[m],
5041 v->WritebackSourceHeight[m],
5042 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5043 }
5044 }
5045 }
5046 }
5047 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5048 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5049 if (v->BlendingAndTiming[k] == m) {
5050 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5051 }
5052 }
5053 }
5054 v->MaxMaxVStartup[i][j] = 0;
5055 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5056 v->MaximumVStartup[i][j][k] =
5057 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5058 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5059 v->VTotal[k] - v->VActive[k]
5060 - dml_max(
5061 1.0,
5062 dml_ceil(
5063 1.0 * v->WritebackDelayTime[k]
5064 / (v->HTotal[k]
5065 / v->PixelClock[k]),
5066 1.0));
5067 if (v->MaximumVStartup[i][j][k] > 1023)
5068 v->MaximumVStartup[i][j][k] = 1023;
5069 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5070 }
5071 }
5072 }
5073
5074 ReorderingBytes = v->NumberOfChannels
5075 * dml_max3(
5076 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5077 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5078 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5079
5080 for (i = 0; i < v->soc.num_states; ++i) {
5081 for (j = 0; j <= 1; ++j) {
5082 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5083 }
5084 }
5085
5086 if (v->UseMinimumRequiredDCFCLK == true) {
5087 UseMinimumDCFCLK(
5088 mode_lib,
5089 v->MaxInterDCNTileRepeaters,
5090 MaxPrefetchMode,
5091 v->DRAMClockChangeLatency,
5092 v->SREnterPlusExitTime,
5093 v->ReturnBusWidth,
5094 v->RoundTripPingLatencyCycles,
5095 ReorderingBytes,
5096 v->PixelChunkSizeInKByte,
5097 v->MetaChunkSize,
5098 v->GPUVMEnable,
5099 v->GPUVMMaxPageTableLevels,
5100 v->HostVMEnable,
5101 v->NumberOfActivePlanes,
5102 v->HostVMMinPageSize,
5103 v->HostVMMaxNonCachedPageTableLevels,
5104 v->DynamicMetadataVMEnabled,
5105 v->ImmediateFlipRequirement[0],
5106 v->ProgressiveToInterlaceUnitInOPP,
5107 v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
5108 v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
5109 v->VTotal,
5110 v->VActive,
5111 v->DynamicMetadataTransmittedBytes,
5112 v->DynamicMetadataLinesBeforeActiveRequired,
5113 v->Interlace,
5114 v->RequiredDPPCLK,
5115 v->RequiredDISPCLK,
5116 v->UrgLatency,
5117 v->NoOfDPP,
5118 v->ProjectedDCFCLKDeepSleep,
5119 v->MaximumVStartup,
5120 v->TotalVActivePixelBandwidth,
5121 v->TotalVActiveCursorBandwidth,
5122 v->TotalMetaRowBandwidth,
5123 v->TotalDPTERowBandwidth,
5124 v->TotalNumberOfActiveDPP,
5125 v->TotalNumberOfDCCActiveDPP,
5126 v->dpte_group_bytes,
5127 v->PrefetchLinesY,
5128 v->PrefetchLinesC,
5129 v->swath_width_luma_ub_all_states,
5130 v->swath_width_chroma_ub_all_states,
5131 v->BytePerPixelY,
5132 v->BytePerPixelC,
5133 v->HTotal,
5134 v->PixelClock,
5135 v->PDEAndMetaPTEBytesPerFrame,
5136 v->DPTEBytesPerRow,
5137 v->MetaRowBytes,
5138 v->DynamicMetadataEnable,
5139 v->VActivePixelBandwidth,
5140 v->VActiveCursorBandwidth,
5141 v->ReadBandwidthLuma,
5142 v->ReadBandwidthChroma,
5143 v->DCFCLKPerState,
5144 v->DCFCLKState);
5145 }
5146
5147 for (i = 0; i < v->soc.num_states; ++i) {
5148 for (j = 0; j <= 1; ++j) {
5149 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5150 v->ReturnBusWidth * v->DCFCLKState[i][j],
5151 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5152 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5153 double PixelDataOnlyReturnBWPerState = dml_min(
5154 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5155 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5156 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5157 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5158 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5159
5160 if (v->HostVMEnable != true) {
5161 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5162 } else {
5163 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5164 }
5165 }
5166 }
5167
5168
5169 for (i = 0; i < v->soc.num_states; ++i) {
5170 for (j = 0; j <= 1; ++j) {
5171 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5172 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5173 v->ROBSupport[i][j] = true;
5174 } else {
5175 v->ROBSupport[i][j] = false;
5176 }
5177 }
5178 }
5179
5180
5181
5182 MaxTotalVActiveRDBandwidth = 0;
5183 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5184 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5185 }
5186
5187 for (i = 0; i < v->soc.num_states; ++i) {
5188 for (j = 0; j <= 1; ++j) {
5189 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5190 dml_min(
5191 v->ReturnBusWidth * v->DCFCLKState[i][j],
5192 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5193 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5194 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5195 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5196
5197 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5198 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5199 } else {
5200 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5201 }
5202 }
5203 }
5204
5205 v->UrgentLatency = CalculateUrgentLatency(
5206 v->UrgentLatencyPixelDataOnly,
5207 v->UrgentLatencyPixelMixedWithVMData,
5208 v->UrgentLatencyVMDataOnly,
5209 v->DoUrgentLatencyAdjustment,
5210 v->UrgentLatencyAdjustmentFabricClockComponent,
5211 v->UrgentLatencyAdjustmentFabricClockReference,
5212 v->FabricClock);
5213
5214 for (i = 0; i < v->soc.num_states; ++i) {
5215 for (j = 0; j <= 1; ++j) {
5216 double VMDataOnlyReturnBWPerState;
5217 double HostVMInefficiencyFactor = 1;
5218 int NextPrefetchModeState = MinPrefetchMode;
5219 bool UnboundedRequestEnabledThisState = false;
5220 int CompressedBufferSizeInkByteThisState = 0;
5221 double dummy;
5222
5223 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5224
5225 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5226 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5227 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5228 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5229 }
5230
5231 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5232 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5233 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5234 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5235 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5236 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5237 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5238 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5239 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5240 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5241 }
5242
5243 VMDataOnlyReturnBWPerState = dml_min(
5244 dml_min(
5245 v->ReturnBusWidth * v->DCFCLKState[i][j],
5246 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5247 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5248 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5249 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5250 if (v->GPUVMEnable && v->HostVMEnable)
5251 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5252
5253 v->ExtraLatency = CalculateExtraLatency(
5254 v->RoundTripPingLatencyCycles,
5255 ReorderingBytes,
5256 v->DCFCLKState[i][j],
5257 v->TotalNumberOfActiveDPP[i][j],
5258 v->PixelChunkSizeInKByte,
5259 v->TotalNumberOfDCCActiveDPP[i][j],
5260 v->MetaChunkSize,
5261 v->ReturnBWPerState[i][j],
5262 v->GPUVMEnable,
5263 v->HostVMEnable,
5264 v->NumberOfActivePlanes,
5265 v->NoOfDPPThisState,
5266 v->dpte_group_bytes,
5267 HostVMInefficiencyFactor,
5268 v->HostVMMinPageSize,
5269 v->HostVMMaxNonCachedPageTableLevels);
5270
5271 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5272 do {
5273 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5274 v->MaxVStartup = v->NextMaxVStartup;
5275
5276 v->TWait = CalculateTWait(
5277 v->PrefetchModePerState[i][j],
5278 v->DRAMClockChangeLatency,
5279 v->UrgLatency[i],
5280 v->SREnterPlusExitTime);
5281
5282 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5283 Pipe myPipe;
5284
5285 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
5286 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
5287 myPipe.PixelClock = v->PixelClock[k];
5288 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
5289 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
5290 myPipe.ScalerEnabled = v->ScalerEnabled[k];
5291 myPipe.SourceScan = v->SourceScan[k];
5292 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
5293 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
5294 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
5295 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
5296 myPipe.InterlaceEnable = v->Interlace[k];
5297 myPipe.NumberOfCursors = v->NumberOfCursors[k];
5298 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
5299 myPipe.HTotal = v->HTotal[k];
5300 myPipe.DCCEnable = v->DCCEnable[k];
5301 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
5302 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
5303 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
5304 myPipe.BytePerPixelY = v->BytePerPixelY[k];
5305 myPipe.BytePerPixelC = v->BytePerPixelC[k];
5306 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
5307 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
5308 mode_lib,
5309 HostVMInefficiencyFactor,
5310 &myPipe,
5311 v->DSCDelayPerState[i][k],
5312 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
5313 v->DPPCLKDelaySCL,
5314 v->DPPCLKDelaySCLLBOnly,
5315 v->DPPCLKDelayCNVCCursor,
5316 v->DISPCLKDelaySubtotal,
5317 v->SwathWidthYThisState[k] / v->HRatio[k],
5318 v->OutputFormat[k],
5319 v->MaxInterDCNTileRepeaters,
5320 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
5321 v->MaximumVStartup[i][j][k],
5322 v->GPUVMMaxPageTableLevels,
5323 v->GPUVMEnable,
5324 v->HostVMEnable,
5325 v->HostVMMaxNonCachedPageTableLevels,
5326 v->HostVMMinPageSize,
5327 v->DynamicMetadataEnable[k],
5328 v->DynamicMetadataVMEnabled,
5329 v->DynamicMetadataLinesBeforeActiveRequired[k],
5330 v->DynamicMetadataTransmittedBytes[k],
5331 v->UrgLatency[i],
5332 v->ExtraLatency,
5333 v->TimeCalc,
5334 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5335 v->MetaRowBytes[i][j][k],
5336 v->DPTEBytesPerRow[i][j][k],
5337 v->PrefetchLinesY[i][j][k],
5338 v->SwathWidthYThisState[k],
5339 v->PrefillY[k],
5340 v->MaxNumSwY[k],
5341 v->PrefetchLinesC[i][j][k],
5342 v->SwathWidthCThisState[k],
5343 v->PrefillC[k],
5344 v->MaxNumSwC[k],
5345 v->swath_width_luma_ub_this_state[k],
5346 v->swath_width_chroma_ub_this_state[k],
5347 v->SwathHeightYThisState[k],
5348 v->SwathHeightCThisState[k],
5349 v->TWait,
5350 &v->DSTXAfterScaler[k],
5351 &v->DSTYAfterScaler[k],
5352 &v->LineTimesForPrefetch[k],
5353 &v->PrefetchBW[k],
5354 &v->LinesForMetaPTE[k],
5355 &v->LinesForMetaAndDPTERow[k],
5356 &v->VRatioPreY[i][j][k],
5357 &v->VRatioPreC[i][j][k],
5358 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
5359 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
5360 &v->NoTimeForDynamicMetadata[i][j][k],
5361 &v->Tno_bw[k],
5362 &v->prefetch_vmrow_bw[k],
5363 &v->dummy7[k],
5364 &v->dummy8[k],
5365 &v->dummy13[k],
5366 &v->VUpdateOffsetPix[k],
5367 &v->VUpdateWidthPix[k],
5368 &v->VReadyOffsetPix[k]);
5369 }
5370
5371 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5372 CalculateUrgentBurstFactor(
5373 v->swath_width_luma_ub_this_state[k],
5374 v->swath_width_chroma_ub_this_state[k],
5375 v->SwathHeightYThisState[k],
5376 v->SwathHeightCThisState[k],
5377 v->HTotal[k] / v->PixelClock[k],
5378 v->UrgentLatency,
5379 v->CursorBufferSize,
5380 v->CursorWidth[k][0],
5381 v->CursorBPP[k][0],
5382 v->VRatioPreY[i][j][k],
5383 v->VRatioPreC[i][j][k],
5384 v->BytePerPixelInDETY[k],
5385 v->BytePerPixelInDETC[k],
5386 v->DETBufferSizeYThisState[k],
5387 v->DETBufferSizeCThisState[k],
5388 &v->UrgentBurstFactorCursorPre[k],
5389 &v->UrgentBurstFactorLumaPre[k],
5390 &v->UrgentBurstFactorChroma[k],
5391 &v->NotUrgentLatencyHidingPre[k]);
5392 }
5393
5394 v->MaximumReadBandwidthWithPrefetch = 0.0;
5395 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5396 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5397 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5398
5399 v->MaximumReadBandwidthWithPrefetch =
5400 v->MaximumReadBandwidthWithPrefetch
5401 + dml_max3(
5402 v->VActivePixelBandwidth[i][j][k]
5403 + v->VActiveCursorBandwidth[i][j][k]
5404 + v->NoOfDPP[i][j][k]
5405 * (v->meta_row_bandwidth[i][j][k]
5406 + v->dpte_row_bandwidth[i][j][k]),
5407 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5408 v->NoOfDPP[i][j][k]
5409 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5410 * v->UrgentBurstFactorLumaPre[k]
5411 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5412 * v->UrgentBurstFactorChromaPre[k])
5413 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5414 }
5415
5416 v->NotEnoughUrgentLatencyHidingPre = false;
5417 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5418 if (v->NotUrgentLatencyHidingPre[k] == true) {
5419 v->NotEnoughUrgentLatencyHidingPre = true;
5420 }
5421 }
5422
5423 v->PrefetchSupported[i][j] = true;
5424 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5425 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5426 v->PrefetchSupported[i][j] = false;
5427 }
5428 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5429 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5430 || v->NoTimeForPrefetch[i][j][k] == true) {
5431 v->PrefetchSupported[i][j] = false;
5432 }
5433 }
5434
5435 v->DynamicMetadataSupported[i][j] = true;
5436 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5437 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5438 v->DynamicMetadataSupported[i][j] = false;
5439 }
5440 }
5441
5442 v->VRatioInPrefetchSupported[i][j] = true;
5443 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5444 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5445 v->VRatioInPrefetchSupported[i][j] = false;
5446 }
5447 }
5448 v->AnyLinesForVMOrRowTooLarge = false;
5449 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5450 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5451 v->AnyLinesForVMOrRowTooLarge = true;
5452 }
5453 }
5454
5455 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5456
5457 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5458 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5459 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5460 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5461 - dml_max(
5462 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5463 v->NoOfDPP[i][j][k]
5464 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5465 * v->UrgentBurstFactorLumaPre[k]
5466 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5467 * v->UrgentBurstFactorChromaPre[k])
5468 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5469 }
5470 v->TotImmediateFlipBytes = 0.0;
5471 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5472 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5473 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5474 + v->DPTEBytesPerRow[i][j][k];
5475 }
5476
5477 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5478 CalculateFlipSchedule(
5479 mode_lib,
5480 HostVMInefficiencyFactor,
5481 v->ExtraLatency,
5482 v->UrgLatency[i],
5483 v->GPUVMMaxPageTableLevels,
5484 v->HostVMEnable,
5485 v->HostVMMaxNonCachedPageTableLevels,
5486 v->GPUVMEnable,
5487 v->HostVMMinPageSize,
5488 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5489 v->MetaRowBytes[i][j][k],
5490 v->DPTEBytesPerRow[i][j][k],
5491 v->BandwidthAvailableForImmediateFlip,
5492 v->TotImmediateFlipBytes,
5493 v->SourcePixelFormat[k],
5494 v->HTotal[k] / v->PixelClock[k],
5495 v->VRatio[k],
5496 v->VRatioChroma[k],
5497 v->Tno_bw[k],
5498 v->DCCEnable[k],
5499 v->dpte_row_height[k],
5500 v->meta_row_height[k],
5501 v->dpte_row_height_chroma[k],
5502 v->meta_row_height_chroma[k],
5503 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5504 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5505 &v->final_flip_bw[k],
5506 &v->ImmediateFlipSupportedForPipe[k]);
5507 }
5508 v->total_dcn_read_bw_with_flip = 0.0;
5509 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5510 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5511 + dml_max3(
5512 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5513 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5514 + v->VActiveCursorBandwidth[i][j][k],
5515 v->NoOfDPP[i][j][k]
5516 * (v->final_flip_bw[k]
5517 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5518 * v->UrgentBurstFactorLumaPre[k]
5519 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5520 * v->UrgentBurstFactorChromaPre[k])
5521 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5522 }
5523 v->ImmediateFlipSupportedForState[i][j] = true;
5524 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5525 v->ImmediateFlipSupportedForState[i][j] = false;
5526 }
5527 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5528 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5529 v->ImmediateFlipSupportedForState[i][j] = false;
5530 }
5531 }
5532 } else {
5533 v->ImmediateFlipSupportedForState[i][j] = false;
5534 }
5535
5536 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5537 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5538 NextPrefetchModeState = NextPrefetchModeState + 1;
5539 } else {
5540 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5541 }
5542 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5543 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5544 && ((v->HostVMEnable == false &&
5545 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5546 || v->ImmediateFlipSupportedForState[i][j] == true))
5547 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5548
5549 CalculateUnboundedRequestAndCompressedBufferSize(
5550 v->DETBufferSizeInKByte[0],
5551 v->ConfigReturnBufferSizeInKByte,
5552 v->UseUnboundedRequesting,
5553 v->TotalNumberOfActiveDPP[i][j],
5554 NoChroma,
5555 v->MaxNumDPP,
5556 v->CompressedBufferSegmentSizeInkByte,
5557 v->Output,
5558 &UnboundedRequestEnabledThisState,
5559 &CompressedBufferSizeInkByteThisState);
5560
5561 CalculateWatermarksAndDRAMSpeedChangeSupport(
5562 mode_lib,
5563 v->PrefetchModePerState[i][j],
5564 v->NumberOfActivePlanes,
5565 v->MaxLineBufferLines,
5566 v->LineBufferSize,
5567 v->WritebackInterfaceBufferSize,
5568 v->DCFCLKState[i][j],
5569 v->ReturnBWPerState[i][j],
5570 v->SynchronizedVBlank,
5571 v->dpte_group_bytes,
5572 v->MetaChunkSize,
5573 v->UrgLatency[i],
5574 v->ExtraLatency,
5575 v->WritebackLatency,
5576 v->WritebackChunkSize,
5577 v->SOCCLKPerState[i],
5578 v->DRAMClockChangeLatency,
5579 v->SRExitTime,
5580 v->SREnterPlusExitTime,
5581 v->SRExitZ8Time,
5582 v->SREnterPlusExitZ8Time,
5583 v->ProjectedDCFCLKDeepSleep[i][j],
5584 v->DETBufferSizeYThisState,
5585 v->DETBufferSizeCThisState,
5586 v->SwathHeightYThisState,
5587 v->SwathHeightCThisState,
5588 v->LBBitPerPixel,
5589 v->SwathWidthYThisState,
5590 v->SwathWidthCThisState,
5591 v->HRatio,
5592 v->HRatioChroma,
5593 v->vtaps,
5594 v->VTAPsChroma,
5595 v->VRatio,
5596 v->VRatioChroma,
5597 v->HTotal,
5598 v->PixelClock,
5599 v->BlendingAndTiming,
5600 v->NoOfDPPThisState,
5601 v->BytePerPixelInDETY,
5602 v->BytePerPixelInDETC,
5603 v->DSTXAfterScaler,
5604 v->DSTYAfterScaler,
5605 v->WritebackEnable,
5606 v->WritebackPixelFormat,
5607 v->WritebackDestinationWidth,
5608 v->WritebackDestinationHeight,
5609 v->WritebackSourceHeight,
5610 UnboundedRequestEnabledThisState,
5611 CompressedBufferSizeInkByteThisState,
5612 &v->DRAMClockChangeSupport[i][j],
5613 &v->UrgentWatermark,
5614 &v->WritebackUrgentWatermark,
5615 &v->DRAMClockChangeWatermark,
5616 &v->WritebackDRAMClockChangeWatermark,
5617 &dummy,
5618 &dummy,
5619 &dummy,
5620 &dummy,
5621 &v->MinActiveDRAMClockChangeLatencySupported);
5622 }
5623 }
5624
5625
5626 for (i = 0; i < v->soc.num_states; i++) {
5627 for (j = 0; j < 2; j++) {
5628 v->PTEBufferSizeNotExceeded[i][j] = true;
5629 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5630 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5631 v->PTEBufferSizeNotExceeded[i][j] = false;
5632 }
5633 }
5634 }
5635 }
5636
5637
5638 v->CursorSupport = true;
5639 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5640 if (v->CursorWidth[k][0] > 0.0) {
5641 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5642 v->CursorSupport = false;
5643 }
5644 }
5645 }
5646
5647
5648 v->PitchSupport = true;
5649 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5650 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5651 if (v->DCCEnable[k] == true) {
5652 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5653 } else {
5654 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5655 }
5656 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5657 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5658 && v->SourcePixelFormat[k] != dm_mono_8) {
5659 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5660 if (v->DCCEnable[k] == true) {
5661 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5662 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5663 64.0 * v->Read256BlockWidthC[k]);
5664 } else {
5665 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5666 }
5667 } else {
5668 v->AlignedCPitch[k] = v->PitchC[k];
5669 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5670 }
5671 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5672 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5673 v->PitchSupport = false;
5674 }
5675 }
5676
5677 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5678 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5679 ViewportExceedsSurface = true;
5680 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5681 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5682 && v->SourcePixelFormat[k] != dm_rgbe) {
5683 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5684 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5685 ViewportExceedsSurface = true;
5686 }
5687 }
5688 }
5689 }
5690
5691
5692 for (i = v->soc.num_states - 1; i >= 0; i--) {
5693 for (j = 0; j < 2; j++) {
5694 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5695 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5696 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5697 && v->DTBCLKRequiredMoreThanSupported[i] == false
5698 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5699 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5700 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5701 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5702 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5703 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5704 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5705 && ((v->HostVMEnable == false
5706 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5707 || v->ImmediateFlipSupportedForState[i][j] == true)
5708 && FMTBufferExceeded == false) {
5709 v->ModeSupport[i][j] = true;
5710 } else {
5711 v->ModeSupport[i][j] = false;
5712 }
5713 }
5714 }
5715
5716 {
5717 unsigned int MaximumMPCCombine = 0;
5718 for (i = v->soc.num_states; i >= 0; i--) {
5719 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5720 v->VoltageLevel = i;
5721 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5722 if (v->ModeSupport[i][0] == true) {
5723 MaximumMPCCombine = 0;
5724 } else {
5725 MaximumMPCCombine = 1;
5726 }
5727 }
5728 }
5729 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5730 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5731 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5732 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5733 }
5734 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5735 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5736 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5737 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5738 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5739 v->maxMpcComb = MaximumMPCCombine;
5740 }
5741}
5742
5743static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5744 struct display_mode_lib *mode_lib,
5745 unsigned int PrefetchMode,
5746 unsigned int NumberOfActivePlanes,
5747 unsigned int MaxLineBufferLines,
5748 unsigned int LineBufferSize,
5749 unsigned int WritebackInterfaceBufferSize,
5750 double DCFCLK,
5751 double ReturnBW,
5752 bool SynchronizedVBlank,
5753 unsigned int dpte_group_bytes[],
5754 unsigned int MetaChunkSize,
5755 double UrgentLatency,
5756 double ExtraLatency,
5757 double WritebackLatency,
5758 double WritebackChunkSize,
5759 double SOCCLK,
5760 double DRAMClockChangeLatency,
5761 double SRExitTime,
5762 double SREnterPlusExitTime,
5763 double SRExitZ8Time,
5764 double SREnterPlusExitZ8Time,
5765 double DCFCLKDeepSleep,
5766 unsigned int DETBufferSizeY[],
5767 unsigned int DETBufferSizeC[],
5768 unsigned int SwathHeightY[],
5769 unsigned int SwathHeightC[],
5770 unsigned int LBBitPerPixel[],
5771 double SwathWidthY[],
5772 double SwathWidthC[],
5773 double HRatio[],
5774 double HRatioChroma[],
5775 unsigned int vtaps[],
5776 unsigned int VTAPsChroma[],
5777 double VRatio[],
5778 double VRatioChroma[],
5779 unsigned int HTotal[],
5780 double PixelClock[],
5781 unsigned int BlendingAndTiming[],
5782 unsigned int DPPPerPlane[],
5783 double BytePerPixelDETY[],
5784 double BytePerPixelDETC[],
5785 double DSTXAfterScaler[],
5786 double DSTYAfterScaler[],
5787 bool WritebackEnable[],
5788 enum source_format_class WritebackPixelFormat[],
5789 double WritebackDestinationWidth[],
5790 double WritebackDestinationHeight[],
5791 double WritebackSourceHeight[],
5792 bool UnboundedRequestEnabled,
5793 int unsigned CompressedBufferSizeInkByte,
5794 enum clock_change_support *DRAMClockChangeSupport,
5795 double *UrgentWatermark,
5796 double *WritebackUrgentWatermark,
5797 double *DRAMClockChangeWatermark,
5798 double *WritebackDRAMClockChangeWatermark,
5799 double *StutterExitWatermark,
5800 double *StutterEnterPlusExitWatermark,
5801 double *Z8StutterExitWatermark,
5802 double *Z8StutterEnterPlusExitWatermark,
5803 double *MinActiveDRAMClockChangeLatencySupported)
5804{
5805 struct vba_vars_st *v = &mode_lib->vba;
5806 double EffectiveLBLatencyHidingY;
5807 double EffectiveLBLatencyHidingC;
5808 double LinesInDETY[DC__NUM_DPP__MAX];
5809 double LinesInDETC;
5810 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5811 unsigned int LinesInDETCRoundedDownToSwath;
5812 double FullDETBufferingTimeY;
5813 double FullDETBufferingTimeC;
5814 double ActiveDRAMClockChangeLatencyMarginY;
5815 double ActiveDRAMClockChangeLatencyMarginC;
5816 double WritebackDRAMClockChangeLatencyMargin;
5817 double PlaneWithMinActiveDRAMClockChangeMargin;
5818 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5819 double WritebackDRAMClockChangeLatencyHiding;
5820 double TotalPixelBW = 0.0;
5821 int k, j;
5822
5823 *UrgentWatermark = UrgentLatency + ExtraLatency;
5824
5825#ifdef __DML_VBA_DEBUG__
5826 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5827 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5828 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
5829#endif
5830
5831 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5832
5833#ifdef __DML_VBA_DEBUG__
5834 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
5835 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
5836#endif
5837
5838 v->TotalActiveWriteback = 0;
5839 for (k = 0; k < NumberOfActivePlanes; ++k) {
5840 if (WritebackEnable[k] == true) {
5841 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5842 }
5843 }
5844
5845 if (v->TotalActiveWriteback <= 1) {
5846 *WritebackUrgentWatermark = WritebackLatency;
5847 } else {
5848 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5849 }
5850
5851 if (v->TotalActiveWriteback <= 1) {
5852 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5853 } else {
5854 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5855 }
5856
5857 for (k = 0; k < NumberOfActivePlanes; ++k) {
5858 TotalPixelBW = TotalPixelBW
5859 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
5860 / (HTotal[k] / PixelClock[k]);
5861 }
5862
5863 for (k = 0; k < NumberOfActivePlanes; ++k) {
5864 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5865
5866 v->LBLatencyHidingSourceLinesY = dml_min(
5867 (double) MaxLineBufferLines,
5868 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5869
5870 v->LBLatencyHidingSourceLinesC = dml_min(
5871 (double) MaxLineBufferLines,
5872 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5873
5874 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5875
5876 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5877
5878 if (UnboundedRequestEnabled) {
5879 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5880 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
5881 }
5882
5883 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5884 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5885 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5886 if (BytePerPixelDETC[k] > 0) {
5887 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5888 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5889 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5890 } else {
5891 LinesInDETC = 0;
5892 FullDETBufferingTimeC = 999999;
5893 }
5894
5895 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5896 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5897
5898 if (NumberOfActivePlanes > 1) {
5899 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5900 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5901 }
5902
5903 if (BytePerPixelDETC[k] > 0) {
5904 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5905 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5906
5907 if (NumberOfActivePlanes > 1) {
5908 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5909 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5910 }
5911 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5912 } else {
5913 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5914 }
5915
5916 if (WritebackEnable[k] == true) {
5917 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
5918 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5919 if (WritebackPixelFormat[k] == dm_444_64) {
5920 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5921 }
5922 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5923 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5924 }
5925 }
5926
5927 v->MinActiveDRAMClockChangeMargin = 999999;
5928 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5929 for (k = 0; k < NumberOfActivePlanes; ++k) {
5930 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5931 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5932 if (BlendingAndTiming[k] == k) {
5933 PlaneWithMinActiveDRAMClockChangeMargin = k;
5934 } else {
5935 for (j = 0; j < NumberOfActivePlanes; ++j) {
5936 if (BlendingAndTiming[k] == j) {
5937 PlaneWithMinActiveDRAMClockChangeMargin = j;
5938 }
5939 }
5940 }
5941 }
5942 }
5943
5944 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5945
5946 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5947 for (k = 0; k < NumberOfActivePlanes; ++k) {
5948 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5949 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5950 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5951 }
5952 }
5953
5954 v->TotalNumberOfActiveOTG = 0;
5955
5956 for (k = 0; k < NumberOfActivePlanes; ++k) {
5957 if (BlendingAndTiming[k] == k) {
5958 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5959 }
5960 }
5961
5962 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5963 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5964 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5965 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5966 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5967 } else {
5968 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5969 }
5970
5971 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5972 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5973 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5974 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5975
5976#ifdef __DML_VBA_DEBUG__
5977 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5978 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5979 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5980 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5981#endif
5982}
5983
5984static void CalculateDCFCLKDeepSleep(
5985 struct display_mode_lib *mode_lib,
5986 unsigned int NumberOfActivePlanes,
5987 int BytePerPixelY[],
5988 int BytePerPixelC[],
5989 double VRatio[],
5990 double VRatioChroma[],
5991 double SwathWidthY[],
5992 double SwathWidthC[],
5993 unsigned int DPPPerPlane[],
5994 double HRatio[],
5995 double HRatioChroma[],
5996 double PixelClock[],
5997 double PSCL_THROUGHPUT[],
5998 double PSCL_THROUGHPUT_CHROMA[],
5999 double DPPCLK[],
6000 double ReadBandwidthLuma[],
6001 double ReadBandwidthChroma[],
6002 int ReturnBusWidth,
6003 double *DCFCLKDeepSleep)
6004{
6005 struct vba_vars_st *v = &mode_lib->vba;
6006 double DisplayPipeLineDeliveryTimeLuma;
6007 double DisplayPipeLineDeliveryTimeChroma;
6008 double ReadBandwidth = 0.0;
6009 int k;
6010
6011 for (k = 0; k < NumberOfActivePlanes; ++k) {
6012
6013 if (VRatio[k] <= 1) {
6014 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6015 } else {
6016 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6017 }
6018 if (BytePerPixelC[k] == 0) {
6019 DisplayPipeLineDeliveryTimeChroma = 0;
6020 } else {
6021 if (VRatioChroma[k] <= 1) {
6022 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6023 } else {
6024 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6025 }
6026 }
6027
6028 if (BytePerPixelC[k] > 0) {
6029 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
6030 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
6031 } else {
6032 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
6033 }
6034 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
6035
6036 }
6037
6038 for (k = 0; k < NumberOfActivePlanes; ++k) {
6039 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
6040 }
6041
6042 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
6043
6044 for (k = 0; k < NumberOfActivePlanes; ++k) {
6045 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
6046 }
6047}
6048
6049static void CalculateUrgentBurstFactor(
6050 int swath_width_luma_ub,
6051 int swath_width_chroma_ub,
6052 unsigned int SwathHeightY,
6053 unsigned int SwathHeightC,
6054 double LineTime,
6055 double UrgentLatency,
6056 double CursorBufferSize,
6057 unsigned int CursorWidth,
6058 unsigned int CursorBPP,
6059 double VRatio,
6060 double VRatioC,
6061 double BytePerPixelInDETY,
6062 double BytePerPixelInDETC,
6063 double DETBufferSizeY,
6064 double DETBufferSizeC,
6065 double *UrgentBurstFactorCursor,
6066 double *UrgentBurstFactorLuma,
6067 double *UrgentBurstFactorChroma,
6068 bool *NotEnoughUrgentLatencyHiding)
6069{
6070 double LinesInDETLuma;
6071 double LinesInDETChroma;
6072 unsigned int LinesInCursorBuffer;
6073 double CursorBufferSizeInTime;
6074 double DETBufferSizeInTimeLuma;
6075 double DETBufferSizeInTimeChroma;
6076
6077 *NotEnoughUrgentLatencyHiding = 0;
6078
6079 if (CursorWidth > 0) {
6080 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
6081 if (VRatio > 0) {
6082 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
6083 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
6084 *NotEnoughUrgentLatencyHiding = 1;
6085 *UrgentBurstFactorCursor = 0;
6086 } else {
6087 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
6088 }
6089 } else {
6090 *UrgentBurstFactorCursor = 1;
6091 }
6092 }
6093
6094 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
6095 if (VRatio > 0) {
6096 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
6097 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
6098 *NotEnoughUrgentLatencyHiding = 1;
6099 *UrgentBurstFactorLuma = 0;
6100 } else {
6101 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
6102 }
6103 } else {
6104 *UrgentBurstFactorLuma = 1;
6105 }
6106
6107 if (BytePerPixelInDETC > 0) {
6108 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
6109 if (VRatio > 0) {
6110 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
6111 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
6112 *NotEnoughUrgentLatencyHiding = 1;
6113 *UrgentBurstFactorChroma = 0;
6114 } else {
6115 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
6116 }
6117 } else {
6118 *UrgentBurstFactorChroma = 1;
6119 }
6120 }
6121}
6122
6123static void CalculatePixelDeliveryTimes(
6124 unsigned int NumberOfActivePlanes,
6125 double VRatio[],
6126 double VRatioChroma[],
6127 double VRatioPrefetchY[],
6128 double VRatioPrefetchC[],
6129 unsigned int swath_width_luma_ub[],
6130 unsigned int swath_width_chroma_ub[],
6131 unsigned int DPPPerPlane[],
6132 double HRatio[],
6133 double HRatioChroma[],
6134 double PixelClock[],
6135 double PSCL_THROUGHPUT[],
6136 double PSCL_THROUGHPUT_CHROMA[],
6137 double DPPCLK[],
6138 int BytePerPixelC[],
6139 enum scan_direction_class SourceScan[],
6140 unsigned int NumberOfCursors[],
6141 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6142 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6143 unsigned int BlockWidth256BytesY[],
6144 unsigned int BlockHeight256BytesY[],
6145 unsigned int BlockWidth256BytesC[],
6146 unsigned int BlockHeight256BytesC[],
6147 double DisplayPipeLineDeliveryTimeLuma[],
6148 double DisplayPipeLineDeliveryTimeChroma[],
6149 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6150 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6151 double DisplayPipeRequestDeliveryTimeLuma[],
6152 double DisplayPipeRequestDeliveryTimeChroma[],
6153 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6154 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6155 double CursorRequestDeliveryTime[],
6156 double CursorRequestDeliveryTimePrefetch[])
6157{
6158 double req_per_swath_ub;
6159 int k;
6160
6161 for (k = 0; k < NumberOfActivePlanes; ++k) {
6162 if (VRatio[k] <= 1) {
6163 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6164 } else {
6165 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6166 }
6167
6168 if (BytePerPixelC[k] == 0) {
6169 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6170 } else {
6171 if (VRatioChroma[k] <= 1) {
6172 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6173 } else {
6174 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6175 }
6176 }
6177
6178 if (VRatioPrefetchY[k] <= 1) {
6179 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6180 } else {
6181 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6182 }
6183
6184 if (BytePerPixelC[k] == 0) {
6185 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6186 } else {
6187 if (VRatioPrefetchC[k] <= 1) {
6188 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6189 } else {
6190 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6191 }
6192 }
6193 }
6194
6195 for (k = 0; k < NumberOfActivePlanes; ++k) {
6196 if (SourceScan[k] != dm_vert) {
6197 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6198 } else {
6199 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6200 }
6201 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6202 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6203 if (BytePerPixelC[k] == 0) {
6204 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6205 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6206 } else {
6207 if (SourceScan[k] != dm_vert) {
6208 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6209 } else {
6210 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6211 }
6212 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6213 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6214 }
6215#ifdef __DML_VBA_DEBUG__
6216 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6217 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6218 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6219 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6220 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6221 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6222 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6223 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6224 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6225 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6226 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6227 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6228#endif
6229 }
6230
6231 for (k = 0; k < NumberOfActivePlanes; ++k) {
6232 int cursor_req_per_width;
6233 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6234 if (NumberOfCursors[k] > 0) {
6235 if (VRatio[k] <= 1) {
6236 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6237 } else {
6238 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6239 }
6240 if (VRatioPrefetchY[k] <= 1) {
6241 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6242 } else {
6243 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6244 }
6245 } else {
6246 CursorRequestDeliveryTime[k] = 0;
6247 CursorRequestDeliveryTimePrefetch[k] = 0;
6248 }
6249#ifdef __DML_VBA_DEBUG__
6250 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6251 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6252 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6253#endif
6254 }
6255}
6256
6257static void CalculateMetaAndPTETimes(
6258 int NumberOfActivePlanes,
6259 bool GPUVMEnable,
6260 int MetaChunkSize,
6261 int MinMetaChunkSizeBytes,
6262 int HTotal[],
6263 double VRatio[],
6264 double VRatioChroma[],
6265 double DestinationLinesToRequestRowInVBlank[],
6266 double DestinationLinesToRequestRowInImmediateFlip[],
6267 bool DCCEnable[],
6268 double PixelClock[],
6269 int BytePerPixelY[],
6270 int BytePerPixelC[],
6271 enum scan_direction_class SourceScan[],
6272 int dpte_row_height[],
6273 int dpte_row_height_chroma[],
6274 int meta_row_width[],
6275 int meta_row_width_chroma[],
6276 int meta_row_height[],
6277 int meta_row_height_chroma[],
6278 int meta_req_width[],
6279 int meta_req_width_chroma[],
6280 int meta_req_height[],
6281 int meta_req_height_chroma[],
6282 int dpte_group_bytes[],
6283 int PTERequestSizeY[],
6284 int PTERequestSizeC[],
6285 int PixelPTEReqWidthY[],
6286 int PixelPTEReqHeightY[],
6287 int PixelPTEReqWidthC[],
6288 int PixelPTEReqHeightC[],
6289 int dpte_row_width_luma_ub[],
6290 int dpte_row_width_chroma_ub[],
6291 double DST_Y_PER_PTE_ROW_NOM_L[],
6292 double DST_Y_PER_PTE_ROW_NOM_C[],
6293 double DST_Y_PER_META_ROW_NOM_L[],
6294 double DST_Y_PER_META_ROW_NOM_C[],
6295 double TimePerMetaChunkNominal[],
6296 double TimePerChromaMetaChunkNominal[],
6297 double TimePerMetaChunkVBlank[],
6298 double TimePerChromaMetaChunkVBlank[],
6299 double TimePerMetaChunkFlip[],
6300 double TimePerChromaMetaChunkFlip[],
6301 double time_per_pte_group_nom_luma[],
6302 double time_per_pte_group_vblank_luma[],
6303 double time_per_pte_group_flip_luma[],
6304 double time_per_pte_group_nom_chroma[],
6305 double time_per_pte_group_vblank_chroma[],
6306 double time_per_pte_group_flip_chroma[])
6307{
6308 unsigned int meta_chunk_width;
6309 unsigned int min_meta_chunk_width;
6310 unsigned int meta_chunk_per_row_int;
6311 unsigned int meta_row_remainder;
6312 unsigned int meta_chunk_threshold;
6313 unsigned int meta_chunks_per_row_ub;
6314 unsigned int meta_chunk_width_chroma;
6315 unsigned int min_meta_chunk_width_chroma;
6316 unsigned int meta_chunk_per_row_int_chroma;
6317 unsigned int meta_row_remainder_chroma;
6318 unsigned int meta_chunk_threshold_chroma;
6319 unsigned int meta_chunks_per_row_ub_chroma;
6320 unsigned int dpte_group_width_luma;
6321 unsigned int dpte_groups_per_row_luma_ub;
6322 unsigned int dpte_group_width_chroma;
6323 unsigned int dpte_groups_per_row_chroma_ub;
6324 int k;
6325
6326 for (k = 0; k < NumberOfActivePlanes; ++k) {
6327 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6328 if (BytePerPixelC[k] == 0) {
6329 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6330 } else {
6331 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6332 }
6333 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6334 if (BytePerPixelC[k] == 0) {
6335 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6336 } else {
6337 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6338 }
6339 }
6340
6341 for (k = 0; k < NumberOfActivePlanes; ++k) {
6342 if (DCCEnable[k] == true) {
6343 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6344 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6345 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6346 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6347 if (SourceScan[k] != dm_vert) {
6348 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6349 } else {
6350 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6351 }
6352 if (meta_row_remainder <= meta_chunk_threshold) {
6353 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6354 } else {
6355 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6356 }
6357 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6358 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6359 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6360 if (BytePerPixelC[k] == 0) {
6361 TimePerChromaMetaChunkNominal[k] = 0;
6362 TimePerChromaMetaChunkVBlank[k] = 0;
6363 TimePerChromaMetaChunkFlip[k] = 0;
6364 } else {
6365 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6366 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6367 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6368 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6369 if (SourceScan[k] != dm_vert) {
6370 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6371 } else {
6372 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6373 }
6374 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6375 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6376 } else {
6377 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6378 }
6379 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6380 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6381 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6382 }
6383 } else {
6384 TimePerMetaChunkNominal[k] = 0;
6385 TimePerMetaChunkVBlank[k] = 0;
6386 TimePerMetaChunkFlip[k] = 0;
6387 TimePerChromaMetaChunkNominal[k] = 0;
6388 TimePerChromaMetaChunkVBlank[k] = 0;
6389 TimePerChromaMetaChunkFlip[k] = 0;
6390 }
6391 }
6392
6393 for (k = 0; k < NumberOfActivePlanes; ++k) {
6394 if (GPUVMEnable == true) {
6395 if (SourceScan[k] != dm_vert) {
6396 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6397 } else {
6398 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6399 }
6400 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6401 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6402 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6403 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6404 if (BytePerPixelC[k] == 0) {
6405 time_per_pte_group_nom_chroma[k] = 0;
6406 time_per_pte_group_vblank_chroma[k] = 0;
6407 time_per_pte_group_flip_chroma[k] = 0;
6408 } else {
6409 if (SourceScan[k] != dm_vert) {
6410 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6411 } else {
6412 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6413 }
6414 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6415 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6416 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6417 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6418 }
6419 } else {
6420 time_per_pte_group_nom_luma[k] = 0;
6421 time_per_pte_group_vblank_luma[k] = 0;
6422 time_per_pte_group_flip_luma[k] = 0;
6423 time_per_pte_group_nom_chroma[k] = 0;
6424 time_per_pte_group_vblank_chroma[k] = 0;
6425 time_per_pte_group_flip_chroma[k] = 0;
6426 }
6427 }
6428}
6429
6430static void CalculateVMGroupAndRequestTimes(
6431 unsigned int NumberOfActivePlanes,
6432 bool GPUVMEnable,
6433 unsigned int GPUVMMaxPageTableLevels,
6434 unsigned int HTotal[],
6435 int BytePerPixelC[],
6436 double DestinationLinesToRequestVMInVBlank[],
6437 double DestinationLinesToRequestVMInImmediateFlip[],
6438 bool DCCEnable[],
6439 double PixelClock[],
6440 int dpte_row_width_luma_ub[],
6441 int dpte_row_width_chroma_ub[],
6442 int vm_group_bytes[],
6443 unsigned int dpde0_bytes_per_frame_ub_l[],
6444 unsigned int dpde0_bytes_per_frame_ub_c[],
6445 int meta_pte_bytes_per_frame_ub_l[],
6446 int meta_pte_bytes_per_frame_ub_c[],
6447 double TimePerVMGroupVBlank[],
6448 double TimePerVMGroupFlip[],
6449 double TimePerVMRequestVBlank[],
6450 double TimePerVMRequestFlip[])
6451{
6452 int num_group_per_lower_vm_stage;
6453 int num_req_per_lower_vm_stage;
6454 int k;
6455
6456 for (k = 0; k < NumberOfActivePlanes; ++k) {
6457 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6458 if (DCCEnable[k] == false) {
6459 if (BytePerPixelC[k] > 0) {
6460 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6461 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6462 } else {
6463 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6464 }
6465 } else {
6466 if (GPUVMMaxPageTableLevels == 1) {
6467 if (BytePerPixelC[k] > 0) {
6468 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6469 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6470 } else {
6471 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6472 }
6473 } else {
6474 if (BytePerPixelC[k] > 0) {
6475 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6476 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6477 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6478 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6479 } else {
6480 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6481 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6482 }
6483 }
6484 }
6485
6486 if (DCCEnable[k] == false) {
6487 if (BytePerPixelC[k] > 0) {
6488 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6489 } else {
6490 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6491 }
6492 } else {
6493 if (GPUVMMaxPageTableLevels == 1) {
6494 if (BytePerPixelC[k] > 0) {
6495 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6496 } else {
6497 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6498 }
6499 } else {
6500 if (BytePerPixelC[k] > 0) {
6501 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6502 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6503 } else {
6504 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6505 }
6506 }
6507 }
6508
6509 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6510 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6511 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6512 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6513
6514 if (GPUVMMaxPageTableLevels > 2) {
6515 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6516 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6517 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6518 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6519 }
6520
6521 } else {
6522 TimePerVMGroupVBlank[k] = 0;
6523 TimePerVMGroupFlip[k] = 0;
6524 TimePerVMRequestVBlank[k] = 0;
6525 TimePerVMRequestFlip[k] = 0;
6526 }
6527 }
6528}
6529
6530static void CalculateStutterEfficiency(
6531 struct display_mode_lib *mode_lib,
6532 int CompressedBufferSizeInkByte,
6533 bool UnboundedRequestEnabled,
6534 int ConfigReturnBufferSizeInKByte,
6535 int MetaFIFOSizeInKEntries,
6536 int ZeroSizeBufferEntries,
6537 int NumberOfActivePlanes,
6538 int ROBBufferSizeInKByte,
6539 double TotalDataReadBandwidth,
6540 double DCFCLK,
6541 double ReturnBW,
6542 double COMPBUF_RESERVED_SPACE_64B,
6543 double COMPBUF_RESERVED_SPACE_ZS,
6544 double SRExitTime,
6545 double SRExitZ8Time,
6546 bool SynchronizedVBlank,
6547 double Z8StutterEnterPlusExitWatermark,
6548 double StutterEnterPlusExitWatermark,
6549 bool ProgressiveToInterlaceUnitInOPP,
6550 bool Interlace[],
6551 double MinTTUVBlank[],
6552 int DPPPerPlane[],
6553 unsigned int DETBufferSizeY[],
6554 int BytePerPixelY[],
6555 double BytePerPixelDETY[],
6556 double SwathWidthY[],
6557 int SwathHeightY[],
6558 int SwathHeightC[],
6559 double NetDCCRateLuma[],
6560 double NetDCCRateChroma[],
6561 double DCCFractionOfZeroSizeRequestsLuma[],
6562 double DCCFractionOfZeroSizeRequestsChroma[],
6563 int HTotal[],
6564 int VTotal[],
6565 double PixelClock[],
6566 double VRatio[],
6567 enum scan_direction_class SourceScan[],
6568 int BlockHeight256BytesY[],
6569 int BlockWidth256BytesY[],
6570 int BlockHeight256BytesC[],
6571 int BlockWidth256BytesC[],
6572 int DCCYMaxUncompressedBlock[],
6573 int DCCCMaxUncompressedBlock[],
6574 int VActive[],
6575 bool DCCEnable[],
6576 bool WritebackEnable[],
6577 double ReadBandwidthPlaneLuma[],
6578 double ReadBandwidthPlaneChroma[],
6579 double meta_row_bw[],
6580 double dpte_row_bw[],
6581 double *StutterEfficiencyNotIncludingVBlank,
6582 double *StutterEfficiency,
6583 int *NumberOfStutterBurstsPerFrame,
6584 double *Z8StutterEfficiencyNotIncludingVBlank,
6585 double *Z8StutterEfficiency,
6586 int *Z8NumberOfStutterBurstsPerFrame,
6587 double *StutterPeriod)
6588{
6589 struct vba_vars_st *v = &mode_lib->vba;
6590
6591 double DETBufferingTimeY;
6592 double SwathWidthYCriticalPlane = 0;
6593 double VActiveTimeCriticalPlane = 0;
6594 double FrameTimeCriticalPlane = 0;
6595 int BytePerPixelYCriticalPlane = 0;
6596 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6597 double MinTTUVBlankCriticalPlane = 0;
6598 double TotalCompressedReadBandwidth;
6599 double TotalRowReadBandwidth;
6600 double AverageDCCCompressionRate;
6601 double EffectiveCompressedBufferSize;
6602 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6603 double StutterBurstTime;
6604 int TotalActiveWriteback;
6605 double LinesInDETY;
6606 double LinesInDETYRoundedDownToSwath;
6607 double MaximumEffectiveCompressionLuma;
6608 double MaximumEffectiveCompressionChroma;
6609 double TotalZeroSizeRequestReadBandwidth;
6610 double TotalZeroSizeCompressedReadBandwidth;
6611 double AverageDCCZeroSizeFraction;
6612 double AverageZeroSizeCompressionRate;
6613 int TotalNumberOfActiveOTG = 0;
6614 double LastStutterPeriod = 0.0;
6615 double LastZ8StutterPeriod = 0.0;
6616 int k;
6617
6618 TotalZeroSizeRequestReadBandwidth = 0;
6619 TotalZeroSizeCompressedReadBandwidth = 0;
6620 TotalRowReadBandwidth = 0;
6621 TotalCompressedReadBandwidth = 0;
6622
6623 for (k = 0; k < NumberOfActivePlanes; ++k) {
6624 if (DCCEnable[k] == true) {
6625 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6626 || DCCYMaxUncompressedBlock[k] < 256) {
6627 MaximumEffectiveCompressionLuma = 2;
6628 } else {
6629 MaximumEffectiveCompressionLuma = 4;
6630 }
6631 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6632 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6633 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6634 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6635 if (ReadBandwidthPlaneChroma[k] > 0) {
6636 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6637 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6638 MaximumEffectiveCompressionChroma = 2;
6639 } else {
6640 MaximumEffectiveCompressionChroma = 4;
6641 }
6642 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6643 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6644 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6645 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6646 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6647 }
6648 } else {
6649 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6650 }
6651 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6652 }
6653
6654 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6655 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6656
6657#ifdef __DML_VBA_DEBUG__
6658 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6659 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6660 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6661 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6662 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6663 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6664 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6665 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6666#endif
6667
6668 if (AverageDCCZeroSizeFraction == 1) {
6669 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6670 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6671 } else if (AverageDCCZeroSizeFraction > 0) {
6672 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6673 EffectiveCompressedBufferSize = dml_min(
6674 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6675 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6676 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6677 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6678 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6679 dml_print(
6680 "DML::%s: min 2 = %f\n",
6681 __func__,
6682 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6683 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6684 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6685 } else {
6686 EffectiveCompressedBufferSize = dml_min(
6687 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6688 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6689 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6690 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6691 }
6692
6693#ifdef __DML_VBA_DEBUG__
6694 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6695 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6696 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6697#endif
6698
6699 *StutterPeriod = 0;
6700 for (k = 0; k < NumberOfActivePlanes; ++k) {
6701 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6702 / BytePerPixelDETY[k] / SwathWidthY[k];
6703 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6704 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6705#ifdef __DML_VBA_DEBUG__
6706 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6707 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6708 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6709 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6710 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6711 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6712 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6713 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6714 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6715 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6716 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6717 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6718#endif
6719
6720 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6721 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6722
6723 *StutterPeriod = DETBufferingTimeY;
6724 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6725 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6726 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6727 SwathWidthYCriticalPlane = SwathWidthY[k];
6728 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6729 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6730
6731#ifdef __DML_VBA_DEBUG__
6732 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6733 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6734 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6735 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6736 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6737 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6738 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6739#endif
6740 }
6741 }
6742
6743 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6744#ifdef __DML_VBA_DEBUG__
6745 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6746 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6747 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6748 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6749 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6750 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6751 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6752 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6753 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6754 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6755#endif
6756
6757 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6758 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6759 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6760#ifdef __DML_VBA_DEBUG__
6761 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6762 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6763 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6764 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6765 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6766#endif
6767 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6768
6769 dml_print(
6770 "DML::%s: Time to finish residue swath=%f\n",
6771 __func__,
6772 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6773
6774 TotalActiveWriteback = 0;
6775 for (k = 0; k < NumberOfActivePlanes; ++k) {
6776 if (WritebackEnable[k]) {
6777 TotalActiveWriteback = TotalActiveWriteback + 1;
6778 }
6779 }
6780
6781 if (TotalActiveWriteback == 0) {
6782#ifdef __DML_VBA_DEBUG__
6783 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6784 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6785 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6786 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6787#endif
6788 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6789 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6790 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6791 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6792 } else {
6793 *StutterEfficiencyNotIncludingVBlank = 0.;
6794 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6795 *NumberOfStutterBurstsPerFrame = 0;
6796 *Z8NumberOfStutterBurstsPerFrame = 0;
6797 }
6798#ifdef __DML_VBA_DEBUG__
6799 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6800 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6801 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6802 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6803 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6804#endif
6805
6806 for (k = 0; k < NumberOfActivePlanes; ++k) {
6807 if (v->BlendingAndTiming[k] == k) {
6808 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6809 }
6810 }
6811
6812 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6813 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6814
6815 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6816 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6817 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6818 } else {
6819 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6820 }
6821 } else {
6822 *StutterEfficiency = 0;
6823 }
6824
6825 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6826 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6827 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6828 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6829 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6830 } else {
6831 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6832 }
6833 } else {
6834 *Z8StutterEfficiency = 0.;
6835 }
6836
6837 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6838 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6839 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6840 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6841 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6842 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6843 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6844 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6845}
6846
6847static void CalculateSwathAndDETConfiguration(
6848 bool ForceSingleDPP,
6849 int NumberOfActivePlanes,
6850 unsigned int DETBufferSizeInKByte,
6851 double MaximumSwathWidthLuma[],
6852 double MaximumSwathWidthChroma[],
6853 enum scan_direction_class SourceScan[],
6854 enum source_format_class SourcePixelFormat[],
6855 enum dm_swizzle_mode SurfaceTiling[],
6856 int ViewportWidth[],
6857 int ViewportHeight[],
6858 int SurfaceWidthY[],
6859 int SurfaceWidthC[],
6860 int SurfaceHeightY[],
6861 int SurfaceHeightC[],
6862 int Read256BytesBlockHeightY[],
6863 int Read256BytesBlockHeightC[],
6864 int Read256BytesBlockWidthY[],
6865 int Read256BytesBlockWidthC[],
6866 enum odm_combine_mode ODMCombineEnabled[],
6867 int BlendingAndTiming[],
6868 int BytePerPixY[],
6869 int BytePerPixC[],
6870 double BytePerPixDETY[],
6871 double BytePerPixDETC[],
6872 int HActive[],
6873 double HRatio[],
6874 double HRatioChroma[],
6875 int DPPPerPlane[],
6876 int swath_width_luma_ub[],
6877 int swath_width_chroma_ub[],
6878 double SwathWidth[],
6879 double SwathWidthChroma[],
6880 int SwathHeightY[],
6881 int SwathHeightC[],
6882 unsigned int DETBufferSizeY[],
6883 unsigned int DETBufferSizeC[],
6884 bool ViewportSizeSupportPerPlane[],
6885 bool *ViewportSizeSupport)
6886{
6887 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6888 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6889 int MinimumSwathHeightY;
6890 int MinimumSwathHeightC;
6891 int RoundedUpMaxSwathSizeBytesY;
6892 int RoundedUpMaxSwathSizeBytesC;
6893 int RoundedUpMinSwathSizeBytesY;
6894 int RoundedUpMinSwathSizeBytesC;
6895 int RoundedUpSwathSizeBytesY;
6896 int RoundedUpSwathSizeBytesC;
6897 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6898 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6899 int k;
6900
6901 CalculateSwathWidth(
6902 ForceSingleDPP,
6903 NumberOfActivePlanes,
6904 SourcePixelFormat,
6905 SourceScan,
6906 ViewportWidth,
6907 ViewportHeight,
6908 SurfaceWidthY,
6909 SurfaceWidthC,
6910 SurfaceHeightY,
6911 SurfaceHeightC,
6912 ODMCombineEnabled,
6913 BytePerPixY,
6914 BytePerPixC,
6915 Read256BytesBlockHeightY,
6916 Read256BytesBlockHeightC,
6917 Read256BytesBlockWidthY,
6918 Read256BytesBlockWidthC,
6919 BlendingAndTiming,
6920 HActive,
6921 HRatio,
6922 DPPPerPlane,
6923 SwathWidthSingleDPP,
6924 SwathWidthSingleDPPChroma,
6925 SwathWidth,
6926 SwathWidthChroma,
6927 MaximumSwathHeightY,
6928 MaximumSwathHeightC,
6929 swath_width_luma_ub,
6930 swath_width_chroma_ub);
6931
6932 *ViewportSizeSupport = true;
6933 for (k = 0; k < NumberOfActivePlanes; ++k) {
6934 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6935 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6936 if (SurfaceTiling[k] == dm_sw_linear
6937 || (SourcePixelFormat[k] == dm_444_64
6938 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6939 && SourceScan[k] != dm_vert)) {
6940 MinimumSwathHeightY = MaximumSwathHeightY[k];
6941 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6942 MinimumSwathHeightY = MaximumSwathHeightY[k];
6943 } else {
6944 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6945 }
6946 MinimumSwathHeightC = MaximumSwathHeightC[k];
6947 } else {
6948 if (SurfaceTiling[k] == dm_sw_linear) {
6949 MinimumSwathHeightY = MaximumSwathHeightY[k];
6950 MinimumSwathHeightC = MaximumSwathHeightC[k];
6951 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6952 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6953 MinimumSwathHeightC = MaximumSwathHeightC[k];
6954 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6955 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6956 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6957 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6958 MinimumSwathHeightY = MaximumSwathHeightY[k];
6959 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6960 } else {
6961 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6962 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6963 }
6964 }
6965
6966 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6967 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6968 if (SourcePixelFormat[k] == dm_420_10) {
6969 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6970 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6971 }
6972 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6973 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6974 if (SourcePixelFormat[k] == dm_420_10) {
6975 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6976 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6977 }
6978
6979 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6980 SwathHeightY[k] = MaximumSwathHeightY[k];
6981 SwathHeightC[k] = MaximumSwathHeightC[k];
6982 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6983 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6984 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6985 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6986 SwathHeightY[k] = MinimumSwathHeightY;
6987 SwathHeightC[k] = MaximumSwathHeightC[k];
6988 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6989 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6990 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6991 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6992 SwathHeightY[k] = MaximumSwathHeightY[k];
6993 SwathHeightC[k] = MinimumSwathHeightC;
6994 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6995 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6996 } else {
6997 SwathHeightY[k] = MinimumSwathHeightY;
6998 SwathHeightC[k] = MinimumSwathHeightC;
6999 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
7000 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
7001 }
7002 {
7003 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7004 if (SwathHeightC[k] == 0) {
7005 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
7006 DETBufferSizeC[k] = 0;
7007 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
7008 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
7009 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
7010 } else {
7011 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
7012 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
7013 }
7014
7015 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
7016 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
7017 *ViewportSizeSupport = false;
7018 ViewportSizeSupportPerPlane[k] = false;
7019 } else {
7020 ViewportSizeSupportPerPlane[k] = true;
7021 }
7022 }
7023 }
7024}
7025
7026static void CalculateSwathWidth(
7027 bool ForceSingleDPP,
7028 int NumberOfActivePlanes,
7029 enum source_format_class SourcePixelFormat[],
7030 enum scan_direction_class SourceScan[],
7031 int ViewportWidth[],
7032 int ViewportHeight[],
7033 int SurfaceWidthY[],
7034 int SurfaceWidthC[],
7035 int SurfaceHeightY[],
7036 int SurfaceHeightC[],
7037 enum odm_combine_mode ODMCombineEnabled[],
7038 int BytePerPixY[],
7039 int BytePerPixC[],
7040 int Read256BytesBlockHeightY[],
7041 int Read256BytesBlockHeightC[],
7042 int Read256BytesBlockWidthY[],
7043 int Read256BytesBlockWidthC[],
7044 int BlendingAndTiming[],
7045 int HActive[],
7046 double HRatio[],
7047 int DPPPerPlane[],
7048 double SwathWidthSingleDPPY[],
7049 double SwathWidthSingleDPPC[],
7050 double SwathWidthY[],
7051 double SwathWidthC[],
7052 int MaximumSwathHeightY[],
7053 int MaximumSwathHeightC[],
7054 int swath_width_luma_ub[],
7055 int swath_width_chroma_ub[])
7056{
7057 enum odm_combine_mode MainPlaneODMCombine;
7058 int j, k;
7059
7060#ifdef __DML_VBA_DEBUG__
7061 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
7062#endif
7063
7064 for (k = 0; k < NumberOfActivePlanes; ++k) {
7065 if (SourceScan[k] != dm_vert) {
7066 SwathWidthSingleDPPY[k] = ViewportWidth[k];
7067 } else {
7068 SwathWidthSingleDPPY[k] = ViewportHeight[k];
7069 }
7070
7071#ifdef __DML_VBA_DEBUG__
7072 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
7073 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
7074#endif
7075
7076 MainPlaneODMCombine = ODMCombineEnabled[k];
7077 for (j = 0; j < NumberOfActivePlanes; ++j) {
7078 if (BlendingAndTiming[k] == j) {
7079 MainPlaneODMCombine = ODMCombineEnabled[j];
7080 }
7081 }
7082
7083 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
7084 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
7085 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
7086 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
7087 } else if (DPPPerPlane[k] == 2) {
7088 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
7089 } else {
7090 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7091 }
7092
7093#ifdef __DML_VBA_DEBUG__
7094 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
7095 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
7096#endif
7097
7098 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
7099 SwathWidthC[k] = SwathWidthY[k] / 2;
7100 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
7101 } else {
7102 SwathWidthC[k] = SwathWidthY[k];
7103 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
7104 }
7105
7106 if (ForceSingleDPP == true) {
7107 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7108 SwathWidthC[k] = SwathWidthSingleDPPC[k];
7109 }
7110 {
7111 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
7112 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
7113 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
7114 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7115
7116#ifdef __DML_VBA_DEBUG__
7117 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
7118#endif
7119
7120 if (SourceScan[k] != dm_vert) {
7121 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
7122 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
7123 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
7124 if (BytePerPixC[k] > 0) {
7125 swath_width_chroma_ub[k] = dml_min(
7126 surface_width_ub_c,
7127 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7128 } else {
7129 swath_width_chroma_ub[k] = 0;
7130 }
7131 } else {
7132 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7133 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7134 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7135 if (BytePerPixC[k] > 0) {
7136 swath_width_chroma_ub[k] = dml_min(
7137 surface_height_ub_c,
7138 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7139 } else {
7140 swath_width_chroma_ub[k] = 0;
7141 }
7142 }
7143 }
7144 }
7145}
7146
7147static double CalculateExtraLatency(
7148 int RoundTripPingLatencyCycles,
7149 int ReorderingBytes,
7150 double DCFCLK,
7151 int TotalNumberOfActiveDPP,
7152 int PixelChunkSizeInKByte,
7153 int TotalNumberOfDCCActiveDPP,
7154 int MetaChunkSize,
7155 double ReturnBW,
7156 bool GPUVMEnable,
7157 bool HostVMEnable,
7158 int NumberOfActivePlanes,
7159 int NumberOfDPP[],
7160 int dpte_group_bytes[],
7161 double HostVMInefficiencyFactor,
7162 double HostVMMinPageSize,
7163 int HostVMMaxNonCachedPageTableLevels)
7164{
7165 double ExtraLatencyBytes;
7166 double ExtraLatency;
7167
7168 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7169 ReorderingBytes,
7170 TotalNumberOfActiveDPP,
7171 PixelChunkSizeInKByte,
7172 TotalNumberOfDCCActiveDPP,
7173 MetaChunkSize,
7174 GPUVMEnable,
7175 HostVMEnable,
7176 NumberOfActivePlanes,
7177 NumberOfDPP,
7178 dpte_group_bytes,
7179 HostVMInefficiencyFactor,
7180 HostVMMinPageSize,
7181 HostVMMaxNonCachedPageTableLevels);
7182
7183 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7184
7185#ifdef __DML_VBA_DEBUG__
7186 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7187 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7188 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7189 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7190 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7191#endif
7192
7193 return ExtraLatency;
7194}
7195
7196static double CalculateExtraLatencyBytes(
7197 int ReorderingBytes,
7198 int TotalNumberOfActiveDPP,
7199 int PixelChunkSizeInKByte,
7200 int TotalNumberOfDCCActiveDPP,
7201 int MetaChunkSize,
7202 bool GPUVMEnable,
7203 bool HostVMEnable,
7204 int NumberOfActivePlanes,
7205 int NumberOfDPP[],
7206 int dpte_group_bytes[],
7207 double HostVMInefficiencyFactor,
7208 double HostVMMinPageSize,
7209 int HostVMMaxNonCachedPageTableLevels)
7210{
7211 double ret;
7212 int HostVMDynamicLevels = 0, k;
7213
7214 if (GPUVMEnable == true && HostVMEnable == true) {
7215 if (HostVMMinPageSize < 2048) {
7216 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7217 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7218 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7219 } else {
7220 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7221 }
7222 } else {
7223 HostVMDynamicLevels = 0;
7224 }
7225
7226 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7227
7228 if (GPUVMEnable == true) {
7229 for (k = 0; k < NumberOfActivePlanes; ++k) {
7230 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7231 }
7232 }
7233 return ret;
7234}
7235
7236static double CalculateUrgentLatency(
7237 double UrgentLatencyPixelDataOnly,
7238 double UrgentLatencyPixelMixedWithVMData,
7239 double UrgentLatencyVMDataOnly,
7240 bool DoUrgentLatencyAdjustment,
7241 double UrgentLatencyAdjustmentFabricClockComponent,
7242 double UrgentLatencyAdjustmentFabricClockReference,
7243 double FabricClock)
7244{
7245 double ret;
7246
7247 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7248 if (DoUrgentLatencyAdjustment == true) {
7249 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7250 }
7251 return ret;
7252}
7253
7254static void UseMinimumDCFCLK(
7255 struct display_mode_lib *mode_lib,
7256 int MaxInterDCNTileRepeaters,
7257 int MaxPrefetchMode,
7258 double FinalDRAMClockChangeLatency,
7259 double SREnterPlusExitTime,
7260 int ReturnBusWidth,
7261 int RoundTripPingLatencyCycles,
7262 int ReorderingBytes,
7263 int PixelChunkSizeInKByte,
7264 int MetaChunkSize,
7265 bool GPUVMEnable,
7266 int GPUVMMaxPageTableLevels,
7267 bool HostVMEnable,
7268 int NumberOfActivePlanes,
7269 double HostVMMinPageSize,
7270 int HostVMMaxNonCachedPageTableLevels,
7271 bool DynamicMetadataVMEnabled,
7272 enum immediate_flip_requirement ImmediateFlipRequirement,
7273 bool ProgressiveToInterlaceUnitInOPP,
7274 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
7275 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
7276 int VTotal[],
7277 int VActive[],
7278 int DynamicMetadataTransmittedBytes[],
7279 int DynamicMetadataLinesBeforeActiveRequired[],
7280 bool Interlace[],
7281 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
7282 double RequiredDISPCLK[][2],
7283 double UrgLatency[],
7284 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
7285 double ProjectedDCFCLKDeepSleep[][2],
7286 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
7287 double TotalVActivePixelBandwidth[][2],
7288 double TotalVActiveCursorBandwidth[][2],
7289 double TotalMetaRowBandwidth[][2],
7290 double TotalDPTERowBandwidth[][2],
7291 unsigned int TotalNumberOfActiveDPP[][2],
7292 unsigned int TotalNumberOfDCCActiveDPP[][2],
7293 int dpte_group_bytes[],
7294 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
7295 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
7296 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
7297 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
7298 int BytePerPixelY[],
7299 int BytePerPixelC[],
7300 int HTotal[],
7301 double PixelClock[],
7302 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
7303 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
7304 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
7305 bool DynamicMetadataEnable[],
7306 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
7307 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
7308 double ReadBandwidthLuma[],
7309 double ReadBandwidthChroma[],
7310 double DCFCLKPerState[],
7311 double DCFCLKState[][2])
7312{
7313 struct vba_vars_st *v = &mode_lib->vba;
7314 int dummy1, i, j, k;
7315 double NormalEfficiency, dummy2, dummy3;
7316 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7317
7318 NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7319 for (i = 0; i < v->soc.num_states; ++i) {
7320 for (j = 0; j <= 1; ++j) {
7321 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7322 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7323 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7324 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7325 double MinimumTWait;
7326 double NonDPTEBandwidth;
7327 double DPTEBandwidth;
7328 double DCFCLKRequiredForAverageBandwidth;
7329 double ExtraLatencyBytes;
7330 double ExtraLatencyCycles;
7331 double DCFCLKRequiredForPeakBandwidth;
7332 int NoOfDPPState[DC__NUM_DPP__MAX];
7333 double MinimumTvmPlus2Tr0;
7334
7335 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7336 for (k = 0; k < NumberOfActivePlanes; ++k) {
7337 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7338 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
7339 }
7340
7341 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7342 NoOfDPPState[k] = NoOfDPP[i][j][k];
7343 }
7344
7345 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
7346 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
7347 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
7348 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
7349 DCFCLKRequiredForAverageBandwidth = dml_max3(
7350 ProjectedDCFCLKDeepSleep[i][j],
7351 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth
7352 / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7353 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth);
7354
7355 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7356 ReorderingBytes,
7357 TotalNumberOfActiveDPP[i][j],
7358 PixelChunkSizeInKByte,
7359 TotalNumberOfDCCActiveDPP[i][j],
7360 MetaChunkSize,
7361 GPUVMEnable,
7362 HostVMEnable,
7363 NumberOfActivePlanes,
7364 NoOfDPPState,
7365 dpte_group_bytes,
7366 1,
7367 HostVMMinPageSize,
7368 HostVMMaxNonCachedPageTableLevels);
7369 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
7370 for (k = 0; k < NumberOfActivePlanes; ++k) {
7371 double DCFCLKCyclesRequiredInPrefetch;
7372 double ExpectedPrefetchBWAcceleration;
7373 double PrefetchTime;
7374
7375 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
7376 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
7377 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7378 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7379 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth
7380 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7381 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
7382 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k])
7383 / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
7384 DynamicMetadataVMExtraLatency[k] =
7385 (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
7386 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7387 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait
7388 - UrgLatency[i]
7389 * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2)
7390 * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7391 - DynamicMetadataVMExtraLatency[k];
7392
7393 if (PrefetchTime > 0) {
7394 double ExpectedVRatioPrefetch;
7395 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7396 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7397 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7398 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7399 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
7400 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7401 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth;
7402 }
7403 } else {
7404 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7405 }
7406 if (DynamicMetadataEnable[k] == true) {
7407 double TSetupPipe;
7408 double TdmbfPipe;
7409 double TdmsksPipe;
7410 double TdmecPipe;
7411 double AllowedTimeForUrgentExtraLatency;
7412
7413 CalculateVupdateAndDynamicMetadataParameters(
7414 MaxInterDCNTileRepeaters,
7415 RequiredDPPCLK[i][j][k],
7416 RequiredDISPCLK[i][j],
7417 ProjectedDCFCLKDeepSleep[i][j],
7418 PixelClock[k],
7419 HTotal[k],
7420 VTotal[k] - VActive[k],
7421 DynamicMetadataTransmittedBytes[k],
7422 DynamicMetadataLinesBeforeActiveRequired[k],
7423 Interlace[k],
7424 ProgressiveToInterlaceUnitInOPP,
7425 &TSetupPipe,
7426 &TdmbfPipe,
7427 &TdmecPipe,
7428 &TdmsksPipe,
7429 &dummy1,
7430 &dummy2,
7431 &dummy3);
7432 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7433 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7434 if (AllowedTimeForUrgentExtraLatency > 0) {
7435 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7436 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7437 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7438 } else {
7439 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7440 }
7441 }
7442 }
7443 DCFCLKRequiredForPeakBandwidth = 0;
7444 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7445 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7446 }
7447 MinimumTvmPlus2Tr0 = UrgLatency[i]
7448 * (GPUVMEnable == true ?
7449 (HostVMEnable == true ?
7450 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) :
7451 0);
7452 for (k = 0; k < NumberOfActivePlanes; ++k) {
7453 double MaximumTvmPlus2Tr0PlusTsw;
7454 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7455 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7456 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
7457 } else {
7458 DCFCLKRequiredForPeakBandwidth = dml_max3(
7459 DCFCLKRequiredForPeakBandwidth,
7460 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7461 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7462 }
7463 }
7464 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7465 }
7466 }
7467}
7468
7469static void CalculateUnboundedRequestAndCompressedBufferSize(
7470 unsigned int DETBufferSizeInKByte,
7471 int ConfigReturnBufferSizeInKByte,
7472 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7473 int TotalActiveDPP,
7474 bool NoChromaPlanes,
7475 int MaxNumDPP,
7476 int CompressedBufferSegmentSizeInkByteFinal,
7477 enum output_encoder_class *Output,
7478 bool *UnboundedRequestEnabled,
7479 int *CompressedBufferSizeInkByte)
7480{
7481 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7482
7483 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7484 *CompressedBufferSizeInkByte = (
7485 *UnboundedRequestEnabled == true ?
7486 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7487 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7488 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7489
7490#ifdef __DML_VBA_DEBUG__
7491 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7492 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7493 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7494 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7495 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7496 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7497 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7498#endif
7499}
7500
7501static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7502{
7503 bool ret_val = false;
7504
7505 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7506 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7507 ret_val = false;
7508 }
7509 return (ret_val);
7510}
7511
7512