@@ -986,38 +986,15 @@ void xe_device_wmb(struct xe_device *xe)
986986 xe_mmio_write32 (xe_root_tile_mmio (xe ), VF_CAP_REG , 0 );
987987}
988988
989- /**
990- * xe_device_td_flush() - Flush transient L3 cache entries
991- * @xe: The device
992- *
993- * Display engine has direct access to memory and is never coherent with L3/L4
994- * caches (or CPU caches), however KMD is responsible for specifically flushing
995- * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
996- * can happen from such a surface without seeing corruption.
997- *
998- * Display surfaces can be tagged as transient by mapping it using one of the
999- * various L3:XD PAT index modes on Xe2.
1000- *
1001- * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
1002- * at the end of each submission via PIPE_CONTROL for compute/render, since SA
1003- * Media is not coherent with L3 and we want to support render-vs-media
1004- * usescases. For other engines like copy/blt the HW internally forces uncached
1005- * behaviour, hence why we can skip the TDF on such platforms.
989+ /*
990+ * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
1006991 */
1007- void xe_device_td_flush (struct xe_device * xe )
992+ static void tdf_request_sync (struct xe_device * xe )
1008993{
1009- struct xe_gt * gt ;
1010994 unsigned int fw_ref ;
995+ struct xe_gt * gt ;
1011996 u8 id ;
1012997
1013- if (!IS_DGFX (xe ) || GRAPHICS_VER (xe ) < 20 )
1014- return ;
1015-
1016- if (XE_WA (xe_root_mmio_gt (xe ), 16023588340 )) {
1017- xe_device_l2_flush (xe );
1018- return ;
1019- }
1020-
1021998 for_each_gt (gt , xe , id ) {
1022999 if (xe_gt_is_media_type (gt ))
10231000 continue ;
@@ -1027,6 +1004,7 @@ void xe_device_td_flush(struct xe_device *xe)
10271004 return ;
10281005
10291006 xe_mmio_write32 (& gt -> mmio , XE2_TDF_CTRL , TRANSIENT_FLUSH_REQUEST );
1007+
10301008 /*
10311009 * FIXME: We can likely do better here with our choice of
10321010 * timeout. Currently we just assume the worst case, i.e. 150us,
@@ -1057,15 +1035,49 @@ void xe_device_l2_flush(struct xe_device *xe)
10571035 return ;
10581036
10591037 spin_lock (& gt -> global_invl_lock );
1060- xe_mmio_write32 (& gt -> mmio , XE2_GLOBAL_INVAL , 0x1 );
10611038
1039+ xe_mmio_write32 (& gt -> mmio , XE2_GLOBAL_INVAL , 0x1 );
10621040 if (xe_mmio_wait32 (& gt -> mmio , XE2_GLOBAL_INVAL , 0x1 , 0x0 , 500 , NULL , true))
10631041 xe_gt_err_once (gt , "Global invalidation timeout\n" );
1042+
10641043 spin_unlock (& gt -> global_invl_lock );
10651044
10661045 xe_force_wake_put (gt_to_fw (gt ), fw_ref );
10671046}
10681047
1048+ /**
1049+ * xe_device_td_flush() - Flush transient L3 cache entries
1050+ * @xe: The device
1051+ *
1052+ * Display engine has direct access to memory and is never coherent with L3/L4
1053+ * caches (or CPU caches), however KMD is responsible for specifically flushing
1054+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
1055+ * can happen from such a surface without seeing corruption.
1056+ *
1057+ * Display surfaces can be tagged as transient by mapping it using one of the
1058+ * various L3:XD PAT index modes on Xe2.
1059+ *
1060+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
1061+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
1062+ * Media is not coherent with L3 and we want to support render-vs-media
1063+ * usescases. For other engines like copy/blt the HW internally forces uncached
1064+ * behaviour, hence why we can skip the TDF on such platforms.
1065+ */
1066+ void xe_device_td_flush (struct xe_device * xe )
1067+ {
1068+ struct xe_gt * root_gt ;
1069+
1070+ if (!IS_DGFX (xe ) || GRAPHICS_VER (xe ) < 20 )
1071+ return ;
1072+
1073+ root_gt = xe_root_mmio_gt (xe );
1074+ if (XE_WA (root_gt , 16023588340 ))
1075+ /* A transient flush is not sufficient: flush the L2 */
1076+ xe_device_l2_flush (xe );
1077+ else
1078+ tdf_request_sync (xe );
1079+ }
1080+
10691081u32 xe_device_ccs_bytes (struct xe_device * xe , u64 size )
10701082{
10711083 return xe_device_has_flat_ccs (xe ) ?
0 commit comments