From c9bb69cc04d6db2a42402d461b2b703b7089bb24 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Thu, 11 Dec 2025 16:16:17 +0200 Subject: [PATCH 01/20] Fix: Thunderbolt eGPU hot-plug/unplug kernel crash support --- kernel-open/common/inc/nvkms-kapi.h | 11 + kernel-open/nvidia-drm/nvidia-drm-drv.c | 78 ++ kernel-open/nvidia-drm/nvidia-drm-priv.h | 15 + .../nvidia-modeset/nvidia-modeset-linux.c | 21 + .../nvidia-modeset-os-interface.h | 6 + kernel-open/nvidia-modeset/nvkms.h | 2 + kernel-open/nvidia/nv-acpi.c | 18 +- kernel-open/nvidia/nv-i2c.c | 27 + kernel-open/nvidia/nv-pci.c | 51 +- kernel-open/nvidia/nv-rsync.c | 27 +- kernel-open/nvidia/nv-rsync.h | 2 + kernel-open/nvidia/nv.c | 92 +- src/nvidia-modeset/include/nvkms-private.h | 2 + src/nvidia-modeset/include/nvkms-types.h | 7 + .../kapi/interface/nvkms-kapi.h | 11 + src/nvidia-modeset/kapi/src/nvkms-kapi.c | 46 + .../include/nvidia-modeset-os-interface.h | 6 + .../os-interface/include/nvkms.h | 2 + .../src/nvkms-console-restore.c | 5 + src/nvidia-modeset/src/nvkms-dma.c | 216 +++- src/nvidia-modeset/src/nvkms-event.c | 5 + src/nvidia-modeset/src/nvkms-evo.c | 51 +- src/nvidia-modeset/src/nvkms-evo1.c | 492 ++++++++ src/nvidia-modeset/src/nvkms-evo3.c | 1106 ++++++++++++----- src/nvidia-modeset/src/nvkms-hw-flip.c | 10 + src/nvidia-modeset/src/nvkms-rm.c | 14 + src/nvidia-modeset/src/nvkms.c | 97 ++ src/nvidia/arch/nvalloc/unix/src/osapi.c | 59 + src/nvidia/arch/nvalloc/unix/src/osinit.c | 49 +- src/nvidia/src/kernel/core/thread_state.c | 5 +- .../src/kernel/diagnostics/nv_debug_dump.c | 22 + src/nvidia/src/kernel/gpu/disp/kern_disp.c | 2 +- .../falcon/arch/turing/kernel_falcon_tu102.c | 9 +- src/nvidia/src/kernel/gpu/gpu.c | 11 + .../src/kernel/gpu/gpu_user_shared_data.c | 19 +- .../src/kernel/gpu/gr/fecs_event_list.c | 6 +- .../src/kernel/gpu/gr/kernel_graphics.c | 2 +- .../gsp/arch/ampere/kernel_gsp_falcon_ga102.c | 4 + .../gsp/arch/turing/kernel_gsp_booter_tu102.c | 15 +- .../gpu/gsp/arch/turing/kernel_gsp_tu102.c | 17 +- src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c | 46 +- src/nvidia/src/kernel/gpu/intr/intr.c | 88 +- src/nvidia/src/kernel/gpu/mem_mgr/ce_utils.c | 11 +- .../src/kernel/gpu/mem_mgr/vaspace_api.c | 2 +- src/nvidia/src/kernel/mem_mgr/mem.c | 2 +- src/nvidia/src/kernel/vgpu/rpc.c | 10 + .../src/libraries/resserv/src/rs_client.c | 2 +- .../src/libraries/resserv/src/rs_server.c | 4 +- 48 files changed, 2402 insertions(+), 403 deletions(-) diff --git a/kernel-open/common/inc/nvkms-kapi.h b/kernel-open/common/inc/nvkms-kapi.h index 4a65977031..4f9c421b3f 100644 --- a/kernel-open/common/inc/nvkms-kapi.h +++ b/kernel-open/common/inc/nvkms-kapi.h @@ -656,6 +656,17 @@ struct NvKmsKapiFunctionsTable { */ void (*freeDevice)(struct NvKmsKapiDevice *device); + /*! + * Frees a device during surprise removal (e.g., Thunderbolt eGPU unplug). + * This skips all hardware access and only releases kernel resources. + * Use this instead of freeDevice() when the GPU hardware is no longer + * accessible to avoid page faults and hangs. + * + * \param [in] device A device returned by allocateDevice(). + * This function is a no-op if device is not valid. + */ + void (*freeDeviceForSurpriseRemoval)(struct NvKmsKapiDevice *device); + /*! * Grab ownership of device, ownership is required to do modeset. * diff --git a/kernel-open/nvidia-drm/nvidia-drm-drv.c b/kernel-open/nvidia-drm/nvidia-drm-drv.c index e9ef77c948..3190fda519 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-drv.c +++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c @@ -852,6 +852,43 @@ static void nv_drm_dev_unload(struct drm_device *dev) return; } + /* + * During surprise removal (e.g., Thunderbolt eGPU hot-unplug), + * the GPU hardware is no longer accessible. Skip NVKMS calls that + * would access hardware to prevent page faults and crashes. + * Use freeDeviceForSurpriseRemoval which only releases kernel resources + * without attempting any hardware access. + */ + if (nv_dev->inSurpriseRemoval) { + NV_DRM_DEV_LOG_INFO(nv_dev, + "Surprise removal detected, skipping hardware access"); + + cancel_delayed_work_sync(&nv_dev->hotplug_event_work); + mutex_lock(&nv_dev->lock); + + atomic_set(&nv_dev->enable_event_handling, false); + drm_kms_helper_poll_fini(dev); + drm_mode_config_cleanup(dev); + + pDevice = nv_dev->pDevice; + nv_dev->pDevice = NULL; + + mutex_unlock(&nv_dev->lock); + + /* + * Use freeDeviceForSurpriseRemoval instead of freeDevice. + * This skips KmsFreeDevice() and RmFreeDevice() which would try + * to access GPU hardware via ioctls/RM API calls and cause + * page faults since the GPU memory is unmapped. + * It only calls nvkms_close_gpu() to release the GPU reference + * count, allowing the eGPU to be re-initialized when reconnected. + */ + if (pDevice != NULL) { + nvKms->freeDeviceForSurpriseRemoval(pDevice); + } + return; + } + /* Release modeset ownership if fbdev is enabled */ #if defined(NV_DRM_FBDEV_AVAILABLE) @@ -2167,6 +2204,28 @@ static void nv_drm_dev_destroy(struct nv_drm_device *nv_dev) nv_drm_free(nv_dev); } +/* + * Helper to get PCI device from DRM device, handling both old and new kernels. + * Returns NULL if not a PCI device or device not available. + */ +static struct pci_dev *nv_drm_get_pci_dev(struct drm_device *dev) +{ + if (dev == NULL) { + return NULL; + } + +#if defined(NV_DRM_DEVICE_HAS_PDEV) + return dev->pdev; +#else + /* On newer kernels (5.14+), drm_device.pdev was removed. + * Get PCI device from the parent device. */ + if (dev->dev != NULL && dev->dev->bus == &pci_bus_type) { + return to_pci_dev(dev->dev); + } + return NULL; +#endif +} + /* * Unregister a single NVIDIA DRM device. */ @@ -2175,7 +2234,26 @@ void nv_drm_remove(NvU32 gpuId) struct nv_drm_device *nv_dev = nv_drm_find_and_remove_device(gpuId); if (nv_dev) { + struct pci_dev *pdev; + NV_DRM_DEV_LOG_INFO(nv_dev, "Removing device"); + + /* + * Check if this is a surprise removal (hot-unplug) by testing + * if the PCI channel is offline. This happens when: + * - Thunderbolt eGPU is physically disconnected + * - GPU falls off the bus unexpectedly + * + * For normal driver unload (rmmod), the PCI channel remains online. + * We only skip NVKMS hardware access during surprise removal. + */ + pdev = nv_drm_get_pci_dev(nv_dev->dev); + if (pdev != NULL && pci_channel_offline(pdev)) { + NV_DRM_DEV_LOG_INFO(nv_dev, + "PCI channel offline - surprise removal detected"); + nv_dev->inSurpriseRemoval = NV_TRUE; + } + drm_dev_unplug(nv_dev->dev); nv_drm_dev_destroy(nv_dev); } diff --git a/kernel-open/nvidia-drm/nvidia-drm-priv.h b/kernel-open/nvidia-drm/nvidia-drm-priv.h index 88c74a069d..7b44e326df 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-priv.h +++ b/kernel-open/nvidia-drm/nvidia-drm-priv.h @@ -148,6 +148,21 @@ struct nv_drm_device { NvBool subOwnershipGranted; NvBool hasFramebufferConsole; + /* + * Set to NV_TRUE for external GPUs (e.g., Thunderbolt/USB4 eGPU). + * External GPUs use the fast removal path to avoid hangs during + * both surprise removal and "safe" software-initiated disconnect. + */ + NvBool isExternalGpu; + + /* + * Set to NV_TRUE when the device is being removed due to + * surprise removal (e.g., Thunderbolt eGPU hot-unplug). + * When set, NVKMS operations that would access GPU hardware + * are skipped to prevent crashes from accessing unmapped memory. + */ + NvBool inSurpriseRemoval; + struct drm_property *nv_out_fence_property; struct drm_property *nv_input_colorspace_property; diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c index e81ac023ad..88b6be31d7 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c +++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c @@ -1208,6 +1208,27 @@ void nvkms_close_gpu(NvU32 gpuId) __rm_ops.free_stack(stack); } +void nvkms_gpu_lost(NvU32 gpuId) +{ + /* + * Mark the GPU as lost in NVKMS. This prevents hardware access + * and cancels pending timers that might try to access the removed GPU. + * + * NOTE: We intentionally do NOT take nvkms_lock here because this function + * may be called from contexts that already hold the lock (e.g., during + * module unload). The gpuLost flag is a simple boolean that can be safely + * written without a lock - any racing operation will either: + * 1. See gpuLost=TRUE and bail out early + * 2. See gpuLost=FALSE but hit the 0xFFFFFFFF check when reading hardware + * + * A memory barrier ensures the write is visible to other CPUs promptly. + */ + nvKmsGpuLost(gpuId); + + /* Ensure gpuLost write is visible to other CPUs */ + smp_wmb(); +} + NvU32 nvkms_enumerate_gpus(nv_gpu_info_t *gpu_info) { return __rm_ops.enumerate_gpus(gpu_info); diff --git a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h index d4d656e766..8c7294e6c2 100644 --- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h +++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h @@ -310,6 +310,12 @@ void* nvkms_get_per_open_data(int fd); NvBool nvkms_open_gpu(NvU32 gpuId); void nvkms_close_gpu(NvU32 gpuId); +/*! + * Mark a GPU as lost (surprise removal, e.g., Thunderbolt eGPU unplug). + * This prevents hardware access and cancels pending timers. + */ +void nvkms_gpu_lost(NvU32 gpuId); + /*! * Enumerate nvidia gpus. diff --git a/kernel-open/nvidia-modeset/nvkms.h b/kernel-open/nvidia-modeset/nvkms.h index d350ef7564..668fa8c27b 100644 --- a/kernel-open/nvidia-modeset/nvkms.h +++ b/kernel-open/nvidia-modeset/nvkms.h @@ -88,6 +88,8 @@ void nvKmsModuleUnload(void); void nvKmsSuspend(NvU32 gpuId); void nvKmsResume(NvU32 gpuId); +void nvKmsGpuLost(NvU32 gpuId); + void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles); NvBool nvKmsReadConf(const char *buff, size_t size, diff --git a/kernel-open/nvidia/nv-acpi.c b/kernel-open/nvidia/nv-acpi.c index d0fe94a039..ee0ae99506 100644 --- a/kernel-open/nvidia/nv-acpi.c +++ b/kernel-open/nvidia/nv-acpi.c @@ -252,12 +252,28 @@ static void nv_acpi_notify_event(acpi_handle handle, u32 event_type, void *data) { nv_acpi_t *pNvAcpiObject = data; nv_state_t *nvl = pNvAcpiObject->notifier_data; + nv_state_t *nv; + + if (nvl == NULL) + return; + + nv = NV_STATE_PTR(nvl); + if (nv == NULL) + return; + + /* + * Check if we're in surprise removal before processing ACPI events. + * This can happen during Thunderbolt eGPU hot-unplug where the device + * is being removed but ACPI events are still being delivered. + */ + if (nv->flags & NV_FLAG_IN_SURPRISE_REMOVAL) + return; /* * Function to handle device specific ACPI events such as display hotplug, * GPS and D-notifier events. */ - rm_acpi_notify(pNvAcpiObject->sp, NV_STATE_PTR(nvl), event_type); + rm_acpi_notify(pNvAcpiObject->sp, nv, event_type); } void nv_acpi_register_notifier(nv_linux_state_t *nvl) diff --git a/kernel-open/nvidia/nv-i2c.c b/kernel-open/nvidia/nv-i2c.c index a0f61ed22d..4da6bfca11 100644 --- a/kernel-open/nvidia/nv-i2c.c +++ b/kernel-open/nvidia/nv-i2c.c @@ -44,6 +44,15 @@ static int nv_i2c_algo_master_xfer(struct i2c_adapter *adapter, struct i2c_msg m #endif ; + /* + * Check if the GPU is in surprise removal (e.g., Thunderbolt unplug). + * If so, return immediately to avoid hanging on RPC calls to GSP. + */ + if (nv_check_gpu_state(nv) != NV_OK) + { + return -ENODEV; + } + rc = nv_kmem_cache_alloc_stack(&sp); if (rc != 0) { @@ -93,6 +102,15 @@ static int nv_i2c_algo_smbus_xfer( NV_STATUS rmStatus = NV_OK; nvidia_stack_t *sp = NULL; + /* + * Check if the GPU is in surprise removal (e.g., Thunderbolt unplug). + * If so, return immediately to avoid hanging on RPC calls to GSP. + */ + if (nv_check_gpu_state(nv) != NV_OK) + { + return -ENODEV; + } + rc = nv_kmem_cache_alloc_stack(&sp); if (rc != 0) { @@ -196,6 +214,15 @@ static u32 nv_i2c_algo_functionality(struct i2c_adapter *adapter) u32 ret = I2C_FUNC_I2C; nvidia_stack_t *sp = NULL; + /* + * Check if the GPU is in surprise removal (e.g., Thunderbolt unplug). + * If so, return 0 to indicate no functionality available. + */ + if (nv_check_gpu_state(nv) != NV_OK) + { + return 0; + } + if (nv_kmem_cache_alloc_stack(&sp) != 0) { return 0; diff --git a/kernel-open/nvidia/nv-pci.c b/kernel-open/nvidia/nv-pci.c index 2767134e8f..fd4d529101 100644 --- a/kernel-open/nvidia/nv-pci.c +++ b/kernel-open/nvidia/nv-pci.c @@ -27,6 +27,7 @@ #include "nv-msi.h" #include "nv-hypervisor.h" #include "nv-reg.h" +#include "nv-rsync.h" #if defined(NV_VGPU_KVM_BUILD) #include "nv-vgpu-vfio-interface.h" @@ -2142,6 +2143,13 @@ nv_pci_remove(struct pci_dev *pci_dev) nv = NV_STATE_PTR(nvl); + /* + * Note: For external GPUs (eGPU via Thunderbolt), the NV_FLAG_IN_SURPRISE_REMOVAL + * flag is set later in the removal process - either when waiting for usage count + * times out, or when actual surprise removal is detected. Setting it too early + * can interfere with normal cleanup operations that need to acquire GPU locks. + */ + #if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_disable_feature #if defined(CONFIG_IOMMU_SVA) && \ (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT)) @@ -2179,22 +2187,24 @@ nv_pci_remove(struct pci_dev *pci_dev) /* * Sanity check: A removed device shouldn't have a non-zero usage_count. * For eGPU, fall off the bus along with clients active is a valid scenario. - * Hence skipping the sanity check for eGPU. + * We still wait for a short time to allow in-progress close operations + * to complete, but with a timeout to prevent hangs. */ if ((atomic64_read(&nvl->usage_count) != 0) && !(nv->is_external_gpu)) { nv_printf(NV_DBG_ERRORS, - "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count!\n", + "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count (%d)%s\n", NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), - NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); + NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn), + NV_ATOMIC_READ(nvl->usage_count), + nv->is_external_gpu ? " (external GPU)" : ""); /* * We can't return from this function without corrupting state, so we wait for - * the usage count to go to zero. + * the usage count to go to zero, but with a timeout. */ while (atomic64_read(&nvl->usage_count) != 0) { - /* * While waiting, release the locks so that other threads can make * forward progress. @@ -2203,6 +2213,7 @@ nv_pci_remove(struct pci_dev *pci_dev) UNLOCK_NV_LINUX_DEVICES(); os_delay(500); + wait_iterations++; /* Re-acquire the locks before checking again */ LOCK_NV_LINUX_DEVICES(); @@ -2221,10 +2232,32 @@ nv_pci_remove(struct pci_dev *pci_dev) down(&nvl->ldata_lock); } - nv_printf(NV_DBG_ERRORS, - "NVRM: Continuing with GPU removal for device %04x:%02x:%02x.%x\n", - NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), - NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); + if (NV_ATOMIC_READ(nvl->usage_count) != 0) + { + nv_printf(NV_DBG_ERRORS, + "NVRM: Timeout waiting for usage count on device %04x:%02x:%02x.%x (remaining: %d). Forcing removal.\n", + NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), + NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn), + NV_ATOMIC_READ(nvl->usage_count)); + /* + * Force the surprise removal flag so that any remaining + * close operations will take the fast-path. + */ + nv->flags |= NV_FLAG_IN_SURPRISE_REMOVAL; + + /* + * Mark that we had a surprise removal so rsync cleanup + * warnings are suppressed during module unload. + */ + nv_set_rsync_had_surprise_removal(); + } + else + { + nv_printf(NV_DBG_ERRORS, + "NVRM: Continuing with GPU removal for device %04x:%02x:%02x.%x\n", + NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), + NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn)); + } } rm_check_for_gpu_surprise_removal(sp, nv); diff --git a/kernel-open/nvidia/nv-rsync.c b/kernel-open/nvidia/nv-rsync.c index 88863dab68..f98c52c8e6 100644 --- a/kernel-open/nvidia/nv-rsync.c +++ b/kernel-open/nvidia/nv-rsync.c @@ -31,6 +31,7 @@ void nv_init_rsync_info( ) { g_rsync_info.relaxed_ordering_mode = NV_FALSE; + g_rsync_info.had_surprise_removal = NV_FALSE; g_rsync_info.usage_count = 0; g_rsync_info.data = NULL; NV_INIT_MUTEX(&g_rsync_info.lock); @@ -40,9 +41,17 @@ void nv_destroy_rsync_info( void ) { - WARN_ON(g_rsync_info.data); - WARN_ON(g_rsync_info.usage_count); - WARN_ON(g_rsync_info.relaxed_ordering_mode); + /* + * After GPU surprise removal (e.g., Thunderbolt eGPU hot-unplug), + * these may not have been properly cleaned up. Skip warnings in + * that case since the cleanup failure is expected. + */ + if (!g_rsync_info.had_surprise_removal) + { + WARN_ON(g_rsync_info.data); + WARN_ON(g_rsync_info.usage_count); + WARN_ON(g_rsync_info.relaxed_ordering_mode); + } } int nv_get_rsync_info( @@ -100,6 +109,18 @@ void nv_put_rsync_info( up(&g_rsync_info.lock); } +/* + * Mark that a GPU surprise removal occurred. This is used to suppress + * warnings about unclean rsync state during module unload, since the + * cleanup may be incomplete after forced removal. + */ +void nv_set_rsync_had_surprise_removal( + void +) +{ + g_rsync_info.had_surprise_removal = NV_TRUE; +} + int nv_register_rsync_driver( int (*get_relaxed_ordering_mode)(int *mode, void *data), void (*put_relaxed_ordering_mode)(int mode, void *data), diff --git a/kernel-open/nvidia/nv-rsync.h b/kernel-open/nvidia/nv-rsync.h index cc0e1a2e51..c1cadefb75 100644 --- a/kernel-open/nvidia/nv-rsync.h +++ b/kernel-open/nvidia/nv-rsync.h @@ -31,6 +31,7 @@ typedef struct nv_rsync_info struct semaphore lock; uint32_t usage_count; NvBool relaxed_ordering_mode; + NvBool had_surprise_removal; int (*get_relaxed_ordering_mode)(int *mode, void *data); void (*put_relaxed_ordering_mode)(int mode, void *data); void (*wait_for_rsync)(struct pci_dev *gpu, void *data); @@ -41,6 +42,7 @@ void nv_init_rsync_info(void); void nv_destroy_rsync_info(void); int nv_get_rsync_info(void); void nv_put_rsync_info(void); +void nv_set_rsync_had_surprise_removal(void); int nv_register_rsync_driver( int (*get_relaxed_ordering_mode)(int *mode, void *data), void (*put_relaxed_ordering_mode)(int mode, void *data), diff --git a/kernel-open/nvidia/nv.c b/kernel-open/nvidia/nv.c index 9ad14f1d91..3abef3c9b3 100644 --- a/kernel-open/nvidia/nv.c +++ b/kernel-open/nvidia/nv.c @@ -2191,14 +2191,49 @@ nvidia_close_callback( static void nvidia_close_deferred(void *data) { nv_linux_file_private_t *nvlfp = data; + nv_linux_state_t *nvl = nvlfp->nvptr; + nv_state_t *nv = nvl ? NV_STATE_PTR(nvl) : NULL; + NvBool got_lock = NV_FALSE; + NvBool in_surprise_removal = NV_FALSE; nv_wait_open_complete(nvlfp); - down_read(&nv_system_pm_lock); + /* + * Check if we're in surprise removal before trying to acquire the lock. + * If the device is being removed (e.g., Thunderbolt unplug), we should + * not block on the PM lock as it may be held by the removal path. + */ + if (nv != NULL) + { + in_surprise_removal = NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv); + } + + if (in_surprise_removal) + { + /* + * For surprise removal, try to acquire the lock but don't block. + * If we can't get it, proceed without it - cleanup will be minimal + * anyway since the hardware is gone. + */ + got_lock = down_read_trylock(&nv_system_pm_lock); + if (!got_lock) + { + nv_printf(NV_DBG_INFO, + "NVRM: Surprise removal - proceeding with close without PM lock\n"); + } + } + else + { + down_read(&nv_system_pm_lock); + got_lock = NV_TRUE; + } nvidia_close_callback(nvlfp); - up_read(&nv_system_pm_lock); + if (got_lock) + { + up_read(&nv_system_pm_lock); + } } int @@ -2209,6 +2244,9 @@ nvidia_close( { int rc; nv_linux_file_private_t *nvlfp = NV_GET_LINUX_FILE_PRIVATE(file); + nv_linux_state_t *nvl; + nv_state_t *nv; + NvBool in_surprise_removal = NV_FALSE; nv_printf(NV_DBG_INFO, "NVRM: nvidia_close on GPU with minor number %d\n", @@ -2221,10 +2259,44 @@ nvidia_close( NV_SET_FILE_PRIVATE(file, NULL); + /* + * Check if the device is in surprise removal (e.g., Thunderbolt unplug). + * If so, we should not block waiting for the PM lock as it may be held + * by the removal path, causing a deadlock. + */ + nvl = nvlfp->nvptr; + if (nvl != NULL) + { + nv = NV_STATE_PTR(nvl); + in_surprise_removal = NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv); + } + rc = nv_wait_open_complete_interruptible(nvlfp); if (rc == 0) { - rc = nv_down_read_interruptible(&nv_system_pm_lock); + if (in_surprise_removal) + { + /* + * For surprise removal, try to acquire the lock but don't block. + * If we can't get it, defer the close to a worker thread that + * will handle it properly. + */ + if (down_read_trylock(&nv_system_pm_lock)) + { + nvidia_close_callback(nvlfp); + up_read(&nv_system_pm_lock); + return 0; + } + /* + * Couldn't get the lock - fall through to defer the close. + * Set rc to indicate we need to defer. + */ + rc = -EAGAIN; + } + else + { + rc = nv_down_read_interruptible(&nv_system_pm_lock); + } } if (rc == 0) @@ -5202,15 +5274,25 @@ int nvidia_dev_get(NvU32 gpu_id, nvidia_stack_t *sp) void nvidia_dev_put(NvU32 gpu_id, nvidia_stack_t *sp) { nv_linux_state_t *nvl; + nv_state_t *nv; + NV_STATUS status; /* Takes nvl->ldata_lock */ nvl = find_gpu_id(gpu_id); if (!nvl) return; - nv_close_device(NV_STATE_PTR(nvl), sp); + nv = NV_STATE_PTR(nvl); - WARN_ON(rm_set_external_kernel_client_count(sp, NV_STATE_PTR(nvl), NV_FALSE) != NV_OK); + nv_close_device(nv, sp); + + /* + * During surprise removal (e.g., Thunderbolt eGPU hot-unplug), + * this may fail because the GPU is already gone. Don't warn in + * that case - it's expected. + */ + status = rm_set_external_kernel_client_count(sp, nv, NV_FALSE); + WARN_ON((status != NV_OK) && !NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)); up(&nvl->ldata_lock); } diff --git a/src/nvidia-modeset/include/nvkms-private.h b/src/nvidia-modeset/include/nvkms-private.h index 31c76081cd..b43b54e1e9 100644 --- a/src/nvidia-modeset/include/nvkms-private.h +++ b/src/nvidia-modeset/include/nvkms-private.h @@ -73,6 +73,8 @@ const NVEvoApiHandlesRec *nvGetSurfaceHandlesFromOpenDevConst( void nvKmsServiceNonStallInterrupt(void *dataPtr, NvU32 dataU32); +void nvKmsReinitializeGlobalClient(void); + #ifdef __cplusplus }; #endif diff --git a/src/nvidia-modeset/include/nvkms-types.h b/src/nvidia-modeset/include/nvkms-types.h index 1bc0d328f9..c6d5542274 100644 --- a/src/nvidia-modeset/include/nvkms-types.h +++ b/src/nvidia-modeset/include/nvkms-types.h @@ -1156,6 +1156,13 @@ typedef struct _NVEvoDevRec { */ NvBool supportsVblankSemControl : 1; + /* + * Indicates the GPU has been lost (e.g., Thunderbolt/eGPU hot-unplug). + * When set, any operations that would access GPU hardware should be + * skipped to avoid kernel crashes. + */ + NvBool gpuLost : 1; + nvkms_timer_handle_t *postFlipIMPTimer; nvkms_timer_handle_t *consoleRestoreTimer; diff --git a/src/nvidia-modeset/kapi/interface/nvkms-kapi.h b/src/nvidia-modeset/kapi/interface/nvkms-kapi.h index 4a65977031..4f9c421b3f 100644 --- a/src/nvidia-modeset/kapi/interface/nvkms-kapi.h +++ b/src/nvidia-modeset/kapi/interface/nvkms-kapi.h @@ -656,6 +656,17 @@ struct NvKmsKapiFunctionsTable { */ void (*freeDevice)(struct NvKmsKapiDevice *device); + /*! + * Frees a device during surprise removal (e.g., Thunderbolt eGPU unplug). + * This skips all hardware access and only releases kernel resources. + * Use this instead of freeDevice() when the GPU hardware is no longer + * accessible to avoid page faults and hangs. + * + * \param [in] device A device returned by allocateDevice(). + * This function is a no-op if device is not valid. + */ + void (*freeDeviceForSurpriseRemoval)(struct NvKmsKapiDevice *device); + /*! * Grab ownership of device, ownership is required to do modeset. * diff --git a/src/nvidia-modeset/kapi/src/nvkms-kapi.c b/src/nvidia-modeset/kapi/src/nvkms-kapi.c index fdf921f1b2..7fc03f7585 100644 --- a/src/nvidia-modeset/kapi/src/nvkms-kapi.c +++ b/src/nvidia-modeset/kapi/src/nvkms-kapi.c @@ -635,6 +635,51 @@ static void FreeDevice(struct NvKmsKapiDevice *device) nvKmsKapiFree(device); } +/* + * FreeDeviceForSurpriseRemoval - Free device without hardware access. + * + * This is used for Thunderbolt eGPU hot-unplug or other surprise removal + * scenarios where the GPU hardware is no longer accessible. We skip all + * hardware operations (NVKMS ioctls, RM API calls) that would cause page + * faults or hangs when trying to access unmapped GPU memory. + * + * We only: + * 1. Release the GPU reference count (nvkms_close_gpu) + * 2. Free kernel memory resources (semaphore, device struct) + * + * The hardware resources will be cleaned up when the GPU is physically + * removed from the system. + */ +static void FreeDeviceForSurpriseRemoval(struct NvKmsKapiDevice *device) +{ + if (device == NULL) { + return; + } + + /* + * Mark the GPU as lost in NVKMS. This sets the gpuLost flag to prevent + * any hardware access, and cancels pending timers that might try to + * access the removed GPU. + */ + nvkms_gpu_lost(device->gpuId); + + /* + * Skip KmsFreeDevice() and RmFreeDevice() - these try to access + * GPU hardware via ioctls and RM API calls, which will crash + * since the GPU memory is unmapped after surprise removal. + */ + + /* Lower the reference count of gpu - this is safe, no hardware access */ + nvkms_close_gpu(device->gpuId); + + /* Free kernel memory resources */ + if (device->pSema != NULL) { + nvkms_sema_free(device->pSema); + } + + nvKmsKapiFree(device); +} + NvBool nvKmsKapiAllocateSystemMemory(struct NvKmsKapiDevice *device, NvU32 hRmHandle, enum NvKmsSurfaceMemoryLayout layout, @@ -4013,6 +4058,7 @@ NvBool nvKmsKapiGetFunctionsTableInternal funcsTable->allocateDevice = AllocateDevice; funcsTable->freeDevice = FreeDevice; + funcsTable->freeDeviceForSurpriseRemoval = FreeDeviceForSurpriseRemoval; funcsTable->grabOwnership = GrabOwnership; funcsTable->releaseOwnership = ReleaseOwnership; diff --git a/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h b/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h index d4d656e766..8c7294e6c2 100644 --- a/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h +++ b/src/nvidia-modeset/os-interface/include/nvidia-modeset-os-interface.h @@ -310,6 +310,12 @@ void* nvkms_get_per_open_data(int fd); NvBool nvkms_open_gpu(NvU32 gpuId); void nvkms_close_gpu(NvU32 gpuId); +/*! + * Mark a GPU as lost (surprise removal, e.g., Thunderbolt eGPU unplug). + * This prevents hardware access and cancels pending timers. + */ +void nvkms_gpu_lost(NvU32 gpuId); + /*! * Enumerate nvidia gpus. diff --git a/src/nvidia-modeset/os-interface/include/nvkms.h b/src/nvidia-modeset/os-interface/include/nvkms.h index d350ef7564..668fa8c27b 100644 --- a/src/nvidia-modeset/os-interface/include/nvkms.h +++ b/src/nvidia-modeset/os-interface/include/nvkms.h @@ -88,6 +88,8 @@ void nvKmsModuleUnload(void); void nvKmsSuspend(NvU32 gpuId); void nvKmsResume(NvU32 gpuId); +void nvKmsGpuLost(NvU32 gpuId); + void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles); NvBool nvKmsReadConf(const char *buff, size_t size, diff --git a/src/nvidia-modeset/src/nvkms-console-restore.c b/src/nvidia-modeset/src/nvkms-console-restore.c index 0c6cc5b296..2cfb595889 100644 --- a/src/nvidia-modeset/src/nvkms-console-restore.c +++ b/src/nvidia-modeset/src/nvkms-console-restore.c @@ -765,6 +765,11 @@ NvBool nvEvoRestoreConsole(NVDevEvoPtr pDevEvo, const NvBool allowMST) pDevEvo->fbConsoleSurfaceHandle); struct NvKmsSetModeParams *params; + /* Skip if GPU has been lost (e.g., Thunderbolt unplug) */ + if (pDevEvo->gpuLost) { + goto done; + } + /* * If this function fails to restore a console then NVKMS frees * and reallocates the core channel, to attempt the console diff --git a/src/nvidia-modeset/src/nvkms-dma.c b/src/nvidia-modeset/src/nvkms-dma.c index a6c0b57b62..409c8ea6f8 100644 --- a/src/nvidia-modeset/src/nvkms-dma.c +++ b/src/nvidia-modeset/src/nvkms-dma.c @@ -27,6 +27,7 @@ #include "nvkms-utils.h" #include "nvkms-rmapi.h" #include "class/cl917d.h" // NV917DDispControlDma, NV917D_DMA_* +#include // NV0080_CTRL_CMD_DMA_FLUSH #include "nvos.h" #define NV_DMA_PUSHER_CHASE_PAD 5 @@ -37,7 +38,18 @@ static void EvoCoreKickoff(NVDmaBufferEvoPtr push_buffer, NvU32 putOffset); void nvDmaKickoffEvo(NVEvoChannelPtr pChannel) { NVDmaBufferEvoPtr p = &pChannel->pb; - NvU32 putOffset = (NvU32)((char *)p->buffer - (char *)p->base); + NvU32 putOffset; + + /* + * Skip DMA kickoff if the GPU has been lost (e.g., Thunderbolt eGPU + * surprise removal). Attempting to access DMA control registers when + * the GPU is gone will crash the kernel. + */ + if (p->pDevEvo == NULL || p->pDevEvo->gpuLost) { + return; + } + + putOffset = (NvU32)((char *)p->buffer - (char *)p->base); if (p->put_offset == putOffset) { return; @@ -48,11 +60,70 @@ void nvDmaKickoffEvo(NVEvoChannelPtr pChannel) static void EvoCoreKickoff(NVDmaBufferEvoPtr push_buffer, NvU32 putOffset) { + NVEvoDmaPtr pDma = &push_buffer->dma; + NVDevEvoPtr pDevEvo = push_buffer->pDevEvo; int i; nvAssert(putOffset % 4 == 0); nvAssert(putOffset <= push_buffer->offset_max); + /* + * Defense-in-depth: check gpuLost again. The caller should have already + * checked this, but verify to avoid writing to invalid mapped memory. + */ + if (pDevEvo == NULL || pDevEvo->gpuLost) { + return; + } + + /* If needed, copy the chunk to be kicked off into each GPU's FB */ + if (pDma->isBar1Mapping) { + int sd; + + NV0080_CTRL_DMA_FLUSH_PARAMS flushParams = { 0 }; + NvU32 ret; + + NvU32 *endAddress; + + if (putOffset < push_buffer->put_offset) { + /* If we've wrapped, copy to the end of the pushbuffer */ + nvAssert(putOffset == 0); + endAddress = push_buffer->base + push_buffer->offset_max / + sizeof(NvU32); + } else { + endAddress = push_buffer->buffer; + } + + for (sd = 0; sd < pDevEvo->numSubDevices; sd++) { + NvU32 startOffset = push_buffer->put_offset / sizeof(NvU32); + + NvU32 *src = push_buffer->base; + NvU32 *dst = pDma->subDeviceAddress[sd]; + + nvAssert(dst != NULL); + + src += startOffset; + dst += startOffset; + while (src < endAddress) { + *dst++ = *src++; + } + } + + /* + * Finally, tell RM to flush so that the data actually lands in FB + * before telling the GPU to fetch it. + */ + flushParams.targetUnit = DRF_DEF(0080_CTRL_DMA, _FLUSH_TARGET, + _UNIT_FB, _ENABLE); + + ret = nvRmApiControl(nvEvoGlobal.clientHandle, + pDevEvo->deviceHandle, + NV0080_CTRL_CMD_DMA_FLUSH, + &flushParams, sizeof(flushParams)); + if (ret != NVOS_STATUS_SUCCESS) { + nvAssert(!"NV0080_CTRL_CMD_DMA_FLUSH failed"); + } + } + #if NVCPU_IS_X86_64 __asm__ __volatile__ ("sfence\n\t" : : : "memory"); #elif NVCPU_IS_FAMILY_ARM @@ -110,8 +181,23 @@ NvBool nvEvoPollForEmptyChannel(NVEvoChannelPtr pChannel, NvU32 sd, { NVDmaBufferEvoPtr push_buffer = &pChannel->pb; + /* Return early if GPU is lost to avoid accessing invalid registers. */ + if (push_buffer->pDevEvo == NULL || push_buffer->pDevEvo->gpuLost) { + return FALSE; + } + do { - if (EvoCoreReadGet(push_buffer, sd) == push_buffer->put_offset) { + NvU32 getOffset = EvoCoreReadGet(push_buffer, sd); + + /* + * Check for GPU removal: reading 0xFFFFFFFF typically indicates + * the device has been removed from the bus. + */ + if (getOffset == 0xFFFFFFFF) { + return FALSE; + } + + if (getOffset == push_buffer->put_offset) { break; } @@ -132,6 +218,21 @@ void nvEvoMakeRoom(NVEvoChannelPtr pChannel, NvU32 count) NvU32 putOffset; NvU64 startTime = 0; const NvU64 timeout = 5000000; /* 5 seconds */ + /* + * Maximum number of consecutive timeouts before we give up. + * This prevents infinite hangs when the GPU is removed (e.g., Thunderbolt + * unplug). After 5 timeouts (25 seconds), we assume the GPU is gone. + */ + const NvU32 maxTimeoutCount = 5; + NvU32 timeoutCount = 0; + + /* + * Skip if the GPU has been lost. No point trying to make room in a + * push buffer for a GPU that's no longer there. + */ + if (push_buffer->pDevEvo == NULL || push_buffer->pDevEvo->gpuLost) { + return; + } putOffset = (NvU32) ((char *)push_buffer->buffer - (char *)push_buffer->base); @@ -146,6 +247,16 @@ void nvEvoMakeRoom(NVEvoChannelPtr pChannel, NvU32 count) while (1) { getOffset = EvoReadGetOffset(push_buffer, TRUE); + /* + * Check for GPU removal: reading 0xFFFFFFFF from PCI config space + * typically indicates the device has been removed from the bus. + */ + if (getOffset == 0xFFFFFFFF) { + nvEvoLogDev(push_buffer->pDevEvo, EVO_LOG_ERROR, + "GPU appears to have been removed (read 0xFFFFFFFF)"); + break; + } + if (putOffset >= getOffset) { push_buffer->fifo_free_count = (push_buffer->offset_max - putOffset) >> 2; @@ -179,16 +290,25 @@ void nvEvoMakeRoom(NVEvoChannelPtr pChannel, NvU32 count) } /* - * If we have been waiting too long, print an error message. There - * isn't much we can do as currently structured, so just reset - * startTime. + * If we have been waiting too long, print an error message. + * After too many consecutive timeouts, give up to prevent + * infinite hangs during GPU surprise removal. */ if (nvExceedsTimeoutUSec(push_buffer->pDevEvo, &startTime, timeout)) { + timeoutCount++; nvEvoLogDev(push_buffer->pDevEvo, EVO_LOG_ERROR, "Error while waiting for GPU progress: " - "0x%08x:%d %d:%d:%d:%d", + "0x%08x:%d %d:%d:%d:%d (timeout %d/%d)", pChannel->hwclass, pChannel->instance, - count, push_buffer->fifo_free_count, getOffset, putOffset); + count, push_buffer->fifo_free_count, getOffset, putOffset, + timeoutCount, maxTimeoutCount); + + if (timeoutCount >= maxTimeoutCount) { + nvEvoLogDev(push_buffer->pDevEvo, EVO_LOG_ERROR, + "GPU not responding after %d timeouts, assuming removed", + timeoutCount); + break; + } startTime = 0; } @@ -217,8 +337,16 @@ void nvWriteEvoCoreNotifier( { NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; const NvU32 sd = pDispEvo->displayOwner; - NVEvoDmaPtr pSubChannel = &pDevEvo->core->notifiersDma[sd]; - volatile NvU32 *pNotifiers = pSubChannel->cpuAddress; + NVEvoDmaPtr pSubChannel; + volatile NvU32 *pNotifiers; + + /* Skip if GPU is lost to avoid writing to invalid memory. */ + if (pDevEvo->gpuLost || pDevEvo->core == NULL) { + return; + } + + pSubChannel = &pDevEvo->core->notifiersDma[sd]; + pNotifiers = pSubChannel->subDeviceAddress[sd]; EvoWriteNotifier(pNotifiers + offset, value); } @@ -230,12 +358,26 @@ static NvBool EvoCheckNotifier(const NVDispEvoRec *pDispEvo, { const NvU32 sd = pDispEvo->displayOwner; NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; - NVEvoDmaPtr pSubChannel = &pDevEvo->core->notifiersDma[sd]; - NVDmaBufferEvoPtr p = &pDevEvo->core->pb; + NVEvoDmaPtr pSubChannel; + NVDmaBufferEvoPtr p; volatile NvU32 *pNotifier; NvU64 startTime = 0; + /* + * Maximum number of timeout cycles before giving up. + * Prevents infinite hangs during GPU surprise removal. + */ + const NvU32 maxTimeoutCount = 5; + NvU32 timeoutCount = 0; - pNotifier = pSubChannel->cpuAddress; + /* Return early if GPU is lost to avoid accessing invalid memory. */ + if (pDevEvo->gpuLost || pDevEvo->core == NULL) { + return FALSE; + } + + pSubChannel = &pDevEvo->core->notifiersDma[sd]; + p = &pDevEvo->core->pb; + + pNotifier = pSubChannel->subDeviceAddress[sd]; nvAssert(pNotifier != NULL); pNotifier += offset; @@ -245,6 +387,17 @@ static NvBool EvoCheckNotifier(const NVDispEvoRec *pDispEvo, const NvU32 val = *pNotifier; const NvU32 done_mask = DRF_SHIFTMASK(done_extent_bit:done_base_bit); const NvU32 done_val = done_value << done_base_bit; + NvU32 getOffset; + + /* + * Check for GPU removal: reading 0xFFFFFFFF typically indicates + * the device has been removed from the bus. + */ + if (val == 0xFFFFFFFF) { + nvEvoLogDisp(pDispEvo, EVO_LOG_WARN, + "GPU appears removed (notifier read 0xFFFFFFFF)"); + return FALSE; + } if ((val & done_mask) == done_val) { return TRUE; @@ -257,14 +410,39 @@ static NvBool EvoCheckNotifier(const NVDispEvoRec *pDispEvo, if (nvExceedsTimeoutUSec( pDevEvo, &startTime, - NV_EVO_NOTIFIER_SHORT_TIMEOUT_USEC) && - (p->put_offset == EvoCoreReadGet(p, sd))) + NV_EVO_NOTIFIER_SHORT_TIMEOUT_USEC)) { - nvEvoLogDisp(pDispEvo, EVO_LOG_WARN, - "Lost display notification (%d:0x%08x); " - "continuing.", sd, val); - EvoWriteNotifier(pNotifier, done_value << done_base_bit); - return TRUE; + getOffset = EvoCoreReadGet(p, sd); + + /* + * Check for GPU removal in get offset as well. + */ + if (getOffset == 0xFFFFFFFF) { + nvEvoLogDisp(pDispEvo, EVO_LOG_WARN, + "GPU appears removed (GET read 0xFFFFFFFF)"); + return FALSE; + } + + if (p->put_offset == getOffset) + { + nvEvoLogDisp(pDispEvo, EVO_LOG_WARN, + "Lost display notification (%d:0x%08x); " + "continuing.", sd, val); + EvoWriteNotifier(pNotifier, done_value << done_base_bit); + return TRUE; + } + + /* + * Count timeouts. After too many, assume GPU is gone. + */ + timeoutCount++; + if (timeoutCount >= maxTimeoutCount) { + nvEvoLogDisp(pDispEvo, EVO_LOG_ERROR, + "GPU not responding after %d timeouts (%d:0x%08x)", + timeoutCount, sd, val); + return FALSE; + } + startTime = 0; } nvkms_yield(); diff --git a/src/nvidia-modeset/src/nvkms-event.c b/src/nvidia-modeset/src/nvkms-event.c index 7b15bab850..414a2e1015 100644 --- a/src/nvidia-modeset/src/nvkms-event.c +++ b/src/nvidia-modeset/src/nvkms-event.c @@ -61,6 +61,11 @@ nvHandleHotplugEventDeferredWork(void *dataPtr, NvU32 dataU32) NVDpyEvoPtr pDpyEvo; NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; + /* Skip hardware access if GPU has been lost (e.g., Thunderbolt unplug) */ + if (pDevEvo->gpuLost) { + return; + } + // Get the hotplug state. hotplugParams.subDeviceInstance = pDispEvo->displayOwner; diff --git a/src/nvidia-modeset/src/nvkms-evo.c b/src/nvidia-modeset/src/nvkms-evo.c index fdd1df7ca5..a2b125c366 100644 --- a/src/nvidia-modeset/src/nvkms-evo.c +++ b/src/nvidia-modeset/src/nvkms-evo.c @@ -8835,6 +8835,21 @@ NvBool nvFreeDevEvo(NVDevEvoPtr pDevEvo) return FALSE; } + /* + * If the GPU was lost (surprise removal), skip all hardware-related + * cleanup. Just free software resources and remove from device list. + */ + if (pDevEvo->gpuLost) { + nvEvoLogDev(pDevEvo, EVO_LOG_INFO, + "Freeing device after GPU lost, skipping hardware cleanup"); + + /* Still need to free the per-open data (software resources only) */ + nvFreePerOpenDev(nvEvoGlobal.nvKmsPerOpen, pDevEvo->pNvKmsOpenDev); + pDevEvo->pNvKmsOpenDev = NULL; + + goto free_software_resources; + } + if (pDevEvo->pDifrState) { nvRmUnregisterDIFREventHandler(pDevEvo); nvDIFRFree(pDevEvo->pDifrState); @@ -8874,19 +8889,39 @@ NvBool nvFreeDevEvo(NVDevEvoPtr pDevEvo) nvRmDestroyDisplays(pDevEvo); - nvkms_free_timer(pDevEvo->consoleRestoreTimer); - pDevEvo->consoleRestoreTimer = NULL; +free_software_resources: + { + NvBool wasGpuLost = pDevEvo->gpuLost; - nvPreallocFree(pDevEvo); + nvkms_free_timer(pDevEvo->consoleRestoreTimer); + pDevEvo->consoleRestoreTimer = NULL; - nvRmFreeDeviceEvo(pDevEvo); + nvPreallocFree(pDevEvo); - nvListDel(&pDevEvo->devListEntry); + /* + * Skip RM device cleanup if GPU is lost - handles are already invalid + * and RM API calls will fail. + */ + if (!pDevEvo->gpuLost) { + nvRmFreeDeviceEvo(pDevEvo); + } - nvkms_free_ref_ptr(pDevEvo->ref_ptr); + nvListDel(&pDevEvo->devListEntry); - nvFree(pDevEvo); - return TRUE; + nvkms_free_ref_ptr(pDevEvo->ref_ptr); + + nvFree(pDevEvo); + + /* + * If the GPU was lost and the device list is now empty, reinitialize + * the global RM client so that newly attached GPUs can be used. + */ + if (wasGpuLost && nvListIsEmpty(&nvEvoGlobal.devList)) { + nvKmsReinitializeGlobalClient(); + } + + return TRUE; + } } static void AssignNumberOfApiHeads(NVDevEvoRec *pDevEvo) diff --git a/src/nvidia-modeset/src/nvkms-evo1.c b/src/nvidia-modeset/src/nvkms-evo1.c index cf2bbac618..1347956901 100644 --- a/src/nvidia-modeset/src/nvkms-evo1.c +++ b/src/nvidia-modeset/src/nvkms-evo1.c @@ -32,9 +32,416 @@ #include "nvkms-evo1.h" #include "nvkms-prealloc.h" #include "nvkms-utils.h" +#include "nvos.h" #include "hdmi_spec.h" +#include // NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS + +/*! + * Initialize head-independent IMP param fields. + * + * Initializes an NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS structure. + * IMP users should call this once, followed by per-head calls to + * AssignPerHeadImpParams(). + * + * \param pImp[in] A pointer to a param structure. + */ +static void InitImpParams(NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS *pImp) +{ + int i; + + nvkms_memset(pImp, 0, sizeof(*pImp)); + + /* Initialize to not possible. */ + pImp->IsPossible = NV5070_CTRL_CMD_IS_MODE_POSSIBLE_IS_POSSIBLE_NO; + + /* Set all heads to inactive. */ + for (i = 0; i < NV5070_CTRL_CMD_MAX_HEADS; i++) { + pImp->Head[i].HeadActive = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_HEAD_ACTIVE_NO; + } + + /* Set all ORs to no owner. */ + for (i = 0; i < NV5070_CTRL_CMD_MAX_DACS; i++) { + pImp->Dac[i].owner = NV5070_CTRL_CMD_OR_OWNER_NONE; + } + + pImp->bUseSorOwnerMask = TRUE; + for (i = 0; i < NV5070_CTRL_CMD_MAX_SORS; i++) { + pImp->Sor[i].ownerMask = NV5070_CTRL_CMD_SOR_OWNER_MASK_NONE; + } + + for (i = 0; i < NV5070_CTRL_CMD_MAX_PIORS; i++) { + pImp->Pior[i].owner = NV5070_CTRL_CMD_OR_OWNER_NONE; + } +} + +/*! + * Initialize head-specific IMP param fields. + * + * Initialize the portion of the NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS + * structure that applies to a specific head, and the OR driven by + * that head. + * + * The param structure should be initialized by InitImpParams() + * before calling this per-head function. + * + * \param[out] pImp The param structure to initialize. + * \param[in] pTimings The rastering timings and viewport configuration. + * \param[in] pUsage The usage bounds that will be used for this head. + * \param[in] head The number of the head that will be driven. + * \param[in] orNumber The number of the OR driven by the head. + * \param[in] orType The type of the OR driven by the head. + */ +static void AssignPerHeadImpParams(const NVDevEvoRec *pDevEvo, + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS *pImp, + const NVHwModeTimingsEvo *pTimings, + const enum nvKmsPixelDepth pixelDepth, + const struct NvKmsUsageBounds *pUsage, + const int head, + const int orNumber, + const int orType) +{ + const NVHwModeViewPortEvo *pViewPort = &pTimings->viewPort; + NvU64 overlayFormats = 0; + NvU32 protocol; + + nvkms_memset(&pImp->Head[head], 0, sizeof(pImp->Head[head])); + + nvAssert(head < NV5070_CTRL_CMD_MAX_HEADS); + pImp->Head[head].HeadActive = TRUE; + + nvAssert(orType == NV0073_CTRL_SPECIFIC_OR_TYPE_NONE || + orNumber != NV_INVALID_OR); + + /* raster timings */ + + pImp->Head[head].PixelClock.Frequency = pTimings->pixelClock; + + pImp->Head[head].PixelClock.Adj1000Div1001 = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PIXEL_CLOCK_ADJ1000DIV1001_NO; + + pImp->Head[head].RasterSize.Width = pTimings->rasterSize.x; + pImp->Head[head].RasterSize.Height = pTimings->rasterSize.y; + pImp->Head[head].RasterBlankStart.X = pTimings->rasterBlankStart.x; + pImp->Head[head].RasterBlankStart.Y = pTimings->rasterBlankStart.y; + pImp->Head[head].RasterBlankEnd.X = pTimings->rasterBlankEnd.x; + pImp->Head[head].RasterBlankEnd.Y = pTimings->rasterBlankEnd.y; + pImp->Head[head].RasterVertBlank2.YStart = pTimings->rasterVertBlank2Start; + pImp->Head[head].RasterVertBlank2.YEnd = pTimings->rasterVertBlank2End; + pImp->Head[head].Control.Structure = + pTimings->interlaced ? + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_CONTROL_STRUCTURE_INTERLACED : + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_CONTROL_STRUCTURE_PROGRESSIVE; + + if (orType == NV0073_CTRL_SPECIFIC_OR_TYPE_DAC) { + nvAssert(orNumber < ARRAY_LEN(pImp->Dac)); + nvAssert(pImp->Dac[orNumber].owner == NV5070_CTRL_CMD_OR_OWNER_NONE); + pImp->Dac[orNumber].owner = NV5070_CTRL_CMD_OR_OWNER_HEAD(head); + nvAssert(pTimings->protocol == NVKMS_PROTOCOL_DAC_RGB); + pImp->Dac[orNumber].protocol = NV5070_CTRL_CMD_DAC_PROTOCOL_RGB_CRT; + } else if (orType == NV0073_CTRL_SPECIFIC_OR_TYPE_SOR) { + nvAssert(orNumber < ARRAY_LEN(pImp->Sor)); + pImp->Sor[orNumber].ownerMask |= NV5070_CTRL_CMD_SOR_OWNER_MASK_HEAD(head); + switch (pTimings->protocol) { + default: + nvAssert(!"Unknown protocol"); + /* fall through */ + case NVKMS_PROTOCOL_SOR_LVDS_CUSTOM: + protocol = NV5070_CTRL_CMD_SOR_PROTOCOL_LVDS_CUSTOM; + break; + case NVKMS_PROTOCOL_SOR_SINGLE_TMDS_A: + protocol = NV5070_CTRL_CMD_SOR_PROTOCOL_SINGLE_TMDS_A; + break; + case NVKMS_PROTOCOL_SOR_SINGLE_TMDS_B: + protocol = NV5070_CTRL_CMD_SOR_PROTOCOL_SINGLE_TMDS_B; + break; + case NVKMS_PROTOCOL_SOR_DUAL_TMDS: + protocol = NV5070_CTRL_CMD_SOR_PROTOCOL_DUAL_TMDS; + break; + case NVKMS_PROTOCOL_SOR_DP_A: + protocol = NV5070_CTRL_CMD_SOR_PROTOCOL_DP_A; + break; + case NVKMS_PROTOCOL_SOR_DP_B: + protocol = NV5070_CTRL_CMD_SOR_PROTOCOL_DP_B; + break; + } + pImp->Sor[orNumber].protocol = protocol; + pImp->Sor[orNumber].pixelReplicateMode = + NV5070_CTRL_IS_MODE_POSSIBLE_PIXEL_REPLICATE_MODE_OFF; + } else if (orType == NV0073_CTRL_SPECIFIC_OR_TYPE_PIOR) { + nvAssert(orNumber < ARRAY_LEN(pImp->Pior)); + nvAssert(pImp->Pior[orNumber].owner == NV5070_CTRL_CMD_OR_OWNER_NONE); + pImp->Pior[orNumber].owner = NV5070_CTRL_CMD_OR_OWNER_HEAD(head); + switch (pTimings->protocol) { + default: + nvAssert(!"Unknown protocol"); + /* fall through */ + case NVKMS_PROTOCOL_PIOR_EXT_TMDS_ENC: + protocol = NV5070_CTRL_CMD_PIOR_PROTOCOL_EXT_TMDS_ENC; + break; + } + pImp->Pior[orNumber].protocol = protocol; + } else { + nvAssert(orType == NV0073_CTRL_SPECIFIC_OR_TYPE_NONE); + } + + /* viewport out */ + + pImp->Head[head].OutputScaler.VerticalTaps = + NVEvoScalerTapsToNum(pViewPort->vTaps); + + pImp->Head[head].OutputScaler.HorizontalTaps = + NVEvoScalerTapsToNum(pViewPort->hTaps); + + pImp->Head[head].ViewportSizeOut.Width = pViewPort->out.width; + pImp->Head[head].ViewportSizeOut.Height = pViewPort->out.height; + + pImp->Head[head].ViewportSizeOutMin.Width = + pImp->Head[head].ViewportSizeOut.Width; + + pImp->Head[head].ViewportSizeOutMin.Height = + pImp->Head[head].ViewportSizeOut.Height; + + pImp->Head[head].ViewportSizeOutMax.Width = + pImp->Head[head].ViewportSizeOut.Width; + + pImp->Head[head].ViewportSizeOutMax.Height = + pImp->Head[head].ViewportSizeOut.Height; + + /* viewport in */ + + pImp->Head[head].ViewportSizeIn.Width = pViewPort->in.width; + pImp->Head[head].ViewportSizeIn.Height = pViewPort->in.height; + + /* + * The actual format doesn't really matter, since RM just + * converts it back to bits per pixel for its IMP calculation anyway. The + * hardware doesn't have a "usage bound" for core -- changing the format + * of the core surface will always incur a supervisor interrupt and rerun + * IMP (XXX if we change the core surface as part of a flip to one of a + * different depth, should we force the pre/post IMP update path?). + * + * EVO2 hal uses surfaces of the same format in the core and base channels, + * see needToReprogramCoreSurface() in nvkms-evo2.c. + */ + if (pUsage->layer[NVKMS_MAIN_LAYER].usable) { + if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED8BPP) { + pImp->Head[head].Params.Format = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS_FORMAT_RF16_GF16_BF16_AF16; + } else if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED4BPP) { + pImp->Head[head].Params.Format = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS_FORMAT_A8R8G8B8; + } else if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED2BPP) { + pImp->Head[head].Params.Format = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS_FORMAT_R5G6B5; + } else if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED1BPP) { + pImp->Head[head].Params.Format = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS_FORMAT_I8; + } else { /* default to RGB 4BPP */ + nvAssert(!"Unknown core format"); + pImp->Head[head].Params.Format = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS_FORMAT_A8R8G8B8; + } + } else { + pImp->Head[head].Params.Format = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS_FORMAT_A8R8G8B8; + } + + pImp->Head[head].Params.SuperSample = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS_SUPER_SAMPLE_X1AA; + + /* base usage bounds */ + + if (pUsage->layer[NVKMS_MAIN_LAYER].usable) { + pImp->Head[head].BaseUsageBounds.Usable = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_USABLE_YES; + + if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED8BPP) { + pImp->Head[head].BaseUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_PIXEL_DEPTH_64; + } else if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED4BPP) { + pImp->Head[head].BaseUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_PIXEL_DEPTH_32; + } else if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED2BPP) { + pImp->Head[head].BaseUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_PIXEL_DEPTH_16; + } else if (pUsage->layer[NVKMS_MAIN_LAYER].supportedSurfaceMemoryFormats & + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED1BPP) { + pImp->Head[head].BaseUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_PIXEL_DEPTH_8; + } else { /* default to RGB 8BPP */ + nvAssert(!"Unknown base channel usage bound format"); + pImp->Head[head].BaseUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_PIXEL_DEPTH_64; + } + + pImp->Head[head].BaseUsageBounds.SuperSample = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_SUPER_SAMPLE_X1AA; + } else { + pImp->Head[head].BaseUsageBounds.Usable = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_BASE_USAGE_BOUNDS_USABLE_NO; + } + + /* overlay usage bounds */ + + pImp->Head[head].OverlayUsageBounds.Usable = + pUsage->layer[NVKMS_OVERLAY_LAYER].usable + ? NV5070_CTRL_CMD_IS_MODE_POSSIBLE_OVERLAY_USAGE_BOUNDS_USABLE_YES + : NV5070_CTRL_CMD_IS_MODE_POSSIBLE_OVERLAY_USAGE_BOUNDS_USABLE_NO; + + overlayFormats = pUsage->layer[NVKMS_OVERLAY_LAYER].usable ? + pUsage->layer[NVKMS_OVERLAY_LAYER].supportedSurfaceMemoryFormats : + NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED2BPP; + + if (overlayFormats & NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED4BPP) { + pImp->Head[head].OverlayUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_OVERLAY_USAGE_BOUNDS_PIXEL_DEPTH_32; + } else if (overlayFormats & NVKMS_SURFACE_MEMORY_FORMATS_RGB_PACKED2BPP) { + pImp->Head[head].OverlayUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_OVERLAY_USAGE_BOUNDS_PIXEL_DEPTH_16; + } else { + nvAssert(!"Unknown overlay channel usage bound format"); + pImp->Head[head].OverlayUsageBounds.PixelDepth = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_OVERLAY_USAGE_BOUNDS_PIXEL_DEPTH_32; + } + + /* pixel depth */ + + switch (pixelDepth) { + case NVKMS_PIXEL_DEPTH_18_444: + pImp->Head[head].outputResourcePixelDepthBPP = + NV5070_CTRL_IS_MODE_POSSIBLE_OUTPUT_RESOURCE_PIXEL_DEPTH_BPP_18_444; + break; + case NVKMS_PIXEL_DEPTH_24_444: + pImp->Head[head].outputResourcePixelDepthBPP = + NV5070_CTRL_IS_MODE_POSSIBLE_OUTPUT_RESOURCE_PIXEL_DEPTH_BPP_24_444; + break; + case NVKMS_PIXEL_DEPTH_30_444: + pImp->Head[head].outputResourcePixelDepthBPP = + NV5070_CTRL_IS_MODE_POSSIBLE_OUTPUT_RESOURCE_PIXEL_DEPTH_BPP_30_444; + break; + case NVKMS_PIXEL_DEPTH_16_422: + pImp->Head[head].outputResourcePixelDepthBPP = + NV5070_CTRL_IS_MODE_POSSIBLE_OUTPUT_RESOURCE_PIXEL_DEPTH_BPP_16_422; + break; + case NVKMS_PIXEL_DEPTH_20_422: + pImp->Head[head].outputResourcePixelDepthBPP = + NV5070_CTRL_IS_MODE_POSSIBLE_OUTPUT_RESOURCE_PIXEL_DEPTH_BPP_20_422; + break; + } +} + +void nvEvo1IsModePossible(NVDispEvoPtr pDispEvo, + const NVEvoIsModePossibleDispInput *pInput, + NVEvoIsModePossibleDispOutput *pOutput) +{ + NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS *pImp = + nvPreallocGet(pDevEvo, PREALLOC_TYPE_IMP_PARAMS, sizeof(*pImp)); + NvBool result = FALSE; + NvU32 head; + NvU32 ret; + + InitImpParams(pImp); + + pImp->RequestedOperation = + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_REQUESTED_OPERATION_QUERY; + + for (head = 0; head < NVKMS_MAX_HEADS_PER_DISP; head++) { + if (pInput->head[head].pTimings == NULL) { + continue; + } + + AssignPerHeadImpParams(pDevEvo, pImp, + pInput->head[head].pTimings, + pInput->head[head].pixelDepth, + pInput->head[head].pUsage, + head, + pInput->head[head].orIndex, + pInput->head[head].orType); + } + + pImp->base.subdeviceIndex = pDispEvo->displayOwner; + + if (pInput->requireBootClocks) { + // XXX TODO: IMP requires lock pin information if pstate information is + // requested. For now, just assume no locking. + pImp->MinPState = NV5070_CTRL_IS_MODE_POSSIBLE_NEED_MIN_PSTATE; + } + + for (head = 0; head < NVKMS_MAX_HEADS_PER_DISP; head++) { + pImp->Head[head].displayId[0] = pInput->head[head].displayId; + } + + ret = nvRmApiControl(nvEvoGlobal.clientHandle, + pDevEvo->displayHandle, + NV5070_CTRL_CMD_IS_MODE_POSSIBLE, + pImp, sizeof(*pImp)); + + if (ret != NV_OK || !pImp->IsPossible || + (pInput->requireBootClocks && + // P8 = "boot clocks" + (pImp->MinPState < NV5070_CTRL_IS_MODE_POSSIBLE_PSTATES_P8 && + // XXX TODO: With PStates 3.0, only a "v-pstate" is returned in + // impParams.minPerfLevel. We need to correlate that with "boot + // clocks" somehow. + pImp->MinPState != NV5070_CTRL_IS_MODE_POSSIBLE_PSTATES_UNDEFINED))) { + goto done; + } + + result = TRUE; + +done: + nvPreallocRelease(pDevEvo, PREALLOC_TYPE_IMP_PARAMS); + pOutput->possible = result; +} + +void nvEvo1PrePostIMP(NVDispEvoPtr pDispEvo, NvBool isPre) +{ + NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_PARAMS *pImp = + nvPreallocGet(pDevEvo, PREALLOC_TYPE_IMP_PARAMS, sizeof(*pImp)); + NvU32 ret; + + if (isPre) { + /* + * Sync the core channel for pre-modeset IMP to ensure that the state + * cache reflects all of the methods we've pushed + */ + ret = nvRMSyncEvoChannel(pDevEvo, pDevEvo->core, __LINE__); + if (!ret) { + nvAssert(!"nvRMSyncEvoChannel failed during PreModesetIMP"); + } + } + + nvkms_memset(pImp, 0, sizeof(*pImp)); + + pImp->RequestedOperation = isPre ? + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_REQUESTED_OPERATION_PRE_MODESET_USE_SC : + NV5070_CTRL_CMD_IS_MODE_POSSIBLE_REQUESTED_OPERATION_POST_MODESET_USE_SC; + + pImp->base.subdeviceIndex = pDispEvo->displayOwner; + + ret = nvRmApiControl(nvEvoGlobal.clientHandle, + pDevEvo->displayHandle, + NV5070_CTRL_CMD_IS_MODE_POSSIBLE, + pImp, sizeof(*pImp)); + if ((ret != NVOS_STATUS_SUCCESS) || !pImp->IsPossible) { + nvAssert(!"NV5070_CTRL_CMD_IS_MODE_POSSIBLE failed"); + } + + nvPreallocRelease(pDevEvo, PREALLOC_TYPE_IMP_PARAMS); +} + /*! * Return the value to use for HEAD_SET_STORAGE_PITCH. * @@ -67,6 +474,91 @@ NvU32 nvEvoGetHeadSetStoragePitchValue(const NVDevEvoRec *pDevEvo, return pitch; } +static NvBool GetChannelState(NVDevEvoPtr pDevEvo, + NVEvoChannelPtr pChan, + NvU32 sd, + NvU32 *result) +{ + NV5070_CTRL_CMD_GET_CHANNEL_INFO_PARAMS info = { }; + NvU32 ret; + + info.base.subdeviceIndex = sd; + info.channelClass = pChan->hwclass; + info.channelInstance = pChan->instance; + + ret = nvRmApiControl(nvEvoGlobal.clientHandle, + pDevEvo->displayHandle, + NV5070_CTRL_CMD_GET_CHANNEL_INFO, + &info, sizeof(info)); + if (ret != NVOS_STATUS_SUCCESS) { + /* + * When the GPU is lost (e.g., Thunderbolt/eGPU hot-unplug), + * suppress the error log to avoid flooding dmesg. The callers + * will handle the failure appropriately. + */ + if (ret != NVOS_STATUS_ERROR_GPU_IS_LOST) { + nvEvoLogDev(pDevEvo, EVO_LOG_ERROR, + "Failed to query display engine channel state: 0x%08x:%d:%d:0x%08x", + pChan->hwclass, pChan->instance, sd, ret); + } + return FALSE; + } + + *result = info.channelState; + + return TRUE; +} + +NvBool nvEvo1IsChannelIdle(NVDevEvoPtr pDevEvo, + NVEvoChannelPtr pChan, + NvU32 sd, + NvBool *result) +{ + NvU32 channelState; + + if (!GetChannelState(pDevEvo, pChan, sd, &channelState)) { + return FALSE; + } + + *result = (channelState == NV5070_CTRL_GET_CHANNEL_INFO_STATE_IDLE); + + return TRUE; +} + +/* + * Result is false if an EVO channel is either one of NO_METHOD_PENDING or + * UNCONNECTED, true o.w. + * + * NO_METHOD_PENDING is a mask for EMPTY | WRTIDLE | IDLE. + * + * If NVKMS hasn't grabbed the channel, it can be seen as UNCONNECTED. + */ +NvBool nvEvo1IsChannelMethodPending(NVDevEvoPtr pDevEvo, + NVEvoChannelPtr pChan, + NvU32 sd, + NvBool *result) +{ + NvU32 channelState; + + if (!GetChannelState(pDevEvo, pChan, sd, &channelState)) { + return FALSE; + } + + *result = !(channelState & + (NV5070_CTRL_GET_CHANNEL_INFO_STATE_NO_METHOD_PENDING | + NV5070_CTRL_GET_CHANNEL_INFO_STATE_UNCONNECTED)); + + return TRUE; +} + +void nvEvo1SetDscParams(const NVDispEvoRec *pDispEvo, + const NvU32 head, + const NVDscInfoEvoRec *pDscInfo, + const enum nvKmsPixelDepth pixelDepth) +{ + nvAssert(pDscInfo->type == NV_DSC_INFO_EVO_TYPE_DISABLED); +} + /* * The 'type' the timing library writes into the NVT_INFOFRAME_HEADER * structure is not the same as the protocol values that hardware diff --git a/src/nvidia-modeset/src/nvkms-evo3.c b/src/nvidia-modeset/src/nvkms-evo3.c index a82a2bdfd4..dae600af67 100644 --- a/src/nvidia-modeset/src/nvkms-evo3.c +++ b/src/nvidia-modeset/src/nvkms-evo3.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2010-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2010-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -39,7 +39,7 @@ #include "nvkms-dpy.h" #include "nvkms-vrr.h" #include "nvkms-ctxdma.h" -#include "displayport/displayport.h" +#include "nvos.h" #include @@ -60,6 +60,10 @@ #include #include +#include + +#define NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C3 \ + (NVBIT64(NvKmsSurfaceMemoryFormatRF16GF16BF16XF16)) #define NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C5 \ (NVBIT64(NvKmsSurfaceMemoryFormatRF16GF16BF16XF16)) @@ -1067,6 +1071,20 @@ static void ConfigureCsc1C5(NVDevEvoPtr pDevEvo, SetCsc11MatrixC5(pChannel, &csc11Matrix); } +static void InitDesktopColorC3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) +{ + NvU32 head; + + for (head = 0; head < pDevEvo->numHeads; head++) { + nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_DESKTOP_COLOR(head), 1); + nvDmaSetEvoMethodData(pChannel, + DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _RED, 0) | + DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _GREEN, 0) | + DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _BLUE, 0) | + DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _ALPHA, 255)); + } +} + static void InitDesktopColorC5(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) { NvU32 head; @@ -1089,6 +1107,19 @@ void nvEvoInitChannel3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) InitChannelCapsC3(pDevEvo, pChannel); } +static void EvoInitChannelC3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) +{ + const NvBool isCore = + FLD_TEST_DRF64(_EVO, _CHANNEL_MASK, _CORE, _ENABLE, + pChannel->channelMask); + + nvEvoInitChannel3(pDevEvo, pChannel); + + if (isCore) { + InitDesktopColorC3(pDevEvo, pChannel); + } +} + static void EvoInitChannelC5(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) { const NvBool isCore = @@ -1302,6 +1333,30 @@ static void EvoInitWindowMapping3(NVDevEvoPtr pDevEvo, } } +static void EvoInitWindowMappingC3(const NVDispEvoRec *pDispEvo, + NVEvoModesetUpdateState *pModesetUpdateState) +{ + NVDevEvoRec *pDevEvo = pDispEvo->pDevEvo; + NVEvoUpdateState *updateState = &pModesetUpdateState->updateState; + NVEvoChannelPtr pChannel = pDevEvo->core; + NvU32 win; + + nvPushEvoSubDevMaskDisp(pDispEvo); + + nvUpdateUpdateState(pDevEvo, updateState, pChannel); + + EvoInitWindowMapping3(pDevEvo, + pModesetUpdateState); + + // Set window usage bounds + for (win = 0; win < pDevEvo->numWindows; win++) { + nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1); + /* XXXnvdisplay: window scaling */ + nvDmaSetEvoMethodData(pChannel, NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3); + } + nvPopEvoSubDevMask(pDevEvo); +} + void nvEvoInitWindowMappingC5(const NVDispEvoRec *pDispEvo, NVEvoModesetUpdateState *pModesetUpdateState) { @@ -1487,6 +1542,17 @@ static void EvoSetRasterParams3(NVDevEvoPtr pDevEvo, int head, nvDmaSetEvoMethodData(pChannel, hdmiStereoCtrl); } +static void EvoSetRasterParamsC3(NVDevEvoPtr pDevEvo, int head, + const NVHwModeTimingsEvo *pTimings, + const NvU8 tilePosition, + const NVDscInfoEvoRec *pDscInfo, + const NVEvoColorRec *pOverscanColor, + NVEvoUpdateState *updateState) +{ + nvAssert(tilePosition == 0); + EvoSetRasterParams3(pDevEvo, head, pTimings, pOverscanColor, updateState); +} + static void EvoSetRasterParams5(NVDevEvoPtr pDevEvo, int head, const NVHwModeTimingsEvo *pTimings, const NvU8 tilePosition, @@ -1588,6 +1654,55 @@ static void EvoSetRasterParamsC6(NVDevEvoPtr pDevEvo, int head, nvDmaSetEvoMethodData(pChannel, rasterHBlankDelay); } +static void EvoSetProcAmpC3(NVDispEvoPtr pDispEvo, const NvU32 head, + NVEvoUpdateState *updateState) +{ + NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; + NVEvoChannelPtr pChannel = pDevEvo->core; + const NVDispHeadStateEvoRec *pHeadState = &pDispEvo->headState[head]; + NvU8 colorSpace; + NvU32 dynRange; + + /* These methods should only apply to a single pDpyEvo */ + nvAssert(pDevEvo->subDevMaskStackDepth > 0); + + nvUpdateUpdateState(pDevEvo, updateState, pChannel); + + // These NVT defines match the HEAD_SET_PROCAMP ones. + ct_assert(NVT_COLORIMETRY_RGB == NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_RGB); + ct_assert(NVT_COLORIMETRY_YUV_601 == NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_YUV_601); + ct_assert(NVT_COLORIMETRY_YUV_709 == NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_YUV_709); + ct_assert(NVT_COLOR_RANGE_FULL == NVC37D_HEAD_SET_PROCAMP_RANGE_COMPRESSION_DISABLE); + ct_assert(NVT_COLOR_RANGE_LIMITED == NVC37D_HEAD_SET_PROCAMP_RANGE_COMPRESSION_ENABLE); + + if (pHeadState->procAmp.colorimetry == NVT_COLORIMETRY_BT2020RGB) { + colorSpace = NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_RGB; + } else if (pHeadState->procAmp.colorimetry == NVT_COLORIMETRY_BT2020YCC) { + colorSpace = NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_YUV_2020; + } else { + colorSpace = pHeadState->procAmp.colorimetry; + } + + if (pHeadState->procAmp.colorRange == NVT_COLOR_RANGE_FULL) { + dynRange = DRF_DEF(C37D, _HEAD_SET_PROCAMP, _DYNAMIC_RANGE, _VESA); + } else { + nvAssert(pHeadState->procAmp.colorRange == NVT_COLOR_RANGE_LIMITED); + dynRange = DRF_DEF(C37D, _HEAD_SET_PROCAMP, _DYNAMIC_RANGE, _CEA); + } + + nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_PROCAMP(head), 1); + nvDmaSetEvoMethodData(pChannel, + DRF_NUM(C37D, _HEAD_SET_PROCAMP, _COLOR_SPACE, colorSpace) | + DRF_DEF(C37D, _HEAD_SET_PROCAMP, _CHROMA_LPF, _DISABLE) | + DRF_NUM(C37D, _HEAD_SET_PROCAMP, _SAT_COS, + pHeadState->procAmp.satCos) | + DRF_NUM(C37D, _HEAD_SET_PROCAMP, _SAT_SINE, 0) | + dynRange | + DRF_NUM(C37D, _HEAD_SET_PROCAMP, _RANGE_COMPRESSION, + pHeadState->procAmp.colorRange) | + DRF_DEF(C37D, _HEAD_SET_PROCAMP, _BLACK_LEVEL, _GRAPHICS)); +} + static const struct NvKmsCscMatrix RGBToLimitedRangeYCbCrRec2020Matrix = {{ { 0x7000, 0x1f9900, 0x1ff700, 0x8000 }, { 0x3988, 0x947c, 0xcfc, 0x1000 }, @@ -1986,6 +2101,31 @@ void nvEvoSetControlC3(NVDevEvoPtr pDevEvo, int sd) } } + /* + * GV100 HW bug 2062029 WAR + * + * GV100 always holds the external fliplock line low as if + * NVC37D_SET_CONTROL_FLIP_LOCK_PIN was enabled. To work around this, + * the GV100 VBIOS initializes the fliplock GPIOs to be software + * controlled (forced off). The following rmctrl needs to be called to + * switch HW control of the fliplock GPIOs back on whenever external + * fliplock is enabled. + */ + { + NVC370_CTRL_SET_SWAPRDY_GPIO_WAR_PARAMS params = { }; + + params.base.subdeviceIndex = pEvoSubDev->subDeviceInstance; + params.bEnable = (data != 0); + + if (nvRmApiControl( + nvEvoGlobal.clientHandle, + pDevEvo->displayHandle, + NVC370_CTRL_CMD_SET_SWAPRDY_GPIO_WAR, + ¶ms, sizeof(params)) != NVOS_STATUS_SUCCESS) { + nvEvoLogDevDebug(pDevEvo, EVO_LOG_ERROR, "Failed to override fliplock GPIO"); + } + } + nvDmaSetStartEvoMethod(pChannel, NVC37D_SET_CONTROL, 1); nvDmaSetEvoMethodData(pChannel, data); } @@ -2353,6 +2493,34 @@ static void EvoORSetControlC6(NVDevEvoPtr pDevEvo, } } +static void EvoHeadSetControlORC3(NVDevEvoPtr pDevEvo, + const int head, + const NVHwModeTimingsEvo *pTimings, + const enum nvKmsPixelDepth pixelDepth, + const NvBool colorSpaceOverride, + NVEvoUpdateState *updateState) +{ + NVEvoChannelPtr pChannel = pDevEvo->core; + const NvU32 hwPixelDepth = nvEvoGetPixelDepthC3(pixelDepth); + const NvU16 colorSpaceFlag = nvEvo1GetColorSpaceFlag(pDevEvo, + colorSpaceOverride); + + nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_CONTROL_OUTPUT_RESOURCE(head), 1); + nvDmaSetEvoMethodData(pChannel, + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _CRC_MODE, _COMPLETE_RASTER) | + (pTimings->hSyncPol ? + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _HSYNC_POLARITY, _NEGATIVE_TRUE) : + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _HSYNC_POLARITY, _POSITIVE_TRUE)) | + (pTimings->vSyncPol ? + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _VSYNC_POLARITY, _NEGATIVE_TRUE) : + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _VSYNC_POLARITY, _POSITIVE_TRUE)) | + DRF_NUM(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _PIXEL_DEPTH, hwPixelDepth) | + (colorSpaceOverride ? + (DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _COLOR_SPACE_OVERRIDE, _ENABLE) | + DRF_NUM(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _COLOR_SPACE_FLAG, colorSpaceFlag)) : + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _COLOR_SPACE_OVERRIDE, _DISABLE))); +} + static void EvoHeadSetControlORC5(NVDevEvoPtr pDevEvo, const int head, const NVHwModeTimingsEvo *pTimings, @@ -2549,6 +2717,13 @@ static NvBool EvoSetUsageBounds3(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, return needCoreUpdate; } +static NvBool EvoSetUsageBoundsC3(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, + const struct NvKmsUsageBounds *pUsage, + NVEvoUpdateState *updateState) +{ + return EvoSetUsageBounds3(pDevEvo, sd, head, pUsage, updateState); +} + NvBool nvEvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, const struct NvKmsUsageBounds *pUsage, NVEvoUpdateState *updateState) @@ -2705,9 +2880,9 @@ static void UpdateWindowIMM(NVEvoChannelPtr pChannel, } updateImm |= releaseElv ? DRF_DEF(C37B, _UPDATE, _RELEASE_ELV, _TRUE) : 0; - nvDmaSetStartEvoMethod(pChannel->imm.dma, NVC37B_UPDATE, 1); - nvDmaSetEvoMethodData(pChannel->imm.dma, updateImm); - nvDmaKickoffEvo(pChannel->imm.dma); + nvDmaSetStartEvoMethod(pChannel->imm.u.dma, NVC37B_UPDATE, 1); + nvDmaSetEvoMethodData(pChannel->imm.u.dma, updateImm); + nvDmaKickoffEvo(pChannel->imm.u.dma); } } @@ -3313,6 +3488,51 @@ nvEvoIsModePossibleC3(NVDispEvoPtr pDispEvo, nvPreallocRelease(pDevEvo, PREALLOC_TYPE_IMP_PARAMS); } +void nvEvoPrePostIMPC3(NVDispEvoPtr pDispEvo, NvBool isPre) +{ + /* Nothing to do on nvdisplay -- pre/post IMP calls are not required. */ +} + +static void +EvoFlipC3(NVDevEvoPtr pDevEvo, + NVEvoChannelPtr pChannel, + const NVFlipChannelEvoHwState *pHwState, + NVEvoUpdateState *updateState, + NvBool bypassComposition); + +/* + * Returns TRUE iff the CSC should be enabled (i.e., the matrix is not the + * identity matrix). + */ +static NvBool SetCscMatrixC3(NVEvoChannelPtr pChannel, + const struct NvKmsCscMatrix *matrix) +{ + NvU32 method = NVC37E_SET_CSC_RED2RED; + int y; + + if (nvIsCscMatrixIdentity(matrix)) { + return FALSE; + } + + for (y = 0; y < 3; y++) { + int x; + + for (x = 0; x < 4; x++) { + // Use DRF_NUM to truncate client-supplied values that are out of + // range. + NvU32 val = DRF_NUM(C37E, _SET_CSC_RED2RED, _COEFF, + matrix->m[y][x]); + + nvDmaSetStartEvoMethod(pChannel, method, 1); + nvDmaSetEvoMethodData(pChannel, val); + + method += 4; + } + } + + return TRUE; +} + static void SetCscMatrixC5Wrapper(NVEvoChannelPtr pChannel, const struct NvKmsCscMatrix *matrix, NvU32 coeffMethod, NvU32 controlMethod, @@ -3390,6 +3610,87 @@ static void SetCsc11MatrixC5(NVEvoChannelPtr pChannel, DRF_DEF(C57E, _SET_CSC11CONTROL, _ENABLE, _DISABLE)); } +/* + * WAR for GV100 HW bug 1978592: + * + * Timestamped flips allow SW to specify the earliest time that the next UPDATE + * will complete. Due to a HW bug, GV100 waits for the timestamp in the ARMED + * state (i.e. the timestamps that were pushed in the previous UPDATE) instead + * of the timestamp in the ASSEMBLY state (the time we want to postpone this + * flip until). + * + * This WAR inserts an additional UPDATE to push the timestamp from ASSEMBLY to + * ARMED while changing no other state, so the following normal UPDATE can + * wait for the correct timestamp. + * + * This update needs to have the following characteristics: + * + * - MIN_PRESENT_INTERVAL 0 + * - TIMESTAMP_MODE _ENABLE + * - All other SET_PRESENT_CONTROL fields unmodified from previous UPDATE + * - SET_UPDATE_TIMESTAMP (target timestamp) + * - RELEASE_ELV _FALSE + * - Non-interlocked + * - Non-fliplocked + */ +static void +InsertAdditionalTimestampFlip(NVDevEvoPtr pDevEvo, + NVEvoChannelPtr pChannel, + const NVFlipChannelEvoHwState *pHwState, + NVEvoUpdateState *updateState) +{ + NvU32 presentControl = pChannel->oldPresentControl; + + /* This hardware bug is only present on GV100 which uses window + * class C37E. */ + nvAssert(pChannel->hwclass == NVC37E_WINDOW_CHANNEL_DMA); + + nvAssert(pHwState->timeStamp != 0); + + /* + * Update the necessary fields in SET_PRESENT_CONTROL without modifying + * the existing values by using the cached SET_PRESENT_CONTROL values + * from the previous update. + * + * Note that BEGIN_MODE must not be changed here; even though BEGIN_MODE + * may currently be NON_TEARING, a NON_TEARING + MIN_PRESENT_INTERVAL 0 + * flip will be correctly collapsed with the surrounding + * MIN_PRESENT_INTERVAL 1 flips. If we were to change BEGIN_MODE to + * IMMEDIATE, this would cause an additional delay due to the transition + * from NON_TEARING to IMMEDIATE. + */ + presentControl = FLD_SET_DRF_NUM(C37E, _SET_PRESENT_CONTROL, + _MIN_PRESENT_INTERVAL, + 0, presentControl); + presentControl = FLD_SET_DRF(C37E, _SET_PRESENT_CONTROL, + _TIMESTAMP_MODE, + _ENABLE, presentControl); + + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_PRESENT_CONTROL, 1); + nvDmaSetEvoMethodData(pChannel, presentControl); + + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_UPDATE_TIMESTAMP_LO, 2); + nvDmaSetEvoMethodData(pChannel, NvU64_LO32(pHwState->timeStamp)); + nvDmaSetEvoMethodData(pChannel, NvU64_HI32(pHwState->timeStamp)); + + // Issue non-interlocked, non-fliplocked, non-ReleaseElv UPDATE + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_INTERLOCK_FLAGS, 1); + nvDmaSetEvoMethodData(pChannel, 0); + + nvDmaSetStartEvoMethod(pChannel, + NVC37E_SET_WINDOW_INTERLOCK_FLAGS, + 1); + nvDmaSetEvoMethodData(pChannel, 0); + + nvDmaSetStartEvoMethod(pChannel, NVC37E_UPDATE, 1); + nvDmaSetEvoMethodData(pChannel, + DRF_DEF(C37E, _UPDATE, _RELEASE_ELV, _FALSE) | + DRF_NUM(C37E, _UPDATE, _FLIP_LOCK_PIN, + NVC37E_UPDATE_FLIP_LOCK_PIN_LOCK_PIN_NONE) | + DRF_DEF(C37E, _UPDATE, _INTERLOCK_WITH_WIN_IMM, + _DISABLE)); +} + static void EvoProgramSemaphore3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel, @@ -3675,6 +3976,15 @@ EvoFlipC3Common(NVDevEvoPtr pDevEvo, nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_PRESENT_CONTROL, 1); nvDmaSetEvoMethodData(pChannel, presentControl); + /* + * GV100 timestamped flips need a duplicate update which only changes + * TIMESTAMP_MODE and MIN_PRESENT_INTERVAL fields in SET_PRESENT_CONTROL; + * to allow updating these fields without changing anything else in + * SET_PRESENT_CONTROL, cache the values we sent in previous flips here. + * (bug 1990958) + */ + pChannel->oldPresentControl = presentControl; + /* Set the surface parameters. */ FOR_ALL_EYES(eye) { const NVSurfaceEvoRec *pSurfaceEvoPerEye = pHwState->pSurfaceEvo[eye]; @@ -4100,6 +4410,80 @@ NVSurfaceEvoPtr EvoGetLutSurface3(NVDevEvoPtr pDevEvo, return pDispEvo->headState[head].lut.pCurrSurface; } +static void +EvoFlipC3(NVDevEvoPtr pDevEvo, + NVEvoChannelPtr pChannel, + const NVFlipChannelEvoHwState *pHwState, + NVEvoUpdateState *updateState, + NvBool bypassComposition) +{ + NvBool enableCSC, swapUV, flip3Return; + enum NvKmsSurfaceMemoryFormat format; + /* + * lutSize and isLutModeVss are unused, since we only support 1025 and + * non-VSS on Volta, but we declare them to pass to EvoGetLutSurface3. + * + * TODO: Maybe validate the resulting values? + */ + NvU32 lutSize = NV_NUM_EVO_LUT_ENTRIES; + NvU64 offset = offsetof(NVEvoLutDataRec, base); + NvBool isLutModeVss = FALSE; + NVSurfaceEvoPtr pLutSurfaceEvo = EvoGetLutSurface3(pDevEvo, pChannel, pHwState, + &lutSize, &offset, &isLutModeVss); + + if (pHwState->timeStamp != 0) { + InsertAdditionalTimestampFlip(pDevEvo, pChannel, pHwState, + updateState); + } + + flip3Return = EvoFlipC3Common(pDevEvo, pChannel, pHwState, updateState); + + /* program semaphore */ + EvoProgramSemaphore3(pDevEvo, pChannel, pHwState); + + if (!flip3Return) { + return; + } + + format = pHwState->pSurfaceEvo[NVKMS_LEFT]->format; + + enableCSC = SetCscMatrixC3(pChannel, &pHwState->cscMatrix); + swapUV = IsSurfaceFormatUVSwapped(format); + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_PARAMS, 1); + nvDmaSetEvoMethodData(pChannel, + (enableCSC ? DRF_DEF(C37E, _SET_PARAMS, _CSC, _ENABLE) : + DRF_DEF(C37E, _SET_PARAMS, _CSC, _DISABLE)) | + DRF_NUM(C37E, _SET_PARAMS, _FORMAT, nvHwFormatFromKmsFormatC3(format)) | + (swapUV ? DRF_DEF(C37E, _SET_PARAMS, _SWAP_UV, _ENABLE) : + DRF_DEF(C37E, _SET_PARAMS, _SWAP_UV, _DISABLE)) | + DRF_DEF(C37E, _SET_PARAMS, _UNDERREPLICATE, _DISABLE)); + + if (pLutSurfaceEvo) { + const NvU32 ctxDma = pLutSurfaceEvo->planes[0].surfaceDesc.ctxDmaHandle; + + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_CONTROL_INPUT_LUT, 1); + nvDmaSetEvoMethodData(pChannel, + DRF_DEF(C37E, _SET_CONTROL_INPUT_LUT, _SIZE, _SIZE_1025) | + DRF_DEF(C37E, _SET_CONTROL_INPUT_LUT, _RANGE, _UNITY) | + DRF_DEF(C37E, _SET_CONTROL_INPUT_LUT, _OUTPUT_MODE, _INDEX)); + + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_OFFSET_INPUT_LUT, 1); + nvDmaSetEvoMethodData(pChannel, + DRF_NUM(C37E, _SET_OFFSET_INPUT_LUT, _ORIGIN, offset)); + + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_CONTEXT_DMA_INPUT_LUT, 1); + nvDmaSetEvoMethodData(pChannel, + DRF_NUM(C37E, _SET_CONTEXT_DMA_INPUT_LUT, _HANDLE, ctxDma)); + } else { + nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_CONTEXT_DMA_INPUT_LUT, 1); + nvDmaSetEvoMethodData(pChannel, 0); + } + + UpdateCompositionC3(pDevEvo, pChannel, + &pHwState->composition, updateState, + format); +} + static void EvoSetupPQEotfBaseLutC5(NVEvoLutDataRec *pData, enum NvKmsLUTState *lutState, NvU32 *lutSize, @@ -4473,6 +4857,14 @@ static void UpdateComposition(NVDevEvoPtr pDevEvo, #undef UPDATE_COMPONENT } +static void EvoFlipTransitionWARC3(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, + const NVEvoSubDevHeadStateRec *pSdHeadState, + const NVFlipEvoHwState *pFlipState, + NVEvoUpdateState *updateState) +{ + /* Nothing to do for Volta */ +} + /* * Hardware bug 2193096 requires that we send special software methods around * a window channel update that transitions from NULL ctxdma to non-NULL or @@ -4701,6 +5093,21 @@ UpdateCompositionC5(NVDevEvoPtr pDevEvo, } } +/* + * The LUT entries in INDEX_1025_UNITY_RANGE have 16 bits, with the + * black value at 24576, and the white at 49151. Since the effective + * range is 16384, we treat this as a 14-bit LUT. However, we need to + * clear the low 3 bits to WAR hardware bug 813188. This gives us + * 14-bit LUT values, but only 11 bits of precision. + * XXXnvdisplay: Bug 813188 is supposed to be fixed on NVDisplay; can we expose + * more precision? + */ +static inline NvU16 ColorToLUTEntry(NvU16 val) +{ + const NvU16 val14bit = val >> 2; + return (val14bit & ~7) + 24576; +} + /* * Unlike earlier EVO implementations, the INDEX mode of the input LUT on * NVDisplay is straightforward: the value of the input component is expanded @@ -4713,13 +5120,8 @@ static inline NvU32 GetLUTIndex(int i, int componentSize) return i << (10 - componentSize); } -static inline float16_t ColorToFp16(NvU16 val, float32_t maxf) -{ - return nvUnormToFp16(val, maxf); -} - -void -nvEvoFillLUTSurfaceC5(NVEvoLutEntryRec *pLUTBuffer, +static void +EvoFillLUTSurfaceC3(NVEvoLutEntryRec *pLUTBuffer, const NvU16 *red, const NvU16 *green, const NvU16 *blue, @@ -4727,7 +5129,6 @@ nvEvoFillLUTSurfaceC5(NVEvoLutEntryRec *pLUTBuffer, { int i; NvU32 rSize, gSize, bSize; - const float32_t maxf = ui32_to_f32(0xffff); switch (depth) { case 15: @@ -4749,38 +5150,126 @@ nvEvoFillLUTSurfaceC5(NVEvoLutEntryRec *pLUTBuffer, return; } - // Skip the VSS header - pLUTBuffer += NV_LUT_VSS_HEADER_SIZE; - for (i = 0; i < nColorMapEntries; i++) { if (i < (1 << rSize)) { - pLUTBuffer[GetLUTIndex(i, rSize)].Red = - ColorToFp16(red[i], maxf).v; + pLUTBuffer[GetLUTIndex(i, rSize)].Red = ColorToLUTEntry(red[i]); } if (i < (1 << gSize)) { - pLUTBuffer[GetLUTIndex(i, gSize)].Green = - ColorToFp16(green[i], maxf).v; + pLUTBuffer[GetLUTIndex(i, gSize)].Green = ColorToLUTEntry(green[i]); } if (i < (1 << bSize)) { - pLUTBuffer[GetLUTIndex(i, bSize)].Blue = - ColorToFp16(blue[i], maxf).v; + pLUTBuffer[GetLUTIndex(i, bSize)].Blue = ColorToLUTEntry(blue[i]); } } } -static void EvoSetupPQOetfOutputLutC5(NVEvoLutDataRec *pData, - enum NvKmsLUTState *lutState, - NvU32 *lutSize, - NvBool *isLutModeVss) +static inline float16_t ColorToFp16(NvU16 val, float32_t maxf) { - NvU32 lutDataStartingIndex = NV_LUT_VSS_HEADER_SIZE; - NvU32 numOetfPQ512Entries = ARRAY_LEN(OetfPQ512Entries); - NvU32 oetfTableIdx; - NvU64 vssHead = 0; - NvU32 lutEntryCounter = 0, i; + return nvUnormToFp16(val, maxf); +} - // Skip LUT data init if already done - if (*lutState == NvKmsLUTStatePQ) { +void +nvEvoFillLUTSurfaceC5(NVEvoLutEntryRec *pLUTBuffer, + const NvU16 *red, + const NvU16 *green, + const NvU16 *blue, + int nColorMapEntries, int depth) +{ + int i; + NvU32 rSize, gSize, bSize; + const float32_t maxf = ui32_to_f32(0xffff); + + switch (depth) { + case 15: + rSize = gSize = bSize = 5; + break; + case 16: + rSize = bSize = 5; + gSize = 6; + break; + case 8: + case 24: + rSize = gSize = bSize = 8; + break; + case 30: + rSize = gSize = bSize = 10; + break; + default: + nvAssert(!"invalid depth"); + return; + } + + // Skip the VSS header + pLUTBuffer += NV_LUT_VSS_HEADER_SIZE; + + for (i = 0; i < nColorMapEntries; i++) { + if (i < (1 << rSize)) { + pLUTBuffer[GetLUTIndex(i, rSize)].Red = + ColorToFp16(red[i], maxf).v; + } + if (i < (1 << gSize)) { + pLUTBuffer[GetLUTIndex(i, gSize)].Green = + ColorToFp16(green[i], maxf).v; + } + if (i < (1 << bSize)) { + pLUTBuffer[GetLUTIndex(i, bSize)].Blue = + ColorToFp16(blue[i], maxf).v; + } + } +} + +static void EvoSetOutputLutC3(NVDevEvoPtr pDevEvo, + NvU32 sd, + NvU32 head, + const NVFlipLutHwState *pOutputLut, + NvU32 fpNormScale, + NVEvoUpdateState *updateState, + NvBool bypassComposition) +{ + NVEvoChannelPtr pChannel = pDevEvo->core; + NvBool enableLut = (pOutputLut->pLutSurfaceEvo != NULL); + NvU64 offset = enableLut ? pOutputLut->offset : offsetof(NVEvoLutDataRec, output); + NvU32 ctxdma = enableLut ? + pOutputLut->pLutSurfaceEvo->planes[0].surfaceDesc.ctxDmaHandle : 0; + + nvUpdateUpdateState(pDevEvo, updateState, pChannel); + + nvAssert((offset & 0xff) == 0); + + nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_CONTROL_OUTPUT_LUT(head), 1); + nvDmaSetEvoMethodData(pChannel, + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_LUT, _SIZE, _SIZE_1025) | + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_LUT, _RANGE, _UNITY) | + DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_LUT, _OUTPUT_MODE, _INTERPOLATE)); + + nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_OFFSET_OUTPUT_LUT(head), 1); + nvDmaSetEvoMethodData(pChannel, + DRF_NUM(C37D, _HEAD_SET_OFFSET_OUTPUT_LUT, _ORIGIN, offset >> 8)); + + /* Set the ctxdma for the output LUT */ + + if (!enableLut) { + /* Class C37D has no separate enable flag. */ + ctxdma = 0; + } + nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_CONTEXT_DMA_OUTPUT_LUT(head), 1); + nvDmaSetEvoMethodData(pChannel, + DRF_NUM(C37D, _HEAD_SET_CONTEXT_DMA_OUTPUT_LUT, _HANDLE, ctxdma)); +} + +static void EvoSetupPQOetfOutputLutC5(NVEvoLutDataRec *pData, + enum NvKmsLUTState *lutState, + NvU32 *lutSize, + NvBool *isLutModeVss) +{ + NvU32 lutDataStartingIndex = NV_LUT_VSS_HEADER_SIZE; + NvU32 numOetfPQ512Entries = ARRAY_LEN(OetfPQ512Entries); + NvU32 oetfTableIdx; + NvU64 vssHead = 0; + NvU32 lutEntryCounter = 0, i; + + // Skip LUT data init if already done + if (*lutState == NvKmsLUTStatePQ) { goto skipInit; } @@ -5192,6 +5681,70 @@ static void EvoParseCapabilityNotifier3(NVDevEvoPtr pDevEvo, } } +static void EvoParseCapabilityNotifierC3(NVDevEvoPtr pDevEvo, + NVEvoSubDevPtr pEvoSubDev, + volatile const NvU32 *pCaps) +{ + NVEvoCapabilitiesPtr pEvoCaps = &pEvoSubDev->capabilities; + NvU32 i; + + // Miscellaneous capabilities + pEvoCaps->misc.supportsSemiPlanar = FALSE; + pEvoCaps->misc.supportsPlanar = FALSE; + pEvoCaps->misc.supportsDSI = FALSE; + + // Heads + ct_assert(ARRAY_LEN(pEvoCaps->head) >= NVC373_HEAD_CAPA__SIZE_1); + for (i = 0; i < NVC373_HEAD_CAPA__SIZE_1; i++) { + NVEvoHeadCaps *pHeadCaps = &pEvoCaps->head[i]; + + /* XXXnvdisplay: add caps for hsat, ocsc, lut */ + if (pHeadCaps->usable) { + NVEvoScalerCaps *pScalerCaps = &pHeadCaps->scalerCaps; + + pScalerCaps->present = + FLD_TEST_DRF(C373, _HEAD_CAPA, _SCALER, _TRUE, + nvEvoReadCapReg3(pCaps, NVC373_HEAD_CAPA(i))); + if (pScalerCaps->present) { + NVEvoScalerTapsCaps *pTapsCaps; + NvU32 tmp; + + /* + * Note that some of these may be zero (e.g., only 2-tap 444 + * mode is supported on GV100, so the rest are all zero. + * + * Downscaling by more than 2x in either direction is not + * allowed by state error check for both horizontal and + * vertical 2-tap scaling. + * + * Downscaling by more than 4x in either direction is not + * allowed by argument error check (and state error check) for + * 5-tap scaling. + * + * 5-tap scaling is not implemented on GV100, though, so we + * should never see numTaps == 5 on GV100, and we can just use a + * max of 2 here all the time. + */ + + /* 2-tap capabilities */ + tmp = nvEvoReadCapReg3(pCaps, NVC373_HEAD_CAPD(i)); + pTapsCaps = &pScalerCaps->taps[NV_EVO_SCALER_2TAPS]; + pTapsCaps->maxVDownscaleFactor = NV_EVO_SCALE_FACTOR_2X; + pTapsCaps->maxHDownscaleFactor = NV_EVO_SCALE_FACTOR_2X; + pTapsCaps->maxPixelsVTaps = + NV_MAX(DRF_VAL(C373, _HEAD_CAPD, _MAX_PIXELS_2TAP422, tmp), + DRF_VAL(C373, _HEAD_CAPD, _MAX_PIXELS_2TAP444, tmp)); + + /* + * Note that there is a capability register for 1TAP, but there + * doesn't appear to be a way to select 1-tap scaling in the + * channel methods, so don't bother reading it for now. + */ + } + } + } +} + static void EvoParsePrecompScalerCaps5(NVEvoCapabilitiesPtr pEvoCaps, volatile const NvU32 *pCaps) { @@ -5496,10 +6049,17 @@ static void SetHDRLayerCaps(NVDevEvoPtr pDevEvo) pDevEvo->caps.layerCaps[numLayers[head]].supportsICtCp = pWinCaps->tmoPresent; - /* Turing+ uses an FP16, linear 64-segment VSS supported ILUT */ - FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].ilut, TRUE, - NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LINEAR, - 64, 1025, NVKMS_LUT_FORMAT_FP16); + if (pDevEvo->hal->caps.needDefaultLutSurface) { + /* Turing+ uses an FP16, linear 64-segment VSS supported ILUT */ + FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].ilut, TRUE, + NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LINEAR, + 64, 1025, NVKMS_LUT_FORMAT_FP16); + } else { + /* Volta uses a UNORM14_WAR_813188, non-VSS ILUT */ + FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].ilut, TRUE, + NVKMS_LUT_VSS_NOT_SUPPORTED, NVKMS_LUT_VSS_TYPE_NONE, + 0, 1025, NVKMS_LUT_FORMAT_UNORM14_WAR_813188); + } if (pWinCaps->tmoPresent) { FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].tmo, TRUE, @@ -5519,10 +6079,17 @@ static void SetHDRLayerCaps(NVDevEvoPtr pDevEvo) numLayers[head]++; } - /* Turing+ uses a UNORM16, logarithmic 33-segment VSS supported OLUT */ - FillLUTCaps(&pDevEvo->caps.olut, TRUE, - NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LOGARITHMIC, - 33, 1025, NVKMS_LUT_FORMAT_UNORM16); + if (pDevEvo->hal->caps.hasUnorm16OLUT) { + /* Turing+ uses a UNORM16, logarithmic 33-segment VSS supported OLUT */ + FillLUTCaps(&pDevEvo->caps.olut, TRUE, + NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LOGARITHMIC, + 33, 1025, NVKMS_LUT_FORMAT_UNORM16); + } else { + /* Volta uses a UNORM14_WAR_813188, non-VSS OLUT */ + FillLUTCaps(&pDevEvo->caps.olut, TRUE, + NVKMS_LUT_VSS_NOT_SUPPORTED, NVKMS_LUT_VSS_TYPE_NONE, + 0, 1025, NVKMS_LUT_FORMAT_UNORM14_WAR_813188); + } } NvBool nvEvoGetCapabilities3(NVDevEvoPtr pDevEvo, @@ -5702,6 +6269,14 @@ NvBool nvEvoGetCapabilities3(NVDevEvoPtr pDevEvo, return ret; } +static NvBool EvoGetCapabilitiesC3(NVDevEvoPtr pDevEvo) +{ + return nvEvoGetCapabilities3(pDevEvo, EvoParseCapabilityNotifierC3, + nvHwFormatFromKmsFormatC3, + NVC373_DISP_CAPABILITIES, + sizeof(_NvC373DispCapabilities)); +} + static NvBool EvoGetCapabilitiesC5(NVDevEvoPtr pDevEvo) { return nvEvoGetCapabilities3(pDevEvo, EvoParseCapabilityNotifierC5, @@ -5738,6 +6313,7 @@ static void EvoSetViewportPointInC3(NVDevEvoPtr pDevEvo, const int head, } static void EvoSetOutputScalerC3(const NVDispEvoRec *pDispEvo, const NvU32 head, + const NvU32 imageSharpeningValue, NVEvoUpdateState *updateState) { NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; @@ -5851,6 +6427,28 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head, return scalingUsageBounds.vUpscalingAllowed; } +static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head, + const NVHwModeViewPortEvo *pViewPortMin, + const NVHwModeViewPortEvo *pViewPort, + const NVHwModeViewPortEvo *pViewPortMax, + NVEvoUpdateState *updateState) +{ + NVEvoChannelPtr pChannel = pDevEvo->core; + NvBool verticalUpscalingAllowed = + EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort, + pViewPortMax, updateState, + NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3); + + nvDmaSetStartEvoMethod(pChannel, + NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1); + nvDmaSetEvoMethodData(pChannel, + DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _CURSOR, _USAGE_W256_H256) | + DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _OUTPUT_LUT, _USAGE_1025) | + (verticalUpscalingAllowed ? + DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) : + DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE))); +} + static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head, const NVHwModeViewPortEvo *pViewPortMin, const NVHwModeViewPortEvo *pViewPort, @@ -6179,6 +6777,19 @@ static NvBool EvoValidateWindowFormatWrapper( return pValSrcRect(sourceFetchRect, format); } +static NvBool EvoValidateWindowFormatC3( + const enum NvKmsSurfaceMemoryFormat format, + const struct NvKmsRect *sourceFetchRect, + NvU32 *hwFormatOut) +{ + return EvoValidateWindowFormatWrapper( + format, + nvHwFormatFromKmsFormatC3, + sourceFetchRect, + ValidateWindowFormatSourceRectC3, + hwFormatOut); +} + static NvBool EvoValidateWindowFormatC5( const enum NvKmsSurfaceMemoryFormat format, const struct NvKmsRect *sourceFetchRect, @@ -6257,10 +6868,7 @@ static void EvoSetDitherC3(NVDispEvoPtr pDispEvo, const int head, ditherControl |= DRF_DEF(C37D, _HEAD_SET_DITHER_CONTROL, _BITS, _TO_8_BITS); break; - case NV0073_CTRL_SPECIFIC_OR_DITHER_TYPE_10_BITS: - ditherControl |= - DRF_DEF(C37D, _HEAD_SET_DITHER_CONTROL, _BITS, _TO_10_BITS); - break; + /* XXXnvdisplay: Support DITHER_TO_{10,12}_BITS (see also bug 1729668). */ default: nvAssert(!"Unknown ditherType"); // Fall through @@ -6391,9 +6999,16 @@ static NvBool GetChannelState(NVDevEvoPtr pDevEvo, NVC370_CTRL_CMD_GET_CHANNEL_INFO, &info, sizeof(info)); if (ret != NVOS_STATUS_SUCCESS) { - nvEvoLogDev(pDevEvo, EVO_LOG_ERROR, - "Failed to query display engine channel state: 0x%08x:%d:%d:0x%08x", - pChan->hwclass, pChan->instance, sd, ret); + /* + * When the GPU is lost (e.g., Thunderbolt/eGPU hot-unplug), + * suppress the error log to avoid flooding dmesg. The callers + * will handle the failure appropriately. + */ + if (ret != NVOS_STATUS_ERROR_GPU_IS_LOST) { + nvEvoLogDev(pDevEvo, EVO_LOG_ERROR, + "Failed to query display engine channel state: 0x%08x:%d:%d:0x%08x", + pChan->hwclass, pChan->instance, sd, ret); + } return FALSE; } @@ -6826,10 +7441,10 @@ void nvEvoSetImmPointOutC3(NVDevEvoPtr pDevEvo, NVEvoUpdateState *updateState, NvU16 x, NvU16 y) { - NVEvoChannelPtr pImmChannel = pChannel->imm.dma; + NVEvoChannelPtr pImmChannel = pChannel->imm.u.dma; nvAssert((pChannel->channelMask & NV_EVO_CHANNEL_MASK_WINDOW_ALL) != 0); - nvAssert(pImmChannel != NULL); + nvAssert(pChannel->imm.type == NV_EVO_IMM_CHANNEL_DMA); /* This should only be called for one GPU at a time, since the * pre-nvdisplay version uses PIO and cannot broadcast. */ @@ -6863,6 +7478,7 @@ static void EvoStartHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, const enum nvKmsTimingsProtocol protocol, const NvU32 orIndex, NvU32 head, + NvU32 sd, NVEvoUpdateState *updateState) { const NvU32 winChannel = head << 1; @@ -6910,7 +7526,7 @@ static void EvoStartHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, DRF_DEF(C37D, _HEAD_SET_CRC_CONTROL, _CRC_DURING_SNOOZE, _DISABLE)); /* Reset the CRC notifier */ - nvEvoResetCRC32Notifier(pDma->cpuAddress, + nvEvoResetCRC32Notifier(pDma->subDeviceAddress[sd], NVC37D_NOTIFIER_CRC_STATUS_0, DRF_BASE(NVC37D_NOTIFIER_CRC_STATUS_0_DONE), NVC37D_NOTIFIER_CRC_STATUS_0_DONE_FALSE); @@ -6948,6 +7564,7 @@ static void EvoStopHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, * * \param[in] pDevEvo NVKMS device pointer * \param[in] pDma Pointer to DMA-mapped memory + * \param[in] sd Subdevice index * \param[in] entry_count Number of independent frames to read CRCs from * \param[out] crc32 Contains pointers to CRC output arrays * \param[out] numCRC32 Number of CRC frames successfully read from DMA @@ -6957,11 +7574,12 @@ static void EvoStopHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, */ NvBool nvEvoQueryHeadCRC32_C3(NVDevEvoPtr pDevEvo, NVEvoDmaPtr pDma, + NvU32 sd, NvU32 entry_count, CRC32NotifierCrcOut *crc32, NvU32 *numCRC32) { - volatile NvU32 *pCRC32Notifier = pDma->cpuAddress; + volatile NvU32 *pCRC32Notifier = pDma->subDeviceAddress[sd]; const NvU32 entry_stride = NVC37D_NOTIFIER_CRC_CRC_ENTRY1_21 - NVC37D_NOTIFIER_CRC_CRC_ENTRY0_13; // Define how many/which variables to read from each CRCNotifierEntry struct @@ -7418,256 +8036,6 @@ static void EvoSetMergeModeC5(const NVDispEvoRec *pDispEvo, nvPopEvoSubDevMask(pDevEvo); } -/* - * The 'type' the timing library writes into the NVT_INFOFRAME_HEADER - * structure is not the type that the HDMI library expects to see in its - * NvHdmiPkt_SetupAdvancedInfoframe call; those are NVHDMIPKT_TYPE_*. - * Map the timing library infoframe type to the - * NVHDMIPKT_TYPE_SHARED_GENERIC*. - */ -static NvBool NvtToHdmiLibGenericInfoFramePktType(const NvU32 srcType, - NVHDMIPKT_TYPE *pDstType) -{ - NVHDMIPKT_TYPE hdmiLibType; - - switch (srcType) { - default: - return FALSE; - case NVT_INFOFRAME_TYPE_EXTENDED_METADATA_PACKET: - hdmiLibType = NVHDMIPKT_TYPE_SHARED_GENERIC1; - break; - case NVT_INFOFRAME_TYPE_VENDOR_SPECIFIC: - hdmiLibType = NVHDMIPKT_TYPE_SHARED_GENERIC2; - break; - case NVT_INFOFRAME_TYPE_DYNAMIC_RANGE_MASTERING: - hdmiLibType = NVHDMIPKT_TYPE_SHARED_GENERIC3; - break; - } - - *pDstType = hdmiLibType; - - return TRUE; -} - -static NvBool ConstructAdvancedInfoFramePacket( - const NVT_INFOFRAME_HEADER *pInfoFrameHeader, - const NvU32 infoframeSize, - const NvBool needChecksum, - const NvBool swChecksum, - NvU8 *pPacket, - const NvU32 packetLen) -{ - NvU8 hdmiPacketType; - const NvU8 *pPayload; - NvU32 payloadLen; - - if (!nvEvo1NvtToHdmiInfoFramePacketType(pInfoFrameHeader->type, - &hdmiPacketType)) { - return FALSE; - } - - /* - * XXX If required, add support for the large infoframe with - * multiple infoframes grouped together. - */ - nvAssert((infoframeSize + 1 /* + HB3 */ + (needChecksum ? 1 : 0)) <= - packetLen); - - pPacket[0] = hdmiPacketType; /* HB0 */ - - /* - * The fields and size of NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER - * match with those of NVT_INFOFRAME_HEADER at the time of writing, but - * nvtiming.h declares them separately. To be safe, special case - * NVT_INFOFRAME_TYPE_EXTENDED_METADATA_PACKET. - */ - if (pInfoFrameHeader->type == NVT_INFOFRAME_TYPE_EXTENDED_METADATA_PACKET) { - const NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER *pExtMetadataHeader = - (const NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER *) - pInfoFrameHeader; - - pPacket[1] = pExtMetadataHeader->firstLast; /* HB1 */ - pPacket[2] = pExtMetadataHeader->sequenceIndex; /* HB2 */ - - pPayload = (const NvU8 *)(pExtMetadataHeader + 1); - payloadLen = infoframeSize - - sizeof(NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER); - } else { - pPacket[1] = pInfoFrameHeader->version; /* HB1 */ - pPacket[2] = pInfoFrameHeader->length; /* HB2 */ - - pPayload = (const NvU8 *)(pInfoFrameHeader + 1); - payloadLen = infoframeSize - sizeof(NVT_INFOFRAME_HEADER); - } - pPacket[3] = 0; /* HB3, reserved */ - - if (needChecksum) { - pPacket[4] = 0; /* PB0: checksum */ - - nvkms_memcpy(&pPacket[5], pPayload, payloadLen); /* PB1~ */ - - if (swChecksum) { - NvU8 checksum = 0; - - for (NvU32 i = 0; i < packetLen; i++) { - checksum += pPacket[i]; - } - pPacket[4] = ~checksum + 1; - } - } else { - nvAssert(!swChecksum); - nvkms_memcpy(&pPacket[4], pPayload, payloadLen); /* PB0~ */ - } - - return TRUE; -} - -void nvEvoSendHdmiInfoFrameC8(const NVDispEvoRec *pDispEvo, - const NvU32 head, - const NvEvoInfoFrameTransmitControl transmitCtrl, - const NVT_INFOFRAME_HEADER *pInfoFrameHeader, - const NvU32 infoFrameSize, - NvBool needChecksum) -{ - NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; - NVHDMIPKT_TYPE hdmiLibType; - NVHDMIPKT_RESULT ret; - ADVANCED_INFOFRAME advancedInfoFrame = { }; - NvBool swChecksum; - - /* - * These structures are weird. The NVT_VIDEO_INFOFRAME, - * NVT_VENDOR_SPECIFIC_INFOFRAME, - * NVT_EXTENDED_METADATA_PACKET_INFOFRAME, etc structures are *kind - * of* what we want to send to the hdmipkt library, except the type - * in the header is different, and a single checksum byte may need - * to be inserted *between* the header and the payload (requiring us - * to allocate a buffer one byte larger). - */ - NvU8 packet[36] = { }; - - if (!NvtToHdmiLibGenericInfoFramePktType(pInfoFrameHeader->type, - &hdmiLibType)) { - nvEvo1SendHdmiInfoFrame(pDispEvo, head, transmitCtrl, pInfoFrameHeader, - infoFrameSize, needChecksum); - return; - } - - switch (transmitCtrl) { - case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_EVERY_FRAME: - advancedInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ALWAYS; - break; - case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_SINGLE_FRAME: - advancedInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ONCE; - break; - } - advancedInfoFrame.location = INFOFRAME_CTRL_LOC_VBLANK; - advancedInfoFrame.hwChecksum = needChecksum; - - // Large infoframes are incompatible with hwChecksum - nvAssert(!(advancedInfoFrame.isLargeInfoframe && - advancedInfoFrame.hwChecksum)); - - // XXX WAR bug 5124145 by always computing checksum in software if needed. - swChecksum = needChecksum; - - // If we need a checksum: hwChecksum, swChecksum, or both must be enabled. - nvAssert(!needChecksum || - (advancedInfoFrame.hwChecksum || swChecksum)); - - if (!ConstructAdvancedInfoFramePacket(pInfoFrameHeader, - infoFrameSize, - needChecksum, - swChecksum, - packet, - sizeof(packet))) { - return; - } - - advancedInfoFrame.packetLen = sizeof(packet); - advancedInfoFrame.pPacket = packet; - - ret = NvHdmiPkt_SetupAdvancedInfoframe(pDevEvo->hdmiLib.handle, - pDispEvo->displayOwner, - head, - hdmiLibType, - &advancedInfoFrame); - if (ret != NVHDMIPKT_SUCCESS) { - nvAssert(ret == NVHDMIPKT_SUCCESS); - } -} - -void nvEvoDisableHdmiInfoFrameC8(const NVDispEvoRec *pDispEvo, - const NvU32 head, - const NvU8 nvtInfoFrameType) -{ - const NVDispHeadStateEvoRec *pHeadState = &pDispEvo->headState[head]; - NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; - NVHDMIPKT_TYPE hdmiLibType; - NVHDMIPKT_RESULT ret; - - if (!NvtToHdmiLibGenericInfoFramePktType(nvtInfoFrameType, - &hdmiLibType)) { - return; - } - - ret = NvHdmiPkt_PacketCtrl(pDevEvo->hdmiLib.handle, - pDispEvo->displayOwner, - pHeadState->activeRmId, - head, - hdmiLibType, - NVHDMIPKT_TRANSMIT_CONTROL_DISABLE); - if (ret != NVHDMIPKT_SUCCESS) { - nvAssert(!"Failed to disable vendor specific infoframe"); - } -} - -void nvEvoSendDpInfoFrameSdpC8(const NVDispEvoRec *pDispEvo, - const NvU32 head, - const NvEvoInfoFrameTransmitControl transmitCtrl, - const DPSDP_DESCRIPTOR *sdp) -{ - NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; - NVHDMIPKT_RESULT ret; - ADVANCED_INFOFRAME advanceInfoFrame = { }; - NvU8 packet[36] = { }; - - /* - * XXX Using NVHDMIPKT_TYPE_SHARED_GENERIC1 for DP HDR SDP, add - * support for other infoframe as needed. - */ - if (sdp->hb.hb1 != dp_pktType_DynamicRangeMasteringInfoFrame) { - nvAssert(!"Unsupported infoframe"); - return; - } - - nvAssert((sizeof(sdp->hb) + sdp->dataSize) <= sizeof(packet)); - - nvkms_memcpy(packet, &sdp->hb, - NV_MIN((sizeof(sdp->hb) + sdp->dataSize), sizeof(packet))); - - switch (transmitCtrl) { - case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_EVERY_FRAME: - advanceInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ALWAYS; - break; - case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_SINGLE_FRAME: - advanceInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ONCE; - break; - } - advanceInfoFrame.location = INFOFRAME_CTRL_LOC_VBLANK; - advanceInfoFrame.packetLen = sizeof(packet); - advanceInfoFrame.pPacket = packet; - - ret = NvHdmiPkt_SetupAdvancedInfoframe(pDevEvo->hdmiLib.handle, - pDispEvo->displayOwner, - head, - NVHDMIPKT_TYPE_SHARED_GENERIC1, - &advanceInfoFrame); - if (ret != NVHDMIPKT_SUCCESS) { - nvAssert(ret == NVHDMIPKT_SUCCESS); - } -} - static NvU32 EvoAllocSurfaceDescriptorC3( NVDevEvoPtr pDevEvo, NVSurfaceDescriptor *pSurfaceDesc, NvU32 memoryHandle, NvU32 localCtxDmaFlags, @@ -7695,6 +8063,105 @@ static NvU32 EvoBindSurfaceDescriptorC3( return nvCtxDmaBind(pDevEvo, pChannel, pSurfaceDesc->ctxDmaHandle); } +NVEvoHAL nvEvoC3 = { + EvoSetRasterParamsC3, /* SetRasterParams */ + EvoSetProcAmpC3, /* SetProcAmp */ + EvoSetHeadControlC3, /* SetHeadControl */ + EvoSetHeadRefClkC3, /* SetHeadRefClk */ + EvoHeadSetControlORC3, /* HeadSetControlOR */ + nvEvoORSetControlC3, /* ORSetControl */ + EvoHeadSetDisplayIdC3, /* HeadSetDisplayId */ + EvoSetUsageBoundsC3, /* SetUsageBounds */ + nvEvoUpdateC3, /* Update */ + nvEvoIsModePossibleC3, /* IsModePossible */ + nvEvoPrePostIMPC3, /* PrePostIMP */ + nvEvoSetNotifierC3, /* SetNotifier */ + EvoGetCapabilitiesC3, /* GetCapabilities */ + EvoFlipC3, /* Flip */ + EvoFlipTransitionWARC3, /* FlipTransitionWAR */ + EvoFillLUTSurfaceC3, /* FillLUTSurface */ + EvoSetOutputLutC3, /* SetOutputLut */ + EvoSetOutputScalerC3, /* SetOutputScaler */ + EvoSetViewportPointInC3, /* SetViewportPointIn */ + EvoSetViewportInOutC3, /* SetViewportInOut */ + EvoSetCursorImageC3, /* SetCursorImage */ + nvEvoValidateCursorSurfaceC3, /* ValidateCursorSurface */ + EvoValidateWindowFormatC3, /* ValidateWindowFormat */ + nvEvoInitCompNotifierC3, /* InitCompNotifier */ + nvEvoIsCompNotifierCompleteC3, /* IsCompNotifierComplete */ + nvEvoWaitForCompNotifierC3, /* WaitForCompNotifier */ + EvoSetDitherC3, /* SetDither */ + EvoSetStallLockC3, /* SetStallLock */ + EvoSetDisplayRateC3, /* SetDisplayRate */ + EvoInitChannelC3, /* InitChannel */ + NULL, /* InitDefaultLut */ + EvoInitWindowMappingC3, /* InitWindowMapping */ + nvEvoIsChannelIdleC3, /* IsChannelIdle */ + nvEvoIsChannelMethodPendingC3, /* IsChannelMethodPending */ + nvEvoForceIdleSatelliteChannelC3, /* ForceIdleSatelliteChannel */ + nvEvoForceIdleSatelliteChannelIgnoreLockC3, /* ForceIdleSatelliteChannelIgnoreLock */ + nvEvoAccelerateChannelC3, /* AccelerateChannel */ + nvEvoResetChannelAcceleratorsC3, /* ResetChannelAccelerators */ + nvEvoAllocRmCtrlObjectC3, /* AllocRmCtrlObject */ + nvEvoFreeRmCtrlObjectC3, /* FreeRmCtrlObject */ + nvEvoSetImmPointOutC3, /* SetImmPointOut */ + EvoStartHeadCRC32CaptureC3, /* StartCRC32Capture */ + EvoStopHeadCRC32CaptureC3, /* StopCRC32Capture */ + nvEvoQueryHeadCRC32_C3, /* QueryCRC32 */ + nvEvoGetScanLineC3, /* GetScanLine */ + NULL, /* ConfigureVblankSyncObject */ + nvEvo1SetDscParams, /* SetDscParams */ + NULL, /* EnableMidFrameAndDWCFWatermark */ + nvEvoGetActiveViewportOffsetC3, /* GetActiveViewportOffset */ + NULL, /* ClearSurfaceUsage */ + EvoComputeWindowScalingTapsC3, /* ComputeWindowScalingTaps */ + nvEvoGetWindowScalingCapsC3, /* GetWindowScalingCaps */ + NULL, /* SetMergeMode */ + nvEvo1SendHdmiInfoFrame, /* SendHdmiInfoFrame */ + nvEvo1DisableHdmiInfoFrame, /* DisableHdmiInfoFrame */ + nvEvo1SendDpInfoFrameSdp, /* SendDpInfoFrameSdp */ + NULL, /* SetDpVscSdp */ + NULL, /* InitHwHeadMultiTileConfig */ + NULL, /* SetMultiTileConfig */ + EvoAllocSurfaceDescriptorC3, /* AllocSurfaceDescriptor */ + EvoFreeSurfaceDescriptorC3, /* FreeSurfaceDescriptor */ + EvoBindSurfaceDescriptorC3, /* BindSurfaceDescriptor */ + NULL, /* SetTmoLutSurfaceAddress */ + NULL, /* SetILUTSurfaceAddress */ + EvoSetISOSurfaceAddressC3, /* SetISOSurfaceAddress */ + EvoSetCoreNotifierSurfaceAddressAndControlC3, /* SetCoreNotifierSurfaceAddressAndControl */ + EvoSetWinNotifierSurfaceAddressAndControlC3, /* SetWinNotifierSurfaceAddressAndControl */ + NULL, /* SetSemaphoreSurfaceAddressAndControl */ + NULL, /* SetAcqSemaphoreSurfaceAddressAndControl */ + { /* caps */ + TRUE, /* supportsNonInterlockedUsageBoundsUpdate */ + TRUE, /* supportsDisplayRate */ + FALSE, /* supportsFlipLockRGStatus */ + FALSE, /* needDefaultLutSurface */ + FALSE, /* hasUnorm10OLUT */ + FALSE, /* supportsImageSharpening */ + FALSE, /* supportsHDMIVRR */ + FALSE, /* supportsCoreChannelSurface */ + FALSE, /* supportsHDMIFRL */ + TRUE, /* supportsSetStorageMemoryLayout */ + FALSE, /* supportsIndependentAcqRelSemaphore */ + FALSE, /* supportsCoreLut */ + TRUE, /* supportsSynchronizedOverlayPositionUpdate */ + FALSE, /* supportsVblankSyncObjects */ + FALSE, /* requiresScalingTapsInBothDimensions */ + FALSE, /* supportsMergeMode */ + FALSE, /* supportsHDMI10BPC */ + FALSE, /* supportsDPAudio192KHz */ + FALSE, /* supportsInputColorSpace */ + FALSE, /* supportsInputColorRange */ + FALSE, /* supportsYCbCr422OverHDMIFRL */ + NV_EVO3_SUPPORTED_DITHERING_MODES, /* supportedDitheringModes */ + sizeof(NVC372_CTRL_IS_MODE_POSSIBLE_PARAMS), /* impStructSize */ + NV_EVO_SCALER_2TAPS, /* minScalerTaps */ + NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C3, /* xEmulatedSurfaceMemoryFormats */ + }, +}; + NVEvoHAL nvEvoC5 = { EvoSetRasterParamsC5, /* SetRasterParams */ EvoSetProcAmpC5, /* SetProcAmp */ @@ -7706,6 +8173,7 @@ NVEvoHAL nvEvoC5 = { nvEvoSetUsageBoundsC5, /* SetUsageBounds */ nvEvoUpdateC3, /* Update */ nvEvoIsModePossibleC3, /* IsModePossible */ + nvEvoPrePostIMPC3, /* PrePostIMP */ nvEvoSetNotifierC3, /* SetNotifier */ EvoGetCapabilitiesC5, /* GetCapabilities */ EvoFlipC5, /* Flip */ @@ -7765,14 +8233,30 @@ NVEvoHAL nvEvoC5 = { NULL, /* SetSemaphoreSurfaceAddressAndControl */ NULL, /* SetAcqSemaphoreSurfaceAddressAndControl */ { /* caps */ + TRUE, /* supportsNonInterlockedUsageBoundsUpdate */ + TRUE, /* supportsDisplayRate */ + FALSE, /* supportsFlipLockRGStatus */ + TRUE, /* needDefaultLutSurface */ + TRUE, /* hasUnorm10OLUT */ + FALSE, /* supportsImageSharpening */ + TRUE, /* supportsHDMIVRR */ + FALSE, /* supportsCoreChannelSurface */ FALSE, /* supportsHDMIFRL */ TRUE, /* supportsSetStorageMemoryLayout */ FALSE, /* supportsIndependentAcqRelSemaphore */ + FALSE, /* supportsCoreLut */ + TRUE, /* supportsSynchronizedOverlayPositionUpdate */ FALSE, /* supportsVblankSyncObjects */ + FALSE, /* requiresScalingTapsInBothDimensions */ TRUE, /* supportsMergeMode */ FALSE, /* supportsHDMI10BPC */ FALSE, /* supportsDPAudio192KHz */ + TRUE, /* supportsInputColorSpace */ + TRUE, /* supportsInputColorRange */ FALSE, /* supportsYCbCr422OverHDMIFRL */ + NV_EVO3_SUPPORTED_DITHERING_MODES, /* supportedDitheringModes */ + sizeof(NVC372_CTRL_IS_MODE_POSSIBLE_PARAMS), /* impStructSize */ + NV_EVO_SCALER_2TAPS, /* minScalerTaps */ NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C5, /* xEmulatedSurfaceMemoryFormats */ }, }; @@ -7788,6 +8272,7 @@ NVEvoHAL nvEvoC6 = { nvEvoSetUsageBoundsC5, /* SetUsageBounds */ nvEvoUpdateC3, /* Update */ nvEvoIsModePossibleC3, /* IsModePossible */ + nvEvoPrePostIMPC3, /* PrePostIMP */ nvEvoSetNotifierC3, /* SetNotifier */ nvEvoGetCapabilitiesC6, /* GetCapabilities */ nvEvoFlipC6, /* Flip */ @@ -7847,15 +8332,30 @@ NVEvoHAL nvEvoC6 = { EvoSetSemaphoreSurfaceAddressAndControlC6, /* SetSemaphoreSurfaceAddressAndControl */ EvoSetAcqSemaphoreSurfaceAddressAndControlC6, /* SetAcqSemaphoreSurfaceAddressAndControl */ { /* caps */ + TRUE, /* supportsNonInterlockedUsageBoundsUpdate */ + TRUE, /* supportsDisplayRate */ + FALSE, /* supportsFlipLockRGStatus */ + TRUE, /* needDefaultLutSurface */ + TRUE, /* hasUnorm10OLUT */ + FALSE, /* supportsImageSharpening */ + TRUE, /* supportsHDMIVRR */ + FALSE, /* supportsCoreChannelSurface */ TRUE, /* supportsHDMIFRL */ FALSE, /* supportsSetStorageMemoryLayout */ TRUE, /* supportsIndependentAcqRelSemaphore */ + FALSE, /* supportsCoreLut */ + TRUE, /* supportsSynchronizedOverlayPositionUpdate */ TRUE, /* supportsVblankSyncObjects */ + FALSE, /* requiresScalingTapsInBothDimensions */ TRUE, /* supportsMergeMode */ TRUE, /* supportsHDMI10BPC */ FALSE, /* supportsDPAudio192KHz */ + TRUE, /* supportsInputColorSpace */ + TRUE, /* supportsInputColorRange */ FALSE, /* supportsYCbCr422OverHDMIFRL */ + NV_EVO3_SUPPORTED_DITHERING_MODES, /* supportedDitheringModes */ + sizeof(NVC372_CTRL_IS_MODE_POSSIBLE_PARAMS), /* impStructSize */ + NV_EVO_SCALER_2TAPS, /* minScalerTaps */ NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C6, /* xEmulatedSurfaceMemoryFormats */ }, }; - diff --git a/src/nvidia-modeset/src/nvkms-hw-flip.c b/src/nvidia-modeset/src/nvkms-hw-flip.c index 8163d67cd7..d433bf1b8d 100644 --- a/src/nvidia-modeset/src/nvkms-hw-flip.c +++ b/src/nvidia-modeset/src/nvkms-hw-flip.c @@ -2561,6 +2561,11 @@ static void LowerDispBandwidth(void *dataPtr, NvU32 dataU32) NvU32 head; NvBool ret; + /* Skip if GPU has been lost (e.g., Thunderbolt unplug) */ + if (pDevEvo->gpuLost) { + return; + } + guaranteedAndCurrent = nvCalloc(1, sizeof(*guaranteedAndCurrent) * NVKMS_MAX_HEADS_PER_DISP); if (guaranteedAndCurrent == NULL) { @@ -2748,6 +2753,11 @@ TryToDoPostFlipIMP(void *dataPtr, NvU32 dataU32) pDevEvo->postFlipIMPTimer = NULL; + /* Skip if GPU has been lost (e.g., Thunderbolt unplug) */ + if (pDevEvo->gpuLost) { + return; + } + FOR_ALL_EVO_DISPLAYS(pDispEvo, sd, pDevEvo) { NVEvoUpdateState updateState = { }; NvBool update = FALSE; diff --git a/src/nvidia-modeset/src/nvkms-rm.c b/src/nvidia-modeset/src/nvkms-rm.c index 3f1297bd98..d257cbfb54 100644 --- a/src/nvidia-modeset/src/nvkms-rm.c +++ b/src/nvidia-modeset/src/nvkms-rm.c @@ -2207,6 +2207,11 @@ NVDpyIdList nvRmGetConnectedDpys(const NVDispEvoRec *pDispEvo, NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; NvU32 ret; + /* Skip hardware access if GPU has been lost (e.g., Thunderbolt unplug) */ + if (pDevEvo->gpuLost) { + return nvEmptyDpyIdList(); + } + params.subDeviceInstance = pDispEvo->displayOwner; params.displayMask = nvDpyIdListToNvU32(dpyIdList); params.flags = @@ -3215,6 +3220,15 @@ NvBool nvRMSyncEvoChannel( { NvBool ret = TRUE; + /* + * Skip channel sync if the GPU has been lost (e.g., Thunderbolt eGPU + * surprise removal). The DMA control registers are invalid and would + * cause a crash. + */ + if (pDevEvo->gpuLost) { + return FALSE; + } + if (pChannel) { NvU32 sd; diff --git a/src/nvidia-modeset/src/nvkms.c b/src/nvidia-modeset/src/nvkms.c index ddb5eb1430..561ada241a 100644 --- a/src/nvidia-modeset/src/nvkms.c +++ b/src/nvidia-modeset/src/nvkms.c @@ -1386,6 +1386,23 @@ static NvBool AllocDevice(struct NvKmsPerOpen *pOpen, pDevEvo = nvFindDevEvoByDeviceId(pParams->request.deviceId); + /* + * If we found an existing device that was marked as lost (e.g., from a + * previous Thunderbolt surprise removal), we need to clean it up before + * allocating a new device for the reconnected GPU. + */ + if (pDevEvo != NULL && pDevEvo->gpuLost) { + nvEvoLogDev(pDevEvo, EVO_LOG_INFO, + "Cleaning up stale device from previous surprise removal"); + /* + * Force cleanup of the stale device. Set allocRefCnt to 1 so that + * nvFreeDevEvo will actually free it. + */ + pDevEvo->allocRefCnt = 1; + nvFreeDevEvo(pDevEvo); + pDevEvo = NULL; + } + if (pDevEvo == NULL) { pDevEvo = nvAllocDevEvo(&pParams->request, &pParams->reply.status); if (pDevEvo == NULL) { @@ -6248,6 +6265,44 @@ static void FreeGlobalState(void) nvClearDpyOverrides(); } +/* + * Reinitialize the global RM client after a GPU surprise removal. + * When a GPU is removed, the RM client handle may become invalid. + * This function re-creates the client handle so that newly attached + * GPUs can be used. + */ +void nvKmsReinitializeGlobalClient(void) +{ + NvU32 ret; + + /* + * First, try to free the old client handle. This may fail if RM + * already invalidated it, but that's OK. + */ + if (nvEvoGlobal.clientHandle != 0) { + nvRmApiFree(nvEvoGlobal.clientHandle, nvEvoGlobal.clientHandle, + nvEvoGlobal.clientHandle); + nvEvoGlobal.clientHandle = 0; + } + + /* Allocate a new root client */ + ret = nvRmApiAlloc(NV01_NULL_OBJECT, + NV01_NULL_OBJECT, + NV01_NULL_OBJECT, + NV01_ROOT, + &nvEvoGlobal.clientHandle); + + if (ret != NVOS_STATUS_SUCCESS) { + nvEvoLog(EVO_LOG_ERROR, "Failed to reinitialize client after GPU removal"); + return; + } + + /* Update the RM context */ + nvEvoGlobal.rmSmgContext.clientHandle = nvEvoGlobal.clientHandle; + + nvEvoLog(EVO_LOG_INFO, "Reinitialized global client after GPU surprise removal"); +} + /* * Wrappers to help SMG access NvKmsKAPI's RM context. */ @@ -6343,6 +6398,11 @@ static void ConsoleRestoreTimerFired(void *dataPtr, NvU32 dataU32) { NVDevEvoPtr pDevEvo = dataPtr; + /* Skip if GPU has been lost (e.g., Thunderbolt unplug) */ + if (pDevEvo->gpuLost) { + return; + } + if (pDevEvo->modesetOwner == NULL && pDevEvo->handleConsoleHotplugs) { pDevEvo->skipConsoleRestore = FALSE; nvEvoRestoreConsole(pDevEvo, TRUE /* allowMST */); @@ -6836,6 +6896,43 @@ void nvKmsResume(NvU32 gpuId) } } +/*! + * Mark a GPU as lost (e.g., Thunderbolt/eGPU hot-unplug). + * + * This prevents any hardware access attempts that would cause kernel crashes. + * The device's timers are cancelled and the gpuLost flag is set so that + * subsequent operations bail out early. + */ +void nvKmsGpuLost(NvU32 gpuId) +{ + NVDevEvoPtr pDevEvo; + NvU32 i; + + FOR_ALL_EVO_DEVS(pDevEvo) { + for (i = 0; i < ARRAY_LEN(pDevEvo->openedGpuIds); i++) { + if (pDevEvo->openedGpuIds[i] == gpuId) { + nvEvoLogDev(pDevEvo, EVO_LOG_INFO, + "GPU lost (surprise removal), disabling hardware access"); + + /* Mark device as lost to prevent hardware access */ + pDevEvo->gpuLost = TRUE; + + /* Cancel timers that might try to access hardware */ + nvkms_free_timer(pDevEvo->consoleRestoreTimer); + pDevEvo->consoleRestoreTimer = NULL; + + nvkms_free_timer(pDevEvo->postFlipIMPTimer); + pDevEvo->postFlipIMPTimer = NULL; + + nvkms_free_timer(pDevEvo->lowerDispBandwidthTimer); + pDevEvo->lowerDispBandwidthTimer = NULL; + + return; + } + } + } +} + static void ServiceOneDeferredRequestFifo( NVDevEvoPtr pDevEvo, NVDeferredRequestFifoRec *pDeferredRequestFifo) diff --git a/src/nvidia/arch/nvalloc/unix/src/osapi.c b/src/nvidia/arch/nvalloc/unix/src/osapi.c index f5db7a0e90..1da6be2e54 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osapi.c +++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c @@ -308,6 +308,17 @@ void RmLogGpuCrash(OBJGPU *pGpu) { NvBool bGpuIsLost, bGpuIsConnected; + NvBool bIsExternalGpu = pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_EXTERNAL_GPU); + + // + // For external GPUs (eGPUs) that have been disconnected, skip the crash + // dump entirely. The GPU is simply gone and attempting to save crash data + // will just produce noise in the logs. + // + if (bIsExternalGpu && pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + return; + } // // Re-evaluate whether or not the GPU is accessible. This could be called @@ -4277,7 +4288,30 @@ void NV_API_CALL rm_power_source_change_event( OBJGPU *pGpu = gpumgrGetGpu(0); if (pGpu != NULL) { + // + // Check if the GPU is lost or inaccessible before proceeding. + // This can happen during hot-unplug (e.g., Thunderbolt eGPU removal) + // where ACPI events may still be delivered after the GPU is gone. + // + if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST) || + !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_CONNECTED)) + { + rmapiLockRelease(); + goto done; + } + nv = NV_GET_NV_STATE(pGpu); + + // + // For external GPUs (Thunderbolt eGPU), check if we're in surprise + // removal before proceeding with power state changes. + // + if (nv->flags & NV_FLAG_IN_SURPRISE_REMOVAL) + { + rmapiLockRelease(); + goto done; + } + if ((rmStatus = os_ref_dynamic_power(nv, NV_DYNAMIC_PM_FINE)) == NV_OK) { @@ -4297,6 +4331,7 @@ void NV_API_CALL rm_power_source_change_event( } } +done: if (rmStatus != NV_OK) { NV_PRINTF(LEVEL_ERROR, @@ -5858,7 +5893,30 @@ void NV_API_CALL rm_acpi_nvpcf_notify( OBJGPU *pGpu = gpumgrGetGpu(0); if (pGpu != NULL) { + // + // Check if the GPU is lost or inaccessible before proceeding. + // This can happen during hot-unplug (e.g., Thunderbolt eGPU removal) + // where ACPI events may still be delivered after the GPU is gone. + // + if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST) || + !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_CONNECTED)) + { + rmapiLockRelease(); + goto done_nvpcf; + } + nv_state_t *nv = NV_GET_NV_STATE(pGpu); + + // + // For external GPUs (Thunderbolt eGPU), check if we're in surprise + // removal before proceeding with power state changes. + // + if (nv->flags & NV_FLAG_IN_SURPRISE_REMOVAL) + { + rmapiLockRelease(); + goto done_nvpcf; + } + if ((rmStatus = os_ref_dynamic_power(nv, NV_DYNAMIC_PM_FINE)) == NV_OK) { @@ -5870,6 +5928,7 @@ void NV_API_CALL rm_acpi_nvpcf_notify( rmapiLockRelease(); } +done_nvpcf: threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE); NV_EXIT_RM_RUNTIME(sp,fp); } diff --git a/src/nvidia/arch/nvalloc/unix/src/osinit.c b/src/nvidia/arch/nvalloc/unix/src/osinit.c index 44185ad78c..09e12451ad 100644 --- a/src/nvidia/arch/nvalloc/unix/src/osinit.c +++ b/src/nvidia/arch/nvalloc/unix/src/osinit.c @@ -356,18 +356,33 @@ osHandleGpuLost pmc_boot_0 = NV_PRIV_REG_RD32(nv->regs->map_u, NV_PMC_BOOT_0); if (pmc_boot_0 != nvp->pmc_boot_0) { + NvBool bIsExternalGpu = pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_EXTERNAL_GPU); + // // This doesn't support PEX Reset and Recovery yet. // This will help to prevent accessing registers of a GPU // which has fallen off the bus. // - nvErrorLog_va((void *)pGpu, ROBUST_CHANNEL_GPU_HAS_FALLEN_OFF_THE_BUS, - "GPU has fallen off the bus."); + // For external GPUs (eGPUs), this is an expected condition during + // hot-unplug, so we keep logging minimal to avoid noise. + // + if (!bIsExternalGpu) + { + nvErrorLog_va((void *)pGpu, ROBUST_CHANNEL_GPU_HAS_FALLEN_OFF_THE_BUS, + "GPU has fallen off the bus."); + } gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_GPU_UNAVAILABLE, NULL, 0, ROBUST_CHANNEL_GPU_HAS_FALLEN_OFF_THE_BUS, 0); - NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "GPU has fallen off the bus.\n"); + if (bIsExternalGpu) + { + NV_DEV_PRINTF(NV_DBG_WARNINGS, nv, "External GPU disconnected.\n"); + } + else + { + NV_DEV_PRINTF(NV_DBG_ERRORS, nv, "GPU has fallen off the bus.\n"); + } if (pGpu->boardInfo != NULL && pGpu->boardInfo->serialNumber[0] != '\0') { @@ -2479,13 +2494,28 @@ void RmShutdownAdapter( if (nvp->flags & NV_INIT_FLAG_GPU_STATE_LOAD) { rmStatus = gpuStateUnload(pGpu, GPU_STATE_DEFAULT); - NV_ASSERT(rmStatus == NV_OK); + // + // During surprise removal (e.g., Thunderbolt eGPU hot-unplug), + // this may fail. Log but don't assert since we're tearing down anyway. + // + if (rmStatus != NV_OK) + { + NV_PRINTF(LEVEL_WARNING, + "gpuStateUnload failed during teardown: 0x%x\n", rmStatus); + } } if (nvp->flags & NV_INIT_FLAG_GPU_STATE) { rmStatus = gpuStateDestroy(pGpu); - NV_ASSERT(rmStatus == NV_OK); + // + // During surprise removal, this may fail. Log but don't assert. + // + if (rmStatus != NV_OK) + { + NV_PRINTF(LEVEL_WARNING, + "gpuStateDestroy failed during teardown: 0x%x\n", rmStatus); + } } if (IS_DCE_CLIENT(pGpu)) @@ -2639,7 +2669,14 @@ void RmDisableAdapter( if (nvp->flags & NV_INIT_FLAG_GPU_STATE_LOAD) { rmStatus = gpuStateUnload(pGpu, GPU_STATE_DEFAULT); - NV_ASSERT(rmStatus == NV_OK); + // + // During surprise removal, this may fail. Log but don't assert. + // + if (rmStatus != NV_OK) + { + NV_PRINTF(LEVEL_WARNING, + "gpuStateUnload failed during eGPU teardown: 0x%x\n", rmStatus); + } nvp->flags &= ~NV_INIT_FLAG_GPU_STATE_LOAD; } diff --git a/src/nvidia/src/kernel/core/thread_state.c b/src/nvidia/src/kernel/core/thread_state.c index 10f73e3e48..e1fdabf22d 100644 --- a/src/nvidia/src/kernel/core/thread_state.c +++ b/src/nvidia/src/kernel/core/thread_state.c @@ -407,7 +407,10 @@ static NV_STATUS _threadNodeCheckTimeout(OBJGPU *pGpu, THREAD_STATE_NODE *pThrea { if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu)) { - NV_PRINTF(LEVEL_ERROR, "API_GPU_ATTACHED_SANITY_CHECK failed!\n"); + // + // Don't log error during surprise removal - this is expected + // when GPU is hot-unplugged (e.g., Thunderbolt eGPU). + // return NV_ERR_TIMEOUT; } } diff --git a/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c b/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c index d76f255a13..7d028da946 100644 --- a/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c +++ b/src/nvidia/src/kernel/diagnostics/nv_debug_dump.c @@ -220,6 +220,17 @@ nvdDoEngineDump_IMPL NVD_ENGINE_CALLBACK *pEngineCallback; NV_STATUS nvStatus = NV_OK; + // + // Skip engine dumps for expected external GPU surprise removal. + // Engine dump attempts will fail with GPU_IS_LOST errors which + // are expected and just add noise to the log. + // + if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_EXTERNAL_GPU) && + pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + return NV_ERR_GPU_IS_LOST; + } + NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, prbEncNestedStart(pPrbEnc, NVDEBUG_NVDUMP_GPU_INFO)); @@ -263,6 +274,17 @@ nvdDumpAllEngines_IMPL NVD_ENGINE_CALLBACK *pEngineCallback; NV_STATUS nvStatus = NV_OK; + // + // Skip engine dumps for expected external GPU surprise removal. + // Engine dump attempts will fail with GPU_IS_LOST errors which + // are expected and just add noise to the log. + // + if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_EXTERNAL_GPU) && + pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + return NV_ERR_GPU_IS_LOST; + } + NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, prbEncNestedStart(pPrbEnc, NVDEBUG_NVDUMP_GPU_INFO)); diff --git a/src/nvidia/src/kernel/gpu/disp/kern_disp.c b/src/nvidia/src/kernel/gpu/disp/kern_disp.c index d061de8aec..d52293b999 100644 --- a/src/nvidia/src/kernel/gpu/disp/kern_disp.c +++ b/src/nvidia/src/kernel/gpu/disp/kern_disp.c @@ -308,7 +308,7 @@ kdispDestroyCommonHandle_IMPL rmStatus = pRmApi->FreeWithSecInfo(pRmApi, pKernelDisplay->hInternalClient, pKernelDisplay->hDispCommonHandle, RMAPI_ALLOC_FLAGS_NONE, &pRmApi->defaultSecInfo); - NV_ASSERT(rmStatus == NV_OK); + NV_ASSERT((rmStatus == NV_OK) || (rmStatus == NV_ERR_GPU_IN_FULLCHIP_RESET) || (rmStatus == NV_ERR_GPU_IS_LOST)); rmapiutilFreeClientAndDeviceHandles(pRmApi, &pKernelDisplay->hInternalClient, &pKernelDisplay->hInternalDevice, diff --git a/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c b/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c index 8b828fc69c..4cf70bef2c 100644 --- a/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c +++ b/src/nvidia/src/kernel/gpu/falcon/arch/turing/kernel_falcon_tu102.c @@ -184,8 +184,13 @@ kflcnReset_TU102 NV_ASSERT_OK_OR_RETURN(kflcnPreResetWait_HAL(pGpu, pKernelFlcn)); NV_ASSERT_OK(kflcnResetHw(pGpu, pKernelFlcn)); status = kflcnWaitForResetToFinish_HAL(pGpu, pKernelFlcn); - NV_ASSERT_OR_RETURN((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET), status); - if (status == NV_ERR_GPU_IN_FULLCHIP_RESET) + // + // During surprise removal, this may return NV_ERR_TIMEOUT in addition to + // NV_ERR_GPU_IS_LOST. Both are acceptable during teardown. + // + NV_ASSERT_OR_RETURN((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || + (status == NV_ERR_GPU_IS_LOST) || (status == NV_ERR_TIMEOUT), status); + if (status != NV_OK) return status; kflcnSwitchToFalcon_HAL(pGpu, pKernelFlcn); kflcnRegWrite_HAL(pGpu, pKernelFlcn, NV_PFALCON_FALCON_RM, diff --git a/src/nvidia/src/kernel/gpu/gpu.c b/src/nvidia/src/kernel/gpu/gpu.c index 51a48bad08..2329e5f37a 100644 --- a/src/nvidia/src/kernel/gpu/gpu.c +++ b/src/nvidia/src/kernel/gpu/gpu.c @@ -5180,6 +5180,17 @@ gpuSetDisconnectedProperties_IMPL OBJGPU *pGpu ) { + // + // Log GPU disconnection once. This is expected during Thunderbolt eGPU + // hot-unplug but should be noted for debugging purposes. + // + if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + NV_PRINTF(LEVEL_NOTICE, + "GPU 0x%x marked as disconnected/lost\n", + pGpu->gpuInstance); + } + pGpu->setProperty(pGpu, PDB_PROP_GPU_IS_LOST, NV_TRUE); pGpu->setProperty(pGpu, PDB_PROP_GPU_IS_CONNECTED, NV_FALSE); pGpu->setProperty(pGpu, PDB_PROP_GPU_IN_PM_CODEPATH, NV_FALSE); diff --git a/src/nvidia/src/kernel/gpu/gpu_user_shared_data.c b/src/nvidia/src/kernel/gpu/gpu_user_shared_data.c index e5284448e1..f35f81e421 100644 --- a/src/nvidia/src/kernel/gpu/gpu_user_shared_data.c +++ b/src/nvidia/src/kernel/gpu/gpu_user_shared_data.c @@ -242,12 +242,15 @@ _gpushareddataDestroyGsp params.bInit = NV_FALSE; - // Free Memdesc on GSP-side - NV_CHECK_OK(status, LEVEL_ERROR, - pRmApi->Control(pRmApi, pGpu->hInternalClient, - pGpu->hInternalSubdevice, - NV2080_CTRL_CMD_INTERNAL_INIT_USER_SHARED_DATA, - ¶ms, sizeof(params))); + // Free Memdesc on GSP-side - ignore GPU_IS_LOST during surprise removal + status = pRmApi->Control(pRmApi, pGpu->hInternalClient, + pGpu->hInternalSubdevice, + NV2080_CTRL_CMD_INTERNAL_INIT_USER_SHARED_DATA, + ¶ms, sizeof(params)); + if ((status != NV_OK) && (status != NV_ERR_GPU_IS_LOST)) + { + NV_PRINTF(LEVEL_ERROR, "Failed to free user shared data on GSP: 0x%x\n", status); + } } static NV_STATUS @@ -460,9 +463,9 @@ _gpushareddataSendDataPollRpc NV2080_CTRL_CMD_INTERNAL_USER_SHARED_DATA_SET_DATA_POLL, ¶ms, sizeof(params)); NV_CHECK_OR_RETURN(LEVEL_ERROR, - (status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET), + (status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST), status); - if (status == NV_ERR_GPU_IN_FULLCHIP_RESET) + if ((status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)) return status; pGpu->userSharedData.lastPolledDataMask = polledDataMask; pGpu->userSharedData.pollingIntervalMs = pollingIntervalMs; diff --git a/src/nvidia/src/kernel/gpu/gr/fecs_event_list.c b/src/nvidia/src/kernel/gpu/gr/fecs_event_list.c index 566a656c9b..5aacfdca7e 100644 --- a/src/nvidia/src/kernel/gpu/gr/fecs_event_list.c +++ b/src/nvidia/src/kernel/gpu/gr/fecs_event_list.c @@ -1620,8 +1620,8 @@ fecsBufferDisableHw NV2080_CTRL_CMD_INTERNAL_GR_GET_FECS_TRACE_HW_ENABLE, &getHwEnableParams, sizeof(getHwEnableParams)); - NV_ASSERT_OR_RETURN_VOID((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); - if (status == NV_ERR_GPU_IN_FULLCHIP_RESET) + NV_ASSERT_OR_RETURN_VOID((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); + if ((status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)) return; if (getHwEnableParams.bEnable) @@ -1636,7 +1636,7 @@ fecsBufferDisableHw NV2080_CTRL_CMD_INTERNAL_GR_SET_FECS_TRACE_HW_ENABLE, &setHwEnableParams, sizeof(setHwEnableParams)); - NV_ASSERT_OR_RETURN_VOID((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + NV_ASSERT_OR_RETURN_VOID((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); } } diff --git a/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c b/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c index ceaae2c99c..81e4271a40 100644 --- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c +++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics.c @@ -2613,7 +2613,7 @@ void kgraphicsFreeGlobalCtxBuffers_IMPL { NV_STATUS status; status = kmemsysCacheOp_HAL(pGpu, pKernelMemorySystem, NULL, FB_CACHE_VIDEO_MEMORY, FB_CACHE_EVICT); - NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); } } diff --git a/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c b/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c index 19d415a775..e1b8d9d069 100644 --- a/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c +++ b/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_falcon_ga102.c @@ -179,6 +179,10 @@ kgspExecuteHsFalcon_GA102 NvU32 data = 0; NvU32 dmaCmd; + // Check for surprise removal (e.g., Thunderbolt eGPU hot-unplug) + if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + return NV_ERR_GPU_IS_LOST; + NV_ASSERT_OR_RETURN(pFlcnUcode != NULL, NV_ERR_INVALID_ARGUMENT); NV_ASSERT_OR_RETURN(pKernelFlcn != NULL, NV_ERR_INVALID_STATE); diff --git a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_booter_tu102.c b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_booter_tu102.c index 20f84418a0..d1ddf81865 100644 --- a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_booter_tu102.c +++ b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_booter_tu102.c @@ -145,6 +145,10 @@ kgspExecuteBooterUnloadIfNeeded_TU102 if (API_GPU_IN_RESET_SANITY_CHECK(pGpu)) return NV_ERR_GPU_IN_FULLCHIP_RESET; + // Check for surprise removal (e.g., Thunderbolt eGPU hot-unplug) + if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + return NV_ERR_GPU_IS_LOST; + // skip actually executing Booter Unload if WPR2 is not up if (!kgspIsWpr2Up_HAL(pGpu, pKernelGsp)) { @@ -155,7 +159,16 @@ kgspExecuteBooterUnloadIfNeeded_TU102 NV_PRINTF(LEVEL_INFO, "executing Booter Unload\n"); NV_ASSERT_OR_RETURN(pKernelGsp->pBooterUnloadUcode != NULL, NV_ERR_INVALID_STATE); - NV_ASSERT_OK(kflcnReset_HAL(pGpu, staticCast(pKernelSec2, KernelFalcon))); + // Falcon reset may timeout during surprise removal - don't assert + status = kflcnReset_HAL(pGpu, staticCast(pKernelSec2, KernelFalcon)); + if ((status != NV_OK) && (status != NV_ERR_TIMEOUT) && (status != NV_ERR_GPU_IS_LOST)) + { + NV_ASSERT(0); + } + if (status != NV_OK) + { + return status; + } // SR code if (sysmemAddrOfSuspendResumeData != 0) diff --git a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c index 71d1de4e9a..fccfae7dca 100644 --- a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c +++ b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c @@ -636,7 +636,18 @@ kgspTeardown_TU102 // Reset GSP so we can load FWSEC-SB status = kflcnReset_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon)); - NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + // + // During surprise removal, this may return NV_ERR_TIMEOUT in addition to + // NV_ERR_GPU_IS_LOST. Both are acceptable during teardown. + // + NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || + (status == NV_ERR_GPU_IS_LOST) || (status == NV_ERR_TIMEOUT)); + + // Skip remaining hardware operations if GPU is lost/timeout - can't talk to it anyway + if (status != NV_OK) + { + goto skip_fwsec; + } // Invoke FWSEC-SB to put back PreOsApps during driver unload status = kgspPrepareForFwsecSb_HAL(pGpu, pKernelGsp, pKernelGsp->pFwsecUcode, &preparedCmd); @@ -648,7 +659,7 @@ kgspTeardown_TU102 else { status = kgspExecuteFwsec_HAL(pGpu, pKernelGsp, &preparedCmd); - if ((status != NV_OK) && (status != NV_ERR_GPU_IN_FULLCHIP_RESET)) + if ((status != NV_OK) && (status != NV_ERR_GPU_IN_FULLCHIP_RESET) && (status != NV_ERR_GPU_IS_LOST)) { NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC-SB for PreOsApps during driver unload: 0x%x\n", status); NV_ASSERT_FAILED("FWSEC-SB failed"); @@ -656,6 +667,8 @@ kgspTeardown_TU102 } } +skip_fwsec: + // Execute Booter Unload status = kgspExecuteBooterUnloadIfNeeded_HAL(pGpu, pKernelGsp, _kgspGetBooterUnloadArgs(pKernelGsp, unloadMode)); diff --git a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c index be52b51333..fc337d4429 100644 --- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c +++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c @@ -149,6 +149,8 @@ static NV_STATUS _kgspRpcRecvPoll(OBJGPU *, OBJRPC *, NvU32, NvU32); static NV_STATUS _kgspRpcDrainEvents(OBJGPU *, KernelGsp *, NvU32, NvU32, KernelGspRpcEventHandlerContext); static void _kgspRpcIncrementTimeoutCountAndRateLimitPrints(OBJGPU *, OBJRPC *); +static NvBool _kgspIsExternalGpuSurpriseRemoval(OBJGPU *); + static NV_STATUS _kgspAllocSimAccessBuffer(OBJGPU *pGpu, KernelGsp *pKernelGsp); static void _kgspFreeSimAccessBuffer(OBJGPU *pGpu, KernelGsp *pKernelGsp); @@ -306,11 +308,13 @@ _kgspRpcSanityCheck(OBJGPU *pGpu, KernelGsp *pKernelGsp, OBJRPC *pRpc) pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) { NV_PRINTF(LEVEL_INFO, "GPU lost, skipping RPC\n"); + pRpc->bQuietPrints = NV_TRUE; return NV_ERR_GPU_IS_LOST; } if (osIsGpuShutdown(pGpu)) { NV_PRINTF(LEVEL_INFO, "GPU shutdown, skipping RPC\n"); + pRpc->bQuietPrints = NV_TRUE; return NV_ERR_GPU_IS_LOST; } if (!gpuIsGpuFullPowerForPmResume(pGpu)) @@ -2029,6 +2033,20 @@ kgspLogRpcDebugInfoToProtobuf prbEncNestedEnd(pProtobufData); } +/*! + * Check if this is an expected external GPU surprise removal. + * Used to suppress noisy debug output during normal eGPU hot-unplug. + */ +static NvBool +_kgspIsExternalGpuSurpriseRemoval +( + OBJGPU *pGpu +) +{ + return pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_EXTERNAL_GPU) && + pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST); +} + void kgspLogRpcDebugInfo ( @@ -2044,6 +2062,15 @@ kgspLogRpcDebugInfo NvU64 activeData[2]; const NvU32 rpcEntriesToLog = (RPC_HISTORY_DEPTH > 8) ? 8 : RPC_HISTORY_DEPTH; + // + // Suppress detailed RPC debug output for expected external GPU surprise removal. + // This keeps the log clean during normal Thunderbolt eGPU hot-unplug. + // + if (_kgspIsExternalGpuSurpriseRemoval(pGpu)) + { + return; + } + _kgspGetActiveRpcDebugData(pRpc, pMsgHdr->function, &activeData[0], &activeData[1]); NV_ERROR_LOG_DATA(pGpu, errorNum, @@ -2096,6 +2123,15 @@ _kgspCheckSlowRpc NV_ASSERT_OR_RETURN_VOID(tsFreqUs > 0); + // + // Suppress slow RPC warnings for expected external GPU surprise removal. + // During normal Thunderbolt eGPU hot-unplug, slow/stalled RPCs are expected. + // + if (_kgspIsExternalGpuSurpriseRemoval(pGpu)) + { + return; + } + duration = (pHistoryEntry->ts_end - pHistoryEntry->ts_start) / tsFreqUs; if (duration > SLOW_RPC_THRESHOLD_US) @@ -2145,7 +2181,15 @@ _kgspLogXid119 KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu); KernelFalcon *pKernelFlcn = staticCast(pKernelGsp, KernelFalcon); - if (pRpc->timeoutCount == 1) + // + // Suppress Xid 119 logging for expected external GPU surprise removal. + // During normal Thunderbolt eGPU hot-unplug, RPC timeouts are expected. + // + if (_kgspIsExternalGpuSurpriseRemoval(pGpu)) + { + return; + } + { NV_PRINTF(LEVEL_ERROR, "********************************* GSP Timeout **********************************\n"); diff --git a/src/nvidia/src/kernel/gpu/intr/intr.c b/src/nvidia/src/kernel/gpu/intr/intr.c index 86e1991855..c7a2f89bb5 100644 --- a/src/nvidia/src/kernel/gpu/intr/intr.c +++ b/src/nvidia/src/kernel/gpu/intr/intr.c @@ -119,6 +119,18 @@ intrServiceStall_IMPL(OBJGPU *pGpu, Intr *pIntr) if (!RMCFG_FEATURE_PLATFORM_GSP) { + // + // Check if GPU is already known to be lost/detached before doing any + // register reads. This prevents log spam during surprise removal + // (e.g., Thunderbolt eGPU hot-unplug). + // + if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu) || + API_GPU_IN_RESET_SANITY_CHECK(pGpu) || + pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + goto exit; + } + // // If the GPU is off the BUS or surprise removed during servicing DPC for ISRs // we wont know about GPU state until after we start processing DPCs for every @@ -134,18 +146,17 @@ intrServiceStall_IMPL(OBJGPU *pGpu, Intr *pIntr) if (regReadValue == GPU_REG_VALUE_INVALID) { - NV_PRINTF(LEVEL_ERROR, - "Failed GPU reg read : 0x%x. Check whether GPU is present on the bus\n", - regReadValue); - } - - if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu)) - { - goto exit; - } - - if (API_GPU_IN_RESET_SANITY_CHECK(pGpu)) - { + // + // GPU has been surprise removed. Mark it as lost and return early. + // Log once when first detected. + // + if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + NV_PRINTF(LEVEL_WARNING, + "GPU 0x%x surprise removed (reg read returned 0xFFFFFFFF)\n", + pGpu->gpuInstance); + pGpu->setProperty(pGpu, PDB_PROP_GPU_IS_LOST, NV_TRUE); + } goto exit; } } @@ -1555,6 +1566,18 @@ _intrServiceStallCommonCheckBegin if (!RMCFG_FEATURE_PLATFORM_GSP) { + // + // Check if GPU is already known to be lost/detached before doing any + // register reads. This prevents log spam during surprise removal + // (e.g., Thunderbolt eGPU hot-unplug). + // + if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu) || + API_GPU_IN_RESET_SANITY_CHECK(pGpu) || + pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + return NV_ERR_GPU_IS_LOST; + } + // // If the GPU is off the BUS or surprise removed during servicing DPC for ISRs // we wont know about GPU state until after we start processing DPCs for every @@ -1570,14 +1593,17 @@ _intrServiceStallCommonCheckBegin if (regReadValue == GPU_REG_VALUE_INVALID) { - NV_PRINTF(LEVEL_ERROR, - "Failed GPU reg read : 0x%x. Check whether GPU is present on the bus\n", - regReadValue); - } - - // Dont service interrupts if GPU is surprise removed - if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu) || API_GPU_IN_RESET_SANITY_CHECK(pGpu)) - { + // + // GPU has been surprise removed. Mark it as lost and return early. + // Log once when first detected. + // + if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + NV_PRINTF(LEVEL_WARNING, + "GPU 0x%x surprise removed (reg read returned 0xFFFFFFFF)\n", + pGpu->gpuInstance); + pGpu->setProperty(pGpu, PDB_PROP_GPU_IS_LOST, NV_TRUE); + } return NV_ERR_GPU_IS_LOST; } } @@ -1635,7 +1661,16 @@ intrServiceStallList_IMPL NvBool bPending; CALL_CONTEXT *pOldContext = NULL; - NV_ASSERT_OK_OR_ELSE(status, _intrServiceStallCommonCheckBegin(pGpu, pIntr, &pOldContext), return); + // + // Don't use NV_ASSERT_OK_OR_ELSE here - NV_ERR_GPU_IS_LOST is expected + // during surprise removal (e.g., Thunderbolt eGPU hot-unplug) and + // should not spam the logs with assertion messages. + // + status = _intrServiceStallCommonCheckBegin(pGpu, pIntr, &pOldContext); + if (status != NV_OK) + { + return; + } do { @@ -1688,7 +1723,16 @@ intrServiceStallSingle_IMPL bitVectorClrAll(&engines); bitVectorSet(&engines, engIdx); - NV_ASSERT_OK_OR_ELSE(status, _intrServiceStallCommonCheckBegin(pGpu, pIntr, &pOldContext), return); + // + // Don't use NV_ASSERT_OK_OR_ELSE here - NV_ERR_GPU_IS_LOST is expected + // during surprise removal (e.g., Thunderbolt eGPU hot-unplug) and + // should not spam the logs with assertion messages. + // + status = _intrServiceStallCommonCheckBegin(pGpu, pIntr, &pOldContext); + if (status != NV_OK) + { + return; + } do { diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/ce_utils.c b/src/nvidia/src/kernel/gpu/mem_mgr/ce_utils.c index 6f25f6bf53..20a8d62fb3 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/ce_utils.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/ce_utils.c @@ -343,10 +343,17 @@ ceutilsDestruct_IMPL // process all callbacks while CeUtils is fully functional _ceutilsProcessCompletionCallbacks(pCeUtils); portSyncSpinlockAcquire(pCeUtils->pCallbackLock); - NV_ASSERT(listCount(&pCeUtils->completionCallbacks) == 0); + // During surprise removal, callbacks may not complete cleanly - skip assertion if GPU is lost + if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + NV_ASSERT(listCount(&pCeUtils->completionCallbacks) == 0); + } portSyncSpinlockRelease(pCeUtils->pCallbackLock); // make sure no new work was queued from callbacks - NV_ASSERT(pCeUtils->lastCompletedPayload == lastSubmittedPayload); + if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + NV_ASSERT(pCeUtils->lastCompletedPayload == lastSubmittedPayload); + } if ((pChannel->bClientUserd) && (pChannel->pControlGPFifo != NULL)) { diff --git a/src/nvidia/src/kernel/gpu/mem_mgr/vaspace_api.c b/src/nvidia/src/kernel/gpu/mem_mgr/vaspace_api.c index 86b62f1b1e..32c96523e9 100644 --- a/src/nvidia/src/kernel/gpu/mem_mgr/vaspace_api.c +++ b/src/nvidia/src/kernel/gpu/mem_mgr/vaspace_api.c @@ -570,7 +570,7 @@ vaspaceapiDestruct_IMPL(VaSpaceApi *pVaspaceApi) if ((IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) && !bBar1VA && !bFlaVA) { NV_RM_RPC_FREE(pGpu, hClient, hParent, hVASpace, status); - NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); } NV_PRINTF(LEVEL_INFO, diff --git a/src/nvidia/src/kernel/mem_mgr/mem.c b/src/nvidia/src/kernel/mem_mgr/mem.c index 706faa9021..e0dc48016b 100644 --- a/src/nvidia/src/kernel/mem_mgr/mem.c +++ b/src/nvidia/src/kernel/mem_mgr/mem.c @@ -175,7 +175,7 @@ memDestruct_IMPL if (pMemory->bRpcAlloc && (IS_VIRTUAL(pGpu) || IS_FW_CLIENT(pGpu))) { NV_RM_RPC_FREE(pGpu, hClient, hParent, hMemory, status); - NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); } } diff --git a/src/nvidia/src/kernel/vgpu/rpc.c b/src/nvidia/src/kernel/vgpu/rpc.c index 35673a0e33..3ca3ab710b 100644 --- a/src/nvidia/src/kernel/vgpu/rpc.c +++ b/src/nvidia/src/kernel/vgpu/rpc.c @@ -1872,6 +1872,16 @@ static NV_STATUS _issueRpcAndWait(OBJGPU *pGpu, OBJRPC *pRpc) NvU32 expectedFunc = vgpu_rpc_message_header_v->function; NvU32 expectedSequence = 0; + // + // Suppress RPC error logging for expected external GPU surprise removal. + // During normal Thunderbolt eGPU hot-unplug, RPC failures are expected. + // + if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_EXTERNAL_GPU) && + pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST)) + { + pRpc->bQuietPrints = NV_TRUE; + } + status = rpcSendMessage(pGpu, pRpc, &expectedSequence); if (status != NV_OK) { diff --git a/src/nvidia/src/libraries/resserv/src/rs_client.c b/src/nvidia/src/libraries/resserv/src/rs_client.c index 62d9738eef..f18afa0dec 100644 --- a/src/nvidia/src/libraries/resserv/src/rs_client.c +++ b/src/nvidia/src/libraries/resserv/src/rs_client.c @@ -841,7 +841,7 @@ clientFreeResource_IMPL _refRemoveAllDependencies(pResourceRef); status = serverFreeResourceRpcUnderLock(pServer, pParams); - NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); // NV_PRINTF(LEVEL_INFO, "hClient %x: Freeing hResource: %x\n", // pClient->hClient, pResourceRef->hResource); diff --git a/src/nvidia/src/libraries/resserv/src/rs_server.c b/src/nvidia/src/libraries/resserv/src/rs_server.c index fee31554ca..fbd3d0bb2f 100644 --- a/src/nvidia/src/libraries/resserv/src/rs_server.c +++ b/src/nvidia/src/libraries/resserv/src/rs_server.c @@ -256,7 +256,7 @@ NV_STATUS serverFreeResourceTreeUnderLock(RsServer *pServer, RS_RES_FREE_PARAMS goto done; status = clientFreeResource(pResourceRef->pClient, pServer, pFreeParams); - NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); serverResLock_Epilogue(pServer, LOCK_ACCESS_WRITE, pLockInfo, &releaseFlags); } @@ -1372,7 +1372,7 @@ serverFreeResourceTree freeParams.bInvalidateOnly = bInvalidateOnly; freeParams.pSecInfo = pParams->pSecInfo; status = serverFreeResourceTreeUnderLock(pServer, &freeParams); - NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET)); + NV_ASSERT((status == NV_OK) || (status == NV_ERR_GPU_IN_FULLCHIP_RESET) || (status == NV_ERR_GPU_IS_LOST)); if (pServer->bDebugFreeList) { From 91eae20d7360b744cae0588cc416e96b14e3b842 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Thu, 11 Dec 2025 18:15:11 +0200 Subject: [PATCH 02/20] fix(thunderbolt): add nvInvalidateDeviceReferences to fix module unload crash --- src/nvidia-modeset/include/nvkms-private.h | 4 +- src/nvidia-modeset/kapi/src/nvkms-kapi.c | 37 +++++++++-- src/nvidia-modeset/src/nvkms-evo.c | 27 +++++--- src/nvidia-modeset/src/nvkms.c | 73 +++++++++++----------- 4 files changed, 88 insertions(+), 53 deletions(-) diff --git a/src/nvidia-modeset/include/nvkms-private.h b/src/nvidia-modeset/include/nvkms-private.h index b43b54e1e9..fc667cb3e3 100644 --- a/src/nvidia-modeset/include/nvkms-private.h +++ b/src/nvidia-modeset/include/nvkms-private.h @@ -35,6 +35,8 @@ struct NvKmsPerOpenDev *nvAllocPerOpenDev(struct NvKmsPerOpen *pOpen, void nvRevokeDevice(NVDevEvoPtr pDevEvo); +void nvInvalidateDeviceReferences(NVDevEvoPtr pDevEvo); + void nvFreePerOpenDev(struct NvKmsPerOpen *pOpen, struct NvKmsPerOpenDev *pOpenDev); @@ -73,8 +75,6 @@ const NVEvoApiHandlesRec *nvGetSurfaceHandlesFromOpenDevConst( void nvKmsServiceNonStallInterrupt(void *dataPtr, NvU32 dataU32); -void nvKmsReinitializeGlobalClient(void); - #ifdef __cplusplus }; #endif diff --git a/src/nvidia-modeset/kapi/src/nvkms-kapi.c b/src/nvidia-modeset/kapi/src/nvkms-kapi.c index 7fc03f7585..bf7258ba9d 100644 --- a/src/nvidia-modeset/kapi/src/nvkms-kapi.c +++ b/src/nvidia-modeset/kapi/src/nvkms-kapi.c @@ -644,8 +644,13 @@ static void FreeDevice(struct NvKmsKapiDevice *device) * faults or hangs when trying to access unmapped GPU memory. * * We only: - * 1. Release the GPU reference count (nvkms_close_gpu) - * 2. Free kernel memory resources (semaphore, device struct) + * 1. Mark GPU as lost to prevent hardware access + * 2. Release the GPU reference count (nvkms_close_gpu) + * 3. Clean up kernel memory resources (handle allocator, semaphore, device struct) + * + * We skip: + * - KmsFreeDevice() - would call nvkms_ioctl_from_kapi() which accesses hardware + * - RmFreeDevice() - would call nvRmApiFree() which accesses hardware * * The hardware resources will be cleaned up when the GPU is physically * removed from the system. @@ -664,10 +669,31 @@ static void FreeDeviceForSurpriseRemoval(struct NvKmsKapiDevice *device) nvkms_gpu_lost(device->gpuId); /* - * Skip KmsFreeDevice() and RmFreeDevice() - these try to access - * GPU hardware via ioctls and RM API calls, which will crash - * since the GPU memory is unmapped after surprise removal. + * Clear device handles to prevent any stale references. + * Don't call nvRmApiFree() as that would access hardware. + */ + device->hKmsDevice = 0; + device->hKmsDisp = 0; + device->hRmSubDevice = 0; + device->hRmDevice = 0; + device->hRmClient = 0; + device->smgGpuInstSubscriptionHdl = 0; + device->smgComputeInstSubscriptionHdl = 0; + + /* + * Tear down the handle allocator - this only frees kernel memory + * (bitmaps), no hardware access. + */ + nvTearDownUnixRmHandleAllocator(&device->handleAllocator); + device->deviceInstance = 0; + + /* + * Clear pKmsOpen - we can't call nvkms_close_from_kapi() as that + * would try to access hardware through nvKmsClose(). The popen + * structure will be leaked, but this only happens during surprise + * removal which is an abnormal condition. */ + device->pKmsOpen = NULL; /* Lower the reference count of gpu - this is safe, no hardware access */ nvkms_close_gpu(device->gpuId); @@ -675,6 +701,7 @@ static void FreeDeviceForSurpriseRemoval(struct NvKmsKapiDevice *device) /* Free kernel memory resources */ if (device->pSema != NULL) { nvkms_sema_free(device->pSema); + device->pSema = NULL; } nvKmsKapiFree(device); diff --git a/src/nvidia-modeset/src/nvkms-evo.c b/src/nvidia-modeset/src/nvkms-evo.c index a2b125c366..0d1f54d211 100644 --- a/src/nvidia-modeset/src/nvkms-evo.c +++ b/src/nvidia-modeset/src/nvkms-evo.c @@ -8838,14 +8838,21 @@ NvBool nvFreeDevEvo(NVDevEvoPtr pDevEvo) /* * If the GPU was lost (surprise removal), skip all hardware-related * cleanup. Just free software resources and remove from device list. + * + * NOTE: We do NOT call nvFreePerOpenDev() here because the pNvKmsOpenDev + * is still in the global open list. It will be properly cleaned up during + * module unload when nvKmsClose iterates through all open handles. + * Calling nvFreePerOpenDev here would cause a double-free crash. */ if (pDevEvo->gpuLost) { nvEvoLogDev(pDevEvo, EVO_LOG_INFO, "Freeing device after GPU lost, skipping hardware cleanup"); - /* Still need to free the per-open data (software resources only) */ - nvFreePerOpenDev(nvEvoGlobal.nvKmsPerOpen, pDevEvo->pNvKmsOpenDev); - pDevEvo->pNvKmsOpenDev = NULL; + /* + * Invalidate all pOpenDev references to this device before freeing it. + * This ensures nvKmsClose won't try to access the freed pDevEvo. + */ + nvInvalidateDeviceReferences(pDevEvo); goto free_software_resources; } @@ -8913,12 +8920,16 @@ NvBool nvFreeDevEvo(NVDevEvoPtr pDevEvo) nvFree(pDevEvo); /* - * If the GPU was lost and the device list is now empty, reinitialize - * the global RM client so that newly attached GPUs can be used. + * NOTE: We intentionally do NOT call nvKmsReinitializeGlobalClient() + * here even if the device list is empty. The global client handle + * is still referenced by open handles (pNvKmsOpenDev) that will be + * cleaned up during module unload by nvKmsClose(). Reinitializing + * the client here would corrupt those handles and cause a crash. + * + * If the user reconnects the GPU before unloading the module, it will + * work because AllocDevice checks for stale gpuLost devices and cleans + * them up. The global client doesn't need to be reinitialized for that. */ - if (wasGpuLost && nvListIsEmpty(&nvEvoGlobal.devList)) { - nvKmsReinitializeGlobalClient(); - } return TRUE; } diff --git a/src/nvidia-modeset/src/nvkms.c b/src/nvidia-modeset/src/nvkms.c index 561ada241a..1b925dc5eb 100644 --- a/src/nvidia-modeset/src/nvkms.c +++ b/src/nvidia-modeset/src/nvkms.c @@ -1621,6 +1621,16 @@ static void DisableRemainingVblankSemControls( static void FreeDeviceReference(struct NvKmsPerOpen *pOpen, struct NvKmsPerOpenDev *pOpenDev) { + /* + * If pDevEvo is NULL, the device was already freed due to GPU loss + * (surprise removal). In this case, skip all hardware-related cleanup + * and just free the software structures. + */ + if (pOpenDev->pDevEvo == NULL) { + nvFreePerOpenDev(pOpen, pOpenDev); + return; + } + /* Disable all client-owned vblank sync objects that still exist. */ DisableRemainingVblankSyncObjects(pOpen, pOpenDev); @@ -5277,6 +5287,31 @@ void nvRevokeDevice(NVDevEvoPtr pDevEvo) } } +/* + * Invalidate all pOpenDev references to a device. + * Called when GPU is lost to ensure nvKmsClose doesn't access freed pDevEvo. + * This sets pOpenDev->pDevEvo to NULL for all open handles. + */ +void nvInvalidateDeviceReferences(NVDevEvoPtr pDevEvo) +{ + struct NvKmsPerOpen *pOpen; + struct NvKmsPerOpenDev *pOpenDev; + NvKmsGenericHandle dev; + + if (pDevEvo == NULL) { + return; + } + + nvListForEachEntry(pOpen, &perOpenIoctlList, perOpenIoctlListEntry) { + FOR_ALL_POINTERS_IN_EVO_API_HANDLES(&pOpen->ioctl.devHandles, + pOpenDev, dev) { + if (pOpenDev->pDevEvo == pDevEvo) { + pOpenDev->pDevEvo = NULL; + } + } + } +} + /*! * Open callback. * @@ -6265,44 +6300,6 @@ static void FreeGlobalState(void) nvClearDpyOverrides(); } -/* - * Reinitialize the global RM client after a GPU surprise removal. - * When a GPU is removed, the RM client handle may become invalid. - * This function re-creates the client handle so that newly attached - * GPUs can be used. - */ -void nvKmsReinitializeGlobalClient(void) -{ - NvU32 ret; - - /* - * First, try to free the old client handle. This may fail if RM - * already invalidated it, but that's OK. - */ - if (nvEvoGlobal.clientHandle != 0) { - nvRmApiFree(nvEvoGlobal.clientHandle, nvEvoGlobal.clientHandle, - nvEvoGlobal.clientHandle); - nvEvoGlobal.clientHandle = 0; - } - - /* Allocate a new root client */ - ret = nvRmApiAlloc(NV01_NULL_OBJECT, - NV01_NULL_OBJECT, - NV01_NULL_OBJECT, - NV01_ROOT, - &nvEvoGlobal.clientHandle); - - if (ret != NVOS_STATUS_SUCCESS) { - nvEvoLog(EVO_LOG_ERROR, "Failed to reinitialize client after GPU removal"); - return; - } - - /* Update the RM context */ - nvEvoGlobal.rmSmgContext.clientHandle = nvEvoGlobal.clientHandle; - - nvEvoLog(EVO_LOG_INFO, "Reinitialized global client after GPU surprise removal"); -} - /* * Wrappers to help SMG access NvKmsKAPI's RM context. */ From 9392240d26accd0e44d64c69230571abecff1500 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 03:39:11 +0200 Subject: [PATCH 03/20] fix(thunderbolt): add GPU reconnect fixes for 590 --- kernel-open/nvidia-drm/nvidia-drm-drv.c | 6 +++ src/nvidia-modeset/include/nvkms-private.h | 2 + src/nvidia-modeset/src/nvkms.c | 45 ++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/kernel-open/nvidia-drm/nvidia-drm-drv.c b/kernel-open/nvidia-drm/nvidia-drm-drv.c index 3190fda519..3b3e39fec8 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-drv.c +++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c @@ -863,6 +863,9 @@ static void nv_drm_dev_unload(struct drm_device *dev) NV_DRM_DEV_LOG_INFO(nv_dev, "Surprise removal detected, skipping hardware access"); + /* Wake up any processes waiting on flip events */ + wake_up_all(&nv_dev->flip_event_wq); + cancel_delayed_work_sync(&nv_dev->hotplug_event_work); mutex_lock(&nv_dev->lock); @@ -2252,6 +2255,9 @@ void nv_drm_remove(NvU32 gpuId) NV_DRM_DEV_LOG_INFO(nv_dev, "PCI channel offline - surprise removal detected"); nv_dev->inSurpriseRemoval = NV_TRUE; + + /* Wake up any processes waiting on flip events */ + wake_up_all(&nv_dev->flip_event_wq); } drm_dev_unplug(nv_dev->dev); diff --git a/src/nvidia-modeset/include/nvkms-private.h b/src/nvidia-modeset/include/nvkms-private.h index fc667cb3e3..df468bc90e 100644 --- a/src/nvidia-modeset/include/nvkms-private.h +++ b/src/nvidia-modeset/include/nvkms-private.h @@ -37,6 +37,8 @@ void nvRevokeDevice(NVDevEvoPtr pDevEvo); void nvInvalidateDeviceReferences(NVDevEvoPtr pDevEvo); +NvBool nvReinitializeGlobalClientAfterGpuLost(void); + void nvFreePerOpenDev(struct NvKmsPerOpen *pOpen, struct NvKmsPerOpenDev *pOpenDev); diff --git a/src/nvidia-modeset/src/nvkms.c b/src/nvidia-modeset/src/nvkms.c index 1b925dc5eb..1df7ed9db4 100644 --- a/src/nvidia-modeset/src/nvkms.c +++ b/src/nvidia-modeset/src/nvkms.c @@ -1401,6 +1401,17 @@ static NvBool AllocDevice(struct NvKmsPerOpen *pOpen, pDevEvo->allocRefCnt = 1; nvFreeDevEvo(pDevEvo); pDevEvo = NULL; + + /* + * After cleaning up a gpuLost device, reinitialize the global RM + * client handle. RM may have invalidated internal state when the + * GPU was lost, causing subsequent API calls to fail with + * NV_ERR_INVALID_OBJECT_HANDLE. + */ + if (!nvReinitializeGlobalClientAfterGpuLost()) { + pParams->reply.status = NVKMS_ALLOC_DEVICE_STATUS_FATAL_ERROR; + return FALSE; + } } if (pDevEvo == NULL) { @@ -6300,6 +6311,40 @@ static void FreeGlobalState(void) nvClearDpyOverrides(); } +NvBool nvReinitializeGlobalClientAfterGpuLost(void) +{ + NvU32 ret; + + /* Only reinitialize if we have a client handle */ + if (nvEvoGlobal.clientHandle == 0) { + return TRUE; + } + + nvEvoLog(EVO_LOG_INFO, "Reinitializing global client after GPU lost"); + + /* Free the old client handle */ + nvRmApiFree(nvEvoGlobal.clientHandle, nvEvoGlobal.clientHandle, + nvEvoGlobal.clientHandle); + nvEvoGlobal.clientHandle = 0; + + /* Allocate a new client handle */ + ret = nvRmApiAlloc(NV01_NULL_OBJECT, + NV01_NULL_OBJECT, + NV01_NULL_OBJECT, + NV01_ROOT, + &nvEvoGlobal.clientHandle); + + if (ret != NVOS_STATUS_SUCCESS) { + nvEvoLog(EVO_LOG_ERROR, "Failed to reinitialize global client"); + return FALSE; + } + + /* Update RM context */ + nvEvoGlobal.rmSmgContext.clientHandle = nvEvoGlobal.clientHandle; + + return TRUE; +} + /* * Wrappers to help SMG access NvKmsKAPI's RM context. */ From aedb5e7846d04a355e52c7a767516a58c746d309 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 03:42:00 +0200 Subject: [PATCH 04/20] fix(thunderbolt): check gpuLost in FreeDeviceReference --- src/nvidia-modeset/src/nvkms.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/nvidia-modeset/src/nvkms.c b/src/nvidia-modeset/src/nvkms.c index 1df7ed9db4..3613acfb9f 100644 --- a/src/nvidia-modeset/src/nvkms.c +++ b/src/nvidia-modeset/src/nvkms.c @@ -1632,12 +1632,20 @@ static void DisableRemainingVblankSemControls( static void FreeDeviceReference(struct NvKmsPerOpen *pOpen, struct NvKmsPerOpenDev *pOpenDev) { + NVDevEvoPtr pDevEvo = pOpenDev->pDevEvo; + /* * If pDevEvo is NULL, the device was already freed due to GPU loss * (surprise removal). In this case, skip all hardware-related cleanup * and just free the software structures. + * + * Also check if the device is marked as gpuLost - this can happen if + * nvInvalidateDeviceReferences hasn't been called yet (e.g., during + * concurrent cleanup) or if there's a race between GPU loss detection + * and this close path. */ - if (pOpenDev->pDevEvo == NULL) { + if (pDevEvo == NULL || pDevEvo->gpuLost) { + pOpenDev->pDevEvo = NULL; nvFreePerOpenDev(pOpen, pOpenDev); return; } From 816d70a77647952b120e3360b66532778b2c608f Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 04:29:22 +0200 Subject: [PATCH 05/20] fix(thunderbolt): check GPU accessibility before HAL calls in UVM ISR --- kernel-open/nvidia-uvm/uvm_gpu_isr.c | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/kernel-open/nvidia-uvm/uvm_gpu_isr.c b/kernel-open/nvidia-uvm/uvm_gpu_isr.c index b2834b58c0..e9adf468b5 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c @@ -29,6 +29,7 @@ #include "uvm_gpu_access_counters.h" #include "uvm_gpu_non_replayable_faults.h" #include "uvm_thread_context.h" +#include // Level-based vs pulse-based interrupts // ===================================== @@ -63,6 +64,21 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args); // half, only. static void access_counters_isr_bottom_half_entry(void *args); +// Check if GPU hardware is accessible (not hot-unplugged). +// This must be called before any HAL function that accesses GPU registers. +static bool uvm_parent_gpu_is_accessible(uvm_parent_gpu_t *parent_gpu) +{ + // If pci_dev is NULL, the GPU has been unregistered + if (parent_gpu->pci_dev == NULL) + return false; + + // Check if PCI channel is offline (surprise removal/hot-unplug) + if (pci_channel_offline(parent_gpu->pci_dev)) + return false; + + return true; +} + // Increments the reference count tracking whether replayable page fault // interrupts should be disabled. The caller is guaranteed that replayable page // faults are disabled upon return. Interrupts might already be disabled prior @@ -881,7 +897,9 @@ static void uvm_parent_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *pare { uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock); - if (parent_gpu->isr.replayable_faults.handling && parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0) + if (parent_gpu->isr.replayable_faults.handling && + parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0 && + uvm_parent_gpu_is_accessible(parent_gpu)) parent_gpu->fault_buffer_hal->disable_replayable_faults(parent_gpu); ++parent_gpu->isr.replayable_faults.disable_intr_ref_count; @@ -893,7 +911,9 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren UVM_ASSERT(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0); --parent_gpu->isr.replayable_faults.disable_intr_ref_count; - if (parent_gpu->isr.replayable_faults.handling && parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0) + if (parent_gpu->isr.replayable_faults.handling && + parent_gpu->isr.replayable_faults.disable_intr_ref_count == 0 && + uvm_parent_gpu_is_accessible(parent_gpu)) parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu); } @@ -910,7 +930,8 @@ void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counte // (disable_intr_ref_count > 0), so the check always returns false when the // race occurs if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 && - parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) { + parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0 && + uvm_parent_gpu_is_accessible(parent_gpu)) { parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters); } @@ -929,7 +950,8 @@ void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counter --parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count; if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 && - parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) { + parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0 && + uvm_parent_gpu_is_accessible(parent_gpu)) { parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters); } } From 5427a2ba71429f69ba3f7fe570f63d1daac485b5 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 04:45:20 +0200 Subject: [PATCH 06/20] fix(uvm): add GPU accessibility check in fault_buffer_flush --- kernel-open/nvidia-uvm/uvm_gpu_isr.c | 2 +- kernel-open/nvidia-uvm/uvm_gpu_isr.h | 5 +++++ kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c | 13 +++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/kernel-open/nvidia-uvm/uvm_gpu_isr.c b/kernel-open/nvidia-uvm/uvm_gpu_isr.c index e9adf468b5..dc4e31cbfd 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c @@ -66,7 +66,7 @@ static void access_counters_isr_bottom_half_entry(void *args); // Check if GPU hardware is accessible (not hot-unplugged). // This must be called before any HAL function that accesses GPU registers. -static bool uvm_parent_gpu_is_accessible(uvm_parent_gpu_t *parent_gpu) +bool uvm_parent_gpu_is_accessible(uvm_parent_gpu_t *parent_gpu) { // If pci_dev is NULL, the GPU has been unregistered if (parent_gpu->pci_dev == NULL) diff --git a/kernel-open/nvidia-uvm/uvm_gpu_isr.h b/kernel-open/nvidia-uvm/uvm_gpu_isr.h index 2a5f22bb53..a17884d3ee 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h +++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h @@ -198,4 +198,9 @@ void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counter // g_uvm_global.global_lock is held so that the returned pointer remains valid. uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu); +// Check if GPU hardware is accessible (not hot-unplugged). +// This must be called before any HAL function that accesses GPU registers. +// Returns false if pci_dev is NULL or PCI channel is offline. +bool uvm_parent_gpu_is_accessible(uvm_parent_gpu_t *parent_gpu); + #endif // __UVM_GPU_ISR_H__ diff --git a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c index 1c557cab6d..29cebe7e01 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c @@ -27,6 +27,7 @@ #include "uvm_linux.h" #include "uvm_global.h" #include "uvm_gpu_replayable_faults.h" +#include "uvm_gpu_isr.h" #include "uvm_hal.h" #include "uvm_kvmalloc.h" #include "uvm_tools.h" @@ -677,9 +678,21 @@ NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu) UVM_ASSERT(gpu->parent->replayable_faults_supported); + // Check if GPU hardware is still accessible before attempting to flush. + // After hot-unplug, the GPU registers are no longer mapped and accessing + // them would cause a page fault crash. + if (!uvm_parent_gpu_is_accessible(gpu->parent)) + return NV_ERR_GPU_IS_LOST; + // Disables replayable fault interrupts and fault servicing uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent); + // Re-check after acquiring the lock in case GPU was removed concurrently + if (!uvm_parent_gpu_is_accessible(gpu->parent)) { + uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent); + return NV_ERR_GPU_IS_LOST; + } + status = fault_buffer_flush_locked(gpu->parent, gpu, UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT, From 06d0b2031d296b87783a07cbc7c69737086fc0ce Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 04:51:59 +0200 Subject: [PATCH 07/20] fix(uvm): add GPU accessibility checks to all fault/counter service handlers --- kernel-open/nvidia-uvm/uvm_gpu_access_counters.c | 6 ++++++ kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c | 6 ++++++ kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c | 5 +++++ 3 files changed, 17 insertions(+) diff --git a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c index a906cb8c77..10510c7ff2 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c @@ -25,6 +25,7 @@ #include "uvm_gpu_access_counters.h" #include "uvm_global.h" #include "uvm_api.h" +#include "uvm_gpu_isr.h" #include "uvm_gpu.h" #include "uvm_hal.h" #include "uvm_kvmalloc.h" @@ -1766,6 +1767,11 @@ void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters) { NV_STATUS status = NV_OK; uvm_access_counter_service_batch_context_t *batch_context; + uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu; + + // Check if GPU is still accessible (e.g., not hot-unplugged) + if (!uvm_parent_gpu_is_accessible(parent_gpu)) + return; batch_context = &access_counters->batch_service_context; diff --git a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c index ddb32aa804..f0867689d7 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c @@ -24,6 +24,7 @@ #include "uvm_common.h" #include "uvm_api.h" #include "uvm_gpu_non_replayable_faults.h" +#include "uvm_gpu_isr.h" #include "uvm_gpu.h" #include "uvm_hal.h" #include "uvm_lock.h" @@ -778,6 +779,11 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent { NvU32 cached_faults; + // Check if GPU is still accessible before servicing faults. + // After hot-unplug, accessing GPU registers would cause a crash. + if (!uvm_parent_gpu_is_accessible(parent_gpu)) + return; + // If this handler is modified to handle fewer than all of the outstanding // faults, then special handling will need to be added to uvm_suspend() // to guarantee that fault processing has completed before control is diff --git a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c index 29cebe7e01..1a90cf483e 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c @@ -2927,6 +2927,11 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu) UVM_ASSERT(parent_gpu->replayable_faults_supported); + // Check if GPU is still accessible before servicing faults. + // After hot-unplug, accessing GPU registers would cause a crash. + if (!uvm_parent_gpu_is_accessible(parent_gpu)) + return; + uvm_tracker_init(&batch_context->tracker); // Process all faults in the buffer From b62d7f94345e09feb03820d6e1a4cfd311f2305e Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 05:09:19 +0200 Subject: [PATCH 08/20] fix(uvm): skip PMA free when GPU is not accessible --- kernel-open/nvidia-uvm/uvm_pmm_gpu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c index 97ff13dcdd..005cc15978 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c @@ -166,6 +166,7 @@ #include "nv_uvm_interface.h" #include "uvm_api.h" #include "uvm_gpu.h" +#include "uvm_gpu_isr.h" #include "uvm_pmm_gpu.h" #include "uvm_mem.h" #include "uvm_mmu.h" @@ -2066,6 +2067,14 @@ void free_root_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_root_chunk_t *root_chunk, free_ if (chunk->is_zero) flags |= UVM_PMA_FREE_IS_ZERO; + // Skip PMA free if GPU is not accessible (e.g., hot-unplugged). + // Calling into the nvidia module with a gone GPU causes hangs + // due to corrupted locks. + if (!uvm_parent_gpu_is_accessible(gpu->parent)) { + uvm_up_read(&pmm->pma_lock); + return; + } + nvUvmInterfacePmaFreePages(pmm->pma, &chunk->address, 1, UVM_CHUNK_SIZE_MAX, flags); uvm_up_read(&pmm->pma_lock); From b8de6e7f8e594a28d1056182b9bafa2c2d52c716 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 05:22:00 +0200 Subject: [PATCH 09/20] fix(uvm): skip PMA callback unregistration when GPU not accessible --- kernel-open/nvidia-uvm/uvm_pmm_gpu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c index 005cc15978..7f44fd122a 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c @@ -3574,7 +3574,10 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm) UVM_ASSERT(uvm_pmm_gpu_check_orphan_pages(pmm)); release_free_root_chunks(pmm); - if (gpu->mem_info.size != 0 && gpu_supports_pma_eviction(gpu)) + // Skip unregistering callbacks if GPU is not accessible (hot-unplugged). + // The nvidia module's internal state is corrupted when the GPU is gone. + if (gpu->mem_info.size != 0 && gpu_supports_pma_eviction(gpu) && + uvm_parent_gpu_is_accessible(gpu->parent)) nvUvmInterfacePmaUnregisterEvictionCallbacks(pmm->pma); // TODO: Bug 1766184: Handle ECC/RC From 3c33b0f788891da930e8e51d0f54c97012ccebd1 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 05:30:13 +0200 Subject: [PATCH 10/20] fix(uvm): skip RM calls in fault_buffer_deinit when GPU not accessible --- kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c index 1a90cf483e..8efadc6ae1 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c @@ -306,11 +306,15 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu) fault_buffer_deinit_replayable_faults(parent_gpu); if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) { - status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE)); - UVM_ASSERT(status == NV_OK); - - uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device, - &parent_gpu->fault_buffer.rm_info)); + // Skip RM calls if GPU is not accessible (e.g., hot-unplugged). + // The nvidia module's internal state is corrupted when the GPU is gone. + if (uvm_parent_gpu_is_accessible(parent_gpu)) { + status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE)); + UVM_ASSERT(status == NV_OK); + + uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device, + &parent_gpu->fault_buffer.rm_info)); + } parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0; } From f4e9c5d50c25a5d7f0f8cefc2048a62c3074d416 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 05:40:04 +0200 Subject: [PATCH 11/20] fix(uvm): add GPU accessibility checks to high-risk deinit paths --- kernel-open/nvidia-uvm/uvm_gpu.c | 30 +++++++++++++------ .../nvidia-uvm/uvm_gpu_access_counters.c | 19 ++++++++---- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/kernel-open/nvidia-uvm/uvm_gpu.c b/kernel-open/nvidia-uvm/uvm_gpu.c index 265a2c46c0..6f6a5ffee0 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_gpu.c @@ -45,6 +45,7 @@ #include "uvm_linux.h" #include "uvm_mmu.h" #include "uvm_kvmalloc.h" +#include "uvm_gpu_isr.h" #define UVM_PROC_GPUS_PEER_DIR_NAME "peers" @@ -1362,7 +1363,8 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu) static void deconfigure_address_space(uvm_gpu_t *gpu) { - if (gpu->rm_address_space_moved_to_page_tree) + // Skip RM call if GPU is not accessible (e.g., hot-unplugged). + if (gpu->rm_address_space_moved_to_page_tree && uvm_parent_gpu_is_accessible(gpu->parent)) uvm_rm_locked_call_void(nvUvmInterfaceUnsetPageDirectory(gpu->rm_address_space)); if (gpu->address_space_tree.root) @@ -1780,6 +1782,10 @@ static void remove_gpu_from_parent_gpu(uvm_gpu_t *gpu) static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu) { + // Check GPU accessibility before pci_dev is cleared. + // If the GPU was hot-unplugged, skip RM calls that would crash. + bool gpu_accessible = uvm_parent_gpu_is_accessible(parent_gpu); + // All channels should have been removed before the retained count went to 0 UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table)); UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table)); @@ -1805,7 +1811,9 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu) if (parent_gpu->rm_info.isSimulated) --g_uvm_global.num_simulated_devices; - if (parent_gpu->rm_device != 0) + // Skip RM call if GPU was not accessible (e.g., hot-unplugged). + // The nvidia module's internal state is corrupted when the GPU is gone. + if (parent_gpu->rm_device != 0 && gpu_accessible) uvm_rm_locked_call_void(nvUvmInterfaceDeviceDestroy(parent_gpu->rm_device)); uvm_parent_gpu_kref_put(parent_gpu); @@ -1848,16 +1856,20 @@ static void deinit_gpu(uvm_gpu_t *gpu) uvm_pmm_gpu_deinit(&gpu->pmm); - if (gpu->rm_address_space != 0) - uvm_rm_locked_call_void(nvUvmInterfaceAddressSpaceDestroy(gpu->rm_address_space)); - - deinit_procfs_dirs(gpu); + // Skip RM calls if GPU is not accessible (e.g., hot-unplugged). + // The nvidia module's internal state is corrupted when the GPU is gone. + if (uvm_parent_gpu_is_accessible(gpu->parent)) { + if (gpu->rm_address_space != 0) + uvm_rm_locked_call_void(nvUvmInterfaceAddressSpaceDestroy(gpu->rm_address_space)); - if (gpu->parent->smc.enabled) { - if (gpu->smc.rm_device != 0) - uvm_rm_locked_call_void(nvUvmInterfaceDeviceDestroy(gpu->smc.rm_device)); + if (gpu->parent->smc.enabled) { + if (gpu->smc.rm_device != 0) + uvm_rm_locked_call_void(nvUvmInterfaceDeviceDestroy(gpu->smc.rm_device)); + } } + deinit_procfs_dirs(gpu); + gpu->magic = 0; } diff --git a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c index 10510c7ff2..f4469cee99 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c @@ -506,11 +506,15 @@ void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 n } if (access_counters && access_counters->rm_info.accessCntrBufferHandle) { - NV_STATUS status = uvm_rm_locked_call(nvUvmInterfaceDestroyAccessCntrInfo(parent_gpu->rm_device, - &access_counters->rm_info)); uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context; - UVM_ASSERT(status == NV_OK); + // Skip RM call if GPU is not accessible (e.g., hot-unplugged). + // The nvidia module's internal state is corrupted when the GPU is gone. + if (uvm_parent_gpu_is_accessible(parent_gpu)) { + NV_STATUS status = uvm_rm_locked_call(nvUvmInterfaceDestroyAccessCntrInfo(parent_gpu->rm_device, + &access_counters->rm_info)); + UVM_ASSERT(status == NV_OK); + } access_counters->rm_info.accessCntrBufferHandle = 0; uvm_kvfree(batch_context->notification_cache); @@ -594,9 +598,12 @@ static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu, NvU32 if (status != NV_OK) UVM_ASSERT(status == uvm_global_get_status()); - status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device, - &access_counters->rm_info)); - UVM_ASSERT(status == NV_OK); + // Skip RM call if GPU is not accessible (e.g., hot-unplugged). + if (uvm_parent_gpu_is_accessible(parent_gpu)) { + status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device, + &access_counters->rm_info)); + UVM_ASSERT(status == NV_OK); + } } // Increment the refcount of access counter enablement. If this is the first From 6b0ac941f31d87d9d1dc72bcf19f5fa6fa40bae3 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 05:59:46 +0200 Subject: [PATCH 12/20] fix(nvidia-drm): skip nvKms calls during surprise removal in GEM/FB free --- kernel-open/nvidia-drm/nvidia-drm-fb.c | 12 +++++++++--- kernel-open/nvidia-drm/nvidia-drm-fence.c | 13 +++++++++++++ kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c | 8 +++++++- .../nvidia-drm/nvidia-drm-gem-nvkms-memory.c | 11 +++++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/kernel-open/nvidia-drm/nvidia-drm-fb.c b/kernel-open/nvidia-drm/nvidia-drm-fb.c index 8c0a45757a..353fc70272 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-fb.c +++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c @@ -61,9 +61,15 @@ static void nv_drm_framebuffer_destroy(struct drm_framebuffer *fb) drm_framebuffer_cleanup(fb); - /* Free NvKmsKapiSurface associated with this framebuffer object */ - - nvKms->destroySurface(nv_dev->pDevice, nv_fb->pSurface); + /* + * Only call nvKms->destroySurface if pDevice is valid and device is not + * in surprise removal. During hot-unplug, nvidia_modeset internal state + * may be corrupted before this destructor runs from delayed_fput. + */ + if (nv_dev->pDevice != NULL && !nv_dev->inSurpriseRemoval) { + /* Free NvKmsKapiSurface associated with this framebuffer object */ + nvKms->destroySurface(nv_dev->pDevice, nv_fb->pSurface); + } __nv_drm_framebuffer_free(nv_fb); } diff --git a/kernel-open/nvidia-drm/nvidia-drm-fence.c b/kernel-open/nvidia-drm/nvidia-drm-fence.c index 7af1ed7f13..d8fcf6edfb 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-fence.c +++ b/kernel-open/nvidia-drm/nvidia-drm-fence.c @@ -212,6 +212,19 @@ static void __nv_drm_prime_fence_context_destroy( struct nv_drm_prime_fence_context *nv_prime_fence_context = to_nv_prime_fence_context(nv_fence_context); + /* + * Skip nvKms calls if device is being surprise-removed. + * The nvidia_modeset internal state may be corrupted. + */ + if (nv_dev->pDevice == NULL || nv_dev->inSurpriseRemoval) { + /* Force signal pending fences and free */ + spin_lock(&nv_prime_fence_context->lock); + nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context); + spin_unlock(&nv_prime_fence_context->lock); + nv_drm_free(nv_fence_context); + return; + } + /* * Free channel event before destroying the fence context, otherwise event * callback continue to get called. diff --git a/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c b/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c index 163a8ecf63..fdddfd98f9 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c +++ b/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c @@ -43,7 +43,13 @@ void __nv_drm_gem_dma_buf_free(struct nv_drm_gem_object *nv_gem) struct nv_drm_device *nv_dev = nv_gem->nv_dev; struct nv_drm_gem_dma_buf *nv_dma_buf = to_nv_dma_buf(nv_gem); - if (nv_dma_buf->base.pMemory) { + /* + * Only call nvKms->freeMemory if pDevice is valid and device is not + * in surprise removal. During hot-unplug, nvidia_modeset internal state + * may be corrupted before this destructor runs from delayed_fput. + */ + if (nv_dma_buf->base.pMemory && nv_dev->pDevice != NULL && + !nv_dev->inSurpriseRemoval) { /* Free NvKmsKapiMemory handle associated with this gem object */ nvKms->freeMemory(nv_dev->pDevice, nv_dma_buf->base.pMemory); } diff --git a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c index 6b92c7532a..da2e8cbe5f 100644 --- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c +++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c @@ -42,6 +42,17 @@ static void __nv_drm_gem_nvkms_memory_free(struct nv_drm_gem_object *nv_gem) struct nv_drm_gem_nvkms_memory *nv_nvkms_memory = to_nv_nvkms_memory(nv_gem); + /* + * Skip nvKms calls if pDevice is NULL or inSurpriseRemoval is set. + * During hot-unplug, the nvidia_modeset internal state (semaphores, + * memory handles) may be corrupted or freed before this destructor + * runs from delayed_fput. The memory resources are gone with the GPU. + */ + if (nv_dev->pDevice == NULL || nv_dev->inSurpriseRemoval) { + nv_drm_free(nv_nvkms_memory); + return; + } + if (nv_nvkms_memory->physically_mapped) { if (nv_nvkms_memory->pWriteCombinedIORemapAddress != NULL) { iounmap(nv_nvkms_memory->pWriteCombinedIORemapAddress); From 98c06f6b5c1ebdc5d2b27225a1ce67f86bc4aafe Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 06:09:27 +0200 Subject: [PATCH 13/20] fix(uvm): check device_p2p_initialised before accessing pci_dev in deinit --- kernel-open/nvidia-uvm/uvm_pmm_gpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c index 7f44fd122a..ab63e3f904 100644 --- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c +++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c @@ -3369,7 +3369,9 @@ void uvm_pmm_gpu_device_p2p_init(uvm_parent_gpu_t *parent_gpu) void uvm_pmm_gpu_device_p2p_deinit(uvm_parent_gpu_t *parent_gpu) { - if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu)) { + // Check device_p2p_initialised first before accessing pci_dev. + // During partial GPU init/deinit, pci_dev may be NULL or P2P was never initialized. + if (parent_gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(parent_gpu) && parent_gpu->pci_dev != NULL) { struct page *p2p_page = pfn_to_page(pci_resource_start(parent_gpu->pci_dev, uvm_device_p2p_static_bar(parent_gpu)) >> PAGE_SHIFT); From 3aab5cda77cae34c158b4629e6f825615c63f154 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 07:00:02 +0200 Subject: [PATCH 14/20] fix(uvm): skip RM calls during cleanup when GPU is surprise removed --- kernel-open/nvidia-uvm/uvm_channel.c | 20 +++++++++++++++----- kernel-open/nvidia-uvm/uvm_rm_mem.c | 15 ++++++++++++--- kernel-open/nvidia-uvm/uvm_user_channel.c | 13 ++++++++++++- kernel-open/nvidia-uvm/uvm_va_space.c | 12 +++++++++++- 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/kernel-open/nvidia-uvm/uvm_channel.c b/kernel-open/nvidia-uvm/uvm_channel.c index 93eed89f01..e7e71e0556 100644 --- a/kernel-open/nvidia-uvm/uvm_channel.c +++ b/kernel-open/nvidia-uvm/uvm_channel.c @@ -27,6 +27,8 @@ #include "uvm_common.h" #include "uvm_global.h" #include "uvm_hal.h" +#include "uvm_gpu.h" +#include "uvm_gpu_isr.h" #include "uvm_procfs.h" #include "uvm_push.h" #include "uvm_gpu_semaphore.h" @@ -2310,10 +2312,14 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel) free_conf_computing_buffers(channel); } - if (uvm_channel_is_proxy(channel)) - uvm_rm_locked_call_void(nvUvmInterfacePagingChannelDestroy(channel->proxy.handle)); - else - uvm_rm_locked_call_void(nvUvmInterfaceChannelDestroy(channel->handle)); + // Skip RM calls if GPU has been surprise removed. Calling RM with stale + // handles will result in NV_ERR_INVALID_OBJECT_HANDLE errors. + if (uvm_parent_gpu_is_accessible(pool->manager->gpu->parent)) { + if (uvm_channel_is_proxy(channel)) + uvm_rm_locked_call_void(nvUvmInterfacePagingChannelDestroy(channel->proxy.handle)); + else + uvm_rm_locked_call_void(nvUvmInterfaceChannelDestroy(channel->handle)); + } uvm_gpu_tracking_semaphore_free(&channel->tracking_sem); @@ -2657,7 +2663,11 @@ static void tsg_destroy(uvm_channel_pool_t *pool, uvmGpuTsgHandle tsg_handle) { UVM_ASSERT(pool->num_tsgs > 0); - uvm_rm_locked_call_void(nvUvmInterfaceTsgDestroy(tsg_handle)); + // Skip RM call if GPU has been surprise removed. Calling RM with stale + // handles will result in NV_ERR_INVALID_OBJECT_HANDLE errors. + if (uvm_parent_gpu_is_accessible(pool->manager->gpu->parent)) + uvm_rm_locked_call_void(nvUvmInterfaceTsgDestroy(tsg_handle)); + pool->num_tsgs--; } diff --git a/kernel-open/nvidia-uvm/uvm_rm_mem.c b/kernel-open/nvidia-uvm/uvm_rm_mem.c index 756080fb24..767f9e8d8a 100644 --- a/kernel-open/nvidia-uvm/uvm_rm_mem.c +++ b/kernel-open/nvidia-uvm/uvm_rm_mem.c @@ -23,6 +23,7 @@ #include "uvm_rm_mem.h" #include "uvm_gpu.h" +#include "uvm_gpu_isr.h" #include "uvm_global.h" #include "uvm_kvmalloc.h" #include "uvm_linux.h" @@ -298,8 +299,11 @@ void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem) if (!uvm_rm_mem_mapped_on_cpu(rm_mem)) return; - uvm_rm_locked_call_void(nvUvmInterfaceMemoryCpuUnMap(rm_mem->gpu_owner->rm_address_space, - uvm_rm_mem_get_cpu_va(rm_mem))); + // Skip RM call if GPU has been surprise removed. Calling RM with stale + // handles will result in NV_ERR_INVALID_OBJECT_HANDLE errors. + if (uvm_parent_gpu_is_accessible(rm_mem->gpu_owner->parent)) + uvm_rm_locked_call_void(nvUvmInterfaceMemoryCpuUnMap(rm_mem->gpu_owner->rm_address_space, + uvm_rm_mem_get_cpu_va(rm_mem))); rm_mem_clear_cpu_va(rm_mem); } @@ -355,7 +359,12 @@ static void rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) rm_mem_unmap_gpu_proxy(rm_mem, gpu); va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu); - uvm_rm_locked_call_void(nvUvmInterfaceMemoryFree(gpu->rm_address_space, va)); + + // Skip RM call if GPU has been surprise removed. Calling RM with stale + // handles will result in NV_ERR_INVALID_OBJECT_HANDLE errors. + if (uvm_parent_gpu_is_accessible(gpu->parent)) + uvm_rm_locked_call_void(nvUvmInterfaceMemoryFree(gpu->rm_address_space, va)); + rm_mem_clear_gpu_va(rm_mem, gpu); } diff --git a/kernel-open/nvidia-uvm/uvm_user_channel.c b/kernel-open/nvidia-uvm/uvm_user_channel.c index a85a645035..634c245ab6 100644 --- a/kernel-open/nvidia-uvm/uvm_user_channel.c +++ b/kernel-open/nvidia-uvm/uvm_user_channel.c @@ -32,6 +32,7 @@ #include "uvm_kvmalloc.h" #include "uvm_api.h" #include "uvm_gpu.h" +#include "uvm_gpu_isr.h" #include "uvm_tracker.h" #include "uvm_map_external.h" #include "nv_uvm_interface.h" @@ -782,6 +783,14 @@ void uvm_user_channel_stop(uvm_user_channel_t *user_channel) // write mode. uvm_assert_rwsem_locked_read(&va_space->lock); + // Skip RM call if GPU has been surprise removed. Calling RM with stale + // client handles will result in repeated NV_ERR_INVALID_OBJECT_HANDLE + // errors during teardown. + if (!uvm_parent_gpu_is_accessible(user_channel->gpu->parent)) { + atomic_set(&user_channel->is_bound, 0); + return; + } + // TODO: Bug 1737765. This doesn't stop the user from putting the // channel back on the runlist, which could put stale instance // pointers back in the fault buffer. @@ -854,7 +863,9 @@ void uvm_user_channel_destroy_detached(uvm_user_channel_t *user_channel) uvm_kvfree(user_channel->resources); } - if (user_channel->rm_retained_channel) + // Skip RM call if GPU has been surprise removed. Calling RM with stale + // handles will result in NV_ERR_INVALID_OBJECT_HANDLE errors. + if (user_channel->rm_retained_channel && uvm_parent_gpu_is_accessible(user_channel->gpu->parent)) uvm_rm_locked_call_void(nvUvmInterfaceReleaseChannel(user_channel->rm_retained_channel)); uvm_user_channel_release(user_channel); diff --git a/kernel-open/nvidia-uvm/uvm_va_space.c b/kernel-open/nvidia-uvm/uvm_va_space.c index f5ff7b46c7..6abd2af9be 100644 --- a/kernel-open/nvidia-uvm/uvm_va_space.c +++ b/kernel-open/nvidia-uvm/uvm_va_space.c @@ -32,6 +32,7 @@ #include "uvm_tools.h" #include "uvm_thread_context.h" #include "uvm_hal.h" +#include "uvm_gpu_isr.h" #include "uvm_map_external.h" #include "uvm_ats.h" #include "uvm_gpu_access_counters.h" @@ -1436,6 +1437,13 @@ void uvm_gpu_va_space_unset_page_dir(uvm_gpu_va_space_t *gpu_va_space) if (gpu_va_space->did_set_page_directory) { NV_STATUS status; + // Skip RM call if GPU has been surprise removed. Calling RM with stale + // handles will result in NV_ERR_INVALID_OBJECT_HANDLE errors. + if (!uvm_parent_gpu_is_accessible(gpu_va_space->gpu->parent)) { + gpu_va_space->did_set_page_directory = false; + return; + } + status = uvm_rm_locked_call(nvUvmInterfaceUnsetPageDirectory(gpu_va_space->duped_gpu_va_space)); UVM_ASSERT_MSG(status == NV_OK, "nvUvmInterfaceUnsetPageDirectory() failed: %s, GPU %s\n", @@ -1487,7 +1495,9 @@ static void destroy_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space) if (gpu_va_space->page_tables.root) uvm_page_tree_deinit(&gpu_va_space->page_tables); - if (gpu_va_space->duped_gpu_va_space) + // Skip RM call if GPU has been surprise removed. Calling RM with stale + // handles will result in NV_ERR_INVALID_OBJECT_HANDLE errors. + if (gpu_va_space->duped_gpu_va_space && uvm_parent_gpu_is_accessible(gpu_va_space->gpu->parent)) uvm_rm_locked_call_void(nvUvmInterfaceAddressSpaceDestroy(gpu_va_space->duped_gpu_va_space)); // If the state is DEAD, then this GPU VA space is tracked in From f2354378266840870648559e3b5cde68a5f7685d Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 12 Dec 2025 07:40:25 +0200 Subject: [PATCH 15/20] fix(uvm): skip GPU semaphore reads during surprise removal --- kernel-open/nvidia-uvm/uvm_gpu_semaphore.c | 15 +++++++++++++++ src/nvidia/src/kernel/rmapi/nv_gpu_ops.c | 17 +++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c index 478c1aafd9..dae09f31ce 100644 --- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c +++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c @@ -27,6 +27,7 @@ #include "uvm_kvmalloc.h" #include "uvm_channel.h" // For UVM_GPU_SEMAPHORE_MAX_JUMP #include "uvm_conf_computing.h" +#include "uvm_gpu_isr.h" #define UVM_SEMAPHORE_SIZE 4 #define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE @@ -822,10 +823,18 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_semaphore) { NvU64 completed; + uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu; // Check that the GPU which owns the semaphore is still present UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore)); + // If the GPU is not accessible (surprise removed), return the cached + // completed value without reading from GPU memory. Reading from GPU + // memory after surprise removal returns garbage values that cause + // assertion failures. + if (!uvm_parent_gpu_is_accessible(gpu->parent)) + return atomic64_read(&tracking_semaphore->completed_value); + if (tracking_semaphore_uses_mutex(tracking_semaphore)) uvm_mutex_lock(&tracking_semaphore->m_lock); else @@ -844,10 +853,16 @@ NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semapho bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value) { NvU64 completed = atomic64_read(&tracking_sem->completed_value); + uvm_gpu_t *gpu = tracking_sem->semaphore.page->pool->gpu; // Check that the GPU which owns the semaphore is still present UVM_ASSERT(tracking_semaphore_check_gpu(tracking_sem)); + // If the GPU is not accessible, consider all values completed to avoid + // spinning forever waiting for a GPU that's gone. + if (!uvm_parent_gpu_is_accessible(gpu->parent)) + return true; + if (completed >= value) { // atomic64_read() doesn't imply any memory barriers and we need all // subsequent memory accesses in this thread to be ordered after the diff --git a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c index 6a155fd597..23a594b92a 100644 --- a/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c +++ b/src/nvidia/src/kernel/rmapi/nv_gpu_ops.c @@ -809,10 +809,19 @@ NV_STATUS nvGpuOpsDestroySession(struct gpuSession *session) if (!session) return NV_OK; - // Sanity Check: There should not be any attached devices with the session! - NV_ASSERT(!session->devices); - // Sanity Check: If there are no devices, there should also be no p2p Info! - NV_ASSERT(!session->p2pInfo); + // During surprise removal (GPU lost), devices may not have been properly + // detached. In normal operation, these assertions catch programming errors. + // When the GPU is lost, we log and continue to avoid blocking cleanup. + if (session->devices) + { + NV_PRINTF(LEVEL_WARNING, + "Destroying session with devices still attached (GPU may be lost)\n"); + } + if (session->p2pInfo) + { + NV_PRINTF(LEVEL_WARNING, + "Destroying session with p2p info still present (GPU may be lost)\n"); + } // freeing session will free everything under it pRmApi->Free(pRmApi, session->handle, session->handle); From ebac14020186212c973e15bc06ad295818bf440e Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 19 Dec 2025 12:42:01 +0200 Subject: [PATCH 16/20] nvkms-dma: fix struct member references for 590.48.01 --- src/nvidia-modeset/src/nvkms-dma.c | 54 ++---------------------------- 1 file changed, 2 insertions(+), 52 deletions(-) diff --git a/src/nvidia-modeset/src/nvkms-dma.c b/src/nvidia-modeset/src/nvkms-dma.c index 409c8ea6f8..ef9e29195d 100644 --- a/src/nvidia-modeset/src/nvkms-dma.c +++ b/src/nvidia-modeset/src/nvkms-dma.c @@ -60,7 +60,6 @@ void nvDmaKickoffEvo(NVEvoChannelPtr pChannel) static void EvoCoreKickoff(NVDmaBufferEvoPtr push_buffer, NvU32 putOffset) { - NVEvoDmaPtr pDma = &push_buffer->dma; NVDevEvoPtr pDevEvo = push_buffer->pDevEvo; int i; @@ -75,55 +74,6 @@ static void EvoCoreKickoff(NVDmaBufferEvoPtr push_buffer, NvU32 putOffset) return; } - /* If needed, copy the chunk to be kicked off into each GPU's FB */ - if (pDma->isBar1Mapping) { - int sd; - - NV0080_CTRL_DMA_FLUSH_PARAMS flushParams = { 0 }; - NvU32 ret; - - NvU32 *endAddress; - - if (putOffset < push_buffer->put_offset) { - /* If we've wrapped, copy to the end of the pushbuffer */ - nvAssert(putOffset == 0); - endAddress = push_buffer->base + push_buffer->offset_max / - sizeof(NvU32); - } else { - endAddress = push_buffer->buffer; - } - - for (sd = 0; sd < pDevEvo->numSubDevices; sd++) { - NvU32 startOffset = push_buffer->put_offset / sizeof(NvU32); - - NvU32 *src = push_buffer->base; - NvU32 *dst = pDma->subDeviceAddress[sd]; - - nvAssert(dst != NULL); - - src += startOffset; - dst += startOffset; - while (src < endAddress) { - *dst++ = *src++; - } - } - - /* - * Finally, tell RM to flush so that the data actually lands in FB - * before telling the GPU to fetch it. - */ - flushParams.targetUnit = DRF_DEF(0080_CTRL_DMA, _FLUSH_TARGET, - _UNIT_FB, _ENABLE); - - ret = nvRmApiControl(nvEvoGlobal.clientHandle, - pDevEvo->deviceHandle, - NV0080_CTRL_CMD_DMA_FLUSH, - &flushParams, sizeof(flushParams)); - if (ret != NVOS_STATUS_SUCCESS) { - nvAssert(!"NV0080_CTRL_CMD_DMA_FLUSH failed"); - } - } - #if NVCPU_IS_X86_64 __asm__ __volatile__ ("sfence\n\t" : : : "memory"); #elif NVCPU_IS_FAMILY_ARM @@ -346,7 +296,7 @@ void nvWriteEvoCoreNotifier( } pSubChannel = &pDevEvo->core->notifiersDma[sd]; - pNotifiers = pSubChannel->subDeviceAddress[sd]; + pNotifiers = pSubChannel->cpuAddress; EvoWriteNotifier(pNotifiers + offset, value); } @@ -377,7 +327,7 @@ static NvBool EvoCheckNotifier(const NVDispEvoRec *pDispEvo, pSubChannel = &pDevEvo->core->notifiersDma[sd]; p = &pDevEvo->core->pb; - pNotifier = pSubChannel->subDeviceAddress[sd]; + pNotifier = pSubChannel->cpuAddress; nvAssert(pNotifier != NULL); pNotifier += offset; From 247d7555f913dc29d8ea6a3861d80a3bef3459ad Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 19 Dec 2025 12:46:23 +0200 Subject: [PATCH 17/20] nvkms-evo3: fix struct initialization and add minimal eGPU protection --- src/nvidia-modeset/src/nvkms-evo3.c | 1089 ++++++++------------------- 1 file changed, 298 insertions(+), 791 deletions(-) diff --git a/src/nvidia-modeset/src/nvkms-evo3.c b/src/nvidia-modeset/src/nvkms-evo3.c index dae600af67..e3826e2783 100644 --- a/src/nvidia-modeset/src/nvkms-evo3.c +++ b/src/nvidia-modeset/src/nvkms-evo3.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2010-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2010-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a @@ -39,7 +39,7 @@ #include "nvkms-dpy.h" #include "nvkms-vrr.h" #include "nvkms-ctxdma.h" -#include "nvos.h" +#include "displayport/displayport.h" #include @@ -60,10 +60,6 @@ #include #include -#include - -#define NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C3 \ - (NVBIT64(NvKmsSurfaceMemoryFormatRF16GF16BF16XF16)) #define NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C5 \ (NVBIT64(NvKmsSurfaceMemoryFormatRF16GF16BF16XF16)) @@ -1071,20 +1067,6 @@ static void ConfigureCsc1C5(NVDevEvoPtr pDevEvo, SetCsc11MatrixC5(pChannel, &csc11Matrix); } -static void InitDesktopColorC3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) -{ - NvU32 head; - - for (head = 0; head < pDevEvo->numHeads; head++) { - nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_DESKTOP_COLOR(head), 1); - nvDmaSetEvoMethodData(pChannel, - DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _RED, 0) | - DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _GREEN, 0) | - DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _BLUE, 0) | - DRF_NUM(C37D, _HEAD_SET_DESKTOP_COLOR, _ALPHA, 255)); - } -} - static void InitDesktopColorC5(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) { NvU32 head; @@ -1107,19 +1089,6 @@ void nvEvoInitChannel3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) InitChannelCapsC3(pDevEvo, pChannel); } -static void EvoInitChannelC3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) -{ - const NvBool isCore = - FLD_TEST_DRF64(_EVO, _CHANNEL_MASK, _CORE, _ENABLE, - pChannel->channelMask); - - nvEvoInitChannel3(pDevEvo, pChannel); - - if (isCore) { - InitDesktopColorC3(pDevEvo, pChannel); - } -} - static void EvoInitChannelC5(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel) { const NvBool isCore = @@ -1333,30 +1302,6 @@ static void EvoInitWindowMapping3(NVDevEvoPtr pDevEvo, } } -static void EvoInitWindowMappingC3(const NVDispEvoRec *pDispEvo, - NVEvoModesetUpdateState *pModesetUpdateState) -{ - NVDevEvoRec *pDevEvo = pDispEvo->pDevEvo; - NVEvoUpdateState *updateState = &pModesetUpdateState->updateState; - NVEvoChannelPtr pChannel = pDevEvo->core; - NvU32 win; - - nvPushEvoSubDevMaskDisp(pDispEvo); - - nvUpdateUpdateState(pDevEvo, updateState, pChannel); - - EvoInitWindowMapping3(pDevEvo, - pModesetUpdateState); - - // Set window usage bounds - for (win = 0; win < pDevEvo->numWindows; win++) { - nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1); - /* XXXnvdisplay: window scaling */ - nvDmaSetEvoMethodData(pChannel, NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3); - } - nvPopEvoSubDevMask(pDevEvo); -} - void nvEvoInitWindowMappingC5(const NVDispEvoRec *pDispEvo, NVEvoModesetUpdateState *pModesetUpdateState) { @@ -1542,17 +1487,6 @@ static void EvoSetRasterParams3(NVDevEvoPtr pDevEvo, int head, nvDmaSetEvoMethodData(pChannel, hdmiStereoCtrl); } -static void EvoSetRasterParamsC3(NVDevEvoPtr pDevEvo, int head, - const NVHwModeTimingsEvo *pTimings, - const NvU8 tilePosition, - const NVDscInfoEvoRec *pDscInfo, - const NVEvoColorRec *pOverscanColor, - NVEvoUpdateState *updateState) -{ - nvAssert(tilePosition == 0); - EvoSetRasterParams3(pDevEvo, head, pTimings, pOverscanColor, updateState); -} - static void EvoSetRasterParams5(NVDevEvoPtr pDevEvo, int head, const NVHwModeTimingsEvo *pTimings, const NvU8 tilePosition, @@ -1654,55 +1588,6 @@ static void EvoSetRasterParamsC6(NVDevEvoPtr pDevEvo, int head, nvDmaSetEvoMethodData(pChannel, rasterHBlankDelay); } -static void EvoSetProcAmpC3(NVDispEvoPtr pDispEvo, const NvU32 head, - NVEvoUpdateState *updateState) -{ - NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; - NVEvoChannelPtr pChannel = pDevEvo->core; - const NVDispHeadStateEvoRec *pHeadState = &pDispEvo->headState[head]; - NvU8 colorSpace; - NvU32 dynRange; - - /* These methods should only apply to a single pDpyEvo */ - nvAssert(pDevEvo->subDevMaskStackDepth > 0); - - nvUpdateUpdateState(pDevEvo, updateState, pChannel); - - // These NVT defines match the HEAD_SET_PROCAMP ones. - ct_assert(NVT_COLORIMETRY_RGB == NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_RGB); - ct_assert(NVT_COLORIMETRY_YUV_601 == NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_YUV_601); - ct_assert(NVT_COLORIMETRY_YUV_709 == NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_YUV_709); - ct_assert(NVT_COLOR_RANGE_FULL == NVC37D_HEAD_SET_PROCAMP_RANGE_COMPRESSION_DISABLE); - ct_assert(NVT_COLOR_RANGE_LIMITED == NVC37D_HEAD_SET_PROCAMP_RANGE_COMPRESSION_ENABLE); - - if (pHeadState->procAmp.colorimetry == NVT_COLORIMETRY_BT2020RGB) { - colorSpace = NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_RGB; - } else if (pHeadState->procAmp.colorimetry == NVT_COLORIMETRY_BT2020YCC) { - colorSpace = NVC37D_HEAD_SET_PROCAMP_COLOR_SPACE_YUV_2020; - } else { - colorSpace = pHeadState->procAmp.colorimetry; - } - - if (pHeadState->procAmp.colorRange == NVT_COLOR_RANGE_FULL) { - dynRange = DRF_DEF(C37D, _HEAD_SET_PROCAMP, _DYNAMIC_RANGE, _VESA); - } else { - nvAssert(pHeadState->procAmp.colorRange == NVT_COLOR_RANGE_LIMITED); - dynRange = DRF_DEF(C37D, _HEAD_SET_PROCAMP, _DYNAMIC_RANGE, _CEA); - } - - nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_PROCAMP(head), 1); - nvDmaSetEvoMethodData(pChannel, - DRF_NUM(C37D, _HEAD_SET_PROCAMP, _COLOR_SPACE, colorSpace) | - DRF_DEF(C37D, _HEAD_SET_PROCAMP, _CHROMA_LPF, _DISABLE) | - DRF_NUM(C37D, _HEAD_SET_PROCAMP, _SAT_COS, - pHeadState->procAmp.satCos) | - DRF_NUM(C37D, _HEAD_SET_PROCAMP, _SAT_SINE, 0) | - dynRange | - DRF_NUM(C37D, _HEAD_SET_PROCAMP, _RANGE_COMPRESSION, - pHeadState->procAmp.colorRange) | - DRF_DEF(C37D, _HEAD_SET_PROCAMP, _BLACK_LEVEL, _GRAPHICS)); -} - static const struct NvKmsCscMatrix RGBToLimitedRangeYCbCrRec2020Matrix = {{ { 0x7000, 0x1f9900, 0x1ff700, 0x8000 }, { 0x3988, 0x947c, 0xcfc, 0x1000 }, @@ -2101,31 +1986,6 @@ void nvEvoSetControlC3(NVDevEvoPtr pDevEvo, int sd) } } - /* - * GV100 HW bug 2062029 WAR - * - * GV100 always holds the external fliplock line low as if - * NVC37D_SET_CONTROL_FLIP_LOCK_PIN was enabled. To work around this, - * the GV100 VBIOS initializes the fliplock GPIOs to be software - * controlled (forced off). The following rmctrl needs to be called to - * switch HW control of the fliplock GPIOs back on whenever external - * fliplock is enabled. - */ - { - NVC370_CTRL_SET_SWAPRDY_GPIO_WAR_PARAMS params = { }; - - params.base.subdeviceIndex = pEvoSubDev->subDeviceInstance; - params.bEnable = (data != 0); - - if (nvRmApiControl( - nvEvoGlobal.clientHandle, - pDevEvo->displayHandle, - NVC370_CTRL_CMD_SET_SWAPRDY_GPIO_WAR, - ¶ms, sizeof(params)) != NVOS_STATUS_SUCCESS) { - nvEvoLogDevDebug(pDevEvo, EVO_LOG_ERROR, "Failed to override fliplock GPIO"); - } - } - nvDmaSetStartEvoMethod(pChannel, NVC37D_SET_CONTROL, 1); nvDmaSetEvoMethodData(pChannel, data); } @@ -2493,34 +2353,6 @@ static void EvoORSetControlC6(NVDevEvoPtr pDevEvo, } } -static void EvoHeadSetControlORC3(NVDevEvoPtr pDevEvo, - const int head, - const NVHwModeTimingsEvo *pTimings, - const enum nvKmsPixelDepth pixelDepth, - const NvBool colorSpaceOverride, - NVEvoUpdateState *updateState) -{ - NVEvoChannelPtr pChannel = pDevEvo->core; - const NvU32 hwPixelDepth = nvEvoGetPixelDepthC3(pixelDepth); - const NvU16 colorSpaceFlag = nvEvo1GetColorSpaceFlag(pDevEvo, - colorSpaceOverride); - - nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_CONTROL_OUTPUT_RESOURCE(head), 1); - nvDmaSetEvoMethodData(pChannel, - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _CRC_MODE, _COMPLETE_RASTER) | - (pTimings->hSyncPol ? - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _HSYNC_POLARITY, _NEGATIVE_TRUE) : - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _HSYNC_POLARITY, _POSITIVE_TRUE)) | - (pTimings->vSyncPol ? - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _VSYNC_POLARITY, _NEGATIVE_TRUE) : - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _VSYNC_POLARITY, _POSITIVE_TRUE)) | - DRF_NUM(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _PIXEL_DEPTH, hwPixelDepth) | - (colorSpaceOverride ? - (DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _COLOR_SPACE_OVERRIDE, _ENABLE) | - DRF_NUM(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _COLOR_SPACE_FLAG, colorSpaceFlag)) : - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_RESOURCE, _COLOR_SPACE_OVERRIDE, _DISABLE))); -} - static void EvoHeadSetControlORC5(NVDevEvoPtr pDevEvo, const int head, const NVHwModeTimingsEvo *pTimings, @@ -2717,13 +2549,6 @@ static NvBool EvoSetUsageBounds3(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, return needCoreUpdate; } -static NvBool EvoSetUsageBoundsC3(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, - const struct NvKmsUsageBounds *pUsage, - NVEvoUpdateState *updateState) -{ - return EvoSetUsageBounds3(pDevEvo, sd, head, pUsage, updateState); -} - NvBool nvEvoSetUsageBoundsC5(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, const struct NvKmsUsageBounds *pUsage, NVEvoUpdateState *updateState) @@ -2880,9 +2705,9 @@ static void UpdateWindowIMM(NVEvoChannelPtr pChannel, } updateImm |= releaseElv ? DRF_DEF(C37B, _UPDATE, _RELEASE_ELV, _TRUE) : 0; - nvDmaSetStartEvoMethod(pChannel->imm.u.dma, NVC37B_UPDATE, 1); - nvDmaSetEvoMethodData(pChannel->imm.u.dma, updateImm); - nvDmaKickoffEvo(pChannel->imm.u.dma); + nvDmaSetStartEvoMethod(pChannel->imm.dma, NVC37B_UPDATE, 1); + nvDmaSetEvoMethodData(pChannel->imm.dma, updateImm); + nvDmaKickoffEvo(pChannel->imm.dma); } } @@ -3488,51 +3313,6 @@ nvEvoIsModePossibleC3(NVDispEvoPtr pDispEvo, nvPreallocRelease(pDevEvo, PREALLOC_TYPE_IMP_PARAMS); } -void nvEvoPrePostIMPC3(NVDispEvoPtr pDispEvo, NvBool isPre) -{ - /* Nothing to do on nvdisplay -- pre/post IMP calls are not required. */ -} - -static void -EvoFlipC3(NVDevEvoPtr pDevEvo, - NVEvoChannelPtr pChannel, - const NVFlipChannelEvoHwState *pHwState, - NVEvoUpdateState *updateState, - NvBool bypassComposition); - -/* - * Returns TRUE iff the CSC should be enabled (i.e., the matrix is not the - * identity matrix). - */ -static NvBool SetCscMatrixC3(NVEvoChannelPtr pChannel, - const struct NvKmsCscMatrix *matrix) -{ - NvU32 method = NVC37E_SET_CSC_RED2RED; - int y; - - if (nvIsCscMatrixIdentity(matrix)) { - return FALSE; - } - - for (y = 0; y < 3; y++) { - int x; - - for (x = 0; x < 4; x++) { - // Use DRF_NUM to truncate client-supplied values that are out of - // range. - NvU32 val = DRF_NUM(C37E, _SET_CSC_RED2RED, _COEFF, - matrix->m[y][x]); - - nvDmaSetStartEvoMethod(pChannel, method, 1); - nvDmaSetEvoMethodData(pChannel, val); - - method += 4; - } - } - - return TRUE; -} - static void SetCscMatrixC5Wrapper(NVEvoChannelPtr pChannel, const struct NvKmsCscMatrix *matrix, NvU32 coeffMethod, NvU32 controlMethod, @@ -3610,87 +3390,6 @@ static void SetCsc11MatrixC5(NVEvoChannelPtr pChannel, DRF_DEF(C57E, _SET_CSC11CONTROL, _ENABLE, _DISABLE)); } -/* - * WAR for GV100 HW bug 1978592: - * - * Timestamped flips allow SW to specify the earliest time that the next UPDATE - * will complete. Due to a HW bug, GV100 waits for the timestamp in the ARMED - * state (i.e. the timestamps that were pushed in the previous UPDATE) instead - * of the timestamp in the ASSEMBLY state (the time we want to postpone this - * flip until). - * - * This WAR inserts an additional UPDATE to push the timestamp from ASSEMBLY to - * ARMED while changing no other state, so the following normal UPDATE can - * wait for the correct timestamp. - * - * This update needs to have the following characteristics: - * - * - MIN_PRESENT_INTERVAL 0 - * - TIMESTAMP_MODE _ENABLE - * - All other SET_PRESENT_CONTROL fields unmodified from previous UPDATE - * - SET_UPDATE_TIMESTAMP (target timestamp) - * - RELEASE_ELV _FALSE - * - Non-interlocked - * - Non-fliplocked - */ -static void -InsertAdditionalTimestampFlip(NVDevEvoPtr pDevEvo, - NVEvoChannelPtr pChannel, - const NVFlipChannelEvoHwState *pHwState, - NVEvoUpdateState *updateState) -{ - NvU32 presentControl = pChannel->oldPresentControl; - - /* This hardware bug is only present on GV100 which uses window - * class C37E. */ - nvAssert(pChannel->hwclass == NVC37E_WINDOW_CHANNEL_DMA); - - nvAssert(pHwState->timeStamp != 0); - - /* - * Update the necessary fields in SET_PRESENT_CONTROL without modifying - * the existing values by using the cached SET_PRESENT_CONTROL values - * from the previous update. - * - * Note that BEGIN_MODE must not be changed here; even though BEGIN_MODE - * may currently be NON_TEARING, a NON_TEARING + MIN_PRESENT_INTERVAL 0 - * flip will be correctly collapsed with the surrounding - * MIN_PRESENT_INTERVAL 1 flips. If we were to change BEGIN_MODE to - * IMMEDIATE, this would cause an additional delay due to the transition - * from NON_TEARING to IMMEDIATE. - */ - presentControl = FLD_SET_DRF_NUM(C37E, _SET_PRESENT_CONTROL, - _MIN_PRESENT_INTERVAL, - 0, presentControl); - presentControl = FLD_SET_DRF(C37E, _SET_PRESENT_CONTROL, - _TIMESTAMP_MODE, - _ENABLE, presentControl); - - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_PRESENT_CONTROL, 1); - nvDmaSetEvoMethodData(pChannel, presentControl); - - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_UPDATE_TIMESTAMP_LO, 2); - nvDmaSetEvoMethodData(pChannel, NvU64_LO32(pHwState->timeStamp)); - nvDmaSetEvoMethodData(pChannel, NvU64_HI32(pHwState->timeStamp)); - - // Issue non-interlocked, non-fliplocked, non-ReleaseElv UPDATE - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_INTERLOCK_FLAGS, 1); - nvDmaSetEvoMethodData(pChannel, 0); - - nvDmaSetStartEvoMethod(pChannel, - NVC37E_SET_WINDOW_INTERLOCK_FLAGS, - 1); - nvDmaSetEvoMethodData(pChannel, 0); - - nvDmaSetStartEvoMethod(pChannel, NVC37E_UPDATE, 1); - nvDmaSetEvoMethodData(pChannel, - DRF_DEF(C37E, _UPDATE, _RELEASE_ELV, _FALSE) | - DRF_NUM(C37E, _UPDATE, _FLIP_LOCK_PIN, - NVC37E_UPDATE_FLIP_LOCK_PIN_LOCK_PIN_NONE) | - DRF_DEF(C37E, _UPDATE, _INTERLOCK_WITH_WIN_IMM, - _DISABLE)); -} - static void EvoProgramSemaphore3(NVDevEvoPtr pDevEvo, NVEvoChannelPtr pChannel, @@ -3976,15 +3675,6 @@ EvoFlipC3Common(NVDevEvoPtr pDevEvo, nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_PRESENT_CONTROL, 1); nvDmaSetEvoMethodData(pChannel, presentControl); - /* - * GV100 timestamped flips need a duplicate update which only changes - * TIMESTAMP_MODE and MIN_PRESENT_INTERVAL fields in SET_PRESENT_CONTROL; - * to allow updating these fields without changing anything else in - * SET_PRESENT_CONTROL, cache the values we sent in previous flips here. - * (bug 1990958) - */ - pChannel->oldPresentControl = presentControl; - /* Set the surface parameters. */ FOR_ALL_EYES(eye) { const NVSurfaceEvoRec *pSurfaceEvoPerEye = pHwState->pSurfaceEvo[eye]; @@ -4410,80 +4100,6 @@ NVSurfaceEvoPtr EvoGetLutSurface3(NVDevEvoPtr pDevEvo, return pDispEvo->headState[head].lut.pCurrSurface; } -static void -EvoFlipC3(NVDevEvoPtr pDevEvo, - NVEvoChannelPtr pChannel, - const NVFlipChannelEvoHwState *pHwState, - NVEvoUpdateState *updateState, - NvBool bypassComposition) -{ - NvBool enableCSC, swapUV, flip3Return; - enum NvKmsSurfaceMemoryFormat format; - /* - * lutSize and isLutModeVss are unused, since we only support 1025 and - * non-VSS on Volta, but we declare them to pass to EvoGetLutSurface3. - * - * TODO: Maybe validate the resulting values? - */ - NvU32 lutSize = NV_NUM_EVO_LUT_ENTRIES; - NvU64 offset = offsetof(NVEvoLutDataRec, base); - NvBool isLutModeVss = FALSE; - NVSurfaceEvoPtr pLutSurfaceEvo = EvoGetLutSurface3(pDevEvo, pChannel, pHwState, - &lutSize, &offset, &isLutModeVss); - - if (pHwState->timeStamp != 0) { - InsertAdditionalTimestampFlip(pDevEvo, pChannel, pHwState, - updateState); - } - - flip3Return = EvoFlipC3Common(pDevEvo, pChannel, pHwState, updateState); - - /* program semaphore */ - EvoProgramSemaphore3(pDevEvo, pChannel, pHwState); - - if (!flip3Return) { - return; - } - - format = pHwState->pSurfaceEvo[NVKMS_LEFT]->format; - - enableCSC = SetCscMatrixC3(pChannel, &pHwState->cscMatrix); - swapUV = IsSurfaceFormatUVSwapped(format); - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_PARAMS, 1); - nvDmaSetEvoMethodData(pChannel, - (enableCSC ? DRF_DEF(C37E, _SET_PARAMS, _CSC, _ENABLE) : - DRF_DEF(C37E, _SET_PARAMS, _CSC, _DISABLE)) | - DRF_NUM(C37E, _SET_PARAMS, _FORMAT, nvHwFormatFromKmsFormatC3(format)) | - (swapUV ? DRF_DEF(C37E, _SET_PARAMS, _SWAP_UV, _ENABLE) : - DRF_DEF(C37E, _SET_PARAMS, _SWAP_UV, _DISABLE)) | - DRF_DEF(C37E, _SET_PARAMS, _UNDERREPLICATE, _DISABLE)); - - if (pLutSurfaceEvo) { - const NvU32 ctxDma = pLutSurfaceEvo->planes[0].surfaceDesc.ctxDmaHandle; - - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_CONTROL_INPUT_LUT, 1); - nvDmaSetEvoMethodData(pChannel, - DRF_DEF(C37E, _SET_CONTROL_INPUT_LUT, _SIZE, _SIZE_1025) | - DRF_DEF(C37E, _SET_CONTROL_INPUT_LUT, _RANGE, _UNITY) | - DRF_DEF(C37E, _SET_CONTROL_INPUT_LUT, _OUTPUT_MODE, _INDEX)); - - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_OFFSET_INPUT_LUT, 1); - nvDmaSetEvoMethodData(pChannel, - DRF_NUM(C37E, _SET_OFFSET_INPUT_LUT, _ORIGIN, offset)); - - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_CONTEXT_DMA_INPUT_LUT, 1); - nvDmaSetEvoMethodData(pChannel, - DRF_NUM(C37E, _SET_CONTEXT_DMA_INPUT_LUT, _HANDLE, ctxDma)); - } else { - nvDmaSetStartEvoMethod(pChannel, NVC37E_SET_CONTEXT_DMA_INPUT_LUT, 1); - nvDmaSetEvoMethodData(pChannel, 0); - } - - UpdateCompositionC3(pDevEvo, pChannel, - &pHwState->composition, updateState, - format); -} - static void EvoSetupPQEotfBaseLutC5(NVEvoLutDataRec *pData, enum NvKmsLUTState *lutState, NvU32 *lutSize, @@ -4857,14 +4473,6 @@ static void UpdateComposition(NVDevEvoPtr pDevEvo, #undef UPDATE_COMPONENT } -static void EvoFlipTransitionWARC3(NVDevEvoPtr pDevEvo, NvU32 sd, NvU32 head, - const NVEvoSubDevHeadStateRec *pSdHeadState, - const NVFlipEvoHwState *pFlipState, - NVEvoUpdateState *updateState) -{ - /* Nothing to do for Volta */ -} - /* * Hardware bug 2193096 requires that we send special software methods around * a window channel update that transitions from NULL ctxdma to non-NULL or @@ -5093,21 +4701,6 @@ UpdateCompositionC5(NVDevEvoPtr pDevEvo, } } -/* - * The LUT entries in INDEX_1025_UNITY_RANGE have 16 bits, with the - * black value at 24576, and the white at 49151. Since the effective - * range is 16384, we treat this as a 14-bit LUT. However, we need to - * clear the low 3 bits to WAR hardware bug 813188. This gives us - * 14-bit LUT values, but only 11 bits of precision. - * XXXnvdisplay: Bug 813188 is supposed to be fixed on NVDisplay; can we expose - * more precision? - */ -static inline NvU16 ColorToLUTEntry(NvU16 val) -{ - const NvU16 val14bit = val >> 2; - return (val14bit & ~7) + 24576; -} - /* * Unlike earlier EVO implementations, the INDEX mode of the input LUT on * NVDisplay is straightforward: the value of the input component is expanded @@ -5120,8 +4713,13 @@ static inline NvU32 GetLUTIndex(int i, int componentSize) return i << (10 - componentSize); } -static void -EvoFillLUTSurfaceC3(NVEvoLutEntryRec *pLUTBuffer, +static inline float16_t ColorToFp16(NvU16 val, float32_t maxf) +{ + return nvUnormToFp16(val, maxf); +} + +void +nvEvoFillLUTSurfaceC5(NVEvoLutEntryRec *pLUTBuffer, const NvU16 *red, const NvU16 *green, const NvU16 *blue, @@ -5129,6 +4727,7 @@ EvoFillLUTSurfaceC3(NVEvoLutEntryRec *pLUTBuffer, { int i; NvU32 rSize, gSize, bSize; + const float32_t maxf = ui32_to_f32(0xffff); switch (depth) { case 15: @@ -5150,123 +4749,35 @@ EvoFillLUTSurfaceC3(NVEvoLutEntryRec *pLUTBuffer, return; } + // Skip the VSS header + pLUTBuffer += NV_LUT_VSS_HEADER_SIZE; + for (i = 0; i < nColorMapEntries; i++) { if (i < (1 << rSize)) { - pLUTBuffer[GetLUTIndex(i, rSize)].Red = ColorToLUTEntry(red[i]); + pLUTBuffer[GetLUTIndex(i, rSize)].Red = + ColorToFp16(red[i], maxf).v; } if (i < (1 << gSize)) { - pLUTBuffer[GetLUTIndex(i, gSize)].Green = ColorToLUTEntry(green[i]); + pLUTBuffer[GetLUTIndex(i, gSize)].Green = + ColorToFp16(green[i], maxf).v; } if (i < (1 << bSize)) { - pLUTBuffer[GetLUTIndex(i, bSize)].Blue = ColorToLUTEntry(blue[i]); + pLUTBuffer[GetLUTIndex(i, bSize)].Blue = + ColorToFp16(blue[i], maxf).v; } } } -static inline float16_t ColorToFp16(NvU16 val, float32_t maxf) +static void EvoSetupPQOetfOutputLutC5(NVEvoLutDataRec *pData, + enum NvKmsLUTState *lutState, + NvU32 *lutSize, + NvBool *isLutModeVss) { - return nvUnormToFp16(val, maxf); -} - -void -nvEvoFillLUTSurfaceC5(NVEvoLutEntryRec *pLUTBuffer, - const NvU16 *red, - const NvU16 *green, - const NvU16 *blue, - int nColorMapEntries, int depth) -{ - int i; - NvU32 rSize, gSize, bSize; - const float32_t maxf = ui32_to_f32(0xffff); - - switch (depth) { - case 15: - rSize = gSize = bSize = 5; - break; - case 16: - rSize = bSize = 5; - gSize = 6; - break; - case 8: - case 24: - rSize = gSize = bSize = 8; - break; - case 30: - rSize = gSize = bSize = 10; - break; - default: - nvAssert(!"invalid depth"); - return; - } - - // Skip the VSS header - pLUTBuffer += NV_LUT_VSS_HEADER_SIZE; - - for (i = 0; i < nColorMapEntries; i++) { - if (i < (1 << rSize)) { - pLUTBuffer[GetLUTIndex(i, rSize)].Red = - ColorToFp16(red[i], maxf).v; - } - if (i < (1 << gSize)) { - pLUTBuffer[GetLUTIndex(i, gSize)].Green = - ColorToFp16(green[i], maxf).v; - } - if (i < (1 << bSize)) { - pLUTBuffer[GetLUTIndex(i, bSize)].Blue = - ColorToFp16(blue[i], maxf).v; - } - } -} - -static void EvoSetOutputLutC3(NVDevEvoPtr pDevEvo, - NvU32 sd, - NvU32 head, - const NVFlipLutHwState *pOutputLut, - NvU32 fpNormScale, - NVEvoUpdateState *updateState, - NvBool bypassComposition) -{ - NVEvoChannelPtr pChannel = pDevEvo->core; - NvBool enableLut = (pOutputLut->pLutSurfaceEvo != NULL); - NvU64 offset = enableLut ? pOutputLut->offset : offsetof(NVEvoLutDataRec, output); - NvU32 ctxdma = enableLut ? - pOutputLut->pLutSurfaceEvo->planes[0].surfaceDesc.ctxDmaHandle : 0; - - nvUpdateUpdateState(pDevEvo, updateState, pChannel); - - nvAssert((offset & 0xff) == 0); - - nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_CONTROL_OUTPUT_LUT(head), 1); - nvDmaSetEvoMethodData(pChannel, - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_LUT, _SIZE, _SIZE_1025) | - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_LUT, _RANGE, _UNITY) | - DRF_DEF(C37D, _HEAD_SET_CONTROL_OUTPUT_LUT, _OUTPUT_MODE, _INTERPOLATE)); - - nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_OFFSET_OUTPUT_LUT(head), 1); - nvDmaSetEvoMethodData(pChannel, - DRF_NUM(C37D, _HEAD_SET_OFFSET_OUTPUT_LUT, _ORIGIN, offset >> 8)); - - /* Set the ctxdma for the output LUT */ - - if (!enableLut) { - /* Class C37D has no separate enable flag. */ - ctxdma = 0; - } - nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_CONTEXT_DMA_OUTPUT_LUT(head), 1); - nvDmaSetEvoMethodData(pChannel, - DRF_NUM(C37D, _HEAD_SET_CONTEXT_DMA_OUTPUT_LUT, _HANDLE, ctxdma)); -} - -static void EvoSetupPQOetfOutputLutC5(NVEvoLutDataRec *pData, - enum NvKmsLUTState *lutState, - NvU32 *lutSize, - NvBool *isLutModeVss) -{ - NvU32 lutDataStartingIndex = NV_LUT_VSS_HEADER_SIZE; - NvU32 numOetfPQ512Entries = ARRAY_LEN(OetfPQ512Entries); - NvU32 oetfTableIdx; - NvU64 vssHead = 0; - NvU32 lutEntryCounter = 0, i; + NvU32 lutDataStartingIndex = NV_LUT_VSS_HEADER_SIZE; + NvU32 numOetfPQ512Entries = ARRAY_LEN(OetfPQ512Entries); + NvU32 oetfTableIdx; + NvU64 vssHead = 0; + NvU32 lutEntryCounter = 0, i; // Skip LUT data init if already done if (*lutState == NvKmsLUTStatePQ) { @@ -5681,70 +5192,6 @@ static void EvoParseCapabilityNotifier3(NVDevEvoPtr pDevEvo, } } -static void EvoParseCapabilityNotifierC3(NVDevEvoPtr pDevEvo, - NVEvoSubDevPtr pEvoSubDev, - volatile const NvU32 *pCaps) -{ - NVEvoCapabilitiesPtr pEvoCaps = &pEvoSubDev->capabilities; - NvU32 i; - - // Miscellaneous capabilities - pEvoCaps->misc.supportsSemiPlanar = FALSE; - pEvoCaps->misc.supportsPlanar = FALSE; - pEvoCaps->misc.supportsDSI = FALSE; - - // Heads - ct_assert(ARRAY_LEN(pEvoCaps->head) >= NVC373_HEAD_CAPA__SIZE_1); - for (i = 0; i < NVC373_HEAD_CAPA__SIZE_1; i++) { - NVEvoHeadCaps *pHeadCaps = &pEvoCaps->head[i]; - - /* XXXnvdisplay: add caps for hsat, ocsc, lut */ - if (pHeadCaps->usable) { - NVEvoScalerCaps *pScalerCaps = &pHeadCaps->scalerCaps; - - pScalerCaps->present = - FLD_TEST_DRF(C373, _HEAD_CAPA, _SCALER, _TRUE, - nvEvoReadCapReg3(pCaps, NVC373_HEAD_CAPA(i))); - if (pScalerCaps->present) { - NVEvoScalerTapsCaps *pTapsCaps; - NvU32 tmp; - - /* - * Note that some of these may be zero (e.g., only 2-tap 444 - * mode is supported on GV100, so the rest are all zero. - * - * Downscaling by more than 2x in either direction is not - * allowed by state error check for both horizontal and - * vertical 2-tap scaling. - * - * Downscaling by more than 4x in either direction is not - * allowed by argument error check (and state error check) for - * 5-tap scaling. - * - * 5-tap scaling is not implemented on GV100, though, so we - * should never see numTaps == 5 on GV100, and we can just use a - * max of 2 here all the time. - */ - - /* 2-tap capabilities */ - tmp = nvEvoReadCapReg3(pCaps, NVC373_HEAD_CAPD(i)); - pTapsCaps = &pScalerCaps->taps[NV_EVO_SCALER_2TAPS]; - pTapsCaps->maxVDownscaleFactor = NV_EVO_SCALE_FACTOR_2X; - pTapsCaps->maxHDownscaleFactor = NV_EVO_SCALE_FACTOR_2X; - pTapsCaps->maxPixelsVTaps = - NV_MAX(DRF_VAL(C373, _HEAD_CAPD, _MAX_PIXELS_2TAP422, tmp), - DRF_VAL(C373, _HEAD_CAPD, _MAX_PIXELS_2TAP444, tmp)); - - /* - * Note that there is a capability register for 1TAP, but there - * doesn't appear to be a way to select 1-tap scaling in the - * channel methods, so don't bother reading it for now. - */ - } - } - } -} - static void EvoParsePrecompScalerCaps5(NVEvoCapabilitiesPtr pEvoCaps, volatile const NvU32 *pCaps) { @@ -6049,17 +5496,10 @@ static void SetHDRLayerCaps(NVDevEvoPtr pDevEvo) pDevEvo->caps.layerCaps[numLayers[head]].supportsICtCp = pWinCaps->tmoPresent; - if (pDevEvo->hal->caps.needDefaultLutSurface) { - /* Turing+ uses an FP16, linear 64-segment VSS supported ILUT */ - FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].ilut, TRUE, - NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LINEAR, - 64, 1025, NVKMS_LUT_FORMAT_FP16); - } else { - /* Volta uses a UNORM14_WAR_813188, non-VSS ILUT */ - FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].ilut, TRUE, - NVKMS_LUT_VSS_NOT_SUPPORTED, NVKMS_LUT_VSS_TYPE_NONE, - 0, 1025, NVKMS_LUT_FORMAT_UNORM14_WAR_813188); - } + /* Turing+ uses an FP16, linear 64-segment VSS supported ILUT */ + FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].ilut, TRUE, + NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LINEAR, + 64, 1025, NVKMS_LUT_FORMAT_FP16); if (pWinCaps->tmoPresent) { FillLUTCaps(&pDevEvo->caps.layerCaps[numLayers[head]].tmo, TRUE, @@ -6079,17 +5519,10 @@ static void SetHDRLayerCaps(NVDevEvoPtr pDevEvo) numLayers[head]++; } - if (pDevEvo->hal->caps.hasUnorm16OLUT) { - /* Turing+ uses a UNORM16, logarithmic 33-segment VSS supported OLUT */ - FillLUTCaps(&pDevEvo->caps.olut, TRUE, - NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LOGARITHMIC, - 33, 1025, NVKMS_LUT_FORMAT_UNORM16); - } else { - /* Volta uses a UNORM14_WAR_813188, non-VSS OLUT */ - FillLUTCaps(&pDevEvo->caps.olut, TRUE, - NVKMS_LUT_VSS_NOT_SUPPORTED, NVKMS_LUT_VSS_TYPE_NONE, - 0, 1025, NVKMS_LUT_FORMAT_UNORM14_WAR_813188); - } + /* Turing+ uses a UNORM16, logarithmic 33-segment VSS supported OLUT */ + FillLUTCaps(&pDevEvo->caps.olut, TRUE, + NVKMS_LUT_VSS_SUPPORTED, NVKMS_LUT_VSS_TYPE_LOGARITHMIC, + 33, 1025, NVKMS_LUT_FORMAT_UNORM16); } NvBool nvEvoGetCapabilities3(NVDevEvoPtr pDevEvo, @@ -6269,14 +5702,6 @@ NvBool nvEvoGetCapabilities3(NVDevEvoPtr pDevEvo, return ret; } -static NvBool EvoGetCapabilitiesC3(NVDevEvoPtr pDevEvo) -{ - return nvEvoGetCapabilities3(pDevEvo, EvoParseCapabilityNotifierC3, - nvHwFormatFromKmsFormatC3, - NVC373_DISP_CAPABILITIES, - sizeof(_NvC373DispCapabilities)); -} - static NvBool EvoGetCapabilitiesC5(NVDevEvoPtr pDevEvo) { return nvEvoGetCapabilities3(pDevEvo, EvoParseCapabilityNotifierC5, @@ -6313,7 +5738,6 @@ static void EvoSetViewportPointInC3(NVDevEvoPtr pDevEvo, const int head, } static void EvoSetOutputScalerC3(const NVDispEvoRec *pDispEvo, const NvU32 head, - const NvU32 imageSharpeningValue, NVEvoUpdateState *updateState) { NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; @@ -6427,28 +5851,6 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head, return scalingUsageBounds.vUpscalingAllowed; } -static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head, - const NVHwModeViewPortEvo *pViewPortMin, - const NVHwModeViewPortEvo *pViewPort, - const NVHwModeViewPortEvo *pViewPortMax, - NVEvoUpdateState *updateState) -{ - NVEvoChannelPtr pChannel = pDevEvo->core; - NvBool verticalUpscalingAllowed = - EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort, - pViewPortMax, updateState, - NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3); - - nvDmaSetStartEvoMethod(pChannel, - NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1); - nvDmaSetEvoMethodData(pChannel, - DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _CURSOR, _USAGE_W256_H256) | - DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _OUTPUT_LUT, _USAGE_1025) | - (verticalUpscalingAllowed ? - DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) : - DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE))); -} - static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head, const NVHwModeViewPortEvo *pViewPortMin, const NVHwModeViewPortEvo *pViewPort, @@ -6777,19 +6179,6 @@ static NvBool EvoValidateWindowFormatWrapper( return pValSrcRect(sourceFetchRect, format); } -static NvBool EvoValidateWindowFormatC3( - const enum NvKmsSurfaceMemoryFormat format, - const struct NvKmsRect *sourceFetchRect, - NvU32 *hwFormatOut) -{ - return EvoValidateWindowFormatWrapper( - format, - nvHwFormatFromKmsFormatC3, - sourceFetchRect, - ValidateWindowFormatSourceRectC3, - hwFormatOut); -} - static NvBool EvoValidateWindowFormatC5( const enum NvKmsSurfaceMemoryFormat format, const struct NvKmsRect *sourceFetchRect, @@ -6868,7 +6257,10 @@ static void EvoSetDitherC3(NVDispEvoPtr pDispEvo, const int head, ditherControl |= DRF_DEF(C37D, _HEAD_SET_DITHER_CONTROL, _BITS, _TO_8_BITS); break; - /* XXXnvdisplay: Support DITHER_TO_{10,12}_BITS (see also bug 1729668). */ + case NV0073_CTRL_SPECIFIC_OR_DITHER_TYPE_10_BITS: + ditherControl |= + DRF_DEF(C37D, _HEAD_SET_DITHER_CONTROL, _BITS, _TO_10_BITS); + break; default: nvAssert(!"Unknown ditherType"); // Fall through @@ -7441,10 +6833,10 @@ void nvEvoSetImmPointOutC3(NVDevEvoPtr pDevEvo, NVEvoUpdateState *updateState, NvU16 x, NvU16 y) { - NVEvoChannelPtr pImmChannel = pChannel->imm.u.dma; + NVEvoChannelPtr pImmChannel = pChannel->imm.dma; nvAssert((pChannel->channelMask & NV_EVO_CHANNEL_MASK_WINDOW_ALL) != 0); - nvAssert(pChannel->imm.type == NV_EVO_IMM_CHANNEL_DMA); + nvAssert(pImmChannel != NULL); /* This should only be called for one GPU at a time, since the * pre-nvdisplay version uses PIO and cannot broadcast. */ @@ -7478,7 +6870,6 @@ static void EvoStartHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, const enum nvKmsTimingsProtocol protocol, const NvU32 orIndex, NvU32 head, - NvU32 sd, NVEvoUpdateState *updateState) { const NvU32 winChannel = head << 1; @@ -7526,7 +6917,7 @@ static void EvoStartHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, DRF_DEF(C37D, _HEAD_SET_CRC_CONTROL, _CRC_DURING_SNOOZE, _DISABLE)); /* Reset the CRC notifier */ - nvEvoResetCRC32Notifier(pDma->subDeviceAddress[sd], + nvEvoResetCRC32Notifier(pDma->cpuAddress, NVC37D_NOTIFIER_CRC_STATUS_0, DRF_BASE(NVC37D_NOTIFIER_CRC_STATUS_0_DONE), NVC37D_NOTIFIER_CRC_STATUS_0_DONE_FALSE); @@ -7564,7 +6955,6 @@ static void EvoStopHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, * * \param[in] pDevEvo NVKMS device pointer * \param[in] pDma Pointer to DMA-mapped memory - * \param[in] sd Subdevice index * \param[in] entry_count Number of independent frames to read CRCs from * \param[out] crc32 Contains pointers to CRC output arrays * \param[out] numCRC32 Number of CRC frames successfully read from DMA @@ -7574,12 +6964,11 @@ static void EvoStopHeadCRC32CaptureC3(NVDevEvoPtr pDevEvo, */ NvBool nvEvoQueryHeadCRC32_C3(NVDevEvoPtr pDevEvo, NVEvoDmaPtr pDma, - NvU32 sd, NvU32 entry_count, CRC32NotifierCrcOut *crc32, NvU32 *numCRC32) { - volatile NvU32 *pCRC32Notifier = pDma->subDeviceAddress[sd]; + volatile NvU32 *pCRC32Notifier = pDma->cpuAddress; const NvU32 entry_stride = NVC37D_NOTIFIER_CRC_CRC_ENTRY1_21 - NVC37D_NOTIFIER_CRC_CRC_ENTRY0_13; // Define how many/which variables to read from each CRCNotifierEntry struct @@ -8036,6 +7425,256 @@ static void EvoSetMergeModeC5(const NVDispEvoRec *pDispEvo, nvPopEvoSubDevMask(pDevEvo); } +/* + * The 'type' the timing library writes into the NVT_INFOFRAME_HEADER + * structure is not the type that the HDMI library expects to see in its + * NvHdmiPkt_SetupAdvancedInfoframe call; those are NVHDMIPKT_TYPE_*. + * Map the timing library infoframe type to the + * NVHDMIPKT_TYPE_SHARED_GENERIC*. + */ +static NvBool NvtToHdmiLibGenericInfoFramePktType(const NvU32 srcType, + NVHDMIPKT_TYPE *pDstType) +{ + NVHDMIPKT_TYPE hdmiLibType; + + switch (srcType) { + default: + return FALSE; + case NVT_INFOFRAME_TYPE_EXTENDED_METADATA_PACKET: + hdmiLibType = NVHDMIPKT_TYPE_SHARED_GENERIC1; + break; + case NVT_INFOFRAME_TYPE_VENDOR_SPECIFIC: + hdmiLibType = NVHDMIPKT_TYPE_SHARED_GENERIC2; + break; + case NVT_INFOFRAME_TYPE_DYNAMIC_RANGE_MASTERING: + hdmiLibType = NVHDMIPKT_TYPE_SHARED_GENERIC3; + break; + } + + *pDstType = hdmiLibType; + + return TRUE; +} + +static NvBool ConstructAdvancedInfoFramePacket( + const NVT_INFOFRAME_HEADER *pInfoFrameHeader, + const NvU32 infoframeSize, + const NvBool needChecksum, + const NvBool swChecksum, + NvU8 *pPacket, + const NvU32 packetLen) +{ + NvU8 hdmiPacketType; + const NvU8 *pPayload; + NvU32 payloadLen; + + if (!nvEvo1NvtToHdmiInfoFramePacketType(pInfoFrameHeader->type, + &hdmiPacketType)) { + return FALSE; + } + + /* + * XXX If required, add support for the large infoframe with + * multiple infoframes grouped together. + */ + nvAssert((infoframeSize + 1 /* + HB3 */ + (needChecksum ? 1 : 0)) <= + packetLen); + + pPacket[0] = hdmiPacketType; /* HB0 */ + + /* + * The fields and size of NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER + * match with those of NVT_INFOFRAME_HEADER at the time of writing, but + * nvtiming.h declares them separately. To be safe, special case + * NVT_INFOFRAME_TYPE_EXTENDED_METADATA_PACKET. + */ + if (pInfoFrameHeader->type == NVT_INFOFRAME_TYPE_EXTENDED_METADATA_PACKET) { + const NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER *pExtMetadataHeader = + (const NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER *) + pInfoFrameHeader; + + pPacket[1] = pExtMetadataHeader->firstLast; /* HB1 */ + pPacket[2] = pExtMetadataHeader->sequenceIndex; /* HB2 */ + + pPayload = (const NvU8 *)(pExtMetadataHeader + 1); + payloadLen = infoframeSize - + sizeof(NVT_EXTENDED_METADATA_PACKET_INFOFRAME_HEADER); + } else { + pPacket[1] = pInfoFrameHeader->version; /* HB1 */ + pPacket[2] = pInfoFrameHeader->length; /* HB2 */ + + pPayload = (const NvU8 *)(pInfoFrameHeader + 1); + payloadLen = infoframeSize - sizeof(NVT_INFOFRAME_HEADER); + } + pPacket[3] = 0; /* HB3, reserved */ + + if (needChecksum) { + pPacket[4] = 0; /* PB0: checksum */ + + nvkms_memcpy(&pPacket[5], pPayload, payloadLen); /* PB1~ */ + + if (swChecksum) { + NvU8 checksum = 0; + + for (NvU32 i = 0; i < packetLen; i++) { + checksum += pPacket[i]; + } + pPacket[4] = ~checksum + 1; + } + } else { + nvAssert(!swChecksum); + nvkms_memcpy(&pPacket[4], pPayload, payloadLen); /* PB0~ */ + } + + return TRUE; +} + +void nvEvoSendHdmiInfoFrameC8(const NVDispEvoRec *pDispEvo, + const NvU32 head, + const NvEvoInfoFrameTransmitControl transmitCtrl, + const NVT_INFOFRAME_HEADER *pInfoFrameHeader, + const NvU32 infoFrameSize, + NvBool needChecksum) +{ + NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; + NVHDMIPKT_TYPE hdmiLibType; + NVHDMIPKT_RESULT ret; + ADVANCED_INFOFRAME advancedInfoFrame = { }; + NvBool swChecksum; + + /* + * These structures are weird. The NVT_VIDEO_INFOFRAME, + * NVT_VENDOR_SPECIFIC_INFOFRAME, + * NVT_EXTENDED_METADATA_PACKET_INFOFRAME, etc structures are *kind + * of* what we want to send to the hdmipkt library, except the type + * in the header is different, and a single checksum byte may need + * to be inserted *between* the header and the payload (requiring us + * to allocate a buffer one byte larger). + */ + NvU8 packet[36] = { }; + + if (!NvtToHdmiLibGenericInfoFramePktType(pInfoFrameHeader->type, + &hdmiLibType)) { + nvEvo1SendHdmiInfoFrame(pDispEvo, head, transmitCtrl, pInfoFrameHeader, + infoFrameSize, needChecksum); + return; + } + + switch (transmitCtrl) { + case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_EVERY_FRAME: + advancedInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ALWAYS; + break; + case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_SINGLE_FRAME: + advancedInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ONCE; + break; + } + advancedInfoFrame.location = INFOFRAME_CTRL_LOC_VBLANK; + advancedInfoFrame.hwChecksum = needChecksum; + + // Large infoframes are incompatible with hwChecksum + nvAssert(!(advancedInfoFrame.isLargeInfoframe && + advancedInfoFrame.hwChecksum)); + + // XXX WAR bug 5124145 by always computing checksum in software if needed. + swChecksum = needChecksum; + + // If we need a checksum: hwChecksum, swChecksum, or both must be enabled. + nvAssert(!needChecksum || + (advancedInfoFrame.hwChecksum || swChecksum)); + + if (!ConstructAdvancedInfoFramePacket(pInfoFrameHeader, + infoFrameSize, + needChecksum, + swChecksum, + packet, + sizeof(packet))) { + return; + } + + advancedInfoFrame.packetLen = sizeof(packet); + advancedInfoFrame.pPacket = packet; + + ret = NvHdmiPkt_SetupAdvancedInfoframe(pDevEvo->hdmiLib.handle, + pDispEvo->displayOwner, + head, + hdmiLibType, + &advancedInfoFrame); + if (ret != NVHDMIPKT_SUCCESS) { + nvAssert(ret == NVHDMIPKT_SUCCESS); + } +} + +void nvEvoDisableHdmiInfoFrameC8(const NVDispEvoRec *pDispEvo, + const NvU32 head, + const NvU8 nvtInfoFrameType) +{ + const NVDispHeadStateEvoRec *pHeadState = &pDispEvo->headState[head]; + NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; + NVHDMIPKT_TYPE hdmiLibType; + NVHDMIPKT_RESULT ret; + + if (!NvtToHdmiLibGenericInfoFramePktType(nvtInfoFrameType, + &hdmiLibType)) { + return; + } + + ret = NvHdmiPkt_PacketCtrl(pDevEvo->hdmiLib.handle, + pDispEvo->displayOwner, + pHeadState->activeRmId, + head, + hdmiLibType, + NVHDMIPKT_TRANSMIT_CONTROL_DISABLE); + if (ret != NVHDMIPKT_SUCCESS) { + nvAssert(!"Failed to disable vendor specific infoframe"); + } +} + +void nvEvoSendDpInfoFrameSdpC8(const NVDispEvoRec *pDispEvo, + const NvU32 head, + const NvEvoInfoFrameTransmitControl transmitCtrl, + const DPSDP_DESCRIPTOR *sdp) +{ + NVDevEvoPtr pDevEvo = pDispEvo->pDevEvo; + NVHDMIPKT_RESULT ret; + ADVANCED_INFOFRAME advanceInfoFrame = { }; + NvU8 packet[36] = { }; + + /* + * XXX Using NVHDMIPKT_TYPE_SHARED_GENERIC1 for DP HDR SDP, add + * support for other infoframe as needed. + */ + if (sdp->hb.hb1 != dp_pktType_DynamicRangeMasteringInfoFrame) { + nvAssert(!"Unsupported infoframe"); + return; + } + + nvAssert((sizeof(sdp->hb) + sdp->dataSize) <= sizeof(packet)); + + nvkms_memcpy(packet, &sdp->hb, + NV_MIN((sizeof(sdp->hb) + sdp->dataSize), sizeof(packet))); + + switch (transmitCtrl) { + case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_EVERY_FRAME: + advanceInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ALWAYS; + break; + case NV_EVO_INFOFRAME_TRANSMIT_CONTROL_SINGLE_FRAME: + advanceInfoFrame.runMode = INFOFRAME_CTRL_RUN_MODE_ONCE; + break; + } + advanceInfoFrame.location = INFOFRAME_CTRL_LOC_VBLANK; + advanceInfoFrame.packetLen = sizeof(packet); + advanceInfoFrame.pPacket = packet; + + ret = NvHdmiPkt_SetupAdvancedInfoframe(pDevEvo->hdmiLib.handle, + pDispEvo->displayOwner, + head, + NVHDMIPKT_TYPE_SHARED_GENERIC1, + &advanceInfoFrame); + if (ret != NVHDMIPKT_SUCCESS) { + nvAssert(ret == NVHDMIPKT_SUCCESS); + } +} + static NvU32 EvoAllocSurfaceDescriptorC3( NVDevEvoPtr pDevEvo, NVSurfaceDescriptor *pSurfaceDesc, NvU32 memoryHandle, NvU32 localCtxDmaFlags, @@ -8063,105 +7702,6 @@ static NvU32 EvoBindSurfaceDescriptorC3( return nvCtxDmaBind(pDevEvo, pChannel, pSurfaceDesc->ctxDmaHandle); } -NVEvoHAL nvEvoC3 = { - EvoSetRasterParamsC3, /* SetRasterParams */ - EvoSetProcAmpC3, /* SetProcAmp */ - EvoSetHeadControlC3, /* SetHeadControl */ - EvoSetHeadRefClkC3, /* SetHeadRefClk */ - EvoHeadSetControlORC3, /* HeadSetControlOR */ - nvEvoORSetControlC3, /* ORSetControl */ - EvoHeadSetDisplayIdC3, /* HeadSetDisplayId */ - EvoSetUsageBoundsC3, /* SetUsageBounds */ - nvEvoUpdateC3, /* Update */ - nvEvoIsModePossibleC3, /* IsModePossible */ - nvEvoPrePostIMPC3, /* PrePostIMP */ - nvEvoSetNotifierC3, /* SetNotifier */ - EvoGetCapabilitiesC3, /* GetCapabilities */ - EvoFlipC3, /* Flip */ - EvoFlipTransitionWARC3, /* FlipTransitionWAR */ - EvoFillLUTSurfaceC3, /* FillLUTSurface */ - EvoSetOutputLutC3, /* SetOutputLut */ - EvoSetOutputScalerC3, /* SetOutputScaler */ - EvoSetViewportPointInC3, /* SetViewportPointIn */ - EvoSetViewportInOutC3, /* SetViewportInOut */ - EvoSetCursorImageC3, /* SetCursorImage */ - nvEvoValidateCursorSurfaceC3, /* ValidateCursorSurface */ - EvoValidateWindowFormatC3, /* ValidateWindowFormat */ - nvEvoInitCompNotifierC3, /* InitCompNotifier */ - nvEvoIsCompNotifierCompleteC3, /* IsCompNotifierComplete */ - nvEvoWaitForCompNotifierC3, /* WaitForCompNotifier */ - EvoSetDitherC3, /* SetDither */ - EvoSetStallLockC3, /* SetStallLock */ - EvoSetDisplayRateC3, /* SetDisplayRate */ - EvoInitChannelC3, /* InitChannel */ - NULL, /* InitDefaultLut */ - EvoInitWindowMappingC3, /* InitWindowMapping */ - nvEvoIsChannelIdleC3, /* IsChannelIdle */ - nvEvoIsChannelMethodPendingC3, /* IsChannelMethodPending */ - nvEvoForceIdleSatelliteChannelC3, /* ForceIdleSatelliteChannel */ - nvEvoForceIdleSatelliteChannelIgnoreLockC3, /* ForceIdleSatelliteChannelIgnoreLock */ - nvEvoAccelerateChannelC3, /* AccelerateChannel */ - nvEvoResetChannelAcceleratorsC3, /* ResetChannelAccelerators */ - nvEvoAllocRmCtrlObjectC3, /* AllocRmCtrlObject */ - nvEvoFreeRmCtrlObjectC3, /* FreeRmCtrlObject */ - nvEvoSetImmPointOutC3, /* SetImmPointOut */ - EvoStartHeadCRC32CaptureC3, /* StartCRC32Capture */ - EvoStopHeadCRC32CaptureC3, /* StopCRC32Capture */ - nvEvoQueryHeadCRC32_C3, /* QueryCRC32 */ - nvEvoGetScanLineC3, /* GetScanLine */ - NULL, /* ConfigureVblankSyncObject */ - nvEvo1SetDscParams, /* SetDscParams */ - NULL, /* EnableMidFrameAndDWCFWatermark */ - nvEvoGetActiveViewportOffsetC3, /* GetActiveViewportOffset */ - NULL, /* ClearSurfaceUsage */ - EvoComputeWindowScalingTapsC3, /* ComputeWindowScalingTaps */ - nvEvoGetWindowScalingCapsC3, /* GetWindowScalingCaps */ - NULL, /* SetMergeMode */ - nvEvo1SendHdmiInfoFrame, /* SendHdmiInfoFrame */ - nvEvo1DisableHdmiInfoFrame, /* DisableHdmiInfoFrame */ - nvEvo1SendDpInfoFrameSdp, /* SendDpInfoFrameSdp */ - NULL, /* SetDpVscSdp */ - NULL, /* InitHwHeadMultiTileConfig */ - NULL, /* SetMultiTileConfig */ - EvoAllocSurfaceDescriptorC3, /* AllocSurfaceDescriptor */ - EvoFreeSurfaceDescriptorC3, /* FreeSurfaceDescriptor */ - EvoBindSurfaceDescriptorC3, /* BindSurfaceDescriptor */ - NULL, /* SetTmoLutSurfaceAddress */ - NULL, /* SetILUTSurfaceAddress */ - EvoSetISOSurfaceAddressC3, /* SetISOSurfaceAddress */ - EvoSetCoreNotifierSurfaceAddressAndControlC3, /* SetCoreNotifierSurfaceAddressAndControl */ - EvoSetWinNotifierSurfaceAddressAndControlC3, /* SetWinNotifierSurfaceAddressAndControl */ - NULL, /* SetSemaphoreSurfaceAddressAndControl */ - NULL, /* SetAcqSemaphoreSurfaceAddressAndControl */ - { /* caps */ - TRUE, /* supportsNonInterlockedUsageBoundsUpdate */ - TRUE, /* supportsDisplayRate */ - FALSE, /* supportsFlipLockRGStatus */ - FALSE, /* needDefaultLutSurface */ - FALSE, /* hasUnorm10OLUT */ - FALSE, /* supportsImageSharpening */ - FALSE, /* supportsHDMIVRR */ - FALSE, /* supportsCoreChannelSurface */ - FALSE, /* supportsHDMIFRL */ - TRUE, /* supportsSetStorageMemoryLayout */ - FALSE, /* supportsIndependentAcqRelSemaphore */ - FALSE, /* supportsCoreLut */ - TRUE, /* supportsSynchronizedOverlayPositionUpdate */ - FALSE, /* supportsVblankSyncObjects */ - FALSE, /* requiresScalingTapsInBothDimensions */ - FALSE, /* supportsMergeMode */ - FALSE, /* supportsHDMI10BPC */ - FALSE, /* supportsDPAudio192KHz */ - FALSE, /* supportsInputColorSpace */ - FALSE, /* supportsInputColorRange */ - FALSE, /* supportsYCbCr422OverHDMIFRL */ - NV_EVO3_SUPPORTED_DITHERING_MODES, /* supportedDitheringModes */ - sizeof(NVC372_CTRL_IS_MODE_POSSIBLE_PARAMS), /* impStructSize */ - NV_EVO_SCALER_2TAPS, /* minScalerTaps */ - NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C3, /* xEmulatedSurfaceMemoryFormats */ - }, -}; - NVEvoHAL nvEvoC5 = { EvoSetRasterParamsC5, /* SetRasterParams */ EvoSetProcAmpC5, /* SetProcAmp */ @@ -8173,7 +7713,6 @@ NVEvoHAL nvEvoC5 = { nvEvoSetUsageBoundsC5, /* SetUsageBounds */ nvEvoUpdateC3, /* Update */ nvEvoIsModePossibleC3, /* IsModePossible */ - nvEvoPrePostIMPC3, /* PrePostIMP */ nvEvoSetNotifierC3, /* SetNotifier */ EvoGetCapabilitiesC5, /* GetCapabilities */ EvoFlipC5, /* Flip */ @@ -8233,30 +7772,14 @@ NVEvoHAL nvEvoC5 = { NULL, /* SetSemaphoreSurfaceAddressAndControl */ NULL, /* SetAcqSemaphoreSurfaceAddressAndControl */ { /* caps */ - TRUE, /* supportsNonInterlockedUsageBoundsUpdate */ - TRUE, /* supportsDisplayRate */ - FALSE, /* supportsFlipLockRGStatus */ - TRUE, /* needDefaultLutSurface */ - TRUE, /* hasUnorm10OLUT */ - FALSE, /* supportsImageSharpening */ - TRUE, /* supportsHDMIVRR */ - FALSE, /* supportsCoreChannelSurface */ FALSE, /* supportsHDMIFRL */ TRUE, /* supportsSetStorageMemoryLayout */ FALSE, /* supportsIndependentAcqRelSemaphore */ - FALSE, /* supportsCoreLut */ - TRUE, /* supportsSynchronizedOverlayPositionUpdate */ FALSE, /* supportsVblankSyncObjects */ - FALSE, /* requiresScalingTapsInBothDimensions */ TRUE, /* supportsMergeMode */ FALSE, /* supportsHDMI10BPC */ FALSE, /* supportsDPAudio192KHz */ - TRUE, /* supportsInputColorSpace */ - TRUE, /* supportsInputColorRange */ FALSE, /* supportsYCbCr422OverHDMIFRL */ - NV_EVO3_SUPPORTED_DITHERING_MODES, /* supportedDitheringModes */ - sizeof(NVC372_CTRL_IS_MODE_POSSIBLE_PARAMS), /* impStructSize */ - NV_EVO_SCALER_2TAPS, /* minScalerTaps */ NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C5, /* xEmulatedSurfaceMemoryFormats */ }, }; @@ -8272,7 +7795,6 @@ NVEvoHAL nvEvoC6 = { nvEvoSetUsageBoundsC5, /* SetUsageBounds */ nvEvoUpdateC3, /* Update */ nvEvoIsModePossibleC3, /* IsModePossible */ - nvEvoPrePostIMPC3, /* PrePostIMP */ nvEvoSetNotifierC3, /* SetNotifier */ nvEvoGetCapabilitiesC6, /* GetCapabilities */ nvEvoFlipC6, /* Flip */ @@ -8332,30 +7854,15 @@ NVEvoHAL nvEvoC6 = { EvoSetSemaphoreSurfaceAddressAndControlC6, /* SetSemaphoreSurfaceAddressAndControl */ EvoSetAcqSemaphoreSurfaceAddressAndControlC6, /* SetAcqSemaphoreSurfaceAddressAndControl */ { /* caps */ - TRUE, /* supportsNonInterlockedUsageBoundsUpdate */ - TRUE, /* supportsDisplayRate */ - FALSE, /* supportsFlipLockRGStatus */ - TRUE, /* needDefaultLutSurface */ - TRUE, /* hasUnorm10OLUT */ - FALSE, /* supportsImageSharpening */ - TRUE, /* supportsHDMIVRR */ - FALSE, /* supportsCoreChannelSurface */ TRUE, /* supportsHDMIFRL */ FALSE, /* supportsSetStorageMemoryLayout */ TRUE, /* supportsIndependentAcqRelSemaphore */ - FALSE, /* supportsCoreLut */ - TRUE, /* supportsSynchronizedOverlayPositionUpdate */ TRUE, /* supportsVblankSyncObjects */ - FALSE, /* requiresScalingTapsInBothDimensions */ TRUE, /* supportsMergeMode */ TRUE, /* supportsHDMI10BPC */ FALSE, /* supportsDPAudio192KHz */ - TRUE, /* supportsInputColorSpace */ - TRUE, /* supportsInputColorRange */ FALSE, /* supportsYCbCr422OverHDMIFRL */ - NV_EVO3_SUPPORTED_DITHERING_MODES, /* supportedDitheringModes */ - sizeof(NVC372_CTRL_IS_MODE_POSSIBLE_PARAMS), /* impStructSize */ - NV_EVO_SCALER_2TAPS, /* minScalerTaps */ NV_EVO3_X_EMULATED_SURFACE_MEMORY_FORMATS_C6, /* xEmulatedSurfaceMemoryFormats */ }, }; + From c44fe64e4eb6b1fc51c0dc0934523c150cda7576 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 19 Dec 2025 12:50:07 +0200 Subject: [PATCH 18/20] nv-pci: add missing wait_iterations variable declarations --- kernel-open/nvidia/nv-pci.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel-open/nvidia/nv-pci.c b/kernel-open/nvidia/nv-pci.c index fd4d529101..aad6b910ee 100644 --- a/kernel-open/nvidia/nv-pci.c +++ b/kernel-open/nvidia/nv-pci.c @@ -2192,6 +2192,15 @@ nv_pci_remove(struct pci_dev *pci_dev) */ if ((atomic64_read(&nvl->usage_count) != 0) && !(nv->is_external_gpu)) { + /* + * For external GPU: wait up to 5 seconds (10 iterations * 500ms) + * For internal GPU: wait up to 60 seconds (120 iterations * 500ms) + * This prevents indefinite hangs while still allowing time for + * graceful cleanup of in-progress operations. + */ + int max_wait_iterations = nv->is_external_gpu ? 10 : 120; + int wait_iterations = 0; + nv_printf(NV_DBG_ERRORS, "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count (%d)%s\n", NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), @@ -2203,7 +2212,8 @@ nv_pci_remove(struct pci_dev *pci_dev) * We can't return from this function without corrupting state, so we wait for * the usage count to go to zero, but with a timeout. */ - while (atomic64_read(&nvl->usage_count) != 0) + while ((atomic64_read(&nvl->usage_count) != 0) && + (wait_iterations < max_wait_iterations)) { /* * While waiting, release the locks so that other threads can make From bc93285720584da6de456392834c0bf6e352d0db Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 19 Dec 2025 13:21:34 +0200 Subject: [PATCH 19/20] nv-pci: add timeout to usage_count wait for eGPU hotplug --- kernel-open/nvidia/nv-pci.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/kernel-open/nvidia/nv-pci.c b/kernel-open/nvidia/nv-pci.c index aad6b910ee..3cfce66b9d 100644 --- a/kernel-open/nvidia/nv-pci.c +++ b/kernel-open/nvidia/nv-pci.c @@ -27,7 +27,6 @@ #include "nv-msi.h" #include "nv-hypervisor.h" #include "nv-reg.h" -#include "nv-rsync.h" #if defined(NV_VGPU_KVM_BUILD) #include "nv-vgpu-vfio-interface.h" @@ -2143,13 +2142,6 @@ nv_pci_remove(struct pci_dev *pci_dev) nv = NV_STATE_PTR(nvl); - /* - * Note: For external GPUs (eGPU via Thunderbolt), the NV_FLAG_IN_SURPRISE_REMOVAL - * flag is set later in the removal process - either when waiting for usage count - * times out, or when actual surprise removal is detected. Setting it too early - * can interfere with normal cleanup operations that need to acquire GPU locks. - */ - #if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_disable_feature #if defined(CONFIG_IOMMU_SVA) && \ (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT)) @@ -2190,7 +2182,7 @@ nv_pci_remove(struct pci_dev *pci_dev) * We still wait for a short time to allow in-progress close operations * to complete, but with a timeout to prevent hangs. */ - if ((atomic64_read(&nvl->usage_count) != 0) && !(nv->is_external_gpu)) + if (atomic64_read(&nvl->usage_count) != 0) { /* * For external GPU: wait up to 5 seconds (10 iterations * 500ms) @@ -2202,10 +2194,10 @@ nv_pci_remove(struct pci_dev *pci_dev) int wait_iterations = 0; nv_printf(NV_DBG_ERRORS, - "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count (%d)%s\n", + "NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count (%lld)%s\n", NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn), - NV_ATOMIC_READ(nvl->usage_count), + atomic64_read(&nvl->usage_count), nv->is_external_gpu ? " (external GPU)" : ""); /* @@ -2242,13 +2234,13 @@ nv_pci_remove(struct pci_dev *pci_dev) down(&nvl->ldata_lock); } - if (NV_ATOMIC_READ(nvl->usage_count) != 0) + if (atomic64_read(&nvl->usage_count) != 0) { nv_printf(NV_DBG_ERRORS, - "NVRM: Timeout waiting for usage count on device %04x:%02x:%02x.%x (remaining: %d). Forcing removal.\n", + "NVRM: Timeout waiting for usage count on device %04x:%02x:%02x.%x (remaining: %lld). Forcing removal.\n", NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev), NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn), - NV_ATOMIC_READ(nvl->usage_count)); + atomic64_read(&nvl->usage_count)); /* * Force the surprise removal flag so that any remaining * close operations will take the fast-path. From 5a12cfbe939651c66aa1d88a496bac3b9fe49748 Mon Sep 17 00:00:00 2001 From: Bohdan Dymchenko Date: Fri, 19 Dec 2025 13:31:50 +0200 Subject: [PATCH 20/20] nv-pci: add missing nv-rsync due to merge conflict --- kernel-open/nvidia/nv-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel-open/nvidia/nv-pci.c b/kernel-open/nvidia/nv-pci.c index 3cfce66b9d..1d54b2b963 100644 --- a/kernel-open/nvidia/nv-pci.c +++ b/kernel-open/nvidia/nv-pci.c @@ -27,6 +27,7 @@ #include "nv-msi.h" #include "nv-hypervisor.h" #include "nv-reg.h" +#include "nv-rsync.h" #if defined(NV_VGPU_KVM_BUILD) #include "nv-vgpu-vfio-interface.h"