diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 700926eb77dfa..e2e41f1bdb63c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4524,8 +4524,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (ret != RET_PF_CONTINUE) return ret; - if (unlikely(is_error_pfn(fault->pfn))) + if (unlikely(is_error_pfn(fault->pfn))) { + /* + * A passed-through PCI BAR is backed by a VM_IO/VM_PFNMAP + * mapping whose fault handler refuses to install a PTE while the + * device's memory space is disabled (e.g. the guest cleared + * PCI_COMMAND.MEM). The gup then fails even though the memslot + * is still valid. Treat such an access as MMIO and emulate it + * (the guest observes Unsupported Request semantics, matching + * real hardware) instead of killing the VM with -EFAULT. Other, + * non-pfnmap errors still take the fatal path. + */ + if (fault->pfn == KVM_PFN_ERR_PFNMAP) + return kvm_handle_noslot_fault(vcpu, fault, access); + return kvm_handle_error_pfn(vcpu, fault); + } if (WARN_ON_ONCE(!fault->slot || is_noslot_pfn(fault->pfn))) return kvm_handle_noslot_fault(vcpu, fault, access); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 595503fa9ca89..e297e2d34aed4 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -451,6 +451,40 @@ static int vfio_pci_core_runtime_resume(struct device *dev) } #endif /* CONFIG_PM */ +/* + * Eager-request BAR resources, and iomap them. Soft failures are + * allowed, and consumers must check the barmap before use in order to + * give compatible user-visible behaviour with the previous on-demand + * allocation method. + */ +static void vfio_pci_core_map_bars(struct vfio_pci_core_device *vdev) +{ + struct pci_dev *pdev = vdev->pdev; + int i; + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + int bar = i + PCI_STD_RESOURCES; + + vdev->barmap[bar] = IOMEM_ERR_PTR(-ENODEV); + + if (!pci_resource_len(pdev, i)) + continue; + + if (pci_request_selected_regions(pdev, 1 << bar, "vfio")) { + pci_dbg(pdev, "Failed to reserve region %d\n", bar); + vdev->barmap[bar] = IOMEM_ERR_PTR(-EBUSY); + continue; + } + + vdev->barmap[bar] = pci_iomap(pdev, bar, 0); + if (!vdev->barmap[bar]) { + pci_dbg(pdev, "Failed to iomap region %d\n", bar); + pci_release_selected_regions(pdev, 1 << bar); + vdev->barmap[bar] = IOMEM_ERR_PTR(-ENOMEM); + } + } +} + /* * The pci-driver core runtime PM routines always save the device state * before going into suspended state. If the device is going into low power @@ -537,6 +571,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; + vfio_pci_core_map_bars(vdev); return 0; @@ -616,7 +651,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) for (i = 0; i < PCI_STD_NUM_BARS; i++) { bar = i + PCI_STD_RESOURCES; - if (!vdev->barmap[bar]) + if (IS_ERR_OR_NULL(vdev->barmap[bar])) continue; pci_iounmap(pdev, vdev->barmap[bar]); pci_release_selected_regions(pdev, 1 << bar); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index a0595c745732a..36372163c5275 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -206,27 +206,15 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, } EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw); +/* + * The barmap is set up in vfio_pci_core_enable(). Callers use this + * function to check that the BAR resources are requested or that the + * pci_iomap() was done. + */ int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) { - struct pci_dev *pdev = vdev->pdev; - int ret; - void __iomem *io; - - if (vdev->barmap[bar]) - return 0; - - ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); - if (ret) - return ret; - - io = pci_iomap(pdev, bar, 0); - if (!io) { - pci_release_selected_regions(pdev, 1 << bar); - return -ENOMEM; - } - - vdev->barmap[bar] = io; - + if (IS_ERR(vdev->barmap[bar])) + return PTR_ERR(vdev->barmap[bar]); return 0; } EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2e836d44f7386..0e0b08a615372 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -97,6 +97,13 @@ #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) #define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) +/* + * Faulting in a VM_IO/VM_PFNMAP mapping failed because its fault handler + * declined to install a PTE, e.g. a passed-through PCI BAR whose device memory + * is currently disabled (guest cleared PCI_COMMAND.MEM). The memslot is valid; + * the access should be treated as MMIO rather than a fatal -EFAULT. + */ +#define KVM_PFN_ERR_PFNMAP (KVM_PFN_ERR_MASK + 4) /* * error pfns indicate that the gfn is in slot but faild to diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aba4078ae2250..24a6166f917df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2991,7 +2991,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, if (r == -EAGAIN) goto retry; if (r < 0) - pfn = KVM_PFN_ERR_FAULT; + /* + * The mapping's fault handler declined to install a PTE + * (e.g. a passed-through PCI BAR with device memory + * disabled). Flag it distinctly so the fault handler can + * treat the access as MMIO instead of a fatal -EFAULT. + */ + pfn = KVM_PFN_ERR_PFNMAP; } else { if (async && vma_is_valid(vma, write_fault)) *async = true;