diff --git a/Makefile b/Makefile index 9c16001ccdcd1..7ee3beedcecaa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 18 -SUBLEVEL = 37 +SUBLEVEL = 38 EXTRAVERSION = NAME = Baby Opossum Posse @@ -806,12 +806,6 @@ endif # KBUILD_EXTMOD # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux -CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -ifdef CONFIG_CC_IS_GCC -CFLAGS_GCOV += -fno-tree-loop-im -endif -export CFLAGS_GCOV - # The arch Makefiles can override CC_FLAGS_FTRACE. We may also append it later. ifdef CONFIG_FUNCTION_TRACER CC_FLAGS_FTRACE := -pg @@ -1082,6 +1076,27 @@ endif # Ensure compilers do not transform certain loops into calls to wcslen() KBUILD_CFLAGS += -fno-builtin-wcslen +CFLAGS_GCOV := -fprofile-arcs -ftest-coverage +ifdef CONFIG_CC_IS_GCC +CFLAGS_GCOV += -fno-tree-loop-im +# Use atomic counter updates to avoid concurrent-access crashes in GCOV. +# Only enable if -fprofile-update=prefer-atomic does not introduce new +# undefined symbols (e.g. libatomic calls that the kernel cannot link). +CFLAGS_GCOV += $(call try-run,\ + echo 'long long x; void f(void){x++;}' | \ + $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -w -fprofile-arcs \ + -ftest-coverage -x c - -c -o "$$TMP.base" && \ + echo 'long long x; void f(void){x++;}' | \ + $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -w -fprofile-arcs \ + -ftest-coverage -fprofile-update=prefer-atomic \ + -x c - -c -o "$$TMP" && \ + $(NM) "$$TMP.base" | grep ' U ' > "$$TMP.ubase" || true ; \ + $(NM) "$$TMP" | grep ' U ' > "$$TMP.utest" || true ; \ + cmp -s "$$TMP.ubase" "$$TMP.utest",\ + -fprofile-update=prefer-atomic) +endif +export CFLAGS_GCOV + # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0d38dc72dfc62..403a635483513 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1444,6 +1444,11 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, if (!kvm_has_mte(kvm)) return; + if (is_zero_pfn(pfn)) { + WARN_ON_ONCE(nr_pages != 1); + return; + } + if (folio_test_hugetlb(folio)) { /* Hugetlb has MTE flags set on head page only */ if (folio_try_hugetlb_mte_tagging(folio)) { diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 46036d98da75b..5d6a60294fb2d 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -688,6 +688,7 @@ static void stop_this_cpu(void *dummy) set_cpu_online(smp_processor_id(), false); calculate_cpu_foreign_map(); local_irq_disable(); + rcutree_report_cpu_dead(); while (true); } diff --git a/arch/mips/dec/prom/console.c b/arch/mips/dec/prom/console.c index 31a8441d84314..b4f0dba3fa20e 100644 --- a/arch/mips/dec/prom/console.c +++ b/arch/mips/dec/prom/console.c @@ -2,8 +2,9 @@ /* * DECstation PROM-based early console support. * - * Copyright (C) 2004, 2007 Maciej W. Rozycki + * Copyright (C) 2004, 2007, 2026 Maciej W. Rozycki */ +#include #include #include #include @@ -14,9 +15,11 @@ static void __init prom_console_write(struct console *con, const char *s, unsigned int c) { - char buf[81]; + static char buf[81] __initdata = { 0 }; unsigned int chunk = sizeof(buf) - 1; + BUG_ON((long)buf != (int)(long)buf); + while (c > 0) { if (chunk > c) chunk = c; diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 4868e79f3b30e..0f28b4a62e72e 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -422,6 +423,7 @@ static void stop_this_cpu(void *dummy) set_cpu_online(smp_processor_id(), false); calculate_cpu_foreign_map(); local_irq_disable(); + rcutree_report_cpu_dead(); while (1); } diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0092513c3376c..b1a2ac6657926 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -43,20 +43,23 @@ do { \ #ifdef CONFIG_64BIT extern u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1]; extern char _end[]; +static inline void mark_new_valid_map(void) +{ + int i; + + /* + * We don't care if concurrently a cpu resets this value since + * the only place this can happen is in handle_exception() where + * an sfence.vma is emitted. + */ + for (i = 0; i < ARRAY_SIZE(new_vmalloc); ++i) + new_vmalloc[i] = -1ULL; +} #define flush_cache_vmap flush_cache_vmap static inline void flush_cache_vmap(unsigned long start, unsigned long end) { - if (is_vmalloc_or_module_addr((void *)start)) { - int i; - - /* - * We don't care if concurrently a cpu resets this value since - * the only place this can happen is in handle_exception() where - * an sfence.vma is emitted. - */ - for (i = 0; i < ARRAY_SIZE(new_vmalloc); ++i) - new_vmalloc[i] = -1ULL; - } + if (is_vmalloc_or_module_addr((void *)start)) + mark_new_valid_map(); } #define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end) #endif diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h index d08bf7fb3aee6..29cb3a6ee113d 100644 --- a/arch/riscv/include/asm/kfence.h +++ b/arch/riscv/include/asm/kfence.h @@ -6,6 +6,7 @@ #include #include #include +#include #include static inline bool arch_kfence_init_pool(void) @@ -17,10 +18,12 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *pte = virt_to_kpte(addr); - if (protect) + if (protect) { set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT)); - else + } else { set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT)); + mark_new_valid_map(); + } preempt_disable(); local_flush_tlb_kernel_range(addr, addr + PAGE_SIZE); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 9b9dec6893b81..e7bd03a9ced53 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -107,8 +107,10 @@ SYM_CODE_START(handle_exception) #ifdef CONFIG_64BIT /* - * The RISC-V kernel does not eagerly emit a sfence.vma after each - * new vmalloc mapping, which may result in exceptions: + * The RISC-V kernel does not flush TLBs on all CPUS after each new + * vmalloc mapping or kfence_unprotect(), which may result in + * exceptions: + * * - if the uarch caches invalid entries, the new mapping would not be * observed by the page table walker and an invalidation is needed. * - if the uarch does not cache invalid entries, a reordered access diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index f279561154bc9..4ebb3c40c6bbc 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1839,6 +1839,11 @@ static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_ba int valid_bit_nr = vp_id / HV_VCPUS_PER_SPARSE_BANK; unsigned long sbank; + BUILD_BUG_ON(BITS_PER_TYPE(valid_bank_mask) != HV_MAX_SPARSE_VCPU_BANKS); + + if (valid_bit_nr >= HV_MAX_SPARSE_VCPU_BANKS) + return false; + if (!test_bit(valid_bit_nr, (unsigned long *)&valid_bank_mask)) return false; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0bd0cb8992c9f..eadeca9786f6f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2453,13 +2453,15 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, bool direct, unsigned int access) { - union kvm_mmu_page_role role; + union kvm_mmu_page_role role = kvm_mmu_child_role(sptep, direct, access); - if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) && - spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn) + if (is_shadow_present_pte(*sptep) && + !is_large_pte(*sptep) && + spte_to_child_sp(*sptep) && + spte_to_child_sp(*sptep)->gfn == gfn && + spte_to_child_sp(*sptep)->role.word == role.word) return ERR_PTR(-EEXIST); - role = kvm_mmu_child_role(sptep, direct, access); return kvm_mmu_get_shadow_page(vcpu, gfn, role); } @@ -7179,13 +7181,19 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, sp = sptep_to_sp(sptep); /* - * We cannot do huge page mapping for indirect shadow pages, - * which are found on the last rmap (level = 1) when not using - * tdp; such shadow pages are synced with the page table in - * the guest, and the guest page table is using 4K page size - * mapping if the indirect sp has level = 1. + * Direct shadow page can be replaced by a hugepage if the host + * mapping level allows it and the memslot maps all of the host + * hugepage. Note! If the memslot maps only part of the + * hugepage, sp->gfn may be below slot->base_gfn, and querying + * the max mapping level would cause an out-of-bounds lpage_info + * access. So the gfn bounds check *must* be done first. + * + * Indirect shadow pages are created when the guest page tables + * are using 4K pages. Since the host mapping is always + * constrained by the page size in the guest, indirect shadow + * pages are never collapsible. */ - if (sp->role.direct && + if (sp->role.direct && is_gfn_in_memslot(slot, sp->gfn) && sp->role.level < kvm_mmu_max_mapping_level(kvm, NULL, slot, sp->gfn)) { kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6d4574eb8a4ce..8b807f8eb0672 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1340,6 +1340,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) s_off = vaddr & ~PAGE_MASK; d_off = dst_vaddr & ~PAGE_MASK; len = min_t(size_t, (PAGE_SIZE - s_off), size); + len = min_t(size_t, len, PAGE_SIZE - d_off); if (dec) ret = __sev_dbg_decrypt_user(kvm, diff --git a/block/bdev.c b/block/bdev.c index 638f0cd458ae4..57e78692084b1 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -438,15 +438,10 @@ EXPORT_SYMBOL_GPL(blockdev_superblock); void __init bdev_cache_init(void) { - int err; - bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT|SLAB_PANIC), init_once); - err = register_filesystem(&bd_type); - if (err) - panic("Cannot register bdev pseudo-fs"); blockdev_mnt = kern_mount(&bd_type); if (IS_ERR(blockdev_mnt)) panic("Cannot create bdev pseudo-fs"); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f1ea69743c544..a2347b5795874 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -164,20 +164,10 @@ static void blkg_free(struct blkcg_gq *blkg) static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); - struct blkcg *blkcg = blkg->blkcg; - int cpu; #ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); #endif - /* - * Flush all the non-empty percpu lockless lists before releasing - * us, given these stat belongs to us. - * - * blkg_stat_lock is for serializing blkg stat update - */ - for_each_possible_cpu(cpu) - __blkcg_rstat_flush(blkcg, cpu); /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -195,6 +185,17 @@ static void __blkg_release(struct rcu_head *rcu) static void blkg_release(struct percpu_ref *ref) { struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); + struct blkcg *blkcg = blkg->blkcg; + int cpu; + + /* + * Flush all the non-empty percpu lockless lists before releasing + * us, given these stat belongs to us. + * + * blkg_stat_lock is for serializing blkg stat update + */ + for_each_possible_cpu(cpu) + __blkcg_rstat_flush(blkcg, cpu); call_rcu(&blkg->rcu_head, __blkg_release); } diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 78135fb13f5c2..1b7509e2ce443 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -714,15 +714,13 @@ int nx_crypto_ctx_aes_xcbc_init(struct crypto_shash *tfm) /** * nx_crypto_ctx_exit - destroy a crypto api context * - * @tfm: the crypto transform pointer for the context + * @nx_ctx: the crypto api context * * As crypto API contexts are destroyed, this exit hook is called to free the * memory associated with it. */ -void nx_crypto_ctx_exit(struct crypto_tfm *tfm) +void nx_crypto_ctx_exit(struct nx_crypto_ctx *nx_ctx) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); - kfree_sensitive(nx_ctx->kmem); nx_ctx->csbcpb = NULL; nx_ctx->csbcpb_aead = NULL; diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 36974f08490a9..6dfabfbf8192c 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -153,7 +153,7 @@ int nx_crypto_ctx_aes_ctr_init(struct crypto_skcipher *tfm); int nx_crypto_ctx_aes_cbc_init(struct crypto_skcipher *tfm); int nx_crypto_ctx_aes_ecb_init(struct crypto_skcipher *tfm); int nx_crypto_ctx_sha_init(struct crypto_shash *tfm); -void nx_crypto_ctx_exit(struct crypto_tfm *tfm); +void nx_crypto_ctx_exit(struct nx_crypto_ctx *nx_ctx); void nx_crypto_ctx_skcipher_exit(struct crypto_skcipher *tfm); void nx_crypto_ctx_aead_exit(struct crypto_aead *tfm); void nx_crypto_ctx_shash_exit(struct crypto_shash *tfm); diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c index caa091224dc54..9107a5b461d35 100644 --- a/drivers/fpga/of-fpga-region.c +++ b/drivers/fpga/of-fpga-region.c @@ -168,11 +168,10 @@ static int child_regions_with_firmware(struct device_node *overlay) fpga_region_of_match); } - of_node_put(child_region); - if (ret) pr_err("firmware-name not allowed in child FPGA region: %pOF", child_region); + of_node_put(child_region); return ret; } diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index ae7e9c8b65a65..05b315405639b 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1570,6 +1570,10 @@ static int i2c_register_adapter(struct i2c_adapter *adap) pm_suspend_ignore_children(&adap->dev, true); pm_runtime_enable(&adap->dev); + mutex_lock(&core_lock); + idr_replace(&i2c_adapter_idr, adap, adap->nr); + mutex_unlock(&core_lock); + res = device_add(&adap->dev); if (res) { pr_err("adapter '%s': can't register device (%d)\n", adap->name, res); @@ -1628,7 +1632,7 @@ static int __i2c_add_numbered_adapter(struct i2c_adapter *adap) int id; mutex_lock(&core_lock); - id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1, GFP_KERNEL); + id = idr_alloc(&i2c_adapter_idr, NULL, adap->nr, adap->nr + 1, GFP_KERNEL); mutex_unlock(&core_lock); if (WARN(id < 0, "couldn't get idr")) return id == -ENOSPC ? -EBUSY : id; @@ -1662,7 +1666,7 @@ int i2c_add_adapter(struct i2c_adapter *adapter) } mutex_lock(&core_lock); - id = idr_alloc(&i2c_adapter_idr, adapter, + id = idr_alloc(&i2c_adapter_idr, NULL, __i2c_first_dynamic_bus_num, 0, GFP_KERNEL); mutex_unlock(&core_lock); if (WARN(id < 0, "couldn't get idr")) diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c index e9ef2f5a7207e..4feef4ab5fecc 100644 --- a/drivers/irqchip/irq-imgpdc.c +++ b/drivers/irqchip/irq-imgpdc.c @@ -378,6 +378,7 @@ static int pdc_intc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "cannot add IRQ domain\n"); return -ENOMEM; } + priv->domain->flags |= IRQ_DOMAIN_FLAG_DESTROY_GC; /* * Set up 2 generic irq chips with 2 chip types. @@ -465,6 +466,11 @@ static void pdc_intc_remove(struct platform_device *pdev) { struct pdc_intc_priv *priv = platform_get_drvdata(pdev); + for (unsigned int i = 0; i < priv->nr_perips; ++i) + irq_set_chained_handler_and_data(priv->perip_irqs[i], NULL, NULL); + + irq_set_chained_handler_and_data(priv->syswake_irq, NULL, NULL); + irq_domain_remove(priv->domain); } diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 7496a2e9a2820..22affdcb4ceae 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -621,7 +621,6 @@ static void ppp_start(struct net_device *dev) struct proto *proto = &ppp->protos[i]; proto->dev = dev; - timer_setup(&proto->timer, ppp_timer, 0); proto->state = CLOSED; } ppp->protos[IDX_LCP].pid = PID_LCP; @@ -641,6 +640,15 @@ static void ppp_close(struct net_device *dev) ppp_tx_flush(); } +static void ppp_timer_release(struct net_device *dev) +{ + struct ppp *ppp = get_ppp(dev); + int i; + + for (i = 0; i < IDX_COUNT; i++) + timer_shutdown_sync(&ppp->protos[i].timer); +} + static struct hdlc_proto proto = { .start = ppp_start, .stop = ppp_stop, @@ -649,6 +657,7 @@ static struct hdlc_proto proto = { .ioctl = ppp_ioctl, .netif_rx = ppp_rx, .module = THIS_MODULE, + .detach = ppp_timer_release, }; static const struct header_ops ppp_header_ops = { @@ -659,7 +668,7 @@ static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs) { hdlc_device *hdlc = dev_to_hdlc(dev); struct ppp *ppp; - int result; + int i, result; switch (ifs->type) { case IF_GET_PROTO: @@ -687,6 +696,8 @@ static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs) return result; ppp = get_ppp(dev); + for (i = 0; i < IDX_COUNT; i++) + timer_setup(&ppp->protos[i].timer, ppp_timer, 0); spin_lock_init(&ppp->lock); ppp->req_timeout = 2; ppp->cr_retries = 10; diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c index 56b1a657e0b0f..68c7ef190780a 100644 --- a/drivers/net/wireless/ath/ath11k/dp.c +++ b/drivers/net/wireless/ath/ath11k/dp.c @@ -1042,6 +1042,7 @@ void ath11k_dp_free(struct ath11k_base *ab) idr_destroy(&dp->tx_ring[i].txbuf_idr); spin_unlock_bh(&dp->tx_ring[i].tx_idr_lock); kfree(dp->tx_ring[i].tx_status); + dp->tx_ring[i].tx_status = NULL; } /* Deinit any SOC level resource */ diff --git a/drivers/net/wireless/intel/iwlwifi/mld/agg.c b/drivers/net/wireless/intel/iwlwifi/mld/agg.c index 3bf36f8f68742..e3627ad0321c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/agg.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/agg.c @@ -64,6 +64,9 @@ static void iwl_mld_release_frames_from_notif(struct iwl_mld *mld, } /* pick any STA ID to find the pointer */ + if (WARN_ON_ONCE(!ba_data->sta_mask)) + goto out_unlock; + sta_id = ffs(ba_data->sta_mask) - 1; link_sta = rcu_dereference(mld->fw_id_to_link_sta[sta_id]); if (WARN_ON_ONCE(IS_ERR_OR_NULL(link_sta) || !link_sta->sta)) @@ -166,6 +169,9 @@ void iwl_mld_del_ba(struct iwl_mld *mld, int queue, goto out_unlock; /* pick any STA ID to find the pointer */ + if (WARN_ON_ONCE(!ba_data->sta_mask)) + goto out_unlock; + sta_id = ffs(ba_data->sta_mask) - 1; link_sta = rcu_dereference(mld->fw_id_to_link_sta[sta_id]); if (WARN_ON_ONCE(IS_ERR_OR_NULL(link_sta) || !link_sta->sta)) @@ -347,6 +353,9 @@ static void iwl_mld_rx_agg_session_expired(struct timer_list *t) } /* timer expired, pick any STA ID to find the pointer */ + if (WARN_ON_ONCE(!ba_data->sta_mask)) + goto unlock; + sta_id = ffs(ba_data->sta_mask) - 1; link_sta = rcu_dereference(ba_data->mld->fw_id_to_link_sta[sta_id]); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c index 231920425c066..b40182320801d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c @@ -319,10 +319,10 @@ void iwl_mld_ptp_remove(struct iwl_mld *mld) mld->ptp_data.ptp_clock_info.name, ptp_clock_index(mld->ptp_data.ptp_clock)); + cancel_delayed_work_sync(&mld->ptp_data.dwork); ptp_clock_unregister(mld->ptp_data.ptp_clock); mld->ptp_data.ptp_clock = NULL; mld->ptp_data.last_gp2 = 0; mld->ptp_data.wrap_counter = 0; - cancel_delayed_work_sync(&mld->ptp_data.dwork); } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c index ad156b82eaa94..efb291ceb0e58 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c @@ -323,11 +323,11 @@ void iwl_mvm_ptp_remove(struct iwl_mvm *mvm) mvm->ptp_data.ptp_clock_info.name, ptp_clock_index(mvm->ptp_data.ptp_clock)); + cancel_delayed_work_sync(&mvm->ptp_data.dwork); ptp_clock_unregister(mvm->ptp_data.ptp_clock); mvm->ptp_data.ptp_clock = NULL; memset(&mvm->ptp_data.ptp_clock_info, 0, sizeof(mvm->ptp_data.ptp_clock_info)); mvm->ptp_data.last_gp2 = 0; - cancel_delayed_work_sync(&mvm->ptp_data.dwork); } } diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 76eb740428b8d..2d24f9fa51a50 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1568,6 +1568,7 @@ mt76_sta_add(struct mt76_phy *phy, struct ieee80211_vif *vif, { struct mt76_wcid *wcid = (struct mt76_wcid *)sta->drv_priv; struct mt76_dev *dev = phy->dev; + struct mt76_wcid *published; int ret; int i; @@ -1587,11 +1588,19 @@ mt76_sta_add(struct mt76_phy *phy, struct ieee80211_vif *vif, mtxq->wcid = wcid->idx; } - ewma_signal_init(&wcid->rssi); - rcu_assign_pointer(dev->wcid[wcid->idx], wcid); + published = rcu_dereference_protected(dev->wcid[wcid->idx], + lockdep_is_held(&dev->mutex)); + if (published != wcid) { + WARN_ON_ONCE(published); + ewma_signal_init(&wcid->rssi); + rcu_assign_pointer(dev->wcid[wcid->idx], wcid); + mt76_wcid_init(wcid, phy->band_idx); + } else { + wcid->phy_idx = phy->band_idx; + } + phy->num_sta++; - mt76_wcid_init(wcid, phy->band_idx); out: mutex_unlock(&dev->mutex); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c index 96cecc576a986..427b294423d3c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c @@ -16,6 +16,7 @@ static const struct usb_device_id mt76x2u_device_table[] = { { USB_DEVICE(0x0e8d, 0x7612) }, /* Aukey USBAC1200 - Alfa AWUS036ACM */ { USB_DEVICE(0x057c, 0x8503) }, /* Avm FRITZ!WLAN AC860 */ { USB_DEVICE(0x7392, 0xb711) }, /* Edimax EW 7722 UAC */ + { USB_DEVICE(0x056e, 0x400a) }, /* ELECOM WDC-867SU3S */ { USB_DEVICE(0x0e8d, 0x7632) }, /* HC-M7662BU1 */ { USB_DEVICE(0x0471, 0x2126) }, /* LiteOn WN4516R module, nonstandard USB connector */ { USB_DEVICE(0x0471, 0x7600) }, /* LiteOn WN4519R module, nonstandard USB connector */ diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 00126f563dfd9..9a8ef3449eea1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1144,6 +1144,9 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, if (vif->type == NL80211_IFTYPE_AP) break; + if (vif->type == NL80211_IFTYPE_STATION && sta->tdls) + continue; + link_sta = mt792x_sta_to_link_sta(vif, sta, link_id); if (!link_sta) continue; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h index 1155365348f32..d5de09d75f451 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h @@ -291,7 +291,7 @@ static inline int get_rx_desc_paggr(__le32 *__pdesc) static inline int get_rx_status_desc_rpt_sel(__le32 *__pdesc) { - return le32_get_bits(*(__pdesc + 1), BIT(28)); + return le32_get_bits(*(__pdesc + 2), BIT(28)); } static inline int get_rx_desc_rxmcs(__le32 *__pdesc) diff --git a/drivers/net/wireless/realtek/rtw88/tx.c b/drivers/net/wireless/realtek/rtw88/tx.c index 2ab440cb2d67b..9e24741b2cb15 100644 --- a/drivers/net/wireless/realtek/rtw88/tx.c +++ b/drivers/net/wireless/realtek/rtw88/tx.c @@ -196,6 +196,7 @@ void rtw_tx_report_purge_timer(struct timer_list *t) void rtw_tx_report_enqueue(struct rtw_dev *rtwdev, struct sk_buff *skb, u8 sn) { struct rtw_tx_report *tx_report = &rtwdev->tx_report; + unsigned long timeout = RTW_TX_PROBE_TIMEOUT; unsigned long flags; u8 *drv_data; @@ -207,7 +208,11 @@ void rtw_tx_report_enqueue(struct rtw_dev *rtwdev, struct sk_buff *skb, u8 sn) __skb_queue_tail(&tx_report->queue, skb); spin_unlock_irqrestore(&tx_report->q_lock, flags); - mod_timer(&tx_report->purge_timer, jiffies + RTW_TX_PROBE_TIMEOUT); + if (rtwdev->chip->id == RTW_CHIP_TYPE_8723D && + rtwdev->hci.type == RTW_HCI_TYPE_USB) + timeout = msecs_to_jiffies(2500); + + mod_timer(&tx_report->purge_timer, jiffies + timeout); } EXPORT_SYMBOL(rtw_tx_report_enqueue); diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index 6e841a11c752b..c7f723c084338 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -399,6 +399,7 @@ static bool rtw_usb_tx_agg_skb(struct rtw_usb *rtwusb, struct sk_buff_head *list int agg_num = 0; unsigned int align_next = 0; u8 qsel; + int ret; if (skb_queue_empty(list)) return false; @@ -456,7 +457,13 @@ static bool rtw_usb_tx_agg_skb(struct rtw_usb *rtwusb, struct sk_buff_head *list tx_desc = (struct rtw_tx_desc *)skb_head->data; qsel = le32_get_bits(tx_desc->w1, RTW_TX_DESC_W1_QSEL); - rtw_usb_write_port(rtwdev, qsel, skb_head, rtw_usb_write_port_tx_complete, txcb); + ret = rtw_usb_write_port(rtwdev, qsel, skb_head, + rtw_usb_write_port_tx_complete, txcb); + if (ret) { + ieee80211_purge_tx_queue(rtwdev->hw, &txcb->tx_ack_queue); + kfree(txcb); + return false; + } return true; } @@ -518,8 +525,10 @@ static int rtw_usb_write_data(struct rtw_dev *rtwdev, ret = rtw_usb_write_port(rtwdev, qsel, skb, rtw_usb_write_port_complete, skb); - if (unlikely(ret)) + if (unlikely(ret)) { rtw_err(rtwdev, "failed to do USB write, ret=%d\n", ret); + dev_kfree_skb_any(skb); + } return ret; } diff --git a/drivers/ntb/hw/epf/ntb_hw_epf.c b/drivers/ntb/hw/epf/ntb_hw_epf.c index d3ecf25a51625..9935da48a52ea 100644 --- a/drivers/ntb/hw/epf/ntb_hw_epf.c +++ b/drivers/ntb/hw/epf/ntb_hw_epf.c @@ -646,7 +646,8 @@ static void ntb_epf_deinit_pci(struct ntb_epf_dev *ndev) struct pci_dev *pdev = ndev->ntb.pdev; pci_iounmap(pdev, ndev->ctrl_reg); - pci_iounmap(pdev, ndev->peer_spad_reg); + if (ndev->barno_map[BAR_PEER_SPAD] != ndev->barno_map[BAR_CONFIG]) + pci_iounmap(pdev, ndev->peer_spad_reg); pci_iounmap(pdev, ndev->db_reg); pci_release_regions(pdev); diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 43555ad9e5dcf..789cc0e3c10da 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -341,20 +341,15 @@ static void qcom_pcie_clear_aspm_l0s(struct dw_pcie *pci) dw_pcie_dbi_ro_wr_dis(pci); } -static void qcom_pcie_set_slot_nccs(struct dw_pcie *pci) +static void qcom_pcie_clear_hpc(struct dw_pcie *pci) { u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); u32 val; dw_pcie_dbi_ro_wr_en(pci); - /* - * Qcom PCIe Root Ports do not support generating command completion - * notifications for the Hot-Plug commands. So set the NCCS field to - * avoid waiting for the completions. - */ val = readl(pci->dbi_base + offset + PCI_EXP_SLTCAP); - val |= PCI_EXP_SLTCAP_NCCS; + val &= ~PCI_EXP_SLTCAP_HPC; writel(val, pci->dbi_base + offset + PCI_EXP_SLTCAP); dw_pcie_dbi_ro_wr_dis(pci); @@ -554,7 +549,7 @@ static int qcom_pcie_post_init_2_1_0(struct qcom_pcie *pcie) writel(CFG_BRIDGE_SB_INIT, pci->dbi_base + AXI_MSTR_RESP_COMP_CTRL1); - qcom_pcie_set_slot_nccs(pcie->pci); + qcom_pcie_clear_hpc(pcie->pci); return 0; } @@ -634,7 +629,7 @@ static int qcom_pcie_post_init_1_0_0(struct qcom_pcie *pcie) writel(val, pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT); } - qcom_pcie_set_slot_nccs(pcie->pci); + qcom_pcie_clear_hpc(pcie->pci); return 0; } @@ -727,7 +722,7 @@ static int qcom_pcie_post_init_2_3_2(struct qcom_pcie *pcie) val |= EN; writel(val, pcie->parf + PARF_AXI_MSTR_WR_ADDR_HALT_V2); - qcom_pcie_set_slot_nccs(pcie->pci); + qcom_pcie_clear_hpc(pcie->pci); return 0; } @@ -1033,7 +1028,7 @@ static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie) writel(WR_NO_SNOOP_OVERRIDE_EN | RD_NO_SNOOP_OVERRIDE_EN, pcie->parf + PARF_NO_SNOOP_OVERRIDE); - qcom_pcie_set_slot_nccs(pcie->pci); + qcom_pcie_clear_hpc(pcie->pci); return 0; } diff --git a/drivers/power/reset/linkstation-poweroff.c b/drivers/power/reset/linkstation-poweroff.c index 02f5fdb8ffc4c..e56d75bfcc43f 100644 --- a/drivers/power/reset/linkstation-poweroff.c +++ b/drivers/power/reset/linkstation-poweroff.c @@ -163,10 +163,10 @@ static int __init linkstation_poweroff_init(void) dn = of_find_matching_node(NULL, ls_poweroff_of_match); if (!dn) return -ENODEV; - of_node_put(dn); match = of_match_node(ls_poweroff_of_match, dn); cfg = match->data; + of_node_put(dn); dn = of_find_node_by_name(NULL, cfg->mdio_node_name); if (!dn) diff --git a/drivers/power/sequencing/core.c b/drivers/power/sequencing/core.c index 1fcf0af7cc0bb..d5da006e1ee85 100644 --- a/drivers/power/sequencing/core.c +++ b/drivers/power/sequencing/core.c @@ -990,8 +990,9 @@ static void *pwrseq_debugfs_seq_start(struct seq_file *seq, loff_t *pos) ctx.index = *pos; /* - * We're holding the lock for the entire printout so no need to fiddle - * with device reference count. + * Hold the lock for the entire printout to prevent device removal. + * Reference counts are managed by start()/next()/stop() as required + * by the seq_file contract. */ down_read(&pwrseq_sem); @@ -999,7 +1000,7 @@ static void *pwrseq_debugfs_seq_start(struct seq_file *seq, loff_t *pos) if (!ctx.index) return NULL; - return ctx.dev; + return get_device(ctx.dev); } static void *pwrseq_debugfs_seq_next(struct seq_file *seq, void *data, @@ -1009,8 +1010,9 @@ static void *pwrseq_debugfs_seq_next(struct seq_file *seq, void *data, ++*pos; - struct device *next __free(put_device) = - bus_find_next_device(&pwrseq_bus, curr); + struct device *next = bus_find_next_device(&pwrseq_bus, curr); + + put_device(curr); return next; } @@ -1059,6 +1061,8 @@ static int pwrseq_debugfs_seq_show(struct seq_file *seq, void *data) static void pwrseq_debugfs_seq_stop(struct seq_file *seq, void *data) { + if (data) + put_device(data); up_read(&pwrseq_sem); } diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index 96fcdd2d7093c..d918cb30db9b0 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -104,6 +104,9 @@ static int rpmsg_ept_cb(struct rpmsg_device *rpdev, void *buf, int len, struct rpmsg_eptdev *eptdev = priv; struct sk_buff *skb; + if (!eptdev) + return 0; + skb = alloc_skb(len, GFP_ATOMIC); if (!skb) return -ENOMEM; @@ -124,6 +127,9 @@ static int rpmsg_ept_flow_cb(struct rpmsg_device *rpdev, void *priv, bool enable { struct rpmsg_eptdev *eptdev = priv; + if (!eptdev) + return 0; + eptdev->remote_flow_restricted = enable; eptdev->remote_flow_updated = true; @@ -490,6 +496,7 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev) struct rpmsg_channel_info chinfo; struct rpmsg_eptdev *eptdev; struct device *dev = &rpdev->dev; + int ret; memcpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE); chinfo.src = rpdev->src; @@ -502,13 +509,17 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev) /* Set the default_ept to the rpmsg device endpoint */ eptdev->default_ept = rpdev->ept; + ret = rpmsg_chrdev_eptdev_add(eptdev, chinfo); + + if (ret) + return ret; /* * The rpmsg_ept_cb uses *priv parameter to get its rpmsg_eptdev context. - * Storedit in default_ept *priv field. + * Stored it in default_ept *priv field. */ eptdev->default_ept->priv = eptdev; - return rpmsg_chrdev_eptdev_add(eptdev, chinfo); + return 0; } static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 4103178725e39..7a00083635439 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -846,8 +846,10 @@ static int dw8250_probe(struct platform_device *pdev) */ if (data->clk) { err = clk_notifier_register(data->clk, &data->clk_notifier); - if (err) + if (err) { + serial8250_unregister_port(data->data.line); return dev_err_probe(dev, err, "Failed to set the clock notifier\n"); + } queue_work(system_unbound_wq, &data->clk_work); } diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index eff757ebbed14..30a2c0d47e5c8 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -734,6 +734,18 @@ int fb_new_modelist(struct fb_info *info) if (list_empty(&info->modelist)) return 1; + /* + * The new modelist may not contain the current mode (info->var), and + * fbcon_new_modelist() below only re-points consoles mapped to this + * framebuffer. Add the current mode here so info->var keeps a match + * even when fbcon is unbound. + */ + if (!fb_match_mode(&info->var, &info->modelist)) { + fb_var_to_videomode(&mode, &info->var); + if (fb_add_videomode(&mode, &info->modelist)) + return 1; + } + fbcon_new_modelist(info); return 0; diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index baa2bae0fb5b3..fe8bd33e64ab1 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -11,6 +11,7 @@ #include #include "fb_internal.h" +#include "fbcon.h" static int activate(struct fb_info *fb_info, struct fb_var_screeninfo *var) { @@ -111,8 +112,15 @@ static ssize_t store_modes(struct device *device, if (fb_new_modelist(fb_info)) { fb_destroy_modelist(&fb_info->modelist); list_splice(&old_list, &fb_info->modelist); - } else + } else { + /* + * fb_display[i].mode and fb_info->mode both point into the old + * list. Clear them before it is freed. + */ + fbcon_delete_modelist(&old_list); + fb_info->mode = NULL; fb_destroy_modelist(&old_list); + } unlock_fb_info(fb_info); console_unlock(); diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c index 53a610948c4a5..0b1f3c5bbcdb3 100644 --- a/drivers/video/fbdev/core/modedb.c +++ b/drivers/video/fbdev/core/modedb.c @@ -259,7 +259,7 @@ static const struct fb_videomode modedb[] = { FB_VMODE_DOUBLE }, /* 1920x1080 @ 60 Hz, 67.3 kHz hsync */ - { NULL, 60, 1920, 1080, 6734, 148, 88, 36, 4, 44, 5, 0, + { NULL, 60, 1920, 1080, 6734, 148, 88, 36, 4, 44, 5, FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED }, @@ -626,7 +626,7 @@ int fb_find_mode(struct fb_var_screeninfo *var, const struct fb_videomode *default_mode, unsigned int default_bpp) { - char *mode_option_buf = NULL; + char *mode_option_buf __free(kfree) = NULL; int i; /* Set up defaults */ @@ -724,7 +724,6 @@ int fb_find_mode(struct fb_var_screeninfo *var, res_specified = 1; } done: - kfree(mode_option_buf); if (cvt) { struct fb_videomode cvt_mode; int ret; diff --git a/fs/backing-file.c b/fs/backing-file.c index 15a7f80310848..e049a627d78fb 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internal.h" @@ -29,14 +30,15 @@ * returned file into a container structure that also stores the stacked * file's path, which can be retrieved using backing_file_user_path(). */ -struct file *backing_file_open(const struct path *user_path, int flags, +struct file *backing_file_open(const struct file *user_file, int flags, const struct path *real_path, const struct cred *cred) { + const struct path *user_path = &user_file->f_path; struct file *f; int error; - f = alloc_empty_backing_file(flags, cred); + f = alloc_empty_backing_file(flags, cred, user_file); if (IS_ERR(f)) return f; @@ -52,15 +54,16 @@ struct file *backing_file_open(const struct path *user_path, int flags, } EXPORT_SYMBOL_GPL(backing_file_open); -struct file *backing_tmpfile_open(const struct path *user_path, int flags, +struct file *backing_tmpfile_open(const struct file *user_file, int flags, const struct path *real_parentpath, umode_t mode, const struct cred *cred) { struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt); + const struct path *user_path = &user_file->f_path; struct file *f; int error; - f = alloc_empty_backing_file(flags, cred); + f = alloc_empty_backing_file(flags, cred, user_file); if (IS_ERR(f)) return f; @@ -339,6 +342,12 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, vma_set_file(vma, file); old_cred = override_creds(ctx->cred); + ret = security_mmap_backing_file(vma, file, user_file); + if (ret) { + revert_creds(old_cred); + return ret; + } + ret = vfs_mmap(vma->vm_file, vma); revert_creds(old_cred); diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 7229146fe2bf6..97f2ff87220fe 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -1081,12 +1081,12 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, continue; } - brelse(bh); if (entry_type == TYPE_EXTEND) { unsigned short entry_uniname[16], unichar; if (step != DIRENT_STEP_NAME || name_len >= MAX_NAME_LENGTH) { + brelse(bh); step = DIRENT_STEP_FILE; continue; } @@ -1097,6 +1097,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, uniname += EXFAT_FILE_NAME_LEN; len = exfat_extract_uni_name(ep, entry_uniname); + brelse(bh); name_len += len; unichar = *(uniname+len); @@ -1115,6 +1116,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, continue; } + brelse(bh); if (entry_type & (TYPE_CRITICAL_SEC | TYPE_BENIGN_SEC)) { if (step == DIRENT_STEP_SECD) { diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index d4d7f329d23f4..4d735c2a7d0b7 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -46,6 +46,7 @@ static inline int f2fs_acl_count(size_t size) static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) { int i, count; + int err = -EINVAL; struct posix_acl *acl; struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); @@ -69,8 +70,11 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) for (i = 0; i < count; i++) { - if ((char *)entry > end) + if (unlikely((char *)entry + + sizeof(struct f2fs_acl_entry_short) > end)) { + err = -EFSCORRUPTED; goto fail; + } acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); @@ -85,6 +89,11 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) break; case ACL_USER: + if (unlikely((char *)entry + + sizeof(struct f2fs_acl_entry) > end)) { + err = -EFSCORRUPTED; + goto fail; + } acl->a_entries[i].e_uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); @@ -92,6 +101,11 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) sizeof(struct f2fs_acl_entry)); break; case ACL_GROUP: + if (unlikely((char *)entry + + sizeof(struct f2fs_acl_entry) > end)) { + err = -EFSCORRUPTED; + goto fail; + } acl->a_entries[i].e_gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); @@ -107,7 +121,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) return acl; fail: posix_acl_release(acl); - return ERR_PTR(-EINVAL); + return ERR_PTR(err); } static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bbe07e3a6c75a..c27509d4b6ef4 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -745,6 +745,7 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) for (i = 0; i < orphan_blocks; i++) { struct folio *folio; struct f2fs_orphan_block *orphan_blk; + unsigned int entry_count; folio = f2fs_get_meta_folio(sbi, start_blk + i); if (IS_ERR(folio)) { @@ -753,7 +754,18 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) } orphan_blk = folio_address(folio); - for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { + entry_count = le32_to_cpu(orphan_blk->entry_count); + if (entry_count > F2FS_ORPHANS_PER_BLOCK) { + f2fs_err(sbi, "invalid orphan inode entry count %u", + entry_count); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_ORPHAN); + err = -EFSCORRUPTED; + f2fs_folio_put(folio, true); + goto out; + } + + for (j = 0; j < entry_count; j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); err = recover_orphan_inode(sbi, ino); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f468cba89f303..89105b22a0243 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3537,6 +3537,7 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, pgoff_t index = folio->index; int err = 0; block_t ori_blk_addr = NULL_ADDR; + bool cow_has_reserved_block = false; /* If pos is beyond the end of file, reserve a new block in COW inode */ if ((pos & PAGE_MASK) >= i_size_read(inode)) @@ -3546,9 +3547,11 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, err = __find_data_block(cow_inode, index, blk_addr); if (err) { return err; - } else if (*blk_addr != NULL_ADDR) { + } else if (__is_valid_data_blkaddr(*blk_addr)) { *use_cow = true; return 0; + } else if (*blk_addr == NEW_ADDR) { + cow_has_reserved_block = true; } if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE)) @@ -3561,10 +3564,13 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, reserve_block: /* Finally, we should reserve a new block in COW inode for the update */ - err = __reserve_data_block(cow_inode, index, blk_addr, node_changed); - if (err) - return err; - inc_atomic_write_cnt(inode); + if (!cow_has_reserved_block) { + err = __reserve_data_block(cow_inode, index, blk_addr, + node_changed); + if (err) + return err; + inc_atomic_write_cnt(inode); + } if (ori_blk_addr != NULL_ADDR) *blk_addr = ori_blk_addr; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 87169fd29d897..129eed892a66f 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -119,10 +119,9 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (!__init_may_extent_tree(inode, type)) return false; - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; - if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(F2FS_I_SB(inode))) return false; @@ -645,14 +644,10 @@ static unsigned int __destroy_extent_node(struct inode *inode, while (atomic_read(&et->node_cnt)) { write_lock(&et->lock); - if (!is_inode_flag_set(inode, FI_NO_EXTENT)) - set_inode_flag(inode, FI_NO_EXTENT); node_cnt += __free_extent_tree(sbi, et, nr_shrink); write_unlock(&et->lock); } - f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); - return node_cnt; } @@ -691,12 +686,12 @@ static void __update_extent_tree_range(struct inode *inode, write_lock(&et->lock); - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; - } - if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } + prev = et->largest; dei.len = 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6d42e2d28861c..271221485d66a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1895,8 +1895,15 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (f2fs_is_pinned_file(inode)) { block_t sec_blks = CAP_BLKS_PER_SEC(sbi); - block_t sec_len = roundup(map.m_len, sec_blks); + block_t sec_len; + if (map.m_lblk % sec_blks) { + map.m_lblk = rounddown(map.m_lblk, sec_blks); + map.m_len = pg_end - map.m_lblk; + if (off_end) + map.m_len++; + } + sec_len = roundup(map.m_len, sec_blks); map.m_len = sec_blks; next_alloc: f2fs_down_write(&sbi->pin_sem); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 49470f4c9362a..171cd4172025f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -555,8 +555,13 @@ static int do_read_inode(struct inode *inode) static bool is_meta_ino(struct f2fs_sb_info *sbi, unsigned int ino) { - return ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi) || - ino == F2FS_COMPRESS_INO(sbi); + if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) + return true; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (test_opt(sbi, COMPRESS_CACHE) && ino == F2FS_COMPRESS_INO(sbi)) + return true; +#endif + return false; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ca3dad7418b26..feb152c21d2eb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1853,7 +1853,7 @@ int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, } if (!__write_node_folio(node_folio, false, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) + &wbc, false, io_type, NULL)) err = -EAGAIN; goto release_folio; out_folio: diff --git a/fs/file_table.c b/fs/file_table.c index 762f03dcbcd77..987e01da99389 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -50,6 +50,9 @@ struct backing_file { struct path user_path; freeptr_t bf_freeptr; }; +#ifdef CONFIG_SECURITY + void *security; +#endif }; #define backing_file(f) container_of(f, struct backing_file, file) @@ -66,8 +69,21 @@ void backing_file_set_user_path(struct file *f, const struct path *path) } EXPORT_SYMBOL_GPL(backing_file_set_user_path); +#ifdef CONFIG_SECURITY +void *backing_file_security(const struct file *f) +{ + return backing_file(f)->security; +} + +void backing_file_set_security(struct file *f, void *security) +{ + backing_file(f)->security = security; +} +#endif /* CONFIG_SECURITY */ + static inline void backing_file_free(struct backing_file *ff) { + security_backing_file_free(&ff->file); path_put(&ff->user_path); kmem_cache_free(bfilp_cachep, ff); } @@ -288,10 +304,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) return f; } -static int init_backing_file(struct backing_file *ff) +static int init_backing_file(struct backing_file *ff, + const struct file *user_file) { memset(&ff->user_path, 0, sizeof(ff->user_path)); - return 0; + backing_file_set_security(&ff->file, NULL); + return security_backing_file_alloc(&ff->file, user_file); } /* @@ -301,7 +319,8 @@ static int init_backing_file(struct backing_file *ff) * This is only for kernel internal use, and the allocate file must not be * installed into file tables or such. */ -struct file *alloc_empty_backing_file(int flags, const struct cred *cred) +struct file *alloc_empty_backing_file(int flags, const struct cred *cred, + const struct file *user_file) { struct backing_file *ff; int error; @@ -318,7 +337,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred) /* The f_mode flags must be set before fput(). */ ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT; - error = init_backing_file(ff); + error = init_backing_file(ff, user_file); if (unlikely(error)) { fput(&ff->file); return ERR_PTR(error); diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 72de97c03d0ee..f2d08ac2459b7 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -167,7 +167,7 @@ struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id) goto out; /* Allocate backing file per fuse file to store fuse path */ - backing_file = backing_file_open(&file->f_path, file->f_flags, + backing_file = backing_file_open(file, file->f_flags, &fb->file->f_path, fb->cred); err = PTR_ERR(backing_file); if (IS_ERR(backing_file)) { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 54c6f2098f01e..7b5fcd78a3a1a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -640,6 +640,7 @@ static void gfs2_put_super(struct super_block *sb) gfs2_delete_debugfs_file(sdp); gfs2_sys_fs_del(sdp); + rcu_barrier(); free_sbd(sdp); } diff --git a/fs/internal.h b/fs/internal.h index 9b2b4d1168802..51107fd515145 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -100,7 +100,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *); */ struct file *alloc_empty_file(int flags, const struct cred *cred); struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred); -struct file *alloc_empty_backing_file(int flags, const struct cred *cred); +struct file *alloc_empty_backing_file(int flags, const struct cred *cred, + const struct file *user_file); void backing_file_set_user_path(struct file *f, const struct path *path); static inline void file_put_write_access(struct file *file) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4b6f18d977343..75e020a8bfd07 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -26,6 +26,8 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, struct nlm_host *host = NULL; struct nlm_file *file = NULL; struct nlm_lock *lock = &argp->lock; + bool is_test = (rqstp->rq_proc == NLMPROC_TEST || + rqstp->rq_proc == NLMPROC_TEST_MSG); __be32 error = 0; /* nfsd callbacks must have been installed for this procedure */ @@ -46,15 +48,20 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, if (filp != NULL) { int mode = lock_to_openmode(&lock->fl); + if (is_test) + mode = O_RDWR; + lock->fl.c.flc_flags = FL_POSIX; - error = nlm_lookup_file(rqstp, &file, lock); + error = nlm_lookup_file(rqstp, &file, lock, mode); if (error) goto no_locks; *filp = file; - /* Set up the missing parts of the file_lock structure */ - lock->fl.c.flc_file = file->f_file[mode]; + if (is_test) + lock->fl.c.flc_file = nlmsvc_file_file(file); + else + lock->fl.c.flc_file = file->f_file[mode]; lock->fl.c.flc_pid = current->tgid; lock->fl.fl_start = (loff_t)lock->lock_start; lock->fl.fl_end = lock->lock_len ? diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index d66e828515999..c35ffa1b4b89d 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -611,7 +611,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_lock *conflock) { int error; - int mode; __be32 ret; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", @@ -626,14 +625,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - mode = lock_to_openmode(&lock->fl); locks_init_lock(&conflock->fl); /* vfs_test_lock only uses start, end, and owner, but tests flc_file */ conflock->fl.c.flc_file = lock->fl.c.flc_file; conflock->fl.fl_start = lock->fl.fl_start; conflock->fl.fl_end = lock->fl.fl_end; conflock->fl.c.flc_owner = lock->fl.c.flc_owner; - error = vfs_test_lock(file->f_file[mode], &conflock->fl); + error = vfs_test_lock(lock->fl.c.flc_file, &conflock->fl); if (error) { /* We can't currently deal with deferred test requests */ if (error == FILE_LOCK_DEFERRED) diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 5817ef272332d..d98e8d684376b 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -55,6 +55,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, struct nlm_host *host = NULL; struct nlm_file *file = NULL; struct nlm_lock *lock = &argp->lock; + bool is_test = (rqstp->rq_proc == NLMPROC_TEST || + rqstp->rq_proc == NLMPROC_TEST_MSG); int mode; __be32 error = 0; @@ -70,15 +72,22 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Obtain file pointer. Not used by FREE_ALL call. */ if (filp != NULL) { - error = cast_status(nlm_lookup_file(rqstp, &file, lock)); + mode = lock_to_openmode(&lock->fl); + + if (is_test) + mode = O_RDWR; + + error = cast_status(nlm_lookup_file(rqstp, &file, lock, mode)); if (error != 0) goto no_locks; *filp = file; /* Set up the missing parts of the file_lock structure */ - mode = lock_to_openmode(&lock->fl); lock->fl.c.flc_flags = FL_POSIX; - lock->fl.c.flc_file = file->f_file[mode]; + if (is_test) + lock->fl.c.flc_file = nlmsvc_file_file(file); + else + lock->fl.c.flc_file = file->f_file[mode]; lock->fl.c.flc_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 9103896164f68..7ea204eadfcac 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -82,18 +82,35 @@ int lock_to_openmode(struct file_lock *lock) * * We have to make sure we have the right credential to open * the file. + * + * mode can be O_RDONLY(0), O_WRONLY(1) or O_RDWR(2). The latter + * means success can be achieved with EITHER O_RDONLY or O_WRONLY. + * It does NOT mean both read and write are required. */ static __be32 nlm_do_fopen(struct svc_rqst *rqstp, struct nlm_file *file, int mode) { - struct file **fp = &file->f_file[mode]; - __be32 nfserr; + __be32 nfserr = nlm_lck_denied_nolocks; + __be32 deferred = 0; + struct file **fp; + int m; - if (*fp) - return 0; - nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode); - if (nfserr) - dprintk("lockd: open failed (error %d)\n", nfserr); + for (m = O_RDONLY ; m <= O_WRONLY ; m++) { + if (mode != O_RDWR && mode != m) + continue; + + fp = &file->f_file[m]; + if (*fp) + return 0; + nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, m); + if (!nfserr) + return 0; + if (nfserr == nlm_drop_reply) + deferred = nfserr; + } + if (deferred) + return deferred; + dprintk("lockd: open failed (error %d)\n", ntohl(nfserr)); return nfserr; } @@ -103,17 +120,15 @@ static __be32 nlm_do_fopen(struct svc_rqst *rqstp, */ __be32 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, - struct nlm_lock *lock) + struct nlm_lock *lock, int mode) { struct nlm_file *file; unsigned int hash; __be32 nfserr; - int mode; nlm_debug_print_fh("nlm_lookup_file", &lock->fh); hash = file_hash(&lock->fh); - mode = lock_to_openmode(&lock->fl); /* Lock file table */ mutex_lock(&nlm_file_mutex); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2aaea9c98c2cd..404e75b0444d7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1070,6 +1070,7 @@ struct nfs_server *nfs_alloc_server(void) server->io_stats = nfs_alloc_iostats(); if (!server->io_stats) { + ida_free(&s_sysfs_ids, server->s_sysfs_id); kfree(server); return NULL; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9056f05a67dc2..bb3f6c18630cb 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -552,6 +552,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, if (!p) goto out_err_free; fh_count = be32_to_cpup(p); + if (fh_count == 0) { + rc = -EINVAL; + goto out_err_free; + } dss_info->fh_versions = kcalloc(fh_count, sizeof(struct nfs_fh), diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 711812d839909..fc858b5c3f66d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5303,10 +5303,9 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, do { alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err); trace_nfs4_mkdir(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); if (err) - alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), - err, - &exception)); + alias = ERR_PTR(err); } while (exception.retry); nfs4_label_release_security(label); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b3cb5ee9d821e..d3c1f49f39242 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2217,11 +2217,11 @@ pnfs_update_layout(struct inode *ino, dprintk("%s wait for layoutreturn\n", __func__); lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); if (!IS_ERR(lseg)) { - pnfs_put_layout_hdr(lo); dprintk("%s retrying\n", __func__); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); + pnfs_put_layout_hdr(lo); goto lookup_again; } trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 9976cc16b6890..37cf4020f3d5d 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -1075,14 +1075,14 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) /* r_netid */ nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ, gfp_flags); - if (unlikely(nlen < 0)) + if (unlikely(nlen <= 0)) goto out_err; /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ /* port is ".ABC.DEF", 8 chars max */ rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8, gfp_flags); - if (unlikely(rlen < 0)) + if (unlikely(rlen <= 0)) goto out_free_netid; /* replace port '.' with '-' */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 0ac538c761800..76305b86c1a95 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -131,10 +131,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) resp->status = fh_getattr(fh, &resp->stat); out: - /* argp->acl_{access,default} may have been allocated in - nfssvc_decode_setaclargs. */ - posix_acl_release(argp->acl_access); - posix_acl_release(argp->acl_default); + /* argp->acl_{access,default} are released in nfsaclsvc_release_setacl. */ return rpc_success; out_drop_lock: @@ -310,6 +307,16 @@ static void nfsaclsvc_release_access(struct svc_rqst *rqstp) fh_put(&resp->fh); } +static void nfsaclsvc_release_setacl(struct svc_rqst *rqstp) +{ + struct nfsd3_setaclargs *argp = rqstp->rq_argp; + struct nfsd_attrstat *resp = rqstp->rq_resp; + + fh_put(&resp->fh); + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); +} + #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ @@ -343,7 +350,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_func = nfsacld_proc_setacl, .pc_decode = nfsaclsvc_decode_setaclargs, .pc_encode = nfssvc_encode_attrstatres, - .pc_release = nfssvc_release_attrstat, + .pc_release = nfsaclsvc_release_setacl, .pc_argsize = sizeof(struct nfsd3_setaclargs), .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd_attrstat), diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 7b5433bd30197..e87731380be85 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -118,10 +118,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) out_errno: resp->status = nfserrno(error); out: - /* argp->acl_{access,default} may have been allocated in - nfs3svc_decode_setaclargs. */ - posix_acl_release(argp->acl_access); - posix_acl_release(argp->acl_default); + /* argp->acl_{access,default} are released in nfs3svc_release_setacl. */ return rpc_success; } @@ -223,6 +220,16 @@ static void nfs3svc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } +static void nfs3svc_release_setacl(struct svc_rqst *rqstp) +{ + struct nfsd3_setaclargs *argp = rqstp->rq_argp; + struct nfsd3_attrstat *resp = rqstp->rq_resp; + + fh_put(&resp->fh); + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); +} + #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ @@ -256,7 +263,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_func = nfsd3_proc_setacl, .pc_decode = nfs3svc_decode_setaclargs, .pc_encode = nfs3svc_encode_setaclres, - .pc_release = nfs3svc_release_fhandle, + .pc_release = nfs3svc_release_setacl, .pc_argsize = sizeof(struct nfsd3_setaclargs), .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd3_attrstat), diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8dada7ef97cb1..9d876f9d98be2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1404,7 +1404,7 @@ void nfsd4_async_copy_reaper(struct nfsd_net *nn) list_for_each_safe(pos, next, &clp->async_copies) { copy = list_entry(pos, struct nfsd4_copy, copies); if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags)) { - if (--copy->cp_ttl) { + if (!--copy->cp_ttl) { list_del_init(©->copies); list_add(©->copies, &reaplist); } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e2b9472e5c78c..3ea4b8a861631 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -800,7 +800,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (IS_ERR(name.data)) return PTR_ERR(name.data); name.len = namelen; - get_user(princhashlen, &ci->cc_princhash.cp_len); + if (get_user(princhashlen, &ci->cc_princhash.cp_len)) + return -EFAULT; if (princhashlen > 0) { princhash.data = memdup_user( &ci->cc_princhash.cp_data, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a9e95df2fdb68..b02fe9926b093 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5176,6 +5176,7 @@ find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, /* Replace unconfirmed owners without checking for replay. */ release_openowner(oo); oo = NULL; + goto retry; } if (oo) { if (new) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b1b52c816ebdf..a96029c967e6b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1871,10 +1871,11 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; + + sin->sin_exp = NULL; if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) return nfserr_bad_xdr; - sin->sin_exp = NULL; return nfs_ok; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e32a5fcd6ac85..2033037238780 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1264,8 +1264,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_stats_io_write_add(nn, exp, *cnt); fsnotify_modify(file); host_err = filemap_check_wb_err(file->f_mapping, since); - if (host_err < 0) + if (host_err < 0) { + commit_reset_write_verifier(nn, rqstp, host_err); goto out_nfserr; + } if (stable && fhp->fh_use_wgather) { host_err = wait_for_concurrent_writes(file); @@ -1445,6 +1447,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, nfsd_copy_write_verifier(verf, nn); err2 = filemap_check_wb_err(nf->nf_file->f_mapping, since); + if (err2 < 0) + commit_reset_write_verifier(nn, rqstp, err2); err = nfserrno(err2); break; case -EINVAL: diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 142ecb3847e59..d719e1073dbb1 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -845,6 +845,12 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, return err; } +static bool ntfs_is_reserved_lxattr(const char *name) +{ + return !strcmp(name, "$LXUID") || !strcmp(name, "$LXGID") || + !strcmp(name, "$LXMOD") || !strcmp(name, "$LXDEV"); +} + /* * ntfs_setxattr - inode_operations::setxattr */ @@ -949,6 +955,12 @@ static noinline int ntfs_setxattr(const struct xattr_handler *handler, goto out; } + /* Do not allow non privileged users to change $LXUID/$LXGID... */ + if (ntfs_is_reserved_lxattr(name) && !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; + } + /* Deal with NTFS extended attribute. */ err = ntfs_set_ea(inode, name, strlen(name), value, size, flags, 0, NULL); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index e93fc842bb203..a752ea59a1675 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -205,8 +205,16 @@ static int ocfs2_validate_gd_parent(struct super_block *sb, int resize) { unsigned int max_bits; + unsigned int max_bitmap_bits; + unsigned int max_bitmap_size; + int suballocator; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + suballocator = le64_to_cpu(di->i_blkno) != OCFS2_SB(sb)->bitmap_blkno; + max_bitmap_size = ocfs2_group_bitmap_size(sb, suballocator, + OCFS2_SB(sb)->s_feature_incompat); + max_bitmap_bits = max_bitmap_size * 8; + if (di->i_blkno != gd->bg_parent_dinode) { do_error("Group descriptor #%llu has bad parent pointer (%llu, expected %llu)\n", (unsigned long long)bh->b_blocknr, @@ -214,6 +222,20 @@ static int ocfs2_validate_gd_parent(struct super_block *sb, (unsigned long long)le64_to_cpu(di->i_blkno)); } + if (le16_to_cpu(gd->bg_size) > max_bitmap_size) { + do_error("Group descriptor #%llu has bitmap size %u but physical max of %u\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_size), + max_bitmap_size); + } + + if (le16_to_cpu(gd->bg_bits) > max_bitmap_bits) { + do_error("Group descriptor #%llu has bit count %u but physical max of %u\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + max_bitmap_bits); + } + max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { do_error("Group descriptor #%llu has bit count of %u\n", diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index a5e9ddf3023b3..e924321b64025 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1355,7 +1355,7 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry, } ovl_path_upper(dentry->d_parent, &realparentpath); - realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath, + realfile = backing_tmpfile_open(file, flags, &realparentpath, mode, current_cred()); err = PTR_ERR_OR_ZERO(realfile); pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 7ab2c9daffd01..3fedfdddfa758 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -48,7 +48,7 @@ static struct file *ovl_open_realfile(const struct file *file, if (!inode_owner_or_capable(real_idmap, realinode)) flags &= ~O_NOATIME; - realfile = backing_file_open(file_user_path(file), + realfile = backing_file_open(file, flags, realpath, current_cred()); } ovl_revert_creds(old_cred); diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index e3c512675c632..f5864dd1dd5f1 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1480,7 +1480,9 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, break; aces_size -= ace_size; - if (ace->sid.num_subauth > SID_MAX_SUB_AUTHORITIES) + if (ace->sid.num_subauth > SID_MAX_SUB_AUTHORITIES || + ace_size < offsetof(struct smb_ace, sid) + CIFS_SID_BASE_SIZE + + sizeof(__le32) * ace->sid.num_subauth) break; if (!compare_sids(&sid, &ace->sid) || diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index df18fb4534037..a67afdbbf77af 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -767,6 +767,8 @@ void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *vm_ctx) if (!ctx) return; + atomic_dec(&ctx->mmap_changing); + VM_WARN_ON_ONCE(atomic_read(&ctx->mmap_changing) < 0); userfaultfd_ctx_put(ctx); } diff --git a/include/keys/request_key_auth-type.h b/include/keys/request_key_auth-type.h index 36b89a933310f..01e42ee5f4099 100644 --- a/include/keys/request_key_auth-type.h +++ b/include/keys/request_key_auth-type.h @@ -9,12 +9,14 @@ #define _KEYS_REQUEST_KEY_AUTH_TYPE_H #include +#include /* * Authorisation record for request_key(). */ struct request_key_auth { struct rcu_head rcu; + refcount_t usage; struct key *target_key; struct key *dest_keyring; const struct cred *cred; diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 1476a6ed1bfd7..c939cd222730c 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -18,10 +18,10 @@ struct backing_file_ctx { void (*end_write)(struct kiocb *iocb, ssize_t); }; -struct file *backing_file_open(const struct path *user_path, int flags, +struct file *backing_file_open(const struct file *user_file, int flags, const struct path *real_path, const struct cred *cred); -struct file *backing_tmpfile_open(const struct path *user_path, int flags, +struct file *backing_tmpfile_open(const struct file *user_file, int flags, const struct path *real_parentpath, umode_t mode, const struct cred *cred); ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 59e54550a053f..47b06e53fa2cb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1187,16 +1187,12 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) __blk_flush_plug(plug, async); } -/* - * tsk == current here - */ -static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +static __always_inline void blk_plug_invalidate_ts(void) { - struct blk_plug *plug = tsk->plug; - - if (plug) - plug->cur_ktime = 0; - current->flags &= ~PF_BLOCK_TS; + if (unlikely(current->flags & PF_BLOCK_TS)) { + current->plug->cur_ktime = 0; + current->flags &= ~PF_BLOCK_TS; + } } int blkdev_issue_flush(struct block_device *bdev); @@ -1222,7 +1218,7 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } -static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +static inline void blk_plug_invalidate_ts(void) { } diff --git a/include/linux/err.h b/include/linux/err.h index 1d60aa86db53b..281115ff04252 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -36,7 +36,7 @@ * * Return: A pointer with @error encoded within its value. */ -static inline void * __must_check ERR_PTR(long error) +static __always_inline void * __must_check ERR_PTR(long error) { return (void *) error; } @@ -52,7 +52,7 @@ static inline void * __must_check ERR_PTR(long error) * @ptr: An error pointer. * Return: The error code within @ptr. */ -static inline long __must_check PTR_ERR(__force const void *ptr) +static __always_inline long __must_check PTR_ERR(__force const void *ptr) { return (long) ptr; } @@ -65,7 +65,7 @@ static inline long __must_check PTR_ERR(__force const void *ptr) * @ptr: The pointer to check. * Return: true if @ptr is an error pointer, false otherwise. */ -static inline bool __must_check IS_ERR(__force const void *ptr) +static __always_inline bool __must_check IS_ERR(__force const void *ptr) { return IS_ERR_VALUE((unsigned long)ptr); } @@ -79,7 +79,7 @@ static inline bool __must_check IS_ERR(__force const void *ptr) * * Like IS_ERR(), but also returns true for a null pointer. */ -static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) +static __always_inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) { return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr); } @@ -91,7 +91,7 @@ static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) * Explicitly cast an error-valued pointer to another pointer type in such a * way as to make it clear that's what's going on. */ -static inline void * __must_check ERR_CAST(__force const void *ptr) +static __always_inline void * __must_check ERR_CAST(__force const void *ptr) { /* cast away the const */ return (void *) ptr; @@ -114,7 +114,7 @@ static inline void * __must_check ERR_CAST(__force const void *ptr) * * Return: The error code within @ptr if it is an error pointer; 0 otherwise. */ -static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) +static __always_inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) { if (IS_ERR(ptr)) return PTR_ERR(ptr); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index dc41722fcc9dd..81c309b27467e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -104,6 +104,7 @@ enum f2fs_error { ERROR_CORRUPTED_XATTR, ERROR_INVALID_NODE_REFERENCE, ERROR_INCONSISTENT_NAT, + ERROR_INCONSISTENT_ORPHAN, ERROR_MAX, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 014cb04eefbe6..f3e798184a58e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2890,6 +2890,19 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); const struct path *backing_file_user_path(const struct file *f); +#ifdef CONFIG_SECURITY +void *backing_file_security(const struct file *f); +void backing_file_set_security(struct file *f, void *security); +#else +static inline void *backing_file_security(const struct file *f) +{ + return NULL; +} +static inline void backing_file_set_security(struct file *f, void *security) +{ +} +#endif /* CONFIG_SECURITY */ + /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file * stored in ->vm_file is a backing file whose f_inode is on the underlying diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 398e5695dc076..3c4e2401de072 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1793,6 +1793,11 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); +static inline bool is_gfn_in_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) +{ + return gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages; +} + /* * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL. @@ -1803,7 +1808,7 @@ try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn) if (!slot) return NULL; - if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages) + if (is_gfn_in_memslot(slot, gfn)) return slot; else return NULL; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c8f0f9458f2cc..d9930fc43ca54 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -293,7 +293,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t); * File handling for the server personality */ __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, - struct nlm_lock *); + struct nlm_lock *, int); void nlm_release_file(struct nlm_file *); void nlmsvc_put_lockowner(struct nlm_lockowner *); void nlmsvc_release_lockowner(struct nlm_lock *); diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 382c56a97bba1..584db296e43b2 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -94,7 +94,7 @@ struct common_audit_data { #endif char *kmod_name; struct lsm_ioctlop_audit *op; - struct file *file; + const struct file *file; struct lsm_ibpkey_audit *ibpkey; struct lsm_ibendport_audit *ibendport; int reason; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 8c42b4bde09c0..b4958167e3819 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -191,6 +191,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask) LSM_HOOK(int, 0, file_alloc_security, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) +LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file, + const struct file *user_file) +LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, @@ -198,6 +201,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) +LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma, + struct file *backing_file, struct file *user_file) LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 79ec5a2bdcca7..ea4b0f5ca7f0f 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -104,6 +104,7 @@ struct security_hook_list { struct lsm_blob_sizes { int lbs_cred; int lbs_file; + int lbs_backing_file; int lbs_ib; int lbs_inode; int lbs_sock; diff --git a/include/linux/security.h b/include/linux/security.h index b64598e5d65d7..e540253624268 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -473,11 +473,17 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_release(struct file *file); void security_file_free(struct file *file); +int security_backing_file_alloc(struct file *backing_file, + const struct file *user_file); +void security_backing_file_free(struct file *backing_file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int security_file_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg); int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags); +int security_mmap_backing_file(struct vm_area_struct *vma, + struct file *backing_file, + struct file *user_file); int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1142,6 +1148,15 @@ static inline void security_file_release(struct file *file) static inline void security_file_free(struct file *file) { } +static inline int security_backing_file_alloc(struct file *backing_file, + const struct file *user_file) +{ + return 0; +} + +static inline void security_backing_file_free(struct file *backing_file) +{ } + static inline int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1161,6 +1176,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot, return 0; } +static inline int security_mmap_backing_file(struct vm_area_struct *vma, + struct file *backing_file, + struct file *user_file) +{ + return 0; +} + static inline int security_mmap_addr(unsigned long addr) { return cap_mmap_addr(addr); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 829b281d6c9c2..4ee7e4680a6e1 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -4,6 +4,7 @@ #ifndef _LINUX_SKMSG_H #define _LINUX_SKMSG_H +#include #include #include #include @@ -199,11 +200,14 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, int which, u32 size) { dst->sg.data[which] = src->sg.data[which]; + __assign_bit(which, dst->sg.copy, test_bit(which, src->sg.copy)); dst->sg.data[which].length = size; dst->sg.size += size; src->sg.size -= size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; + if (!src->sg.data[which].length) + __clear_bit(which, src->sg.copy); } static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) @@ -273,16 +277,19 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { - if (copy_state) - __set_bit(i, msg->sg.copy); - else - __clear_bit(i, msg->sg.copy); + __assign_bit(i, msg->sg.copy, copy_state); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; } while (1); } +static inline void sk_msg_sg_copy_assign(struct sk_msg *dst, u32 dst_i, + const struct sk_msg *src, u32 src_i) +{ + __assign_bit(dst_i, dst->sg.copy, test_bit(src_i, src->sg.copy)); +} + static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start) { sk_msg_sg_copy(msg, start, true); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ec65a8cebb992..2bff41aacc987 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -256,6 +256,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, struct netlink_ext_ack *exterr); struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); +bool rtnl_dev_link_net_capable(const struct net_device *dev, + const struct net *link_net); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 248f517d66d04..159e29e809362 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1940,7 +1940,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, kfree(ctx.cur_val); if (ret == 1 && ctx.new_updated) { - kfree(*buf); + kvfree(*buf); *buf = ctx.new_val; *pcount = ctx.new_len; } else { diff --git a/kernel/fork.c b/kernel/fork.c index 1215d3f52c6d2..8b1238d692916 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2230,6 +2230,7 @@ __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_BLOCK p->plug = NULL; + p->flags &= ~PF_BLOCK_TS; #endif futex_init_task(p); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 46fc94f2338e8..d089b1f211556 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5206,6 +5206,12 @@ static struct rq *finish_task_switch(struct task_struct *prev) */ kmap_local_sched_in(); + /* + * Any cached block-layer timestamp (plug->cur_ktime) is stale now, + * invalidate it. + */ + blk_plug_invalidate_ts(); + fire_sched_in_preempt_notifiers(current); /* * When switching through a kernel thread, the loop in @@ -7000,12 +7006,10 @@ static inline void sched_submit_work(struct task_struct *tsk) static void sched_update_worker(struct task_struct *tsk) { - if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER | PF_BLOCK_TS)) { - if (tsk->flags & PF_BLOCK_TS) - blk_plug_invalidate_ts(tsk); + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { if (tsk->flags & PF_WQ_WORKER) wq_worker_running(tsk); - else if (tsk->flags & PF_IO_WORKER) + else io_wq_worker_running(tsk); } } diff --git a/net/9p/client.c b/net/9p/client.c index 5c1ca57ccd285..9c9d249dabaea 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1215,7 +1215,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, clunk_fid: kfree(wqids); - p9_fid_put(fid); + if (fid != oldfid) + p9_fid_put(fid); fid = NULL; error: diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b8b1b997960a9..6e79f69c2fede 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -311,14 +311,23 @@ batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, const struct batadv_ogm_packet *ogm_packet) { int next_buff_pos = 0; + u16 tvlv_len; /* check if there is enough space for the header */ next_buff_pos += buff_pos + sizeof(*ogm_packet); if (next_buff_pos > packet_len) return false; + tvlv_len = ntohs(ogm_packet->tvlv_len); + + /* the fields of an aggregated OGM are accessed assuming (at least) + * 2-byte alignment, so a following OGM must start at an even offset. + */ + if (tvlv_len & 1) + return false; + /* check if there is enough space for the optional TVLV */ - next_buff_pos += ntohs(ogm_packet->tvlv_len); + next_buff_pos += tvlv_len; return next_buff_pos <= packet_len; } diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index de94447142642..17d2a1ccdce67 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -817,6 +817,7 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) hard_iface->bat_v.aggr_len = 0; skb_queue_head_init(&hard_iface->bat_v.aggr_list); + hard_iface->bat_v.aggr_list_enabled = false; INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq, batadv_v_ogm_aggr_work); } diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index d66ca77b1aaa3..1f9b2d2b4831c 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -252,11 +252,18 @@ static void batadv_v_ogm_queue_on_if(struct batadv_priv *bat_priv, } spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); + if (!hard_iface->bat_v.aggr_list_enabled) { + kfree_skb(skb); + goto unlock; + } + if (!batadv_v_ogm_queue_left(skb, hard_iface)) batadv_v_ogm_aggr_send(bat_priv, hard_iface); hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb); __skb_queue_tail(&hard_iface->bat_v.aggr_list, skb); + +unlock: spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); } @@ -417,6 +424,10 @@ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); + spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); + hard_iface->bat_v.aggr_list_enabled = true; + spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); + batadv_v_ogm_start_queue_timer(hard_iface); batadv_v_ogm_start_timer(bat_priv); @@ -432,6 +443,7 @@ void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface) cancel_delayed_work_sync(&hard_iface->bat_v.aggr_wq); spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); + hard_iface->bat_v.aggr_list_enabled = false; batadv_v_ogm_aggr_list_free(hard_iface); spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); } @@ -837,14 +849,23 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, const struct batadv_ogm2_packet *ogm2_packet) { int next_buff_pos = 0; + u16 tvlv_len; /* check if there is enough space for the header */ next_buff_pos += buff_pos + sizeof(*ogm2_packet); if (next_buff_pos > packet_len) return false; + tvlv_len = ntohs(ogm2_packet->tvlv_len); + + /* the fields of an aggregated OGMv2 are accessed assuming (at least) + * 2-byte alignment, so a following OGMv2 must start at an even offset. + */ + if (tvlv_len & 1) + return false; + /* check if there is enough space for the optional TVLV */ - next_buff_pos += ntohs(ogm2_packet->tvlv_len); + next_buff_pos += tvlv_len; return next_buff_pos <= packet_len; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 3072f94275ac6..2c6e2b0d1ded4 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -513,7 +513,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, return NULL; entry->vid = vid; - entry->lasttime = jiffies; + WRITE_ONCE(entry->lasttime, jiffies); entry->crc = BATADV_BLA_CRC_INIT; entry->bat_priv = bat_priv; spin_lock_init(&entry->crc_lock); @@ -581,7 +581,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, if (unlikely(!backbone_gw)) return; - backbone_gw->lasttime = jiffies; + WRITE_ONCE(backbone_gw->lasttime, jiffies); batadv_backbone_gw_put(backbone_gw); } @@ -715,7 +715,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, ether_addr_copy(claim->addr, mac); spin_lock_init(&claim->backbone_lock); claim->vid = vid; - claim->lasttime = jiffies; + WRITE_ONCE(claim->lasttime, jiffies); kref_get(&backbone_gw->refcount); claim->backbone_gw = backbone_gw; kref_init(&claim->refcount); @@ -737,7 +737,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, return; } } else { - claim->lasttime = jiffies; + WRITE_ONCE(claim->lasttime, jiffies); if (claim->backbone_gw == backbone_gw) /* no need to register a new backbone */ goto claim_free_ref; @@ -770,7 +770,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, spin_lock_bh(&backbone_gw->crc_lock); backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); spin_unlock_bh(&backbone_gw->crc_lock); - backbone_gw->lasttime = jiffies; + WRITE_ONCE(backbone_gw->lasttime, jiffies); claim_free_ref: batadv_claim_put(claim); @@ -859,7 +859,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, return true; /* handle as ANNOUNCE frame */ - backbone_gw->lasttime = jiffies; + WRITE_ONCE(backbone_gw->lasttime, jiffies); crc = ntohs(*((__force __be16 *)(&an_addr[4]))); batadv_dbg(BATADV_DBG_BLA, bat_priv, @@ -1254,7 +1254,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) head, hash_entry) { if (now) goto purge_now; - if (!batadv_has_timed_out(backbone_gw->lasttime, + if (!batadv_has_timed_out(READ_ONCE(backbone_gw->lasttime), BATADV_BLA_BACKBONE_TIMEOUT)) continue; @@ -1335,7 +1335,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, primary_if->net_dev->dev_addr)) goto skip; - if (!batadv_has_timed_out(claim->lasttime, + if (!batadv_has_timed_out(READ_ONCE(claim->lasttime), BATADV_BLA_CLAIM_TIMEOUT)) goto skip; @@ -1495,7 +1495,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) eth_random_addr(bat_priv->bla.loopdetect_addr); bat_priv->bla.loopdetect_addr[0] = 0xba; bat_priv->bla.loopdetect_addr[1] = 0xbe; - bat_priv->bla.loopdetect_lasttime = jiffies; + WRITE_ONCE(bat_priv->bla.loopdetect_lasttime, jiffies); atomic_set(&bat_priv->bla.loopdetect_next, BATADV_BLA_LOOPDETECT_PERIODS); @@ -1516,7 +1516,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) primary_if->net_dev->dev_addr)) continue; - backbone_gw->lasttime = jiffies; + WRITE_ONCE(backbone_gw->lasttime, jiffies); batadv_bla_send_announce(bat_priv, backbone_gw); if (send_loopdetect) @@ -1934,7 +1934,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, /* If the packet came too late, don't forward it on the mesh * but don't consider that as loop. It might be a coincidence. */ - if (batadv_has_timed_out(bat_priv->bla.loopdetect_lasttime, + if (batadv_has_timed_out(READ_ONCE(bat_priv->bla.loopdetect_lasttime), BATADV_BLA_LOOPDETECT_TIMEOUT)) return true; @@ -2049,7 +2049,7 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (own_claim) { /* ... allow it in any case */ - claim->lasttime = jiffies; + WRITE_ONCE(claim->lasttime, jiffies); goto allow; } @@ -2151,7 +2151,7 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, /* if yes, the client has roamed and we have * to unclaim it. */ - if (batadv_has_timed_out(claim->lasttime, 100)) { + if (batadv_has_timed_out(READ_ONCE(claim->lasttime), 100)) { /* only unclaim if the last claim entry is * older than 100 ms to make sure we really * have a roaming client here. @@ -2396,7 +2396,7 @@ batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, backbone_crc = backbone_gw->crc; spin_unlock_bh(&backbone_gw->crc_lock); - msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime); + msecs = jiffies_to_msecs(jiffies - READ_ONCE(backbone_gw->lasttime)); if (is_own) if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 031c295fff1b1..860db505e8697 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -214,10 +214,13 @@ static void batadv_dat_purge(struct work_struct *work) */ static bool batadv_compare_dat(const struct hlist_node *node, const void *data2) { - const void *data1 = container_of(node, struct batadv_dat_entry, - hash_entry); + const struct batadv_dat_entry *entry1; + const struct batadv_dat_entry *entry2; - return memcmp(data1, data2, sizeof(__be32)) == 0; + entry1 = container_of(node, struct batadv_dat_entry, hash_entry); + entry2 = data2; + + return entry1->ip == entry2->ip && entry1->vid == entry2->vid; } /** @@ -344,6 +347,9 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, if (dat_entry->ip != ip) continue; + if (dat_entry->vid != vid) + continue; + if (!kref_get_unless_zero(&dat_entry->refcount)) continue; diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 31395281692cb..4779741e7273e 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -384,6 +384,8 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb, * @skb: skb to forward * @recv_if: interface that the skb is received on * @orig_node_src: originator that the skb is received from + * @rx_result: set to NET_RX_SUCCESS when the fragment was forwarded and + * NET_RX_DROP when it was dropped; only valid when true is returned * * Look up the next-hop of the fragments payload and check if the merged packet * will exceed the MTU towards the next-hop. If so, the fragment is forwarded @@ -393,7 +395,8 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb, */ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_hard_iface *recv_if, - struct batadv_orig_node *orig_node_src) + struct batadv_orig_node *orig_node_src, + int *rx_result) { struct batadv_priv *bat_priv = netdev_priv(recv_if->mesh_iface); struct batadv_neigh_node *neigh_node = NULL; @@ -412,12 +415,29 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, */ total_size = ntohs(packet->total_size); if (total_size > neigh_node->if_incoming->net_dev->mtu) { + if (packet->ttl < 2) { + kfree_skb(skb); + *rx_result = NET_RX_DROP; + ret = true; + goto out; + } + + if (skb_cow(skb, ETH_HLEN) < 0) { + kfree_skb(skb); + *rx_result = NET_RX_DROP; + ret = true; + goto out; + } + + packet = (struct batadv_frag_packet *)skb->data; + batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_FWD); batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES, skb->len + ETH_HLEN); packet->ttl--; batadv_send_unicast_skb(skb, neigh_node); + *rx_result = NET_RX_SUCCESS; ret = true; } diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index dbf0871f87030..51e281027ab63 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -19,7 +19,8 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig, bool (*check_cb)(struct batadv_frag_table_entry *)); bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_hard_iface *recv_if, - struct batadv_orig_node *orig_node_src); + struct batadv_orig_node *orig_node_src, + int *rx_result); bool batadv_frag_skb_buffer(struct sk_buff **skb, struct batadv_orig_node *orig_node); int batadv_frag_send_packet(struct sk_buff *skb, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 1c488049d5546..39b1ed813497d 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -786,30 +786,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, return ret; } -/** - * batadv_hardif_cnt() - get number of interfaces enslaved to mesh interface - * @mesh_iface: mesh interface to check - * - * This function is only using RCU for locking - the result can therefore be - * off when another function is modifying the list at the same time. The - * caller can use the rtnl_lock to make sure that the count is accurate. - * - * Return: number of connected/enslaved hard interfaces - */ -static size_t batadv_hardif_cnt(struct net_device *mesh_iface) -{ - struct batadv_hard_iface *hard_iface; - struct list_head *iter; - size_t count = 0; - - rcu_read_lock(); - netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) - count++; - rcu_read_unlock(); - - return count; -} - /** * batadv_hardif_disable_interface() - Remove hard interface from mesh interface * @hard_iface: hard interface to be removed @@ -850,8 +826,8 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->mesh_iface); batadv_hardif_recalc_extra_skbroom(hard_iface->mesh_iface); - /* nobody uses this interface anymore */ - if (batadv_hardif_cnt(hard_iface->mesh_iface) <= 1) + /* nobody uses this mesh interface anymore */ + if (list_empty(&hard_iface->mesh_iface->adj_list.lower)) batadv_gw_check_client_stop(bat_priv); hard_iface->mesh_iface = NULL; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 78c651f634cd3..1d144d8cc0928 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -917,9 +917,15 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb, #ifdef CONFIG_BATMAN_ADV_BATMAN_V if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) { + u32 elp_interval; + attr = info->attrs[BATADV_ATTR_ELP_INTERVAL]; + elp_interval = nla_get_u32(attr); + + elp_interval = min_t(u32, elp_interval, INT_MAX); + elp_interval = max_t(u32, elp_interval, BATADV_JITTER); - atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr)); + atomic_set(&hard_iface->bat_v.elp_interval, elp_interval); } if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) { diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 12c16f81cc51d..41951c7a1c50b 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -8,6 +8,7 @@ #include "main.h" #include +#include #include #include #include @@ -204,6 +205,59 @@ bool batadv_check_management_packet(struct sk_buff *skb, return true; } +/** + * batadv_skb_decrement_ttl() - decrement ttl in a batman-adv header, csum-safe + * @skb: the received packet with @skb->data pointing to the batman-adv header + * + * Supports the following packet types, all of which carry the TTL at offset 2: + * + * - batadv_ogm_packet + * - batadv_ogm2_packet + * - batadv_icmp_header + * - batadv_icmp_packet + * - batadv_icmp_tp_packet + * - batadv_icmp_packet_rr + * - batadv_unicast_packet + * - batadv_frag_packet + * - batadv_bcast_packet + * - batadv_mcast_packet + * - batadv_coded_packet + * - batadv_unicast_tvlv_packet + * + * Return: true if the packet may be forwarded (ttl decremented), + * false if it must be dropped (ttl would expire) + */ +static bool batadv_skb_decrement_ttl(struct sk_buff *skb) +{ + static const size_t ttl_offset = 2; + u8 *ttl_pos; + + BUILD_BUG_ON(offsetof(struct batadv_ogm_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_ogm2_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_icmp_header, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_icmp_tp_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_frag_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_bcast_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_mcast_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_coded_packet, ttl) != ttl_offset); + BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, ttl) != ttl_offset); + + ttl_pos = skb->data + ttl_offset; + + /* would expire on this hop -> drop, leave header + csum untouched */ + if (*ttl_pos < 2) + return false; + + skb_postpull_rcsum(skb, ttl_pos, 1); + (*ttl_pos)--; + skb_postpush_rcsum(skb, ttl_pos, 1); + + return true; +} + /** * batadv_recv_my_icmp_packet() - receive an icmp packet locally * @bat_priv: the bat priv with all the mesh interface information @@ -1114,10 +1168,9 @@ int batadv_recv_frag_packet(struct sk_buff *skb, /* Route the fragment if it is not for us and too big to be merged. */ if (!batadv_is_my_mac(bat_priv, frag_packet->dest) && - batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) { + batadv_frag_skb_fwd(skb, recv_if, orig_node_src, &ret)) { /* skb was consumed */ skb = NULL; - ret = NET_RX_SUCCESS; goto put_orig_node; } @@ -1191,7 +1244,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto free_skb; - if (bcast_packet->ttl-- < 2) + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, ETH_HLEN) < 0) + goto free_skb; + + bcast_packet = (struct batadv_bcast_packet *)skb->data; + + if (!batadv_skb_decrement_ttl(skb)) goto free_skb; orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); @@ -1298,7 +1357,7 @@ int batadv_recv_mcast_packet(struct sk_buff *skb, goto free_skb; mcast_packet = (struct batadv_mcast_packet *)skb->data; - if (mcast_packet->ttl-- < 2) + if (!batadv_skb_decrement_ttl(skb)) goto free_skb; tvlv_buff = (unsigned char *)(skb->data + hdr_size); @@ -1307,6 +1366,12 @@ int batadv_recv_mcast_packet(struct sk_buff *skb, if (tvlv_buff_len > skb->len - hdr_size) goto free_skb; + /* the fields of an multicast payload are accessed assuming (at least) + * 2-byte alignment, so a following packet must start at an even offset. + */ + if (tvlv_buff_len & 1) + goto free_skb; + ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb, tvlv_buff, tvlv_buff_len); if (ret >= 0) { diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index b1629e0ac8268..02af19aaaff29 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -87,6 +87,11 @@ #define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \ sizeof(struct batadv_unicast_packet)) +/** + * BATADV_TP_MAX_UNACKED - maximum number of packets a receiver didn't yet ack + */ +#define BATADV_TP_MAX_UNACKED 100 + static u8 batadv_tp_prerandom[4096] __read_mostly; /** @@ -154,9 +159,12 @@ static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss) return; } + /* prevent overflow in (mss * mss) << 3 */ + mss = min_t(u32, mss, (1U << 14) - 1); + /* increment CWND at least of 1 (section 3.1 of RFC5681) */ tp_vars->dec_cwnd += max_t(u32, 1U << 3, - ((mss * mss) << 6) / (tp_vars->cwnd << 3)); + ((mss * mss) << 3) / tp_vars->cwnd); if (tp_vars->dec_cwnd < (mss << 3)) { spin_unlock_bh(&tp_vars->cwnd_lock); return; @@ -730,7 +738,7 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, if (atomic_read(&tp_vars->dup_acks) != 3) goto out; - if (recv_ack >= tp_vars->recover) + if (!batadv_seq_before(tp_vars->recover, recv_ack)) goto out; /* if this is the third duplicate ACK do Fast Retransmit */ @@ -817,10 +825,15 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars, size_t payload_len) { + u32 last_sent = READ_ONCE(tp_vars->last_sent); u32 win_left, win_limit; win_limit = atomic_read(&tp_vars->last_acked) + tp_vars->cwnd; - win_left = win_limit - tp_vars->last_sent; + + if (batadv_seq_before(last_sent, win_limit)) + win_left = win_limit - last_sent; + else + win_left = 0; return win_left >= payload_len; } @@ -1045,6 +1058,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, tp_vars->icmp_uid = icmp_uid; tp_vars->last_sent = BATADV_TP_FIRST_SEQ; + atomic_set(&tp_vars->dup_acks, 0); atomic_set(&tp_vars->last_acked, BATADV_TP_FIRST_SEQ); tp_vars->fast_recovery = false; tp_vars->recover = BATADV_TP_FIRST_SEQ; @@ -1054,6 +1068,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, * mesh_interface, hence its MTU */ tp_vars->cwnd = BATADV_TP_PLEN * 3; + tp_vars->dec_cwnd = 0; + /* at the beginning initialise the SS threshold to the biggest possible * window size, hence the AWND size */ @@ -1085,21 +1101,21 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, tp_vars->prerandom_offset = 0; spin_lock_init(&tp_vars->prerandom_lock); - kref_get(&tp_vars->refcount); - hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); - spin_unlock_bh(&bat_priv->tp_list_lock); - tp_vars->test_length = test_length; if (!tp_vars->test_length) tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH; + /* init work item for finished tp tests */ + INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish); + + kref_get(&tp_vars->refcount); + hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: starting throughput meter towards %pM (length=%ums)\n", dst, test_length); - /* init work item for finished tp tests */ - INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish); - /* start tp kthread. This way the write() call issued from userspace can * happily return and avoid to block */ @@ -1167,7 +1183,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) bat_priv = tp_vars->bat_priv; /* if there is recent activity rearm the timer */ - if (!batadv_has_timed_out(tp_vars->last_recv_time, + if (!batadv_has_timed_out(READ_ONCE(tp_vars->last_recv_time), BATADV_TP_RECV_TIMEOUT)) { /* reset the receiver shutdown timer */ batadv_tp_reset_receiver_timer(tp_vars); @@ -1184,6 +1200,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { list_del(&un->list); kfree(un); + tp_vars->unacked_count--; } spin_unlock_bh(&tp_vars->unacked_lock); @@ -1267,7 +1284,8 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, /** * batadv_tp_handle_out_of_order() - store an out of order packet * @tp_vars: the private data of the current TP meter session - * @skb: the buffer containing the received packet + * @seqno: sequence number of new received packet + * @payload_len: length of the received packet * * Store the out of order packet in the unacked list for late processing. This * packets are kept in this list so that they can be ACKed at once as soon as @@ -1276,28 +1294,24 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, * Return: true if the packed has been successfully processed, false otherwise */ static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars, - const struct sk_buff *skb) + u32 seqno, u32 payload_len) + __must_hold(&tp_vars->unacked_lock) { - const struct batadv_icmp_tp_packet *icmp; struct batadv_tp_unacked *un, *new; - u32 payload_len; bool added = false; new = kmalloc(sizeof(*new), GFP_ATOMIC); if (unlikely(!new)) return false; - icmp = (struct batadv_icmp_tp_packet *)skb->data; - - new->seqno = ntohl(icmp->seqno); - payload_len = skb->len - sizeof(struct batadv_unicast_packet); + new->seqno = seqno; new->len = payload_len; - spin_lock_bh(&tp_vars->unacked_lock); /* if the list is empty immediately attach this new object */ if (list_empty(&tp_vars->unacked_list)) { list_add(&new->list, &tp_vars->unacked_list); - goto out; + tp_vars->unacked_count++; + return true; } /* otherwise loop over the list and either drop the packet because this @@ -1325,17 +1339,26 @@ static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars, * one is attached _after_ it. In this way the list is kept in * ascending order */ - list_add_tail(&new->list, &un->list); + list_add(&new->list, &un->list); added = true; + tp_vars->unacked_count++; break; } /* received packet with smallest seqno out of order; add it to front */ - if (!added) + if (!added) { list_add(&new->list, &tp_vars->unacked_list); + tp_vars->unacked_count++; + } -out: - spin_unlock_bh(&tp_vars->unacked_lock); + /* remove the last (biggest) unacked seqno when list is too large */ + if (tp_vars->unacked_count > BATADV_TP_MAX_UNACKED) { + un = list_last_entry(&tp_vars->unacked_list, + struct batadv_tp_unacked, list); + list_del(&un->list); + kfree(un); + tp_vars->unacked_count--; + } return true; } @@ -1346,6 +1369,7 @@ static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars, * @tp_vars: the private data of the current TP meter session */ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) + __must_hold(&tp_vars->unacked_lock) { struct batadv_tp_unacked *un, *safe; u32 to_ack; @@ -1353,7 +1377,6 @@ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) /* go through the unacked packet list and possibly ACK them as * well */ - spin_lock_bh(&tp_vars->unacked_lock); list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { /* the list is ordered, therefore it is possible to stop as soon * there is a gap between the last acked seqno and the seqno of @@ -1369,8 +1392,8 @@ static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) list_del(&un->list); kfree(un); + tp_vars->unacked_count--; } - spin_unlock_bh(&tp_vars->unacked_lock); } /** @@ -1392,8 +1415,10 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, icmp->session, BATADV_TP_RECEIVER); - if (tp_vars) + if (tp_vars) { + WRITE_ONCE(tp_vars->last_recv_time, jiffies); goto out_unlock; + } if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, @@ -1417,12 +1442,15 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, spin_lock_init(&tp_vars->unacked_lock); INIT_LIST_HEAD(&tp_vars->unacked_list); + tp_vars->unacked_count = 0; kref_get(&tp_vars->refcount); - hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); + timer_setup(&tp_vars->timer, batadv_tp_receiver_shutdown, 0); + + WRITE_ONCE(tp_vars->last_recv_time, jiffies); kref_get(&tp_vars->refcount); - timer_setup(&tp_vars->timer, batadv_tp_receiver_shutdown, 0); + hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); batadv_tp_reset_receiver_timer(tp_vars); @@ -1444,7 +1472,8 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, { const struct batadv_icmp_tp_packet *icmp; struct batadv_tp_vars *tp_vars; - size_t packet_size; + u32 payload_len; + u32 to_ack; u32 seqno; icmp = (struct batadv_icmp_tp_packet *)skb->data; @@ -1469,41 +1498,49 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, icmp->orig); goto out; } + + WRITE_ONCE(tp_vars->last_recv_time, jiffies); } - tp_vars->last_recv_time = jiffies; + spin_lock_bh(&tp_vars->unacked_lock); /* if the packet is a duplicate, it may be the case that an ACK has been * lost. Resend the ACK */ - if (batadv_seq_before(seqno, tp_vars->last_recv)) + payload_len = skb->len - sizeof(struct batadv_unicast_packet); + to_ack = seqno + payload_len; + if (batadv_seq_before(to_ack, tp_vars->last_recv)) goto send_ack; /* if the packet is out of order enqueue it */ - if (ntohl(icmp->seqno) != tp_vars->last_recv) { + if (batadv_seq_before(tp_vars->last_recv, seqno)) { /* exit immediately (and do not send any ACK) if the packet has * not been enqueued correctly */ - if (!batadv_tp_handle_out_of_order(tp_vars, skb)) + if (!batadv_tp_handle_out_of_order(tp_vars, seqno, payload_len)) { + spin_unlock_bh(&tp_vars->unacked_lock); goto out; + } /* send a duplicate ACK */ goto send_ack; } /* if everything was fine count the ACKed bytes */ - packet_size = skb->len - sizeof(struct batadv_unicast_packet); - tp_vars->last_recv += packet_size; + tp_vars->last_recv = to_ack; /* check if this ordered message filled a gap.... */ batadv_tp_ack_unordered(tp_vars); send_ack: + to_ack = tp_vars->last_recv; + spin_unlock_bh(&tp_vars->unacked_lock); + /* send the ACK. If the received packet was out of order, the ACK that * is going to be sent is a duplicate (the sender will count them and * possibly enter Fast Retransmit as soon as it has reached 3) */ - batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv, + batadv_tp_send_ack(bat_priv, icmp->orig, to_ack, icmp->timestamp, icmp->session, icmp->uid); out: batadv_tp_vars_put(tp_vars); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 9f6e67771ffa8..83dfd804a143a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -446,6 +446,9 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; + if (entry->change.vid != tt_change_node->change.vid) + continue; + del_op_entry = entry->change.flags & BATADV_TT_CLIENT_DEL; if (del_op_requested != del_op_entry) { /* DEL+ADD in the same orig interval have no effect and @@ -3439,6 +3442,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) * batadv_tt_check_roam_count() - check if a client has roamed too frequently * @bat_priv: the bat priv with all the mesh interface information * @client: mac address of the roaming client + * @vid: VLAN identifier * * This function checks whether the client already reached the * maximum number of possible roaming phases. In this case the ROAMING_ADV @@ -3446,7 +3450,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) * * Return: true if the ROAMING_ADV can be sent, false otherwise */ -static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) +static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client, u16 vid) { struct batadv_tt_roam_node *tt_roam_node; bool ret = false; @@ -3459,6 +3463,9 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) if (!batadv_compare_eth(tt_roam_node->addr, client)) continue; + if (tt_roam_node->vid != vid) + continue; + if (batadv_has_timed_out(tt_roam_node->first_time, BATADV_ROAMING_MAX_TIME)) continue; @@ -3480,6 +3487,7 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) atomic_set(&tt_roam_node->counter, BATADV_ROAMING_MAX_COUNT - 1); ether_addr_copy(tt_roam_node->addr, client); + tt_roam_node->vid = vid; list_add(&tt_roam_node->list, &bat_priv->tt.roam_list); ret = true; @@ -3516,7 +3524,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, /* before going on we have to check whether the client has * already roamed to us too many times */ - if (!batadv_tt_check_roam_count(bat_priv, client)) + if (!batadv_tt_check_roam_count(bat_priv, client, vid)) goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index cde798c82dcf1..a91f1891747c0 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -398,7 +398,6 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, tvlv_handler->ogm_handler(bat_priv, orig_node, BATADV_NO_FLAGS, tvlv_value, tvlv_value_len); - tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED; break; case BATADV_UNICAST_TVLV: if (!skb) @@ -430,6 +429,48 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, return NET_RX_SUCCESS; } +/** + * batadv_tvlv_containers_contain() - check if a tvlv buffer holds a container + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + * @type: tvlv container type to look for + * @version: tvlv container version to look for + * + * Return: true if a container of the given type and version is present in the + * tvlv buffer, false otherwise. + */ +static bool batadv_tvlv_containers_contain(void *tvlv_value, + u16 tvlv_value_len, u8 type, + u8 version) +{ + struct batadv_tvlv_hdr *tvlv_hdr; + u16 tvlv_value_cont_len; + + while (tvlv_value_len >= sizeof(*tvlv_hdr)) { + tvlv_hdr = tvlv_value; + tvlv_value_cont_len = ntohs(tvlv_hdr->len); + tvlv_value = tvlv_hdr + 1; + tvlv_value_len -= sizeof(*tvlv_hdr); + + if (tvlv_value_cont_len > tvlv_value_len) + break; + + /* the next tvlv header is accessed assuming (at least) 2-byte + * alignment, so it must start at an even offset. + */ + if (tvlv_value_cont_len & 1) + break; + + if (tvlv_hdr->type == type && tvlv_hdr->version == version) + return true; + + tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; + tvlv_value_len -= tvlv_value_cont_len; + } + + return false; +} + /** * batadv_tvlv_containers_process() - parse the given tvlv buffer to call the * appropriate handlers @@ -449,7 +490,9 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, struct sk_buff *skb, void *tvlv_value, u16 tvlv_value_len) { + u16 tvlv_value_start_len = tvlv_value_len; struct batadv_tvlv_handler *tvlv_handler; + void *tvlv_value_start = tvlv_value; struct batadv_tvlv_hdr *tvlv_hdr; u16 tvlv_value_cont_len; u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; @@ -464,6 +507,12 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, if (tvlv_value_cont_len > tvlv_value_len) break; + /* the next tvlv header is accessed assuming (at least) 2-byte + * alignment, so it must start at an even offset. + */ + if (tvlv_value_cont_len & 1) + break; + tvlv_handler = batadv_tvlv_handler_get(bat_priv, tvlv_hdr->type, tvlv_hdr->version); @@ -487,12 +536,20 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, if (!tvlv_handler->ogm_handler) continue; - if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) && - !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED)) - tvlv_handler->ogm_handler(bat_priv, orig_node, - cifnotfound, NULL, 0); + if (!(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)) + continue; + + /* if the corresponding container was present then the handler + * was already called from the loop above + */ + if (batadv_tvlv_containers_contain(tvlv_value_start, + tvlv_value_start_len, + tvlv_handler->type, + tvlv_handler->version)) + continue; - tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED; + tvlv_handler->ogm_handler(bat_priv, orig_node, + cifnotfound, NULL, 0); } rcu_read_unlock(); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index c9bd49d23547e..ac4494f1b8e2a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -130,6 +130,12 @@ struct batadv_hard_iface_bat_v { /** @aggr_list: queue for to be aggregated OGM packets */ struct sk_buff_head aggr_list; + /** + * @aggr_list_enabled: aggr_list is active and new skbs can be + * enqueued. Protected by aggr_list.lock after initialization + */ + bool aggr_list_enabled:1; + /** @aggr_len: size of the OGM aggregate (excluding ethernet header) */ unsigned int aggr_len; @@ -1417,9 +1423,12 @@ struct batadv_tp_vars { /** @unacked_list: list of unacked packets (meta-info only) */ struct list_head unacked_list; - /** @unacked_lock: protect unacked_list */ + /** @unacked_lock: protect unacked_list + &batadv_tp_receiver.last_recv */ spinlock_t unacked_lock; + /** @unacked_count: number of unacked entries */ + size_t unacked_count; + /** @last_recv_time: time (jiffies) a msg was received */ unsigned long last_recv_time; @@ -1903,6 +1912,9 @@ struct batadv_tt_roam_node { /** @addr: mac address of the client in the roaming phase */ u8 addr[ETH_ALEN]; + /** @vid: VLAN identifier */ + u16 vid; + /** * @counter: number of allowed roaming events per client within a single * OGM interval (changes are committed with each OGM) @@ -2233,13 +2245,6 @@ enum batadv_tvlv_handler_flags { * will call this handler even if its type was not found (with no data) */ BATADV_TVLV_HANDLER_OGM_CIFNOTFND = BIT(1), - - /** - * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the - * API marks a handler as being called, so it won't be called if the - * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set - */ - BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), }; #endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/core/filter.c b/net/core/filter.c index 0b61945491054..6dd9bdbef1996 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2732,11 +2732,13 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, poffset += len; sge->length = 0; put_page(sg_page(sge)); + __clear_bit(i, msg->sg.copy); sk_msg_iter_var_next(i); } while (i != last_sge); sg_set_page(&msg->sg.data[first_sge], page, copy, 0); + __clear_bit(first_sge, msg->sg.copy); /* To repair sg ring we need to shift entries. If we only * had a single entry though we can just replace it and @@ -2762,9 +2764,11 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, break; msg->sg.data[i] = msg->sg.data[move_from]; + sk_msg_sg_copy_assign(msg, i, msg, move_from); msg->sg.data[move_from].length = 0; msg->sg.data[move_from].page_link = 0; msg->sg.data[move_from].offset = 0; + __clear_bit(move_from, msg->sg.copy); sk_msg_iter_var_next(i); } while (1); @@ -2793,6 +2797,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, { struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; u32 new, i = 0, l = 0, space, copy = 0, offset = 0; + bool sge_copy, nsge_copy, nnsge_copy, rsge_copy = false; u8 *raw, *to, *from; struct page *page; @@ -2865,6 +2870,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, sk_msg_iter_var_prev(i); psge = sk_msg_elem(msg, i); rsge = sk_msg_elem_cpy(msg, i); + rsge_copy = test_bit(i, msg->sg.copy); psge->length = start - offset; rsge.length -= psge->length; @@ -2889,24 +2895,32 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, /* Shift one or two slots as needed */ sge = sk_msg_elem_cpy(msg, new); + sge_copy = test_bit(new, msg->sg.copy); sg_unmark_end(&sge); nsge = sk_msg_elem_cpy(msg, i); + nsge_copy = test_bit(i, msg->sg.copy); if (rsge.length) { sk_msg_iter_var_next(i); nnsge = sk_msg_elem_cpy(msg, i); + nnsge_copy = test_bit(i, msg->sg.copy); sk_msg_iter_next(msg, end); } while (i != msg->sg.end) { msg->sg.data[i] = sge; + __assign_bit(i, msg->sg.copy, sge_copy); sge = nsge; + sge_copy = nsge_copy; sk_msg_iter_var_next(i); if (rsge.length) { nsge = nnsge; + nsge_copy = nnsge_copy; nnsge = sk_msg_elem_cpy(msg, i); + nnsge_copy = test_bit(i, msg->sg.copy); } else { nsge = sk_msg_elem_cpy(msg, i); + nsge_copy = test_bit(i, msg->sg.copy); } } @@ -2920,6 +2934,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, get_page(sg_page(&rsge)); sk_msg_iter_var_next(new); msg->sg.data[new] = rsge; + __assign_bit(new, msg->sg.copy, rsge_copy); } sk_msg_reset_curr(msg); @@ -2947,25 +2962,33 @@ static void sk_msg_shift_left(struct sk_msg *msg, int i) prev = i; sk_msg_iter_var_next(i); msg->sg.data[prev] = msg->sg.data[i]; + sk_msg_sg_copy_assign(msg, prev, msg, i); } while (i != msg->sg.end); sk_msg_iter_prev(msg, end); + __clear_bit(msg->sg.end, msg->sg.copy); } static void sk_msg_shift_right(struct sk_msg *msg, int i) { struct scatterlist tmp, sge; + bool tmp_copy, sge_copy; sk_msg_iter_next(msg, end); sge = sk_msg_elem_cpy(msg, i); + sge_copy = test_bit(i, msg->sg.copy); sk_msg_iter_var_next(i); tmp = sk_msg_elem_cpy(msg, i); + tmp_copy = test_bit(i, msg->sg.copy); while (i != msg->sg.end) { msg->sg.data[i] = sge; + __assign_bit(i, msg->sg.copy, sge_copy); sk_msg_iter_var_next(i); sge = tmp; + sge_copy = tmp_copy; tmp = sk_msg_elem_cpy(msg, i); + tmp_copy = test_bit(i, msg->sg.copy); } } @@ -3025,6 +3048,8 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); int a = start - offset; int b = sge->length - pop - a; + u32 sge_i = i; + bool sge_copy = test_bit(i, msg->sg.copy); sk_msg_iter_var_next(i); @@ -3037,6 +3062,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, sg_set_page(nsge, sg_page(sge), b, sge->offset + pop + a); + __assign_bit(i, msg->sg.copy, sge_copy); } else { struct page *page, *orig; u8 *to, *from; @@ -3053,6 +3079,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, memcpy(to, from, a); memcpy(to + a, from + a + pop, b); sg_set_page(sge, page, a + b, 0); + __clear_bit(sge_i, msg->sg.copy); put_page(orig); } pop = 0; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0a43c3881e3f8..4909f20ff4d63 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2414,6 +2414,14 @@ struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid) } EXPORT_SYMBOL_GPL(rtnl_get_net_ns_capable); +bool rtnl_dev_link_net_capable(const struct net_device *dev, + const struct net *link_net) +{ + return net_eq(link_net, dev_net(dev)) || + ns_capable(link_net->user_ns, CAP_NET_ADMIN); +} +EXPORT_SYMBOL_GPL(rtnl_dev_link_net_capable); + static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, bool strict_check, struct nlattr **tb, struct netlink_ext_ack *extack) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 75ea4fdb27642..c1c315ae2b22b 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -66,6 +66,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, sge = &msg->sg.data[msg->sg.end]; sg_unmark_end(sge); sg_set_page(sge, pfrag->page, use, orig_offset); + __clear_bit(msg->sg.end, msg->sg.copy); get_page(pfrag->page); sk_msg_iter_next(msg, end); } @@ -186,6 +187,7 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i, sk_mem_uncharge(sk, len); put_page(sg_page(sge)); } + __clear_bit(i, msg->sg.copy); memset(sge, 0, sizeof(*sge)); return len; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 35f0baa99d409..879d37c557faf 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1456,6 +1456,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], __u32 fwmark = t->fwmark; int err; + if (!rtnl_dev_link_net_capable(dev, t->net)) + return -EPERM; + err = ipgre_newlink_encap_setup(dev, data); if (err) return err; @@ -1485,6 +1488,9 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], __u32 fwmark = t->fwmark; int err; + if (!rtnl_dev_link_net_capable(dev, t->net)) + return -EPERM; + err = ipgre_newlink_encap_setup(dev, data); if (err) return err; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7c005263262ff..7eaf35a6e24ba 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1117,8 +1117,8 @@ static int __ip_append_data(struct sock *sk, !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { - alloclen = fragheaderlen + transhdrlen; - pagedlen = datalen - transhdrlen; + alloclen = fragheaderlen + transhdrlen + fraggap; + pagedlen = datalen - transhdrlen - fraggap; } alloclen += alloc_extra; @@ -1165,9 +1165,6 @@ static int __ip_append_data(struct sock *sk, } copy = datalen - transhdrlen - fraggap - pagedlen; - /* [!] NOTE: copy will be negative if pagedlen>0 - * because then the equation reduces to -fraggap. - */ if (copy > 0 && INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, data + transhdrlen, offset, diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index aa624434b5556..46eb5b4683bb5 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1776,6 +1776,10 @@ static int tcp_ao_delete_key(struct sock *sk, struct tcp_ao_info *ao_info, * them and we can just free all resources in RCU fashion. */ if (del_async) { + if (ao_info->current_key == key) + WRITE_ONCE(ao_info->current_key, NULL); + if (ao_info->rnext_key == key) + WRITE_ONCE(ao_info->rnext_key, NULL); atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); call_rcu(&key->rcu, tcp_ao_key_free_rcu); return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f5ca0267e7706..8f37c9cc868b0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1648,8 +1648,8 @@ static int __ip6_append_data(struct sock *sk, !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { - alloclen = fragheaderlen + transhdrlen; - pagedlen = datalen - transhdrlen; + alloclen = fragheaderlen + transhdrlen + fraggap; + pagedlen = datalen - transhdrlen - fraggap; } alloclen += alloc_extra; @@ -1664,10 +1664,7 @@ static int __ip6_append_data(struct sock *sk, fraglen = datalen + fragheaderlen; copy = datalen - transhdrlen - fraggap - pagedlen; - /* [!] NOTE: copy may be negative if pagedlen>0 - * because then the equation may reduces to -fraggap. - */ - if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) { + if (copy < 0) { err = -EINVAL; goto error; } diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index f13b07ebfb98a..09a47104b5772 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -710,6 +710,7 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) { struct ieee802154_hdr hdr; int rc, authlen, hlen; + struct sk_buff *trailer; struct mac802154_llsec_key *key; u32 frame_ctr; @@ -769,6 +770,12 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) skb->mac_len = ieee802154_hdr_push(skb, &hdr); skb_reset_mac_header(skb); + rc = skb_cow_data(skb, 0, &trailer); + if (rc < 0) { + llsec_key_put(key); + return rc; + } + rc = llsec_do_encrypt(skb, sec, &hdr, key); llsec_key_put(key); @@ -908,6 +915,13 @@ llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec, const struct ieee802154_hdr *hdr, struct mac802154_llsec_key *key, __le64 dev_addr) { + struct sk_buff *trailer; + int err; + + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) + return err; + if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC) return llsec_do_decrypt_unauth(skb, sec, hdr, key, dev_addr); else diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index a3f9ca28c3d53..d388fe8e477e2 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -950,12 +950,20 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, goto exit; } + /* Get net to avoid freed tipc_crypto when delete namespace */ + if (!maybe_get_net(net)) { + tipc_bearer_put(b); + rc = -ENODEV; + goto exit; + } + /* Now, do decrypt */ rc = crypto_aead_decrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) return rc; tipc_bearer_put(b); + put_net(net); exit: kfree(ctx); @@ -993,6 +1001,7 @@ static void tipc_aead_decrypt_done(void *data, int err) } tipc_bearer_put(b); + put_net(net); } static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 034f322054e53..9949ae0270811 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -623,6 +623,7 @@ static int tls_split_open_record(struct sock *sk, struct tls_rec *from, struct scatterlist *sge, *osge, *nsge; u32 orig_size = msg_opl->sg.size; struct scatterlist tmp = { }; + u32 tmp_i = 0; struct sk_msg *msg_npl; struct tls_rec *new; int ret; @@ -644,6 +645,7 @@ static int tls_split_open_record(struct sock *sk, struct tls_rec *from, if (sge->length > apply) { u32 len = sge->length - apply; + tmp_i = i; get_page(sg_page(sge)); sg_set_page(&tmp, sg_page(sge), len, sge->offset + apply); @@ -675,6 +677,7 @@ static int tls_split_open_record(struct sock *sk, struct tls_rec *from, nsge = sk_msg_elem(msg_npl, j); if (tmp.length) { memcpy(nsge, &tmp, sizeof(*nsge)); + sk_msg_sg_copy_assign(msg_npl, j, msg_opl, tmp_i); sk_msg_iter_var_next(j); nsge = sk_msg_elem(msg_npl, j); } @@ -682,6 +685,7 @@ static int tls_split_open_record(struct sock *sk, struct tls_rec *from, osge = sk_msg_elem(msg_opl, i); while (osge->length) { memcpy(nsge, osge, sizeof(*nsge)); + sk_msg_sg_copy_assign(msg_npl, j, msg_opl, i); sg_unmark_end(nsge); sk_msg_iter_var_next(i); sk_msg_iter_var_next(j); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 529b21d043d92..3986717090266 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -606,6 +606,8 @@ static void __unix_gc(struct work_struct *work) struct sk_buff_head hitlist; struct sk_buff *skb; + WRITE_ONCE(gc_in_progress, true); + spin_lock(&unix_gc_lock); if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) { diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h index e5a95dc4da1f7..b9de0fdf9ee52 100644 --- a/security/apparmor/include/policy_unpack.h +++ b/security/apparmor/include/policy_unpack.h @@ -163,6 +163,25 @@ aa_get_profile_loaddata(struct aa_loaddata *data) return data; } +/** + * aa_get_profile_loaddata_not0 - get a profile reference count if not zero + * @data: reference to get a count on + * + * Like aa_get_profile_loaddata(), but safe to call on an entry that may + * be on a list (e.g. ns->rawdata_list) where the last pcount has already + * dropped and the deferred cleanup has not yet run. + * + * Returns: pointer to reference, or %NULL if @data is NULL or its + * profile refcount has already reached zero. + */ +static inline struct aa_loaddata * +aa_get_profile_loaddata_not0(struct aa_loaddata *data) +{ + if (data && kref_get_unless_zero(&data->pcount)) + return data; + return NULL; +} + void __aa_loaddata_update(struct aa_loaddata *data, long revision); bool aa_rawdata_eq(struct aa_loaddata *l, struct aa_loaddata *r); void aa_loaddata_kref(struct kref *kref); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 3a4ef7bd3b5d0..e47696a7eacf8 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1423,7 +1423,21 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, static int apparmor_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { - return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size); + int error = aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size); + + if (error) + return error; + + /* TCP fast open carries connect() semantics in sendmsg(); mediate + * the implicit connect so it cannot bypass the connect permission. + */ + if ((msg->msg_flags & MSG_FASTOPEN) && msg->msg_name && + (sk_is_tcp(sock->sk) || + (sk_is_inet(sock->sk) && sock->sk->sk_type == SOCK_STREAM && + sock->sk->sk_protocol == IPPROTO_MPTCP))) + error = aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); + + return error; } static int apparmor_socket_recvmsg(struct socket *sock, diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 44c04102062f3..1fc6145ccbb8d 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -22,12 +22,14 @@ struct aa_sfs_entry aa_sfs_entry_network[] = { AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), + AA_SFS_FILE_BOOLEAN("tcp-fast-open", 1), { } }; struct aa_sfs_entry aa_sfs_entry_networkv9[] = { AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), AA_SFS_FILE_BOOLEAN("af_unix", 1), + AA_SFS_FILE_BOOLEAN("tcp-fast-open", 1), { } }; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index b92db1b2f26e2..c474a55fed22c 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -1206,8 +1206,12 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label, if (aa_rawdata_eq(rawdata_ent, udata)) { struct aa_loaddata *tmp; - tmp = aa_get_profile_loaddata(rawdata_ent); - /* check we didn't fail the race */ + /* + * Entries remain on rawdata_list with + * pcount == 0 until do_ploaddata_rmfs() + * runs; only take a live profile ref. + */ + tmp = aa_get_profile_loaddata_not0(rawdata_ent); if (tmp) { aa_put_profile_loaddata(udata); udata = tmp; diff --git a/security/keys/internal.h b/security/keys/internal.h index 2cffa6dc82557..b7b622bc36a13 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -208,6 +208,8 @@ extern struct key *request_key_auth_new(struct key *target, const void *callout_info, size_t callout_len, struct key *dest_keyring); +struct request_key_auth *request_key_auth_get(struct key *authkey); +void request_key_auth_put(struct request_key_auth *rka); extern struct key *key_get_instantiation_authkey(key_serial_t target_id); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index ab927a142f515..11bedca4e1c6d 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1197,9 +1197,13 @@ static long keyctl_instantiate_key_common(key_serial_t id, if (!instkey) goto error; - rka = instkey->payload.data[0]; - if (rka->target_key->serial != id) + rka = request_key_auth_get(instkey); + if (!rka) { + ret = -EKEYREVOKED; goto error; + } + if (rka->target_key->serial != id) + goto error_put_rka; /* pull the payload in if one was supplied */ payload = NULL; @@ -1208,7 +1212,7 @@ static long keyctl_instantiate_key_common(key_serial_t id, ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) - goto error; + goto error_put_rka; ret = -EFAULT; if (!copy_from_iter_full(payload, plen, from)) @@ -1234,6 +1238,8 @@ static long keyctl_instantiate_key_common(key_serial_t id, error2: kvfree_sensitive(payload, plen); +error_put_rka: + request_key_auth_put(rka); error: return ret; } @@ -1358,15 +1364,19 @@ long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error, if (!instkey) goto error; - rka = instkey->payload.data[0]; - if (rka->target_key->serial != id) + rka = request_key_auth_get(instkey); + if (!rka) { + ret = -EKEYREVOKED; goto error; + } + if (rka->target_key->serial != id) + goto error_put_rka; /* find the destination keyring if present (which must also be * writable) */ ret = get_instantiation_keyring(ringid, rka, &dest_keyring); if (ret < 0) - goto error; + goto error_put_rka; /* instantiate the key and link it into a keyring */ ret = key_reject_and_link(rka->target_key, timeout, error, @@ -1379,6 +1389,8 @@ long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error, if (ret == 0) keyctl_change_reqkey_auth(NULL); +error_put_rka: + request_key_auth_put(rka); error: return ret; } diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c index 97bc27bbf0797..ba150ee2d4a37 100644 --- a/security/keys/keyctl_pkey.c +++ b/security/keys/keyctl_pkey.c @@ -138,28 +138,35 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par if (uparams.in_len > info.max_dec_size || uparams.out_len > info.max_enc_size) return -EINVAL; + + params->out_len = info.max_enc_size; break; case KEYCTL_PKEY_DECRYPT: if (uparams.in_len > info.max_enc_size || uparams.out_len > info.max_dec_size) return -EINVAL; + + params->out_len = info.max_dec_size; break; case KEYCTL_PKEY_SIGN: if (uparams.in_len > info.max_data_size || uparams.out_len > info.max_sig_size) return -EINVAL; + + params->out_len = info.max_sig_size; break; case KEYCTL_PKEY_VERIFY: if (uparams.in_len > info.max_data_size || uparams.in2_len > info.max_sig_size) return -EINVAL; + + params->out_len = info.max_sig_size; break; default: BUG(); } params->in_len = uparams.in_len; - params->out_len = uparams.out_len; /* Note: same as in2_len */ return 0; } diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 8f33cd170e421..bd34317b58cc8 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -23,6 +23,7 @@ static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); static long request_key_auth_read(const struct key *, char *, size_t); +static void request_key_auth_rcu_disposal(struct rcu_head *); /* * The request-key authorisation key type definition. @@ -115,6 +116,31 @@ static void free_request_key_auth(struct request_key_auth *rka) kfree(rka); } +/* + * Take a reference to the request-key authorisation payload so callers can + * drop authkey->sem before doing operations that may sleep. + */ +struct request_key_auth *request_key_auth_get(struct key *authkey) +{ + struct request_key_auth *rka; + + down_read(&authkey->sem); + rka = dereference_key_locked(authkey); + if (rka && !test_bit(KEY_FLAG_REVOKED, &authkey->flags)) + refcount_inc(&rka->usage); + else + rka = NULL; + up_read(&authkey->sem); + + return rka; +} + +void request_key_auth_put(struct request_key_auth *rka) +{ + if (rka && refcount_dec_and_test(&rka->usage)) + call_rcu(&rka->rcu, request_key_auth_rcu_disposal); +} + /* * Dispose of the request_key_auth record under RCU conditions */ @@ -136,8 +162,10 @@ static void request_key_auth_revoke(struct key *key) struct request_key_auth *rka = dereference_key_locked(key); kenter("{%d}", key->serial); + if (!rka) + return; rcu_assign_keypointer(key, NULL); - call_rcu(&rka->rcu, request_key_auth_rcu_disposal); + request_key_auth_put(rka); } /* @@ -150,7 +178,7 @@ static void request_key_auth_destroy(struct key *key) kenter("{%d}", key->serial); if (rka) { rcu_assign_keypointer(key, NULL); - call_rcu(&rka->rcu, request_key_auth_rcu_disposal); + request_key_auth_put(rka); } } @@ -174,6 +202,7 @@ struct key *request_key_auth_new(struct key *target, const char *op, rka = kzalloc(sizeof(*rka), GFP_KERNEL); if (!rka) goto error; + refcount_set(&rka->usage, 1); rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL); if (!rka->callout_info) goto error_free_rka; diff --git a/security/security.c b/security/security.c index 603c3c6d5635d..9285909908ab8 100644 --- a/security/security.c +++ b/security/security.c @@ -94,6 +94,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = { static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain); static struct kmem_cache *lsm_file_cache; +static struct kmem_cache *lsm_backing_file_cache; static struct kmem_cache *lsm_inode_cache; char *lsm_names; @@ -265,6 +266,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed) lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred); lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file); + lsm_set_blob_size(&needed->lbs_backing_file, &blob_sizes.lbs_backing_file); lsm_set_blob_size(&needed->lbs_ib, &blob_sizes.lbs_ib); /* * The inode blob gets an rcu_head in addition to @@ -470,6 +472,7 @@ static void __init ordered_lsm_init(void) init_debug("cred blob size = %d\n", blob_sizes.lbs_cred); init_debug("file blob size = %d\n", blob_sizes.lbs_file); + init_debug("lsm_backing_file_cache = %d\n", blob_sizes.lbs_backing_file); init_debug("ib blob size = %d\n", blob_sizes.lbs_ib); init_debug("inode blob size = %d\n", blob_sizes.lbs_inode); init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc); @@ -495,6 +498,11 @@ static void __init ordered_lsm_init(void) lsm_file_cache = kmem_cache_create("lsm_file_cache", blob_sizes.lbs_file, 0, SLAB_PANIC, NULL); + if (blob_sizes.lbs_backing_file) + lsm_backing_file_cache = kmem_cache_create( + "lsm_backing_file_cache", + blob_sizes.lbs_backing_file, + 0, SLAB_PANIC, NULL); if (blob_sizes.lbs_inode) lsm_inode_cache = kmem_cache_create("lsm_inode_cache", blob_sizes.lbs_inode, 0, @@ -671,6 +679,30 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_blocking_lsm_notifier); +/** + * lsm_backing_file_alloc - allocate a composite backing file blob + * @backing_file: the backing file + * + * Allocate the backing file blob for all the modules. + * + * Returns 0, or -ENOMEM if memory can't be allocated. + */ +static int lsm_backing_file_alloc(struct file *backing_file) +{ + void *blob; + + if (!lsm_backing_file_cache) { + backing_file_set_security(backing_file, NULL); + return 0; + } + + blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL); + backing_file_set_security(backing_file, blob); + if (!blob) + return -ENOMEM; + return 0; +} + /** * lsm_blob_alloc - allocate a composite blob * @dest: the destination for the blob @@ -2965,6 +2997,57 @@ void security_file_free(struct file *file) } } +/** + * security_backing_file_alloc() - Allocate and setup a backing file blob + * @backing_file: the backing file + * @user_file: the associated user visible file + * + * Allocate a backing file LSM blob and perform any necessary initialization of + * the LSM blob. There will be some operations where the LSM will not have + * access to @user_file after this point, so any important state associated + * with @user_file that is important to the LSM should be captured in the + * backing file's LSM blob. + * + * LSM's should avoid taking a reference to @user_file in this hook as it will + * result in problems later when the system attempts to drop/put the file + * references due to a circular dependency. + * + * Return: Return 0 if the hook is successful, negative values otherwise. + */ +int security_backing_file_alloc(struct file *backing_file, + const struct file *user_file) +{ + int rc; + + rc = lsm_backing_file_alloc(backing_file); + if (rc) + return rc; + rc = call_int_hook(backing_file_alloc, backing_file, user_file); + if (unlikely(rc)) + security_backing_file_free(backing_file); + + return rc; +} + +/** + * security_backing_file_free() - Free a backing file blob + * @backing_file: the backing file + * + * Free any LSM state associate with a backing file's LSM blob, including the + * blob itself. + */ +void security_backing_file_free(struct file *backing_file) +{ + void *blob = backing_file_security(backing_file); + + call_void_hook(backing_file_free, backing_file); + + if (blob) { + backing_file_set_security(backing_file, NULL); + kmem_cache_free(lsm_backing_file_cache, blob); + } +} + /** * security_file_ioctl() - Check if an ioctl is allowed * @file: associated file @@ -3053,6 +3136,32 @@ int security_mmap_file(struct file *file, unsigned long prot, flags); } +/** + * security_mmap_backing_file - Check if mmap'ing a backing file is allowed + * @vma: the vm_area_struct for the mmap'd region + * @backing_file: the backing file being mmap'd + * @user_file: the user file being mmap'd + * + * Check permissions for a mmap operation on a stacked filesystem. This hook + * is called after the security_mmap_file() and is responsible for authorizing + * the mmap on @backing_file. It is important to note that the mmap operation + * on @user_file has already been authorized and the @vma->vm_file has been + * set to @backing_file. + * + * Return: Returns 0 if permission is granted. + */ +int security_mmap_backing_file(struct vm_area_struct *vma, + struct file *backing_file, + struct file *user_file) +{ + /* recommended by the stackable filesystem devs */ + if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING))) + return -EIO; + + return call_int_hook(mmap_backing_file, vma, backing_file, user_file); +} +EXPORT_SYMBOL_GPL(security_mmap_backing_file); + /** * security_mmap_addr() - Check if mmap'ing an address is allowed * @addr: address diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3da3017ad2ca0..f96ee8f372e3b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1739,49 +1739,72 @@ static inline int file_path_has_perm(const struct cred *cred, static int bpf_fd_pass(const struct file *file, u32 sid); #endif -/* Check whether a task can use an open file descriptor to - access an inode in a given way. Check access to the - descriptor itself, and then use dentry_has_perm to - check a particular permission to the file. - Access to the descriptor is implicitly granted if it - has the same SID as the process. If av is zero, then - access to the file is not checked, e.g. for cases - where only the descriptor is affected like seek. */ -static int file_has_perm(const struct cred *cred, - struct file *file, - u32 av) +static int __file_has_perm(const struct cred *cred, const struct file *file, + u32 av, bool bf_user_file) + { - struct file_security_struct *fsec = selinux_file(file); - struct inode *inode = file_inode(file); struct common_audit_data ad; - u32 sid = cred_sid(cred); + struct inode *inode; + u32 ssid = cred_sid(cred); + u32 tsid_fd; int rc; - ad.type = LSM_AUDIT_DATA_FILE; - ad.u.file = file; + if (bf_user_file) { + struct backing_file_security_struct *bfsec; + const struct path *path; - if (sid != fsec->sid) { - rc = avc_has_perm(sid, fsec->sid, - SECCLASS_FD, - FD__USE, - &ad); + if (WARN_ON(!(file->f_mode & FMODE_BACKING))) + return -EIO; + + bfsec = selinux_backing_file(file); + path = backing_file_user_path(file); + tsid_fd = bfsec->uf_sid; + inode = d_inode(path->dentry); + + ad.type = LSM_AUDIT_DATA_PATH; + ad.u.path = *path; + } else { + struct file_security_struct *fsec = selinux_file(file); + + tsid_fd = fsec->sid; + inode = file_inode(file); + + ad.type = LSM_AUDIT_DATA_FILE; + ad.u.file = file; + } + + if (ssid != tsid_fd) { + rc = avc_has_perm(ssid, tsid_fd, SECCLASS_FD, FD__USE, &ad); if (rc) - goto out; + return rc; } #ifdef CONFIG_BPF_SYSCALL - rc = bpf_fd_pass(file, cred_sid(cred)); + /* regardless of backing vs user file, use the underlying file here */ + rc = bpf_fd_pass(file, ssid); if (rc) return rc; #endif /* av is zero if only checking access to the descriptor. */ - rc = 0; if (av) - rc = inode_has_perm(cred, inode, av, &ad); + return inode_has_perm(cred, inode, av, &ad); -out: - return rc; + return 0; +} + +/* Check whether a task can use an open file descriptor to + access an inode in a given way. Check access to the + descriptor itself, and then use dentry_has_perm to + check a particular permission to the file. + Access to the descriptor is implicitly granted if it + has the same SID as the process. If av is zero, then + access to the file is not checked, e.g. for cases + where only the descriptor is affected like seek. */ +static inline int file_has_perm(const struct cred *cred, + const struct file *file, u32 av) +{ + return __file_has_perm(cred, file, av, false); } /* @@ -3799,6 +3822,17 @@ static int selinux_file_alloc_security(struct file *file) return 0; } +static int selinux_backing_file_alloc(struct file *backing_file, + const struct file *user_file) +{ + struct backing_file_security_struct *bfsec; + + bfsec = selinux_backing_file(backing_file); + bfsec->uf_sid = selinux_file(user_file)->sid; + + return 0; +} + /* * Check whether a task has the ioctl permission and cmd * operation to an inode. @@ -3916,42 +3950,55 @@ static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd, static int default_noexec __ro_after_init; -static int file_map_prot_check(struct file *file, unsigned long prot, int shared) +static int __file_map_prot_check(const struct cred *cred, + const struct file *file, unsigned long prot, + bool shared, bool bf_user_file) { - const struct cred *cred = current_cred(); - u32 sid = cred_sid(cred); - int rc = 0; + struct inode *inode = NULL; + bool prot_exec = prot & PROT_EXEC; + bool prot_write = prot & PROT_WRITE; + + if (file) { + if (bf_user_file) + inode = d_inode(backing_file_user_path(file)->dentry); + else + inode = file_inode(file); + } + + if (default_noexec && prot_exec && + (!file || IS_PRIVATE(inode) || (!shared && prot_write))) { + int rc; + u32 sid = cred_sid(cred); - if (default_noexec && - (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || - (!shared && (prot & PROT_WRITE)))) { /* - * We are making executable an anonymous mapping or a - * private file mapping that will also be writable. - * This has an additional check. + * We are making executable an anonymous mapping or a private + * file mapping that will also be writable. */ - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, - PROCESS__EXECMEM, NULL); + rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM, + NULL); if (rc) - goto error; + return rc; } if (file) { - /* read access is always possible with a mapping */ + /* "read" always possible, "write" only if shared */ u32 av = FILE__READ; - - /* write access only matters if the mapping is shared */ - if (shared && (prot & PROT_WRITE)) + if (shared && prot_write) av |= FILE__WRITE; - - if (prot & PROT_EXEC) + if (prot_exec) av |= FILE__EXECUTE; - return file_has_perm(cred, file, av); + return __file_has_perm(cred, file, av, bf_user_file); } -error: - return rc; + return 0; +} + +static inline int file_map_prot_check(const struct cred *cred, + const struct file *file, + unsigned long prot, bool shared) +{ + return __file_map_prot_check(cred, file, prot, shared, false); } static int selinux_mmap_addr(unsigned long addr) @@ -3967,36 +4014,80 @@ static int selinux_mmap_addr(unsigned long addr) return rc; } -static int selinux_mmap_file(struct file *file, - unsigned long reqprot __always_unused, - unsigned long prot, unsigned long flags) +static int selinux_mmap_file_common(const struct cred *cred, struct file *file, + unsigned long prot, bool shared) { - struct common_audit_data ad; - int rc; - if (file) { + int rc; + struct common_audit_data ad; + ad.type = LSM_AUDIT_DATA_FILE; ad.u.file = file; - rc = inode_has_perm(current_cred(), file_inode(file), - FILE__MAP, &ad); + rc = inode_has_perm(cred, file_inode(file), FILE__MAP, &ad); if (rc) return rc; } - return file_map_prot_check(file, prot, - (flags & MAP_TYPE) == MAP_SHARED); + return file_map_prot_check(cred, file, prot, shared); +} + +static int selinux_mmap_file(struct file *file, + unsigned long reqprot __always_unused, + unsigned long prot, unsigned long flags) +{ + return selinux_mmap_file_common(current_cred(), file, prot, + (flags & MAP_TYPE) == MAP_SHARED); +} + +/** + * selinux_mmap_backing_file - Check mmap permissions on a backing file + * @vma: memory region + * @backing_file: stacked filesystem backing file + * @user_file: user visible file + * + * This is called after selinux_mmap_file() on stacked filesystems, and it + * is this function's responsibility to verify access to @backing_file and + * setup the SELinux state for possible later use in the mprotect() code path. + * + * By the time this function is called, mmap() access to @user_file has already + * been authorized and @vma->vm_file has been set to point to @backing_file. + * + * Return zero on success, negative values otherwise. + */ +static int selinux_mmap_backing_file(struct vm_area_struct *vma, + struct file *backing_file, + struct file *user_file __always_unused) +{ + unsigned long prot = 0; + + /* translate vma->vm_flags perms into PROT perms */ + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + + return selinux_mmap_file_common(backing_file->f_cred, backing_file, + prot, vma->vm_flags & VM_SHARED); } static int selinux_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot __always_unused, unsigned long prot) { + int rc; const struct cred *cred = current_cred(); u32 sid = cred_sid(cred); + const struct file *file = vma->vm_file; + bool backing_file; + bool shared = vma->vm_flags & VM_SHARED; + + /* check if we need to trigger the "backing files are awful" mode */ + backing_file = file && (file->f_mode & FMODE_BACKING); if (default_noexec && (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { - int rc = 0; /* * We don't use the vma_is_initial_heap() helper as it has * a history of problems and is currently broken on systems @@ -4010,11 +4101,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, vma->vm_end <= vma->vm_mm->brk) { rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); - } else if (!vma->vm_file && (vma_is_initial_stack(vma) || + if (rc) + return rc; + } else if (!file && (vma_is_initial_stack(vma) || vma_is_stack_for_current(vma))) { rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECSTACK, NULL); - } else if (vma->vm_file && vma->anon_vma) { + if (rc) + return rc; + } else if (file && vma->anon_vma) { /* * We are making executable a file mapping that has * had some COW done. Since pages might have been @@ -4022,13 +4117,29 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, * modified content. This typically should only * occur for text relocations. */ - rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD); + rc = __file_has_perm(cred, file, FILE__EXECMOD, + backing_file); + if (rc) + return rc; + if (backing_file) { + rc = file_has_perm(file->f_cred, file, + FILE__EXECMOD); + if (rc) + return rc; + } } + } + + rc = __file_map_prot_check(cred, file, prot, shared, backing_file); + if (rc) + return rc; + if (backing_file) { + rc = file_map_prot_check(file->f_cred, file, prot, shared); if (rc) return rc; } - return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED); + return 0; } static int selinux_file_lock(struct file *file, unsigned int cmd) @@ -7140,6 +7251,7 @@ struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = { .lbs_cred = sizeof(struct cred_security_struct), .lbs_task = sizeof(struct task_security_struct), .lbs_file = sizeof(struct file_security_struct), + .lbs_backing_file = sizeof(struct backing_file_security_struct), .lbs_inode = sizeof(struct inode_security_struct), .lbs_ipc = sizeof(struct ipc_security_struct), .lbs_key = sizeof(struct key_security_struct), @@ -7363,9 +7475,11 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(file_permission, selinux_file_permission), LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security), + LSM_HOOK_INIT(backing_file_alloc, selinux_backing_file_alloc), LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl), LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat), LSM_HOOK_INIT(mmap_file, selinux_mmap_file), + LSM_HOOK_INIT(mmap_backing_file, selinux_mmap_backing_file), LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr), LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect), LSM_HOOK_INIT(file_lock, selinux_file_lock), diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 816fde5a5896c..fcb46793898f5 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -86,6 +86,10 @@ struct file_security_struct { u32 pseqno; /* Policy seqno at the time of file open */ }; +struct backing_file_security_struct { + u32 uf_sid; /* associated user file fsec->sid */ +}; + struct superblock_security_struct { u32 sid; /* SID of file system superblock */ u32 def_sid; /* default SID for labeling */ @@ -190,6 +194,13 @@ static inline struct file_security_struct *selinux_file(const struct file *file) return file->f_security + selinux_blob_sizes.lbs_file; } +static inline struct backing_file_security_struct * +selinux_backing_file(const struct file *backing_file) +{ + void *blob = backing_file_security(backing_file); + return blob + selinux_blob_sizes.lbs_backing_file; +} + static inline struct inode_security_struct * selinux_inode(const struct inode *inode) { diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 1a64266341b19..5bf90affcbb73 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -780,21 +781,18 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) return true; /* otherwise, we have to actually compare the data */ - - BUG_ON(!IS_ALIGNED((unsigned long)val, len)); - switch (len) { case 1: - _val = *(u8 *)val; + _val = get_unaligned((u8 *)val); break; case 2: - _val = *(u16 *)val; + _val = get_unaligned((u16 *)val); break; case 4: - _val = *(u32 *)val; + _val = get_unaligned((u32 *)val); break; case 8: - _val = *(u64 *)val; + _val = get_unaligned((u64 *)val); break; default: return false;