summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap9
-rw-r--r--Documentation/accounting/psi.txt12
-rw-r--r--Documentation/bpf/btf.rst57
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.yaml2
-rw-r--r--Documentation/lzo.txt8
-rw-r--r--Documentation/networking/devlink-info-versions.rst5
-rw-r--r--Documentation/networking/dsa/bcm_sf2.rst (renamed from Documentation/networking/dsa/bcm_sf2.txt)27
-rw-r--r--Documentation/networking/dsa/dsa.rst (renamed from Documentation/networking/dsa/dsa.txt)279
-rw-r--r--Documentation/networking/dsa/index.rst10
-rw-r--r--Documentation/networking/dsa/lan9303.rst (renamed from Documentation/networking/dsa/lan9303.txt)6
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--MAINTAINERS7
-rw-r--r--Makefile5
-rw-r--r--arch/arc/include/asm/syscall.h7
-rw-r--r--arch/arm/boot/dts/am335x-evm.dts26
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts26
-rw-r--r--arch/arm/boot/dts/am33xx-l4.dtsi4
-rw-r--r--arch/arm/boot/dts/rk3288-tinker.dtsi3
-rw-r--r--arch/arm/boot/dts/rk3288-veyron.dtsi2
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi20
-rw-r--r--arch/arm/boot/dts/sama5d2-pinfunc.h2
-rw-r--r--arch/arm/boot/dts/ste-nomadik-nhk15.dts9
-rw-r--r--arch/arm/include/asm/syscall.h47
-rw-r--r--arch/arm/mach-at91/pm.c6
-rw-r--r--arch/arm/mach-iop13xx/setup.c8
-rw-r--r--arch/arm/mach-iop13xx/tpmi.c10
-rw-r--r--arch/arm/mach-milbeaut/platsmp.c4
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c2
-rw-r--r--arch/arm/mach-omap2/display.c4
-rw-r--r--arch/arm/plat-iop/adma.c6
-rw-r--r--arch/arm/plat-orion/common.c4
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi58
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts1
-rw-r--r--arch/arm64/include/asm/syscall.h46
-rw-r--r--arch/arm64/kernel/sdei.c6
-rw-r--r--arch/c6x/include/asm/syscall.h79
-rw-r--r--arch/csky/include/asm/syscall.h26
-rw-r--r--arch/h8300/include/asm/syscall.h34
-rw-r--r--arch/hexagon/include/asm/syscall.h4
-rw-r--r--arch/ia64/include/asm/syscall.h13
-rw-r--r--arch/ia64/kernel/ptrace.c7
-rw-r--r--arch/microblaze/include/asm/syscall.h8
-rw-r--r--arch/mips/include/asm/syscall.h3
-rw-r--r--arch/mips/kernel/ptrace.c2
-rw-r--r--arch/nds32/include/asm/syscall.h62
-rw-r--r--arch/nios2/include/asm/syscall.h84
-rw-r--r--arch/openrisc/include/asm/syscall.h12
-rw-r--r--arch/parisc/include/asm/ptrace.h5
-rw-r--r--arch/parisc/include/asm/syscall.h30
-rw-r--r--arch/parisc/kernel/process.c6
-rw-r--r--arch/parisc/kernel/setup.c3
-rw-r--r--arch/powerpc/include/asm/syscall.h15
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/riscv/include/asm/syscall.h24
-rw-r--r--arch/s390/include/asm/syscall.h28
-rw-r--r--arch/sh/boards/of-generic.c4
-rw-r--r--arch/sh/include/asm/syscall_32.h47
-rw-r--r--arch/sh/include/asm/syscall_64.h8
-rw-r--r--arch/sparc/include/asm/syscall.h11
-rw-r--r--arch/um/include/asm/syscall-generic.h78
-rw-r--r--arch/x86/include/asm/syscall.h142
-rw-r--r--arch/x86/include/asm/xen/hypercall.h3
-rw-r--r--arch/x86/kvm/svm.c22
-rw-r--r--arch/x86/kvm/vmx/nested.c74
-rw-r--r--arch/xtensa/include/asm/processor.h21
-rw-r--r--arch/xtensa/include/asm/syscall.h33
-rw-r--r--arch/xtensa/kernel/entry.S6
-rw-r--r--arch/xtensa/kernel/stacktrace.c6
-rw-r--r--arch/xtensa/mm/mmu.c2
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/bfq-wf2q.c2
-rw-r--r--block/blk-core.c4
-rw-r--r--block/blk-mq-sched.c8
-rw-r--r--block/blk-mq.c129
-rw-r--r--block/blk-mq.h6
-rw-r--r--drivers/block/null_blk_main.c5
-rw-r--r--drivers/block/paride/pcd.c14
-rw-r--r--drivers/block/paride/pf.c12
-rw-r--r--drivers/block/xsysace.c2
-rw-r--r--drivers/char/tpm/eventlog/tpm2.c4
-rw-r--r--drivers/char/tpm/tpm-dev-common.c9
-rw-r--r--drivers/char/tpm/tpm-interface.c14
-rw-r--r--drivers/crypto/caam/caamhash.c13
-rw-r--r--drivers/i2c/busses/i2c-imx.c4
-rw-r--r--drivers/infiniband/core/addr.c2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c8
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c2
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-init.c2
-rw-r--r--drivers/md/dm-integrity.c16
-rw-r--r--drivers/md/dm-rq.c11
-rw-r--r--drivers/md/dm-table.c39
-rw-r--r--drivers/md/dm.c30
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c6
-rw-r--r--drivers/net/appletalk/ipddp.c6
-rw-r--r--drivers/net/dummy.c15
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c16
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c8
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h4
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c4
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h9
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c268
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c15
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c956
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c400
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h29
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c42
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c15
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c68
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c301
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c473
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h55
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile1
-rw-r--r--drivers/net/ethernet/netronome/nfp/abm/ctrl.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c236
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h33
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h17
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c3
-rw-r--r--drivers/net/ethernet/netronome/nfp/ccm.c220
-rw-r--r--drivers/net/ethernet/netronome/nfp/ccm.h81
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h23
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c103
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h7
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic.h2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c413
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c9
-rw-r--r--drivers/net/loopback.c14
-rw-r--r--drivers/net/netdevsim/Makefile6
-rw-r--r--drivers/net/netdevsim/bpf.c87
-rw-r--r--drivers/net/netdevsim/netdev.c82
-rw-r--r--drivers/net/netdevsim/netdevsim.h35
-rw-r--r--drivers/net/netdevsim/sdev.c69
-rw-r--r--drivers/net/phy/aquantia_main.c14
-rw-r--r--drivers/net/phy/marvell10g.c1
-rw-r--r--drivers/net/phy/phy-c45.c17
-rw-r--r--drivers/net/phy/phy-core.c270
-rw-r--r--drivers/net/phy/phy_device.c38
-rw-r--r--drivers/net/usb/qmi_wwan.c65
-rw-r--r--drivers/net/veth.c14
-rw-r--r--drivers/net/vrf.c20
-rw-r--r--drivers/net/xen-netback/common.h18
-rw-r--r--drivers/net/xen-netback/xenbus.c17
-rw-r--r--drivers/parisc/iosapic.c6
-rw-r--r--drivers/reset/reset-meson-audio-arb.c1
-rw-r--r--drivers/rtc/Kconfig4
-rw-r--r--drivers/rtc/rtc-cros-ec.c4
-rw-r--r--drivers/rtc/rtc-da9063.c7
-rw-r--r--drivers/rtc/rtc-sh.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c7
-rw-r--r--drivers/scsi/qedi/qedi_main.c7
-rw-r--r--drivers/scsi/scsi_devinfo.c1
-rw-r--r--drivers/scsi/scsi_dh.c1
-rw-r--r--drivers/scsi/storvsc_drv.c15
-rw-r--r--drivers/vfio/pci/vfio_pci.c4
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c2
-rw-r--r--drivers/vfio/vfio_iommu_type1.c14
-rw-r--r--drivers/xen/privcmd-buf.c3
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c4
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/io_uring.c1
-rw-r--r--fs/open.c18
-rw-r--r--fs/proc/base.c17
-rw-r--r--fs/read_write.c5
-rw-r--r--include/asm-generic/syscall.h21
-rw-r--r--include/dt-bindings/reset/amlogic,meson-g12a-reset.h5
-rw-r--r--include/keys/trusted.h2
-rw-r--r--include/linux/bitrev.h46
-rw-r--r--include/linux/bpf.h75
-rw-r--r--include/linux/bpf_verifier.h27
-rw-r--r--include/linux/btf.h1
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/memcontrol.h5
-rw-r--r--include/linux/mlx5/cq.h2
-rw-r--r--include/linux/mlx5/device.h1
-rw-r--r--include/linux/mlx5/doorbell.h39
-rw-r--r--include/linux/mlx5/driver.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h51
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/phy.h2
-rw-r--r--include/linux/ptrace.h11
-rw-r--r--include/linux/rhashtable.h226
-rw-r--r--include/linux/string.h3
-rw-r--r--include/net/ip6_fib.h8
-rw-r--r--include/net/ip6_route.h2
-rw-r--r--include/net/ip_fib.h18
-rw-r--r--include/net/ipv6_stubs.h6
-rw-r--r--include/net/ndisc.h40
-rw-r--r--include/net/neighbour.h5
-rw-r--r--include/net/nfc/nci_core.h2
-rw-r--r--include/net/route.h43
-rw-r--r--include/net/sch_generic.h80
-rw-r--r--include/net/sctp/ulpqueue.h2
-rw-r--r--include/trace/events/fib.h4
-rw-r--r--include/trace/events/syscalls.h2
-rw-r--r--include/uapi/linux/bpf.h37
-rw-r--r--include/uapi/linux/btf.h32
-rw-r--r--include/uapi/linux/ethtool.h2
-rw-r--r--kernel/bpf/arraymap.c53
-rw-r--r--kernel/bpf/btf.c419
-rw-r--r--kernel/bpf/core.c14
-rw-r--r--kernel/bpf/disasm.c5
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/local_storage.c6
-rw-r--r--kernel/bpf/lpm_trie.c3
-rw-r--r--kernel/bpf/queue_stack_maps.c6
-rw-r--r--kernel/bpf/syscall.c144
-rw-r--r--kernel/bpf/verifier.c397
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/trace/trace_syscalls.c9
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--lib/lzo/lzo1x_compress.c9
-rw-r--r--lib/lzo/lzo1x_decompress_safe.c4
-rw-r--r--lib/rhashtable.c27
-rw-r--r--lib/string.c20
-rw-r--r--lib/syscall.c57
-rw-r--r--lib/test_rhashtable.c2
-rw-r--r--mm/huge_memory.c36
-rw-r--r--mm/kmemleak.c16
-rw-r--r--mm/memcontrol.c20
-rw-r--r--mm/slab.c3
-rw-r--r--mm/util.c2
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/bpf/Makefile2
-rw-r--r--net/bpf/test_run.c149
-rw-r--r--net/caif/caif_dev.c12
-rw-r--r--net/core/filter.c37
-rw-r--r--net/core/gen_stats.c2
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/dns_resolver/dns_query.c2
-rw-r--r--net/ipv4/fib_frontend.c69
-rw-r--r--net/ipv4/fib_semantics.c433
-rw-r--r--net/ipv4/fou.c8
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_gre.c15
-rw-r--r--net/ipv4/ip_output.c13
-rw-r--r--net/ipv4/route.c112
-rw-r--r--net/ipv4/xfrm4_policy.c7
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/addrconf_core.c9
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_gre.c20
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c280
-rw-r--r--net/mpls/mpls_iptunnel.c12
-rw-r--r--net/nfc/nci/hci.c8
-rw-r--r--net/sched/cls_flower.c23
-rw-r--r--net/sched/sch_api.c15
-rw-r--r--net/sched/sch_cbs.c98
-rw-r--r--net/sched/sch_generic.c67
-rw-r--r--net/sched/sch_taprio.c97
-rw-r--r--net/sctp/stream_interleave.c60
-rw-r--r--net/sctp/ulpqueue.c50
-rw-r--r--net/smc/af_smc.c355
-rw-r--r--net/smc/smc.h11
-rw-r--r--net/smc/smc_clc.c10
-rw-r--r--net/smc/smc_clc.h20
-rw-r--r--net/smc/smc_core.c93
-rw-r--r--net/smc/smc_core.h25
-rw-r--r--net/smc/smc_pnet.c47
-rw-r--r--net/smc/smc_pnet.h7
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/tipc/node.c5
-rw-r--r--samples/bpf/asm_goto_workaround.h1
-rw-r--r--samples/bpf/offwaketime_user.c5
-rw-r--r--samples/bpf/sampleip_user.c5
-rw-r--r--samples/bpf/spintest_user.c7
-rw-r--r--samples/bpf/trace_event_user.c5
-rw-r--r--scripts/coccinelle/api/stream_open.cocci363
-rwxr-xr-xscripts/link-vmlinux.sh20
-rw-r--r--security/keys/trusted.c32
-rw-r--r--tools/arch/arm64/include/asm/barrier.h10
-rw-r--r--tools/arch/x86/include/asm/barrier.h7
-rw-r--r--tools/bpf/bpftool/btf_dumper.c59
-rw-r--r--tools/bpf/bpftool/map.c10
-rw-r--r--tools/bpf/bpftool/prog.c6
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c3
-rw-r--r--tools/include/linux/filter.h21
-rw-r--r--tools/include/uapi/linux/bpf.h37
-rw-r--r--tools/include/uapi/linux/btf.h32
-rw-r--r--tools/lib/bpf/.gitignore1
-rw-r--r--tools/lib/bpf/Makefile20
-rw-r--r--tools/lib/bpf/bpf.c36
-rw-r--r--tools/lib/bpf/bpf.h8
-rw-r--r--tools/lib/bpf/btf.c97
-rw-r--r--tools/lib/bpf/btf.h3
-rw-r--r--tools/lib/bpf/libbpf.c567
-rw-r--r--tools/lib/bpf/libbpf.h6
-rw-r--r--tools/lib/bpf/libbpf.map7
-rw-r--r--tools/lib/bpf/libbpf.pc.template12
-rw-r--r--tools/lib/bpf/xsk.c9
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h8
-rw-r--r--tools/testing/selftests/bpf/config8
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c89
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_data.c106
-rw-r--r--tools/testing/selftests/bpf/progs/test_jhash.h70
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c321
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale1.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale2.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale3.c30
-rw-r--r--tools/testing/selftests/bpf/test_btf.c697
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c6
-rw-r--r--tools/testing/selftests/bpf/test_progs.h1
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh136
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c86
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c159
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_skb.c1
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_value_access.c347
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_dw.c9
-rw-r--r--tools/testing/selftests/bpf/verifier/var_off.c186
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh70
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh213
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json40
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py5
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py63
358 files changed, 11058 insertions, 4691 deletions
diff --git a/.mailmap b/.mailmap
index ae2bcad06f4b..a51547ac96f9 100644
--- a/.mailmap
+++ b/.mailmap
@@ -16,6 +16,9 @@ Alan Cox <alan@lxorguk.ukuu.org.uk>
Alan Cox <root@hraefn.swansea.linux.org.uk>
Aleksey Gorelov <aleksey_gorelov@phoenix.com>
Aleksandar Markovic <aleksandar.markovic@mips.com> <aleksandar.markovic@imgtec.com>
+Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
+Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
+Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Al Viro <viro@ftp.linux.org.uk>
Al Viro <viro@zenIV.linux.org.uk>
Andi Shyti <andi@etezian.org> <andi.shyti@samsung.com>
@@ -46,6 +49,12 @@ Christoph Hellwig <hch@lst.de>
Christophe Ricard <christophe.ricard@gmail.com>
Corey Minyard <minyard@acm.org>
Damian Hobson-Garcia <dhobsong@igel.co.jp>
+Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
+Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
+Daniel Borkmann <daniel@iogearbox.net> <danborkmann@iogearbox.net>
+Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch>
+Daniel Borkmann <daniel@iogearbox.net> <danborkmann@googlemail.com>
+Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
David Brownell <david-b@pacbell.net>
David Woodhouse <dwmw2@shinybook.infradead.org>
Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@mips.com>
diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
index b8ca28b60215..7e71c9c1d8e9 100644
--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.txt
@@ -56,12 +56,12 @@ situation from a state where some tasks are stalled but the CPU is
still doing productive work. As such, time spent in this subset of the
stall state is tracked separately and exported in the "full" averages.
-The ratios are tracked as recent trends over ten, sixty, and three
-hundred second windows, which gives insight into short term events as
-well as medium and long term trends. The total absolute stall time is
-tracked and exported as well, to allow detection of latency spikes
-which wouldn't necessarily make a dent in the time averages, or to
-average trends over custom time frames.
+The ratios (in %) are tracked as recent trends over ten, sixty, and
+three hundred second windows, which gives insight into short term events
+as well as medium and long term trends. The total absolute stall time
+(in us) is tracked and exported as well, to allow detection of latency
+spikes which wouldn't necessarily make a dent in the time averages,
+or to average trends over custom time frames.
Cgroup2 interface
=================
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 7313d354f20e..29396e6943b0 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -82,6 +82,8 @@ sequentially and type id is assigned to each recognized type starting from id
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+ #define BTF_KIND_VAR 14 /* Variable */
+ #define BTF_KIND_DATASEC 15 /* Section */
Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@@ -393,6 +395,61 @@ refers to parameter type.
If the function has variable arguments, the last parameter is encoded with
``name_off = 0`` and ``type = 0``.
+2.2.14 BTF_KIND_VAR
+~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid C identifier
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_VAR
+ * ``info.vlen``: 0
+ * ``type``: the type of the variable
+
+``btf_type`` is followed by a single ``struct btf_variable`` with the
+following data::
+
+ struct btf_var {
+ __u32 linkage;
+ };
+
+``struct btf_var`` encoding:
+ * ``linkage``: currently only static variable 0, or globally allocated
+ variable in ELF sections 1
+
+Not all type of global variables are supported by LLVM at this point.
+The following is currently available:
+
+ * static variables with or without section attributes
+ * global variables with section attributes
+
+The latter is for future extraction of map key/value type id's from a
+map definition.
+
+2.2.15 BTF_KIND_DATASEC
+~~~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a valid name associated with a variable or
+ one of .data/.bss/.rodata
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_DATASEC
+ * ``info.vlen``: # of variables
+ * ``size``: total section size in bytes (0 at compilation time, patched
+ to actual size by BPF loaders such as libbpf)
+
+``btf_type`` is followed by ``info.vlen`` number of ``struct btf_var_secinfo``.::
+
+ struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+ };
+
+``struct btf_var_secinfo`` encoding:
+ * ``type``: the type of the BTF_KIND_VAR variable
+ * ``offset``: the in-section offset of the variable
+ * ``size``: the size of the variable in bytes
+
3. BTF Kernel API
*****************
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 365dcf384d73..82dd7582e945 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -228,7 +228,7 @@ patternProperties:
- renesas,r9a06g032-smp
- rockchip,rk3036-smp
- rockchip,rk3066-smp
- - socionext,milbeaut-m10v-smp
+ - socionext,milbeaut-m10v-smp
- ste,dbx500-smp
cpu-release-addr:
diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
index f79934225d8d..ca983328976b 100644
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@@ -102,9 +102,11 @@ Byte sequences
dictionary which is empty, and that it will always be
invalid at this place.
- 17 : bitstream version. If the first byte is 17, the next byte
- gives the bitstream version (version 1 only). If the first byte
- is not 17, the bitstream version is 0.
+ 17 : bitstream version. If the first byte is 17, and compressed
+ stream length is at least 5 bytes (length of shortest possible
+ versioned bitstream), the next byte gives the bitstream version
+ (version 1 only).
+ Otherwise, the bitstream version is 0.
18..21 : copy 0..3 literals
state = (byte - 17) = 0..3 [ copy <state> literals ]
diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst
index c79ad8593383..4316342b7746 100644
--- a/Documentation/networking/devlink-info-versions.rst
+++ b/Documentation/networking/devlink-info-versions.rst
@@ -41,3 +41,8 @@ fw.ncsi
Version of the software responsible for supporting/handling the
Network Controller Sideband Interface.
+
+fw.psid
+=======
+
+Unique identifier of the firmware parameter set.
diff --git a/Documentation/networking/dsa/bcm_sf2.txt b/Documentation/networking/dsa/bcm_sf2.rst
index eba3a2431e91..dee234039e1e 100644
--- a/Documentation/networking/dsa/bcm_sf2.txt
+++ b/Documentation/networking/dsa/bcm_sf2.rst
@@ -1,3 +1,4 @@
+=============================================
Broadcom Starfighter 2 Ethernet switch driver
=============================================
@@ -25,27 +26,27 @@ are connected at a lower speed.
The switch hardware block is typically interfaced using MMIO accesses and
contains a bunch of sub-blocks/registers:
-* SWITCH_CORE: common switch registers
-* SWITCH_REG: external interfaces switch register
-* SWITCH_MDIO: external MDIO bus controller (there is another one in SWITCH_CORE,
+- ``SWITCH_CORE``: common switch registers
+- ``SWITCH_REG``: external interfaces switch register
+- ``SWITCH_MDIO``: external MDIO bus controller (there is another one in SWITCH_CORE,
which is used for indirect PHY accesses)
-* SWITCH_INDIR_RW: 64-bits wide register helper block
-* SWITCH_INTRL2_0/1: Level-2 interrupt controllers
-* SWITCH_ACB: Admission control block
-* SWITCH_FCB: Fail-over control block
+- ``SWITCH_INDIR_RW``: 64-bits wide register helper block
+- ``SWITCH_INTRL2_0/1``: Level-2 interrupt controllers
+- ``SWITCH_ACB``: Admission control block
+- ``SWITCH_FCB``: Fail-over control block
Implementation details
======================
-The driver is located in drivers/net/dsa/bcm_sf2.c and is implemented as a DSA
-driver; see Documentation/networking/dsa/dsa.txt for details on the subsystem
+The driver is located in ``drivers/net/dsa/bcm_sf2.c`` and is implemented as a DSA
+driver; see ``Documentation/networking/dsa/dsa.rst`` for details on the subsystem
and what it provides.
The SF2 switch is configured to enable a Broadcom specific 4-bytes switch tag
which gets inserted by the switch for every packet forwarded to the CPU
interface, conversely, the CPU network interface should insert a similar tag for
packets entering the CPU port. The tag format is described in
-net/dsa/tag_brcm.c.
+``net/dsa/tag_brcm.c``.
Overall, the SF2 driver is a fairly regular DSA driver; there are a few
specifics covered below.
@@ -54,7 +55,7 @@ Device Tree probing
-------------------
The DSA platform device driver is probed using a specific compatible string
-provided in net/dsa/dsa.c. The reason for that is because the DSA subsystem gets
+provided in ``net/dsa/dsa.c``. The reason for that is because the DSA subsystem gets
registered as a platform device driver currently. DSA will provide the needed
device_node pointers which are then accessible by the switch driver setup
function to setup resources such as register ranges and interrupts. This
@@ -70,7 +71,7 @@ Broadcom switches connected to a SF2 require the use of the DSA slave MDIO bus
in order to properly configure them. By default, the SF2 pseudo-PHY address, and
an external switch pseudo-PHY address will both be snooping for incoming MDIO
transactions, since they are at the same address (30), resulting in some kind of
-"double" programming. Using DSA, and setting ds->phys_mii_mask accordingly, we
+"double" programming. Using DSA, and setting ``ds->phys_mii_mask`` accordingly, we
selectively divert reads and writes towards external Broadcom switches
pseudo-PHY addresses. Newer revisions of the SF2 hardware have introduced a
configurable pseudo-PHY address which circumvents the initial design limitation.
@@ -86,7 +87,7 @@ firmware gets reloaded. The SF2 driver relies on such events to properly set its
MoCA interface carrier state and properly report this to the networking stack.
The MoCA interfaces are supported using the PHY library's fixed PHY/emulated PHY
-device and the switch driver registers a fixed_link_update callback for such
+device and the switch driver registers a ``fixed_link_update`` callback for such
PHYs which reflects the link state obtained from the interrupt handler.
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.rst
index 43ef767bc440..ca87068b9ab9 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.rst
@@ -1,10 +1,8 @@
-Distributed Switch Architecture
-===============================
-
-Introduction
+============
+Architecture
============
-This document describes the Distributed Switch Architecture (DSA) subsystem
+This document describes the **Distributed Switch Architecture (DSA)** subsystem
design principles, limitations, interactions with other subsystems, and how to
develop drivers for this subsystem as well as a TODO for developers interested
in joining the effort.
@@ -70,11 +68,11 @@ Switch tagging protocols
DSA currently supports 5 different tagging protocols, and a tag-less mode as
well. The different protocols are implemented in:
-net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy)
-net/dsa/tag_dsa.c: Marvell's original DSA tag
-net/dsa/tag_edsa.c: Marvell's enhanced DSA tag
-net/dsa/tag_brcm.c: Broadcom's 4 bytes tag
-net/dsa/tag_qca.c: Qualcomm's 2 bytes tag
+- ``net/dsa/tag_trailer.c``: Marvell's 4 trailer tag mode (legacy)
+- ``net/dsa/tag_dsa.c``: Marvell's original DSA tag
+- ``net/dsa/tag_edsa.c``: Marvell's enhanced DSA tag
+- ``net/dsa/tag_brcm.c``: Broadcom's 4 bytes tag
+- ``net/dsa/tag_qca.c``: Qualcomm's 2 bytes tag
The exact format of the tag protocol is vendor specific, but in general, they
all contain something which:
@@ -89,7 +87,7 @@ Master network devices are regular, unmodified Linux network device drivers for
the CPU/management Ethernet interface. Such a driver might occasionally need to
know whether DSA is enabled (e.g.: to enable/disable specific offload features),
but the DSA subsystem has been proven to work with industry standard drivers:
-e1000e, mv643xx_eth etc. without having to introduce modifications to these
+``e1000e,`` ``mv643xx_eth`` etc. without having to introduce modifications to these
drivers. Such network devices are also often referred to as conduit network
devices since they act as a pipe between the host processor and the hardware
Ethernet switch.
@@ -100,40 +98,42 @@ Networking stack hooks
When a master netdev is used with DSA, a small hook is placed in in the
networking stack is in order to have the DSA subsystem process the Ethernet
switch specific tagging protocol. DSA accomplishes this by registering a
-specific (and fake) Ethernet type (later becoming skb->protocol) with the
-networking stack, this is also known as a ptype or packet_type. A typical
+specific (and fake) Ethernet type (later becoming ``skb->protocol``) with the
+networking stack, this is also known as a ``ptype`` or ``packet_type``. A typical
Ethernet Frame receive sequence looks like this:
Master network device (e.g.: e1000e):
-Receive interrupt fires:
-- receive function is invoked
-- basic packet processing is done: getting length, status etc.
-- packet is prepared to be processed by the Ethernet layer by calling
- eth_type_trans
+1. Receive interrupt fires:
+
+ - receive function is invoked
+ - basic packet processing is done: getting length, status etc.
+ - packet is prepared to be processed by the Ethernet layer by calling
+ ``eth_type_trans``
+
+2. net/ethernet/eth.c::
+
+ eth_type_trans(skb, dev)
+ if (dev->dsa_ptr != NULL)
+ -> skb->protocol = ETH_P_XDSA
-net/ethernet/eth.c:
+3. drivers/net/ethernet/\*::
-eth_type_trans(skb, dev)
- if (dev->dsa_ptr != NULL)
- -> skb->protocol = ETH_P_XDSA
+ netif_receive_skb(skb)
+ -> iterate over registered packet_type
+ -> invoke handler for ETH_P_XDSA, calls dsa_switch_rcv()
-drivers/net/ethernet/*:
+4. net/dsa/dsa.c::
-netif_receive_skb(skb)
- -> iterate over registered packet_type
- -> invoke handler for ETH_P_XDSA, calls dsa_switch_rcv()
+ -> dsa_switch_rcv()
+ -> invoke switch tag specific protocol handler in 'net/dsa/tag_*.c'
-net/dsa/dsa.c:
- -> dsa_switch_rcv()
- -> invoke switch tag specific protocol handler in
- net/dsa/tag_*.c
+5. net/dsa/tag_*.c:
-net/dsa/tag_*.c:
- -> inspect and strip switch tag protocol to determine originating port
- -> locate per-port network device
- -> invoke eth_type_trans() with the DSA slave network device
- -> invoked netif_receive_skb()
+ - inspect and strip switch tag protocol to determine originating port
+ - locate per-port network device
+ - invoke ``eth_type_trans()`` with the DSA slave network device
+ - invoked ``netif_receive_skb()``
Past this point, the DSA slave network devices get delivered regular Ethernet
frames that can be processed by the networking stack.
@@ -162,7 +162,7 @@ invoke a specific transmit routine which takes care of adding the relevant
switch tag in the Ethernet frames.
These frames are then queued for transmission using the master network device
-ndo_start_xmit() function, since they contain the appropriate switch tag, the
+``ndo_start_xmit()`` function, since they contain the appropriate switch tag, the
Ethernet switch will be able to process these incoming frames from the
management interface and delivers these frames to the physical switch port.
@@ -170,23 +170,25 @@ Graphical representation
------------------------
Summarized, this is basically how DSA looks like from a network device
-perspective:
-
-
- |---------------------------
- | CPU network device (eth0)|
- ----------------------------
- | <tag added by switch |
- | |
- | |
- | tag added by CPU> |
- |--------------------------------------------|
- | Switch driver |
- |--------------------------------------------|
- || || ||
- |-------| |-------| |-------|
- | sw0p0 | | sw0p1 | | sw0p2 |
- |-------| |-------| |-------|
+perspective::
+
+
+ |---------------------------
+ | CPU network device (eth0)|
+ ----------------------------
+ | <tag added by switch |
+ | |
+ | |
+ | tag added by CPU> |
+ |--------------------------------------------|
+ | Switch driver |
+ |--------------------------------------------|
+ || || ||
+ |-------| |-------| |-------|
+ | sw0p0 | | sw0p1 | | sw0p2 |
+ |-------| |-------| |-------|
+
+
Slave MDIO bus
--------------
@@ -207,31 +209,32 @@ PHYs, external PHYs, or even external switches.
Data structures
---------------
-DSA data structures are defined in include/net/dsa.h as well as
-net/dsa/dsa_priv.h.
+DSA data structures are defined in ``include/net/dsa.h`` as well as
+``net/dsa/dsa_priv.h``:
-dsa_chip_data: platform data configuration for a given switch device, this
-structure describes a switch device's parent device, its address, as well as
-various properties of its ports: names/labels, and finally a routing table
-indication (when cascading switches)
+- ``dsa_chip_data``: platform data configuration for a given switch device,
+ this structure describes a switch device's parent device, its address, as
+ well as various properties of its ports: names/labels, and finally a routing
+ table indication (when cascading switches)
-dsa_platform_data: platform device configuration data which can reference a
-collection of dsa_chip_data structure if multiples switches are cascaded, the
-master network device this switch tree is attached to needs to be referenced
+- ``dsa_platform_data``: platform device configuration data which can reference
+ a collection of dsa_chip_data structure if multiples switches are cascaded,
+ the master network device this switch tree is attached to needs to be
+ referenced
-dsa_switch_tree: structure assigned to the master network device under
-"dsa_ptr", this structure references a dsa_platform_data structure as well as
-the tagging protocol supported by the switch tree, and which receive/transmit
-function hooks should be invoked, information about the directly attached switch
-is also provided: CPU port. Finally, a collection of dsa_switch are referenced
-to address individual switches in the tree.
+- ``dsa_switch_tree``: structure assigned to the master network device under
+ ``dsa_ptr``, this structure references a dsa_platform_data structure as well as
+ the tagging protocol supported by the switch tree, and which receive/transmit
+ function hooks should be invoked, information about the directly attached
+ switch is also provided: CPU port. Finally, a collection of dsa_switch are
+ referenced to address individual switches in the tree.
-dsa_switch: structure describing a switch device in the tree, referencing a
-dsa_switch_tree as a backpointer, slave network devices, master network device,
-and a reference to the backing dsa_switch_ops
+- ``dsa_switch``: structure describing a switch device in the tree, referencing
+ a ``dsa_switch_tree`` as a backpointer, slave network devices, master network
+ device, and a reference to the backing``dsa_switch_ops``
-dsa_switch_ops: structure referencing function pointers, see below for a full
-description.
+- ``dsa_switch_ops``: structure referencing function pointers, see below for a
+ full description.
Design limitations
==================
@@ -240,7 +243,7 @@ Limits on the number of devices and ports
-----------------------------------------
DSA currently limits the number of maximum switches within a tree to 4
-(DSA_MAX_SWITCHES), and the number of ports per switch to 12 (DSA_MAX_PORTS).
+(``DSA_MAX_SWITCHES``), and the number of ports per switch to 12 (``DSA_MAX_PORTS``).
These limits could be extended to support larger configurations would this need
arise.
@@ -279,15 +282,15 @@ Interactions with other subsystems
DSA currently leverages the following subsystems:
-- MDIO/PHY library: drivers/net/phy/phy.c, mdio_bus.c
-- Switchdev: net/switchdev/*
+- MDIO/PHY library: ``drivers/net/phy/phy.c``, ``mdio_bus.c``
+- Switchdev:``net/switchdev/*``
- Device Tree for various of_* functions
MDIO/PHY library
----------------
Slave network devices exposed by DSA may or may not be interfacing with PHY
-devices (struct phy_device as defined in include/linux/phy.h), but the DSA
+devices (``struct phy_device`` as defined in ``include/linux/phy.h)``, but the DSA
subsystem deals with all possible combinations:
- internal PHY devices, built into the Ethernet switch hardware
@@ -296,16 +299,16 @@ subsystem deals with all possible combinations:
- special, non-autonegotiated or non MDIO-managed PHY devices: SFPs, MoCA; a.k.a
fixed PHYs
-The PHY configuration is done by the dsa_slave_phy_setup() function and the
+The PHY configuration is done by the ``dsa_slave_phy_setup()`` function and the
logic basically looks like this:
- if Device Tree is used, the PHY device is looked up using the standard
"phy-handle" property, if found, this PHY device is created and registered
- using of_phy_connect()
+ using ``of_phy_connect()``
- if Device Tree is used, and the PHY device is "fixed", that is, conforms to
the definition of a non-MDIO managed PHY as defined in
- Documentation/devicetree/bindings/net/fixed-link.txt, the PHY is registered
+ ``Documentation/devicetree/bindings/net/fixed-link.txt``, the PHY is registered
and connected transparently using the special fixed MDIO bus driver
- finally, if the PHY is built into the switch, as is very common with
@@ -331,8 +334,8 @@ Device Tree
-----------
DSA features a standardized binding which is documented in
-Documentation/devicetree/bindings/net/dsa/dsa.txt. PHY/MDIO library helper
-functions such as of_get_phy_mode(), of_phy_connect() are also used to query
+``Documentation/devicetree/bindings/net/dsa/dsa.txt``. PHY/MDIO library helper
+functions such as ``of_get_phy_mode()``, ``of_phy_connect()`` are also used to query
per-port PHY specific details: interface connection, MDIO bus location etc..
Driver development
@@ -341,8 +344,8 @@ Driver development
DSA switch drivers need to implement a dsa_switch_ops structure which will
contain the various members described below.
-register_switch_driver() registers this dsa_switch_ops in its internal list
-of drivers to probe for. unregister_switch_driver() does the exact opposite.
+``register_switch_driver()`` registers this dsa_switch_ops in its internal list
+of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
Unless requested differently by setting the priv_size member accordingly, DSA
does not allocate any driver private context space.
@@ -350,17 +353,17 @@ does not allocate any driver private context space.
Switch configuration
--------------------
-- tag_protocol: this is to indicate what kind of tagging protocol is supported,
- should be a valid value from the dsa_tag_protocol enum
+- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
+ should be a valid value from the ``dsa_tag_protocol`` enum
-- probe: probe routine which will be invoked by the DSA platform device upon
+- ``probe``: probe routine which will be invoked by the DSA platform device upon
registration to test for the presence/absence of a switch device. For MDIO
devices, it is recommended to issue a read towards internal registers using
the switch pseudo-PHY and return whether this is a supported device. For other
buses, return a non-NULL string
-- setup: setup function for the switch, this function is responsible for setting
- up the dsa_switch_ops private structure with all it needs: register maps,
+- ``setup``: setup function for the switch, this function is responsible for setting
+ up the ``dsa_switch_ops`` private structure with all it needs: register maps,
interrupts, mutexes, locks etc.. This function is also expected to properly
configure the switch to separate all network interfaces from each other, that
is, they should be isolated by the switch hardware itself, typically by creating
@@ -375,27 +378,27 @@ Switch configuration
PHY devices and link management
-------------------------------
-- get_phy_flags: Some switches are interfaced to various kinds of Ethernet PHYs,
+- ``get_phy_flags``: Some switches are interfaced to various kinds of Ethernet PHYs,
if the PHY library PHY driver needs to know about information it cannot obtain
on its own (e.g.: coming from switch memory mapped registers), this function
should return a 32-bits bitmask of "flags", that is private between the switch
- driver and the Ethernet PHY driver in drivers/net/phy/*.
+ driver and the Ethernet PHY driver in ``drivers/net/phy/\*``.
-- phy_read: Function invoked by the DSA slave MDIO bus when attempting to read
+- ``phy_read``: Function invoked by the DSA slave MDIO bus when attempting to read
the switch port MDIO registers. If unavailable, return 0xffff for each read.
For builtin switch Ethernet PHYs, this function should allow reading the link
status, auto-negotiation results, link partner pages etc..
-- phy_write: Function invoked by the DSA slave MDIO bus when attempting to write
+- ``phy_write``: Function invoked by the DSA slave MDIO bus when attempting to write
to the switch port MDIO registers. If unavailable return a negative error
code.
-- adjust_link: Function invoked by the PHY library when a slave network device
+- ``adjust_link``: Function invoked by the PHY library when a slave network device
is attached to a PHY device. This function is responsible for appropriately
configuring the switch port link parameters: speed, duplex, pause based on
- what the phy_device is providing.
+ what the ``phy_device`` is providing.
-- fixed_link_update: Function invoked by the PHY library, and specifically by
+- ``fixed_link_update``: Function invoked by the PHY library, and specifically by
the fixed PHY driver asking the switch driver for link parameters that could
not be auto-negotiated, or obtained by reading the PHY registers through MDIO.
This is particularly useful for specific kinds of hardware such as QSGMII,
@@ -405,87 +408,87 @@ PHY devices and link management
Ethtool operations
------------------
-- get_strings: ethtool function used to query the driver's strings, will
+- ``get_strings``: ethtool function used to query the driver's strings, will
typically return statistics strings, private flags strings etc.
-- get_ethtool_stats: ethtool function used to query per-port statistics and
+- ``get_ethtool_stats``: ethtool function used to query per-port statistics and
return their values. DSA overlays slave network devices general statistics:
RX/TX counters from the network device, with switch driver specific statistics
per port
-- get_sset_count: ethtool function used to query the number of statistics items
+- ``get_sset_count``: ethtool function used to query the number of statistics items
-- get_wol: ethtool function used to obtain Wake-on-LAN settings per-port, this
+- ``get_wol``: ethtool function used to obtain Wake-on-LAN settings per-port, this
function may, for certain implementations also query the master network device
Wake-on-LAN settings if this interface needs to participate in Wake-on-LAN
-- set_wol: ethtool function used to configure Wake-on-LAN settings per-port,
+- ``set_wol``: ethtool function used to configure Wake-on-LAN settings per-port,
direct counterpart to set_wol with similar restrictions
-- set_eee: ethtool function which is used to configure a switch port EEE (Green
+- ``set_eee``: ethtool function which is used to configure a switch port EEE (Green
Ethernet) settings, can optionally invoke the PHY library to enable EEE at the
PHY level if relevant. This function should enable EEE at the switch port MAC
controller and data-processing logic
-- get_eee: ethtool function which is used to query a switch port EEE settings,
+- ``get_eee``: ethtool function which is used to query a switch port EEE settings,
this function should return the EEE state of the switch port MAC controller
and data-processing logic as well as query the PHY for its currently configured
EEE settings
-- get_eeprom_len: ethtool function returning for a given switch the EEPROM
+- ``get_eeprom_len``: ethtool function returning for a given switch the EEPROM
length/size in bytes
-- get_eeprom: ethtool function returning for a given switch the EEPROM contents
+- ``get_eeprom``: ethtool function returning for a given switch the EEPROM contents
-- set_eeprom: ethtool function writing specified data to a given switch EEPROM
+- ``set_eeprom``: ethtool function writing specified data to a given switch EEPROM
-- get_regs_len: ethtool function returning the register length for a given
+- ``get_regs_len``: ethtool function returning the register length for a given
switch
-- get_regs: ethtool function returning the Ethernet switch internal register
+- ``get_regs``: ethtool function returning the Ethernet switch internal register
contents. This function might require user-land code in ethtool to
pretty-print register values and registers
Power management
----------------
-- suspend: function invoked by the DSA platform device when the system goes to
+- ``suspend``: function invoked by the DSA platform device when the system goes to
suspend, should quiesce all Ethernet switch activities, but keep ports
participating in Wake-on-LAN active as well as additional wake-up logic if
supported
-- resume: function invoked by the DSA platform device when the system resumes,
+- ``resume``: function invoked by the DSA platform device when the system resumes,
should resume all Ethernet switch activities and re-configure the switch to be
in a fully active state
-- port_enable: function invoked by the DSA slave network device ndo_open
+- ``port_enable``: function invoked by the DSA slave network device ndo_open
function when a port is administratively brought up, this function should be
fully enabling a given switch port. DSA takes care of marking the port with
- BR_STATE_BLOCKING if the port is a bridge member, or BR_STATE_FORWARDING if it
+ ``BR_STATE_BLOCKING`` if the port is a bridge member, or ``BR_STATE_FORWARDING`` if it
was not, and propagating these changes down to the hardware
-- port_disable: function invoked by the DSA slave network device ndo_close
+- ``port_disable``: function invoked by the DSA slave network device ndo_close
function when a port is administratively brought down, this function should be
fully disabling a given switch port. DSA takes care of marking the port with
- BR_STATE_DISABLED and propagating changes to the hardware if this port is
+ ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
disabled while being a bridge member
Bridge layer
------------
-- port_bridge_join: bridge layer function invoked when a given switch port is
+- ``port_bridge_join``: bridge layer function invoked when a given switch port is
added to a bridge, this function should be doing the necessary at the switch
level to permit the joining port from being added to the relevant logical
domain for it to ingress/egress traffic with other members of the bridge.
-- port_bridge_leave: bridge layer function invoked when a given switch port is
+- ``port_bridge_leave``: bridge layer function invoked when a given switch port is
removed from a bridge, this function should be doing the necessary at the
switch level to deny the leaving port from ingress/egress traffic from the
remaining bridge members. When the port leaves the bridge, it should be aged
out at the switch hardware for the switch to (re) learn MAC addresses behind
this port.
-- port_stp_state_set: bridge layer function invoked when a given switch port STP
+- ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
state is computed by the bridge layer and should be propagated to switch
hardware to forward/block/learn traffic. The switch driver is responsible for
computing a STP state change based on current and asked parameters and perform
@@ -494,7 +497,7 @@ Bridge layer
Bridge VLAN filtering
---------------------
-- port_vlan_filtering: bridge layer function invoked when the bridge gets
+- ``port_vlan_filtering``: bridge layer function invoked when the bridge gets
configured for turning on or off VLAN filtering. If nothing specific needs to
be done at the hardware level, this callback does not need to be implemented.
When VLAN filtering is turned on, the hardware must be programmed with
@@ -504,61 +507,61 @@ Bridge VLAN filtering
accept any 802.1Q frames irrespective of their VLAN ID, and untagged frames are
allowed.
-- port_vlan_prepare: bridge layer function invoked when the bridge prepares the
+- ``port_vlan_prepare``: bridge layer function invoked when the bridge prepares the
configuration of a VLAN on the given port. If the operation is not supported
- by the hardware, this function should return -EOPNOTSUPP to inform the bridge
+ by the hardware, this function should return ``-EOPNOTSUPP`` to inform the bridge
code to fallback to a software implementation. No hardware setup must be done
in this function. See port_vlan_add for this and details.
-- port_vlan_add: bridge layer function invoked when a VLAN is configured
+- ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
(tagged or untagged) for the given switch port
-- port_vlan_del: bridge layer function invoked when a VLAN is removed from the
+- ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
given switch port
-- port_vlan_dump: bridge layer function invoked with a switchdev callback
+- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
function that the driver has to call for each VLAN the given port is a member
of. A switchdev object is used to carry the VID and bridge flags.
-- port_fdb_add: bridge layer function invoked when the bridge wants to install a
+- ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
associated with this VLAN ID. If the operation is not supported, this
- function should return -EOPNOTSUPP to inform the bridge code to fallback to
+ function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
a software implementation.
-Note: VLAN ID 0 corresponds to the port private database, which, in the context
-of DSA, would be the its port-based VLAN, used by the associated bridge device.
+.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
+ of DSA, would be the its port-based VLAN, used by the associated bridge device.
-- port_fdb_del: bridge layer function invoked when the bridge wants to remove a
+- ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
Forwarding Database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database
-- port_fdb_dump: bridge layer function invoked with a switchdev callback
+- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
function that the driver has to call for each MAC address known to be behind
the given port. A switchdev object is used to carry the VID and FDB info.
-- port_mdb_prepare: bridge layer function invoked when the bridge prepares the
+- ``port_mdb_prepare``: bridge layer function invoked when the bridge prepares the
installation of a multicast database entry. If the operation is not supported,
- this function should return -EOPNOTSUPP to inform the bridge code to fallback
+ this function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback
to a software implementation. No hardware setup must be done in this function.
- See port_fdb_add for this and details.
+ See ``port_fdb_add`` for this and details.
-- port_mdb_add: bridge layer function invoked when the bridge wants to install
+- ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
a multicast database entry, the switch hardware should be programmed with the
specified address in the specified VLAN ID in the forwarding database
associated with this VLAN ID.
-Note: VLAN ID 0 corresponds to the port private database, which, in the context
-of DSA, would be the its port-based VLAN, used by the associated bridge device.
+.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
+ of DSA, would be the its port-based VLAN, used by the associated bridge device.
-- port_mdb_del: bridge layer function invoked when the bridge wants to remove a
+- ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
multicast database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database.
-- port_mdb_dump: bridge layer function invoked with a switchdev callback
+- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
function that the driver has to call for each MAC address known to be behind
the given port. A switchdev object is used to carry the VID and MDB info.
@@ -577,7 +580,7 @@ two subsystems and get the best of both worlds.
Other hanging fruits
--------------------
-- making the number of ports fully dynamic and not dependent on DSA_MAX_PORTS
+- making the number of ports fully dynamic and not dependent on ``DSA_MAX_PORTS``
- allowing more than one CPU/management interface:
http://comments.gmane.org/gmane.linux.network/365657
- porting more drivers from other vendors:
diff --git a/Documentation/networking/dsa/index.rst b/Documentation/networking/dsa/index.rst
new file mode 100644
index 000000000000..5c488d345a1e
--- /dev/null
+++ b/Documentation/networking/dsa/index.rst
@@ -0,0 +1,10 @@
+===============================
+Distributed Switch Architecture
+===============================
+
+.. toctree::
+ :maxdepth: 1
+
+ dsa
+ bcm_sf2
+ lan9303
diff --git a/Documentation/networking/dsa/lan9303.txt b/Documentation/networking/dsa/lan9303.rst
index 144b02b95207..e3c820db28ad 100644
--- a/Documentation/networking/dsa/lan9303.txt
+++ b/Documentation/networking/dsa/lan9303.rst
@@ -1,3 +1,4 @@
+==============================
LAN9303 Ethernet switch driver
==============================
@@ -9,10 +10,9 @@ host master network interface (e.g. fixed link).
Driver details
==============
-The driver is implemented as a DSA driver, see
-Documentation/networking/dsa/dsa.txt.
+The driver is implemented as a DSA driver, see ``Documentation/networking/dsa/dsa.rst``.
-See Documentation/devicetree/bindings/net/dsa/lan9303.txt for device tree
+See ``Documentation/devicetree/bindings/net/dsa/lan9303.txt`` for device tree
binding.
The LAN9303 can be managed both via MDIO and I2C, both supported by this driver.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 984e68f9e026..269d6f2661d5 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -25,6 +25,7 @@ Contents:
device_drivers/intel/i40e
device_drivers/intel/iavf
device_drivers/intel/ice
+ dsa/index
devlink-info-versions
ieee802154
kapi
diff --git a/MAINTAINERS b/MAINTAINERS
index cdb088103b2e..2b08063c8275 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1893,14 +1893,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
ARM/NUVOTON NPCM ARCHITECTURE
M: Avi Fishman <avifishman70@gmail.com>
M: Tomer Maimon <tmaimon77@gmail.com>
+M: Tali Perry <tali.perry1@gmail.com>
R: Patrick Venture <venture@google.com>
R: Nancy Yuen <yuenn@google.com>
-R: Brendan Higgins <brendanhiggins@google.com>
+R: Benjamin Fair <benjaminfair@google.com>
L: openbmc@lists.ozlabs.org (moderated for non-subscribers)
S: Supported
F: arch/arm/mach-npcm/
F: arch/arm/boot/dts/nuvoton-npcm*
-F: include/dt-bindings/clock/nuvoton,npcm7xx-clks.h
+F: include/dt-bindings/clock/nuvoton,npcm7xx-clock.h
F: drivers/*/*npcm*
F: Documentation/devicetree/bindings/*/*npcm*
F: Documentation/devicetree/bindings/*/*/*npcm*
@@ -7518,7 +7519,7 @@ F: include/net/mac802154.h
F: include/net/af_ieee802154.h
F: include/net/cfg802154.h
F: include/net/ieee802154_netdev.h
-F: Documentation/networking/ieee802154.txt
+F: Documentation/networking/ieee802154.rst
IFE PROTOCOL
M: Yotam Gigi <yotam.gi@gmail.com>
diff --git a/Makefile b/Makefile
index 026fbc450906..079c9a90bf90 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 1
SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
NAME = Shy Crocodile
# *DOCUMENTATION*
@@ -401,6 +401,7 @@ NM = $(CROSS_COMPILE)nm
STRIP = $(CROSS_COMPILE)strip
OBJCOPY = $(CROSS_COMPILE)objcopy
OBJDUMP = $(CROSS_COMPILE)objdump
+PAHOLE = pahole
LEX = flex
YACC = bison
AWK = awk
@@ -455,7 +456,7 @@ KBUILD_LDFLAGS :=
GCC_PLUGINS_CFLAGS :=
export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS
+export CPP AR NM STRIP OBJCOPY OBJDUMP PAHOLE KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS
export MAKE LEX YACC AWK INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE
export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 29de09804306..c7a4201ed62b 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -55,12 +55,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
*/
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
unsigned long *inside_ptregs = &(regs->r0);
- inside_ptregs -= i;
-
- BUG_ON((i + n) > 6);
+ unsigned int n = 6;
+ unsigned int i = 0;
while (n--) {
args[i++] = (*inside_ptregs);
diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index dce5be5df97b..edcff79879e7 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -57,6 +57,24 @@
enable-active-high;
};
+ /* TPS79501 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS79501 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
matrix_keypad: matrix_keypad0 {
compatible = "gpio-matrix-keypad";
debounce-delay-ms = <5>;
@@ -499,10 +517,10 @@
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index b128998097ce..2c2d8b5b8cf5 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -73,6 +73,24 @@
enable-active-high;
};
+ /* TPS79518 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS78633 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
leds {
pinctrl-names = "default";
pinctrl-0 = <&user_leds_s0>;
@@ -501,10 +519,10 @@
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi
index f459ec316a22..ca6d9f02a800 100644
--- a/arch/arm/boot/dts/am33xx-l4.dtsi
+++ b/arch/arm/boot/dts/am33xx-l4.dtsi
@@ -1762,7 +1762,7 @@
reg = <0xcc000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
@@ -1785,7 +1785,7 @@
reg = <0xd0000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/rk3288-tinker.dtsi b/arch/arm/boot/dts/rk3288-tinker.dtsi
index aa107ee41b8b..ef653c3209bc 100644
--- a/arch/arm/boot/dts/rk3288-tinker.dtsi
+++ b/arch/arm/boot/dts/rk3288-tinker.dtsi
@@ -254,6 +254,7 @@
};
vccio_sd: LDO_REG5 {
+ regulator-boot-on;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-name = "vccio_sd";
@@ -430,7 +431,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
- card-detect-delay = <200>;
+ broken-cd;
disable-wp; /* wp not hooked up */
pinctrl-names = "default";
pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 0bc2409f6903..192dbc089ade 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -25,8 +25,6 @@
gpio_keys: gpio-keys {
compatible = "gpio-keys";
- #address-cells = <1>;
- #size-cells = <0>;
pinctrl-names = "default";
pinctrl-0 = <&pwr_key_l>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index ca7d52daa8fb..a024d1e7e74c 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -70,7 +70,7 @@
compatible = "arm,cortex-a12";
reg = <0x501>;
resets = <&cru SRST_CORE1>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -80,7 +80,7 @@
compatible = "arm,cortex-a12";
reg = <0x502>;
resets = <&cru SRST_CORE2>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -90,7 +90,7 @@
compatible = "arm,cortex-a12";
reg = <0x503>;
resets = <&cru SRST_CORE3>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -1119,8 +1119,6 @@
clock-names = "ref", "pclk";
power-domains = <&power RK3288_PD_VIO>;
rockchip,grf = <&grf>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
@@ -1282,27 +1280,27 @@
gpu_opp_table: gpu-opp-table {
compatible = "operating-points-v2";
- opp@100000000 {
+ opp-100000000 {
opp-hz = /bits/ 64 <100000000>;
opp-microvolt = <950000>;
};
- opp@200000000 {
+ opp-200000000 {
opp-hz = /bits/ 64 <200000000>;
opp-microvolt = <950000>;
};
- opp@300000000 {
+ opp-300000000 {
opp-hz = /bits/ 64 <300000000>;
opp-microvolt = <1000000>;
};
- opp@400000000 {
+ opp-400000000 {
opp-hz = /bits/ 64 <400000000>;
opp-microvolt = <1100000>;
};
- opp@500000000 {
+ opp-500000000 {
opp-hz = /bits/ 64 <500000000>;
opp-microvolt = <1200000>;
};
- opp@600000000 {
+ opp-600000000 {
opp-hz = /bits/ 64 <600000000>;
opp-microvolt = <1250000>;
};
diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
index 1c01a6f843d8..28a2e45752fe 100644
--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
+++ b/arch/arm/boot/dts/sama5d2-pinfunc.h
@@ -518,7 +518,7 @@
#define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0)
#define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3)
#define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1)
-#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1)
+#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1)
#define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2)
#define PIN_PC10 74
#define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0)
diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
index f2f6558a00f1..04066f9cb8a3 100644
--- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts
+++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
@@ -213,13 +213,12 @@
gpio-sck = <&gpio0 5 GPIO_ACTIVE_HIGH>;
gpio-mosi = <&gpio0 4 GPIO_ACTIVE_HIGH>;
/*
- * This chipselect is active high. Just setting the flags
- * to GPIO_ACTIVE_HIGH is not enough for the SPI DT bindings,
- * it will be ignored, only the special "spi-cs-high" flag
- * really counts.
+ * It's not actually active high, but the frameworks assume
+ * the polarity of the passed-in GPIO is "normal" (active
+ * high) then actively drives the line low to select the
+ * chip.
*/
cs-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
- spi-cs-high;
num-chipselects = <1>;
/*
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 06dea6bce293..080ce70cab12 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -55,53 +55,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- args[0] = regs->ARM_ORIG_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->ARM_r0 + i, n * sizeof(args[0]));
+ args[0] = regs->ARM_ORIG_r0;
+ args++;
+
+ memcpy(args, &regs->ARM_r0 + 1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->ARM_ORIG_r0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
+ regs->ARM_ORIG_r0 = args[0];
+ args++;
+
+ memcpy(&regs->ARM_r0 + 1, args, 5 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 51e808adb00c..2a757dcaa1a5 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -591,13 +591,13 @@ static int __init at91_pm_backup_init(void)
np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam");
if (!np)
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
pdev = of_find_device_by_node(np);
of_node_put(np);
if (!pdev) {
pr_warn("%s: failed to find securam device!\n", __func__);
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
}
sram_pool = gen_pool_get(&pdev->dev, NULL);
@@ -620,6 +620,8 @@ static int __init at91_pm_backup_init(void)
return 0;
securam_fail:
+ put_device(&pdev->dev);
+securam_fail_no_ref_dev:
iounmap(pm_data.sfrbu);
pm_data.sfrbu = NULL;
return ret;
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 53c316f7301e..fe4932fda01d 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = {
}
};
-static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
+static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32);
static struct iop_adma_platform_data iop13xx_adma_0_data = {
.hw_id = 0,
.pool_size = PAGE_SIZE,
@@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = {
.resource = iop13xx_adma_0_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_0_data,
},
};
@@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = {
.resource = iop13xx_adma_1_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_1_data,
},
};
@@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = {
.resource = iop13xx_adma_2_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_2_data,
},
};
diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c
index db511ec2b1df..116feb6b261e 100644
--- a/arch/arm/mach-iop13xx/tpmi.c
+++ b/arch/arm/mach-iop13xx/tpmi.c
@@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = {
}
};
-u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64);
+u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32);
static struct platform_device iop13xx_tpmi_0_device = {
.name = "iop-tpmi",
.id = 0,
@@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = {
.resource = iop13xx_tpmi_0_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = {
.resource = iop13xx_tpmi_1_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = {
.resource = iop13xx_tpmi_2_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = {
.resource = iop13xx_tpmi_3_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
diff --git a/arch/arm/mach-milbeaut/platsmp.c b/arch/arm/mach-milbeaut/platsmp.c
index 591543c81399..3ea880f5fcb7 100644
--- a/arch/arm/mach-milbeaut/platsmp.c
+++ b/arch/arm/mach-milbeaut/platsmp.c
@@ -65,6 +65,7 @@ static void m10v_smp_init(unsigned int max_cpus)
writel(KERNEL_UNBOOT_FLAG, m10v_smp_base + cpu * 4);
}
+#ifdef CONFIG_HOTPLUG_CPU
static void m10v_cpu_die(unsigned int l_cpu)
{
gic_cpu_if_down(0);
@@ -83,12 +84,15 @@ static int m10v_cpu_kill(unsigned int l_cpu)
return 1;
}
+#endif
static struct smp_operations m10v_smp_ops __initdata = {
.smp_prepare_cpus = m10v_smp_init,
.smp_boot_secondary = m10v_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = m10v_cpu_die,
.cpu_kill = m10v_cpu_kill,
+#endif
};
CPU_METHOD_OF_DECLARE(m10v_smp, "socionext,milbeaut-m10v-smp", &m10v_smp_ops);
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index be30c3c061b4..1b15d593837e 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -182,6 +182,7 @@ static struct resource latch1_resources[] = {
static struct bgpio_pdata latch1_pdata = {
.label = LATCH1_LABEL,
+ .base = -1,
.ngpio = LATCH1_NGPIO,
};
@@ -219,6 +220,7 @@ static struct resource latch2_resources[] = {
static struct bgpio_pdata latch2_pdata = {
.label = LATCH2_LABEL,
+ .base = -1,
.ngpio = LATCH2_NGPIO,
};
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 1444b4b4bd9f..439e143cad7b 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -250,8 +250,10 @@ static int __init omapdss_init_of(void)
if (!node)
return 0;
- if (!of_device_is_available(node))
+ if (!of_device_is_available(node)) {
+ of_node_put(node);
return 0;
+ }
pdev = of_find_device_by_node(node);
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index a4d1f8de3b5b..d9612221e484 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = {
.resource = iop3xx_dma_0_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_0_data,
},
};
@@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = {
.resource = iop3xx_dma_1_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_1_data,
},
};
@@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = {
.resource = iop3xx_aau_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_aau_data,
},
};
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index a6c81ce00f52..8647cb80a93b 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -622,7 +622,7 @@ static struct platform_device orion_xor0_shared = {
.resource = orion_xor0_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor0_pdata,
},
};
@@ -683,7 +683,7 @@ static struct platform_device orion_xor1_shared = {
.resource = orion_xor1_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor1_pdata,
},
};
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 7c649f6b14cb..cd7c76e58b09 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -162,6 +162,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 1>;
+ altr,sysmgr-syscon = <&sysmgr 0x44 0>;
status = "disabled";
};
@@ -179,6 +180,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 2>;
+ altr,sysmgr-syscon = <&sysmgr 0x48 0>;
status = "disabled";
};
@@ -196,6 +198,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 3>;
+ altr,sysmgr-syscon = <&sysmgr 0x4c 0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
index 33c44e857247..0e34354b2092 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
@@ -108,8 +108,8 @@
snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
snps,reset-active-low;
snps,reset-delays-us = <0 10000 50000>;
- tx_delay = <0x25>;
- rx_delay = <0x11>;
+ tx_delay = <0x24>;
+ rx_delay = <0x18>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index 2157a528276b..79b4d1d4b5d6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -46,8 +46,7 @@
vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator {
compatible = "regulator-fixed";
- enable-active-high;
- gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
+ gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
pinctrl-names = "default";
pinctrl-0 = <&usb20_host_drv>;
regulator-name = "vcc_host1_5v";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 84f14b132e8f..dabef1a21649 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -1445,11 +1445,11 @@
sdmmc0 {
sdmmc0_clk: sdmmc0-clk {
- rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_4ma>;
+ rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_8ma>;
};
sdmmc0_cmd: sdmmc0-cmd {
- rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_8ma>;
};
sdmmc0_dectn: sdmmc0-dectn {
@@ -1461,14 +1461,14 @@
};
sdmmc0_bus1: sdmmc0-bus1 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>;
};
sdmmc0_bus4: sdmmc0-bus4 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>,
- <1 RK_PA1 1 &pcfg_pull_up_4ma>,
- <1 RK_PA2 1 &pcfg_pull_up_4ma>,
- <1 RK_PA3 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA1 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA2 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA3 1 &pcfg_pull_up_8ma>;
};
sdmmc0_gpio: sdmmc0-gpio {
@@ -1642,50 +1642,50 @@
rgmiim1_pins: rgmiim1-pins {
rockchip,pins =
/* mac_txclk */
- <1 RK_PB4 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB4 2 &pcfg_pull_none_8ma>,
/* mac_rxclk */
- <1 RK_PB5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB5 2 &pcfg_pull_none_4ma>,
/* mac_mdio */
- <1 RK_PC3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC3 2 &pcfg_pull_none_4ma>,
/* mac_txen */
- <1 RK_PD1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PD1 2 &pcfg_pull_none_8ma>,
/* mac_clk */
- <1 RK_PC5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC5 2 &pcfg_pull_none_4ma>,
/* mac_rxdv */
- <1 RK_PC6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC6 2 &pcfg_pull_none_4ma>,
/* mac_mdc */
- <1 RK_PC7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC7 2 &pcfg_pull_none_4ma>,
/* mac_rxd1 */
- <1 RK_PB2 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB2 2 &pcfg_pull_none_4ma>,
/* mac_rxd0 */
- <1 RK_PB3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB3 2 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <1 RK_PB0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB0 2 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <1 RK_PB1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB1 2 &pcfg_pull_none_8ma>,
/* mac_rxd3 */
- <1 RK_PB6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB6 2 &pcfg_pull_none_4ma>,
/* mac_rxd2 */
- <1 RK_PB7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB7 2 &pcfg_pull_none_4ma>,
/* mac_txd3 */
- <1 RK_PC0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC0 2 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <1 RK_PC1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC1 2 &pcfg_pull_none_8ma>,
/* mac_txclk */
- <0 RK_PB0 1 &pcfg_pull_none>,
+ <0 RK_PB0 1 &pcfg_pull_none_8ma>,
/* mac_txen */
- <0 RK_PB4 1 &pcfg_pull_none>,
+ <0 RK_PB4 1 &pcfg_pull_none_8ma>,
/* mac_clk */
- <0 RK_PD0 1 &pcfg_pull_none>,
+ <0 RK_PD0 1 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <0 RK_PC0 1 &pcfg_pull_none>,
+ <0 RK_PC0 1 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <0 RK_PC1 1 &pcfg_pull_none>,
+ <0 RK_PC1 1 &pcfg_pull_none_8ma>,
/* mac_txd3 */
- <0 RK_PC7 1 &pcfg_pull_none>,
+ <0 RK_PC7 1 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <0 RK_PC6 1 &pcfg_pull_none>;
+ <0 RK_PC6 1 &pcfg_pull_none_8ma>;
};
rmiim1_pins: rmiim1-pins {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
index 4a543f2117d4..844eac939a97 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
@@ -158,6 +158,7 @@
};
&hdmi {
+ ddc-i2c-bus = <&i2c3>;
pinctrl-names = "default";
pinctrl-0 = <&hdmi_cec>;
status = "okay";
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index ad8be16a39c9..a179df3674a1 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -65,52 +65,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_x0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->regs[i], n * sizeof(args[0]));
+ args[0] = regs->orig_x0;
+ args++;
+
+ memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_x0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(&regs->regs[i], args, n * sizeof(args[0]));
+ regs->orig_x0 = args[0];
+ args++;
+
+ memcpy(&regs->regs[1], args, 5 * sizeof(args[0]));
}
/*
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 5ba4465e44f0..ea94cf8f9dc6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -94,6 +94,9 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
@@ -111,6 +114,9 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index ae2be315ee9c..15ba8599858e 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -46,78 +46,27 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->a4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->b4;
- case 2:
- if (!n--)
- break;
- *args++ = regs->a6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->b6;
- case 4:
- if (!n--)
- break;
- *args++ = regs->a8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->b8;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->a4;
+ *args++ = regs->b4;
+ *args++ = regs->a6;
+ *args++ = regs->b6;
+ *args++ = regs->a8;
+ *args = regs->b8;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->a4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->b4 = *args++;
- case 2:
- if (!n--)
- break;
- regs->a6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->b6 = *args++;
- case 4:
- if (!n--)
- break;
- regs->a8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->a9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->a4 = *args++;
+ regs->b4 = *args++;
+ regs->a6 = *args++;
+ regs->b6 = *args++;
+ regs->a8 = *args++;
+ regs->a9 = *args;
}
#endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index d637445737b7..bda0a446c63e 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -43,30 +43,20 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, &regs->a1, 5 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
}
static inline int
diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h
index 924990401237..ddd483c6ca95 100644
--- a/arch/h8300/include/asm/syscall.h
+++ b/arch/h8300/include/asm/syscall.h
@@ -17,34 +17,14 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- while (n > 0) {
- switch (i) {
- case 0:
- *args++ = regs->er1;
- break;
- case 1:
- *args++ = regs->er2;
- break;
- case 2:
- *args++ = regs->er3;
- break;
- case 3:
- *args++ = regs->er4;
- break;
- case 4:
- *args++ = regs->er5;
- break;
- case 5:
- *args++ = regs->er6;
- break;
- }
- i++;
- n--;
- }
+ *args++ = regs->er1;
+ *args++ = regs->er2;
+ *args++ = regs->er3;
+ *args++ = regs->er4;
+ *args++ = regs->er5;
+ *args = regs->er6;
}
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index 4af9c7b6f13a..ae3a1e24fabd 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -37,10 +37,8 @@ static inline long syscall_get_nr(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &(&regs->r00)[i], n * sizeof(args[0]));
+ memcpy(args, &(&regs->r00)[0], 6 * sizeof(args[0]));
}
#endif
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 1d0b875fec44..0d9e7fab4a79 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -59,26 +59,19 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
extern void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw);
+ struct pt_regs *regs, unsigned long *args, int rw);
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+ ia64_syscall_get_set_arguments(task, regs, args, 0);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+ ia64_syscall_get_set_arguments(task, regs, args, 1);
}
static inline int syscall_get_arch(void)
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 6d50ede0ed69..bf9c24d9ce84 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -2179,12 +2179,11 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
}
void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw)
+ struct pt_regs *regs, unsigned long *args, int rw)
{
struct syscall_get_set_args data = {
- .i = i,
- .n = n,
+ .i = 0,
+ .n = 6,
.args = args,
.regs = regs,
.rw = rw,
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 220decd605a4..833d3a53dab3 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -82,18 +82,22 @@ static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
*args++ = microblaze_get_syscall_arg(regs, i++);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
microblaze_set_syscall_arg(regs, i++, *args++);
}
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 6cf8ffb5367e..a2b4748655df 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -116,9 +116,10 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
int ret;
/* O32 ABI syscall() */
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0057c910bc2f..3a62f80958e1 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1419,7 +1419,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
sd.nr = syscall;
sd.arch = syscall_get_arch();
- syscall_get_arguments(current, regs, 0, 6, args);
+ syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
sd.args[i] = args[i];
sd.instruction_pointer = KSTK_EIP(current);
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index f7e5e86765fe..671ebd357496 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -108,81 +108,41 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
* syscall_get_arguments - extract system call parameter values
* @task: task of interest, must be blocked
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array filled with argument values
*
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5). Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call (from 0 through 5). The first
+ * argument is stored in @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
#define SYSCALL_MAX_ARGS 6
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- if (n == 0)
- return;
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->uregs[0] + i, n * sizeof(args[0]));
+ args[0] = regs->orig_r0;
+ args++;
+ memcpy(args, &regs->uregs[0] + 1, 5 * sizeof(args[0]));
}
/**
* syscall_set_arguments - change system call parameter value
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array of argument values to store
*
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call. The first argument gets value
+ * @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_r0 = args[0];
- args++;
- i++;
- n--;
- }
+ regs->orig_r0 = args[0];
+ args++;
- memcpy(&regs->uregs[0] + i, args, n * sizeof(args[0]));
+ memcpy(&regs->uregs[0] + 1, args, 5 * sizeof(args[0]));
}
#endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 9de220854c4a..d7624ed06efb 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -58,81 +58,25 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args)
+ struct pt_regs *regs, unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->r4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->r5;
- case 2:
- if (!n--)
- break;
- *args++ = regs->r6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->r7;
- case 4:
- if (!n--)
- break;
- *args++ = regs->r8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->r9;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->r4;
+ *args++ = regs->r5;
+ *args++ = regs->r6;
+ *args++ = regs->r7;
+ *args++ = regs->r8;
+ *args = regs->r9;
}
static inline void syscall_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- const unsigned long *args)
+ struct pt_regs *regs, const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->r4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->r5 = *args++;
- case 2:
- if (!n--)
- break;
- regs->r6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->r7 = *args++;
- case 4:
- if (!n--)
- break;
- regs->r8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->r9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->r4 = *args++;
+ regs->r5 = *args++;
+ regs->r6 = *args++;
+ regs->r7 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
}
#endif
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index 2db9f1cf0694..b4ff07c1baed 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -56,20 +56,16 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
+ memcpy(args, &regs->gpr[3], 6 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 2a27b275ab09..9ff033d261ab 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -22,13 +22,14 @@ unsigned long profile_pc(struct pt_regs *);
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
- return regs->gr[20];
+ return regs->gr[28];
}
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->iaoq[0] = val;
+ regs->iaoq[0] = val;
+ regs->iaoq[1] = val + 4;
}
/* Query offset/name of register from its name/offset */
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 8bff1a58c97f..62a6d477fae0 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -18,29 +18,15 @@ static inline long syscall_get_nr(struct task_struct *tsk,
}
static inline void syscall_get_arguments(struct task_struct *tsk,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- BUG_ON(i);
-
- switch (n) {
- case 6:
- args[5] = regs->gr[21];
- case 5:
- args[4] = regs->gr[22];
- case 4:
- args[3] = regs->gr[23];
- case 3:
- args[2] = regs->gr[24];
- case 2:
- args[1] = regs->gr[25];
- case 1:
- args[0] = regs->gr[26];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->gr[21];
+ args[4] = regs->gr[22];
+ args[3] = regs->gr[23];
+ args[2] = regs->gr[24];
+ args[1] = regs->gr[25];
+ args[0] = regs->gr[26];
}
static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index eb39e7e380d7..841db71958cd 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -210,12 +210,6 @@ void __cpuidle arch_cpu_idle(void)
static int __init parisc_idle_init(void)
{
- const char *marker;
-
- /* check QEMU/SeaBIOS marker in PAGE0 */
- marker = (char *) &PAGE0->pad0;
- running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
-
if (!running_on_qemu)
cpu_idle_poll_ctrl(1);
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 15dd9e21be7e..d908058d05c1 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -397,6 +397,9 @@ void __init start_parisc(void)
int ret, cpunum;
struct pdc_coproc_cfg coproc_cfg;
+ /* check QEMU/SeaBIOS marker in PAGE0 */
+ running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0);
+
cpunum = smp_processor_id();
init_cpu_topology();
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 1a0e7a8b1c81..1243045bad2d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -65,22 +65,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long val, mask = -1UL;
-
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_32BIT))
mask = 0xffffffff;
#endif
while (n--) {
- if (n == 0 && i == 0)
+ if (n == 0)
val = regs->orig_gpr3;
else
- val = regs->gpr[3 + i + n];
+ val = regs->gpr[3 + n];
args[n] = val & mask;
}
@@ -88,15 +86,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
/* Also copy the first argument into orig_gpr3 */
- if (i == 0 && n > 0)
- regs->orig_gpr3 = args[0];
+ regs->orig_gpr3 = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 683b5b3805bd..cd381e2291df 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -22,6 +22,7 @@
#include <linux/kvm_host.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/kmemleak.h>
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
@@ -712,6 +713,12 @@ static void kvm_use_magic_page(void)
static __init void kvm_free_tmp(void)
{
+ /*
+ * Inform kmemleak about the hole in the .bss section since the
+ * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
+ */
+ kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
+ ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
free_reserved_area(&kvm_tmp[kvm_tmp_index],
&kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
}
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index bba3da6ef157..a3d5273ded7c 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -72,32 +72,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, &regs->a1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
}
static inline int syscall_get_arch(void)
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 96f9a9151fde..59c3e91f2cdb 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -56,40 +56,32 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long mask = -1UL;
+ unsigned int n = 6;
- /*
- * No arguments for this syscall, there's nothing to do.
- */
- if (!n)
- return;
-
- BUG_ON(i + n > 6);
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_31BIT))
mask = 0xffffffff;
#endif
while (n-- > 0)
- if (i + n > 0)
- args[n] = regs->gprs[2 + i + n] & mask;
- if (i == 0)
- args[0] = regs->orig_gpr2 & mask;
+ if (n > 0)
+ args[n] = regs->gprs[2 + n] & mask;
+
+ args[0] = regs->orig_gpr2 & mask;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
+
while (n-- > 0)
- if (i + n > 0)
- regs->gprs[2 + i + n] = args[n];
- if (i == 0)
- regs->orig_gpr2 = args[0];
+ if (n > 0)
+ regs->gprs[2 + n] = args[n];
+ regs->orig_gpr2 = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 958f46da3a79..d91065e81a4e 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -164,10 +164,10 @@ static struct sh_machine_vector __initmv sh_of_generic_mv = {
struct sh_clk_ops;
-void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
+void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
{
}
-void __init plat_irq_setup(void)
+void __init __weak plat_irq_setup(void)
{
}
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 6e118799831c..8c9d7e5e5dcc 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -48,51 +48,28 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- /*
- * Do this simply for now. If we need to start supporting
- * fetching arguments from arbitrary indices, this will need some
- * extra logic. Presently there are no in-tree users that depend
- * on this behaviour.
- */
- BUG_ON(i);
/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
- switch (n) {
- case 6: args[5] = regs->regs[1];
- case 5: args[4] = regs->regs[0];
- case 4: args[3] = regs->regs[7];
- case 3: args[2] = regs->regs[6];
- case 2: args[1] = regs->regs[5];
- case 1: args[0] = regs->regs[4];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->regs[1];
+ args[4] = regs->regs[0];
+ args[3] = regs->regs[7];
+ args[2] = regs->regs[6];
+ args[1] = regs->regs[5];
+ args[0] = regs->regs[4];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- /* Same note as above applies */
- BUG_ON(i);
-
- switch (n) {
- case 6: regs->regs[1] = args[5];
- case 5: regs->regs[0] = args[4];
- case 4: regs->regs[7] = args[3];
- case 3: regs->regs[6] = args[2];
- case 2: regs->regs[5] = args[1];
- case 1: regs->regs[4] = args[0];
- break;
- default:
- BUG();
- }
+ regs->regs[1] = args[5];
+ regs->regs[0] = args[4];
+ regs->regs[7] = args[3];
+ regs->regs[6] = args[2];
+ regs->regs[5] = args[1];
+ regs->regs[4] = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 43882580c7f9..22fad97da066 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -47,20 +47,16 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &regs->regs[2 + i], n * sizeof(args[0]));
+ memcpy(args, &regs->regs[2], 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->regs[2 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->regs[2], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 053989e3f6a6..4d075434e816 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -96,11 +96,11 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
int zero_extend = 0;
unsigned int j;
+ unsigned int n = 6;
#ifdef CONFIG_SPARC64
if (test_tsk_thread_flag(task, TIF_32BIT))
@@ -108,7 +108,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
#endif
for (j = 0; j < n; j++) {
- unsigned long val = regs->u_regs[UREG_I0 + i + j];
+ unsigned long val = regs->u_regs[UREG_I0 + j];
if (zero_extend)
args[j] = (u32) val;
@@ -119,13 +119,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- unsigned int j;
+ unsigned int i;
- for (j = 0; j < n; j++)
- regs->u_regs[UREG_I0 + i + j] = args[j];
+ for (i = 0; i < 6; i++)
+ regs->u_regs[UREG_I0 + i] = args[i];
}
static inline int syscall_get_arch(void)
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
index 9fb9cf8cd39a..98e50c50c12e 100644
--- a/arch/um/include/asm/syscall-generic.h
+++ b/arch/um/include/asm/syscall-generic.h
@@ -53,84 +53,30 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
const struct uml_pt_regs *r = &regs->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG1(r);
- case 1:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG2(r);
- case 2:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG3(r);
- case 3:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG4(r);
- case 4:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG5(r);
- case 5:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG6(r);
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ *args++ = UPT_SYSCALL_ARG1(r);
+ *args++ = UPT_SYSCALL_ARG2(r);
+ *args++ = UPT_SYSCALL_ARG3(r);
+ *args++ = UPT_SYSCALL_ARG4(r);
+ *args++ = UPT_SYSCALL_ARG5(r);
+ *args = UPT_SYSCALL_ARG6(r);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
struct uml_pt_regs *r = &regs->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- UPT_SYSCALL_ARG1(r) = *args++;
- case 1:
- if (!n--)
- break;
- UPT_SYSCALL_ARG2(r) = *args++;
- case 2:
- if (!n--)
- break;
- UPT_SYSCALL_ARG3(r) = *args++;
- case 3:
- if (!n--)
- break;
- UPT_SYSCALL_ARG4(r) = *args++;
- case 4:
- if (!n--)
- break;
- UPT_SYSCALL_ARG5(r) = *args++;
- case 5:
- if (!n--)
- break;
- UPT_SYSCALL_ARG6(r) = *args++;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ UPT_SYSCALL_ARG1(r) = *args++;
+ UPT_SYSCALL_ARG2(r) = *args++;
+ UPT_SYSCALL_ARG3(r) = *args++;
+ UPT_SYSCALL_ARG4(r) = *args++;
+ UPT_SYSCALL_ARG5(r) = *args++;
+ UPT_SYSCALL_ARG6(r) = *args;
}
/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d653139857af..4c305471ec33 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -91,11 +91,9 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+ memcpy(args, &regs->bx, 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
@@ -116,124 +114,50 @@ static inline int syscall_get_arch(void)
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->bx;
- case 1:
- if (!n--) break;
- *args++ = regs->cx;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->si;
- case 4:
- if (!n--) break;
- *args++ = regs->di;
- case 5:
- if (!n--) break;
- *args++ = regs->bp;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ *args++ = regs->bx;
+ *args++ = regs->cx;
+ *args++ = regs->dx;
+ *args++ = regs->si;
+ *args++ = regs->di;
+ *args = regs->bp;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->di;
- case 1:
- if (!n--) break;
- *args++ = regs->si;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->r10;
- case 4:
- if (!n--) break;
- *args++ = regs->r8;
- case 5:
- if (!n--) break;
- *args++ = regs->r9;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ *args++ = regs->di;
+ *args++ = regs->si;
+ *args++ = regs->dx;
+ *args++ = regs->r10;
+ *args++ = regs->r8;
+ *args = regs->r9;
+ }
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- regs->bx = *args++;
- case 1:
- if (!n--) break;
- regs->cx = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->si = *args++;
- case 4:
- if (!n--) break;
- regs->di = *args++;
- case 5:
- if (!n--) break;
- regs->bp = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ regs->bx = *args++;
+ regs->cx = *args++;
+ regs->dx = *args++;
+ regs->si = *args++;
+ regs->di = *args++;
+ regs->bp = *args;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- regs->di = *args++;
- case 1:
- if (!n--) break;
- regs->si = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->r10 = *args++;
- case 4:
- if (!n--) break;
- regs->r8 = *args++;
- case 5:
- if (!n--) break;
- regs->r9 = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ regs->di = *args++;
+ regs->si = *args++;
+ regs->dx = *args++;
+ regs->r10 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
+ }
}
static inline int syscall_get_arch(void)
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index de6f0d59a24f..2863c2026655 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -206,6 +206,9 @@ xen_single_call(unsigned int call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+ if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
+ return -EINVAL;
+
asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
: [thunk_target] "a" (&hypercall_page[call])
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 426039285fd1..e0a791c3d4fc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6422,11 +6422,11 @@ e_free:
return ret;
}
-static int get_num_contig_pages(int idx, struct page **inpages,
- unsigned long npages)
+static unsigned long get_num_contig_pages(unsigned long idx,
+ struct page **inpages, unsigned long npages)
{
unsigned long paddr, next_paddr;
- int i = idx + 1, pages = 1;
+ unsigned long i = idx + 1, pages = 1;
/* find the number of contiguous pages starting from idx */
paddr = __sme_page_pa(inpages[idx]);
@@ -6445,12 +6445,12 @@ static int get_num_contig_pages(int idx, struct page **inpages,
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+ unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data *data;
struct page **inpages;
- int i, ret, pages;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6799,7 +6799,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
- int ret, size;
+ unsigned int size;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6807,6 +6808,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
return -EFAULT;
+ if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
+ return -EINVAL;
+ if (!debug.dst_uaddr)
+ return -EINVAL;
+
vaddr = debug.src_uaddr;
size = debug.len;
vaddr_end = vaddr + size;
@@ -6857,8 +6863,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
dst_vaddr,
len, &argp->error);
- sev_unpin_memory(kvm, src_p, 1);
- sev_unpin_memory(kvm, dst_p, 1);
+ sev_unpin_memory(kvm, src_p, n);
+ sev_unpin_memory(kvm, dst_p, n);
if (ret)
goto err;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 153e539c29c9..7ec9bb1dd723 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -500,6 +500,17 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+ int msr;
+
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap[word] = ~0;
+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ }
+}
+
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
@@ -541,39 +552,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
return false;
msr_bitmap_l1 = (unsigned long *)kmap(page);
- if (nested_cpu_has_apic_reg_virt(vmcs12)) {
- /*
- * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
- * just lets the processor take the value from the virtual-APIC page;
- * take those 256 bits directly from the L1 bitmap.
- */
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = msr_bitmap_l1[word];
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- } else {
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = ~0;
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- }
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_TASKPRI),
- MSR_TYPE_W);
+ /*
+ * To keep the control flow simple, pay eight 8-byte writes (sixteen
+ * 4-byte writes on 32-bit systems) up front to enable intercepts for
+ * the x2APIC MSR range and selectively disable them below.
+ */
+ enable_x2apic_msr_intercepts(msr_bitmap_l0);
+
+ if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
+ if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+ /*
+ * L0 need not intercept reads for MSRs between 0x800
+ * and 0x8ff, it just lets the processor take the value
+ * from the virtual-APIC page; take those 256 bits
+ * directly from the L1 bitmap.
+ */
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap_l0[word] = msr_bitmap_l1[word];
+ }
+ }
- if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_EOI),
- MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_SELF_IPI),
- MSR_TYPE_W);
+ X2APIC_MSR(APIC_TASKPRI),
+ MSR_TYPE_R | MSR_TYPE_W);
+
+ if (nested_cpu_has_vid(vmcs12)) {
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_EOI),
+ MSR_TYPE_W);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_SELF_IPI),
+ MSR_TYPE_W);
+ }
}
if (spec_ctrl)
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index f7dd895b2353..0c14018d1c26 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -187,15 +187,18 @@ struct thread_struct {
/* Clearing a0 terminates the backtrace. */
#define start_thread(regs, new_pc, new_sp) \
- memset(regs, 0, sizeof(*regs)); \
- regs->pc = new_pc; \
- regs->ps = USER_PS_VALUE; \
- regs->areg[1] = new_sp; \
- regs->areg[0] = 0; \
- regs->wmask = 1; \
- regs->depc = 0; \
- regs->windowbase = 0; \
- regs->windowstart = 1;
+ do { \
+ memset((regs), 0, sizeof(*(regs))); \
+ (regs)->pc = (new_pc); \
+ (regs)->ps = USER_PS_VALUE; \
+ (regs)->areg[1] = (new_sp); \
+ (regs)->areg[0] = 0; \
+ (regs)->wmask = 1; \
+ (regs)->depc = 0; \
+ (regs)->windowbase = 0; \
+ (regs)->windowstart = 1; \
+ (regs)->syscall = NO_SYSCALL; \
+ } while (0)
/* Forward declaration */
struct task_struct;
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index a168bf81c7f4..91dc06d58060 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -59,45 +59,24 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
+ unsigned int i;
- if (n == 0)
- return;
-
- WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS);
-
- for (j = 0; j < n; ++j) {
- if (i + j < SYSCALL_MAX_ARGS)
- args[j] = regs->areg[reg[i + j]];
- else
- args[j] = 0;
- }
+ for (i = 0; i < 6; ++i)
+ args[i] = regs->areg[reg[i]];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
-
- if (n == 0)
- return;
-
- if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) {
- if (i < SYSCALL_MAX_ARGS)
- n = SYSCALL_MAX_ARGS - i;
- else
- return;
- }
+ unsigned int i;
- for (j = 0; j < n; ++j)
- regs->areg[reg[i + j]] = args[j];
+ for (i = 0; i < 6; ++i)
+ regs->areg[reg[i]] = args[i];
}
asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index e50f5124dc6f..e54af8b7e0f8 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1860,6 +1860,8 @@ ENTRY(system_call)
l32i a7, a2, PT_SYSCALL
1:
+ s32i a7, a1, 4
+
/* syscall = sys_call_table[syscall_nr] */
movi a4, sys_call_table
@@ -1893,8 +1895,12 @@ ENTRY(system_call)
retw
1:
+ l32i a4, a1, 4
+ l32i a3, a2, PT_SYSCALL
+ s32i a4, a2, PT_SYSCALL
mov a6, a2
call4 do_syscall_trace_leave
+ s32i a3, a2, PT_SYSCALL
retw
ENDPROC(system_call)
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 174c11f13bba..b9f82510c650 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -253,10 +253,14 @@ static int return_address_cb(struct stackframe *frame, void *data)
return 1;
}
+/*
+ * level == 0 is for the return address from the caller of this function,
+ * not from this function itself.
+ */
unsigned long return_address(unsigned level)
{
struct return_addr_data r = {
- .skip = level + 1,
+ .skip = level,
};
walk_stackframe(stack_pointer(NULL), return_address_cb, &r);
return r.addr;
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 2fb7d1172228..03678c4afc39 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -33,7 +33,7 @@ static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
pte = memblock_alloc_low(n_pages * sizeof(pte_t), PAGE_SIZE);
if (!pte)
- panic("%s: Failed to allocate %zu bytes align=%lx\n",
+ panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, n_pages * sizeof(pte_t), PAGE_SIZE);
for (i = 0; i < n_pages; ++i)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4c592496a16a..fac188dd78fa 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -674,7 +674,7 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
* at least two nodes.
*/
return !(varied_queue_weights || multiple_classes_busy
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
|| bfqd->num_groups_with_pending_reqs > 0
#endif
);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 63311d1ff1ed..a11bef75483d 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1012,7 +1012,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
entity->on_st = true;
}
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */
struct bfq_group *bfqg =
container_of(entity, struct bfq_group, entity);
diff --git a/block/blk-core.c b/block/blk-core.c
index 4673ebe42255..a55389ba8779 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1245,8 +1245,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
*/
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
- blk_qc_t unused;
-
if (blk_cloned_rq_check_limits(q, rq))
return BLK_STS_IOERR;
@@ -1262,7 +1260,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
* bypass a potential scheduler on the bottom device for
* insert.
*/
- return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true);
+ return blk_mq_request_issue_directly(rq, true);
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 40905539afed..aa6bc5c02643 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -423,10 +423,12 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
- if (!hctx->dispatch_busy && !e && !run_queue_async)
+ if (!hctx->dispatch_busy && !e && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
- else
- blk_mq_insert_requests(hctx, ctx, list);
+ if (list_empty(list))
+ return;
+ }
+ blk_mq_insert_requests(hctx, ctx, list);
}
blk_mq_run_hw_queue(hctx, run_queue_async);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ff3d7b49969..a9354835cf51 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1711,11 +1711,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
unsigned int depth;
list_splice_init(&plug->mq_list, &list);
- plug->rq_count = 0;
if (plug->rq_count > 2 && plug->multiple_queues)
list_sort(NULL, &list, plug_rq_cmp);
+ plug->rq_count = 0;
+
this_q = NULL;
this_hctx = NULL;
this_ctx = NULL;
@@ -1800,74 +1801,76 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
return ret;
}
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
blk_qc_t *cookie,
- bool bypass, bool last)
+ bool bypass_insert, bool last)
{
struct request_queue *q = rq->q;
bool run_queue = true;
- blk_status_t ret = BLK_STS_RESOURCE;
- int srcu_idx;
- bool force = false;
- hctx_lock(hctx, &srcu_idx);
/*
- * hctx_lock is needed before checking quiesced flag.
+ * RCU or SRCU read lock is needed before checking quiesced flag.
*
- * When queue is stopped or quiesced, ignore 'bypass', insert
- * and return BLK_STS_OK to caller, and avoid driver to try to
- * dispatch again.
+ * When queue is stopped or quiesced, ignore 'bypass_insert' from
+ * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
+ * and avoid driver to try to dispatch again.
*/
- if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) {
+ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;
- bypass = false;
- goto out_unlock;
+ bypass_insert = false;
+ goto insert;
}
- if (unlikely(q->elevator && !bypass))
- goto out_unlock;
+ if (q->elevator && !bypass_insert)
+ goto insert;
if (!blk_mq_get_dispatch_budget(hctx))
- goto out_unlock;
+ goto insert;
if (!blk_mq_get_driver_tag(rq)) {
blk_mq_put_dispatch_budget(hctx);
- goto out_unlock;
+ goto insert;
}
- /*
- * Always add a request that has been through
- *.queue_rq() to the hardware dispatch list.
- */
- force = true;
- ret = __blk_mq_issue_directly(hctx, rq, cookie, last);
-out_unlock:
+ return __blk_mq_issue_directly(hctx, rq, cookie, last);
+insert:
+ if (bypass_insert)
+ return BLK_STS_RESOURCE;
+
+ blk_mq_request_bypass_insert(rq, run_queue);
+ return BLK_STS_OK;
+}
+
+static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, blk_qc_t *cookie)
+{
+ blk_status_t ret;
+ int srcu_idx;
+
+ might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+
+ hctx_lock(hctx, &srcu_idx);
+
+ ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+ blk_mq_request_bypass_insert(rq, true);
+ else if (ret != BLK_STS_OK)
+ blk_mq_end_request(rq, ret);
+
+ hctx_unlock(hctx, srcu_idx);
+}
+
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
+{
+ blk_status_t ret;
+ int srcu_idx;
+ blk_qc_t unused_cookie;
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ hctx_lock(hctx, &srcu_idx);
+ ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
hctx_unlock(hctx, srcu_idx);
- switch (ret) {
- case BLK_STS_OK:
- break;
- case BLK_STS_DEV_RESOURCE:
- case BLK_STS_RESOURCE:
- if (force) {
- blk_mq_request_bypass_insert(rq, run_queue);
- /*
- * We have to return BLK_STS_OK for the DM
- * to avoid livelock. Otherwise, we return
- * the real result to indicate whether the
- * request is direct-issued successfully.
- */
- ret = bypass ? BLK_STS_OK : ret;
- } else if (!bypass) {
- blk_mq_sched_insert_request(rq, false,
- run_queue, false);
- }
- break;
- default:
- if (!bypass)
- blk_mq_end_request(rq, ret);
- break;
- }
return ret;
}
@@ -1875,20 +1878,22 @@ out_unlock:
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list)
{
- blk_qc_t unused;
- blk_status_t ret = BLK_STS_OK;
-
while (!list_empty(list)) {
+ blk_status_t ret;
struct request *rq = list_first_entry(list, struct request,
queuelist);
list_del_init(&rq->queuelist);
- if (ret == BLK_STS_OK)
- ret = blk_mq_try_issue_directly(hctx, rq, &unused,
- false,
+ ret = blk_mq_request_issue_directly(rq, list_empty(list));
+ if (ret != BLK_STS_OK) {
+ if (ret == BLK_STS_RESOURCE ||
+ ret == BLK_STS_DEV_RESOURCE) {
+ blk_mq_request_bypass_insert(rq,
list_empty(list));
- else
- blk_mq_sched_insert_request(rq, false, true, false);
+ break;
+ }
+ blk_mq_end_request(rq, ret);
+ }
}
/*
@@ -1896,7 +1901,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
* the driver there was more coming, but that turned out to
* be a lie.
*/
- if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs)
+ if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
hctx->queue->mq_ops->commit_rqs(hctx);
}
@@ -2003,19 +2008,21 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
plug->rq_count--;
}
blk_add_rq_to_plug(plug, rq);
+ trace_block_plug(q);
blk_mq_put_ctx(data.ctx);
if (same_queue_rq) {
data.hctx = same_queue_rq->mq_hctx;
+ trace_block_unplug(q, 1, true);
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
- &cookie, false, true);
+ &cookie);
}
} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
!data.hctx->dispatch_busy)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true);
+ blk_mq_try_issue_directly(data.hctx, rq, &cookie);
} else {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
@@ -2332,7 +2339,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
return 0;
free_fq:
- kfree(hctx->fq);
+ blk_free_flush_queue(hctx->fq);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d704fc7766f4..423ea88ab6fb 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -70,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
- struct request *rq,
- blk_qc_t *cookie,
- bool bypass, bool last);
+/* Used by blk_insert_cloned_request() to issue request directly */
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list);
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 417a9f15c116..d7ac09c092f2 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1748,6 +1748,11 @@ static int __init null_init(void)
return -EINVAL;
}
+ if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
+ pr_err("null_blk: invalid home_node value\n");
+ g_home_node = NUMA_NO_NODE;
+ }
+
if (g_queue_mode == NULL_Q_RQ) {
pr_err("null_blk: legacy IO path no longer available\n");
return -EINVAL;
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 377a694dc228..6d415b20fb70 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -314,6 +314,7 @@ static void pcd_init_units(void)
disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
1, BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(disk->queue)) {
+ put_disk(disk);
disk->queue = NULL;
continue;
}
@@ -750,6 +751,8 @@ static int pcd_detect(void)
printk("%s: No CD-ROM drive found\n", name);
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
blk_cleanup_queue(cd->disk->queue);
cd->disk->queue = NULL;
blk_mq_free_tag_set(&cd->tag_set);
@@ -1010,8 +1013,14 @@ static int __init pcd_init(void)
pcd_probe_capabilities();
if (register_blkdev(major, name)) {
- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+ for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
+
+ blk_cleanup_queue(cd->disk->queue);
+ blk_mq_free_tag_set(&cd->tag_set);
put_disk(cd->disk);
+ }
return -EBUSY;
}
@@ -1032,6 +1041,9 @@ static void __exit pcd_exit(void)
int unit;
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
+
if (cd->present) {
del_gendisk(cd->disk);
pi_release(cd->pi);
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 103b617cdc31..35e6e271b219 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -762,6 +762,8 @@ static int pf_detect(void)
printk("%s: No ATAPI disk detected\n", name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
blk_cleanup_queue(pf->disk->queue);
pf->disk->queue = NULL;
blk_mq_free_tag_set(&pf->tag_set);
@@ -1029,8 +1031,13 @@ static int __init pf_init(void)
pf_busy = 0;
if (register_blkdev(major, name)) {
- for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+ for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
+ blk_cleanup_queue(pf->disk->queue);
+ blk_mq_free_tag_set(&pf->tag_set);
put_disk(pf->disk);
+ }
return -EBUSY;
}
@@ -1051,6 +1058,9 @@ static void __exit pf_exit(void)
int unit;
unregister_blkdev(major, name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
+
if (pf->present)
del_gendisk(pf->disk);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 87ccef4bd69e..32a21b8d1d85 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -1090,6 +1090,8 @@ static int ace_setup(struct ace_device *ace)
return 0;
err_read:
+ /* prevent double queue cleanup */
+ ace->gd->queue = NULL;
put_disk(ace->gd);
err_alloc_disk:
blk_cleanup_queue(ace->queue);
diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index d8b77133a83a..f824563fc28d 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -37,8 +37,8 @@
*
* Returns size of the event. If it is an invalid event, returns 0.
*/
-static int calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
- struct tcg_pcr_event *event_header)
+static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+ struct tcg_pcr_event *event_header)
{
struct tcg_efi_specid_event_head *efispecid;
struct tcg_event_field *event_field;
diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index 8856cce5a23b..817ae09a369e 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -233,12 +233,19 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
poll_wait(file, &priv->async_wait, wait);
+ mutex_lock(&priv->buffer_mutex);
- if (!priv->response_read || priv->response_length)
+ /*
+ * The response_length indicates if there is still response
+ * (or part of it) to be consumed. Partial reads decrease it
+ * by the number of bytes read, and write resets it the zero.
+ */
+ if (priv->response_length)
mask = EPOLLIN | EPOLLRDNORM;
else
mask = EPOLLOUT | EPOLLWRNORM;
+ mutex_unlock(&priv->buffer_mutex);
return mask;
}
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 83ece5639f86..ae1030c9b086 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -402,15 +402,13 @@ int tpm_pm_suspend(struct device *dev)
if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
return 0;
- if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- mutex_lock(&chip->tpm_mutex);
- if (!tpm_chip_start(chip)) {
+ if (!tpm_chip_start(chip)) {
+ if (chip->flags & TPM_CHIP_FLAG_TPM2)
tpm2_shutdown(chip, TPM2_SU_STATE);
- tpm_chip_stop(chip);
- }
- mutex_unlock(&chip->tpm_mutex);
- } else {
- rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+ else
+ rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+
+ tpm_chip_stop(chip);
}
return rc;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index b1eadc6652b5..7205d9f4029e 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -865,19 +865,18 @@ static int ahash_update_ctx(struct ahash_request *req)
if (ret)
goto unmap_ctx;
- if (mapped_nents) {
+ if (mapped_nents)
sg_to_sec4_sg_last(req->src, mapped_nents,
edesc->sec4_sg + sec4_sg_src_index,
0);
- if (*next_buflen)
- scatterwalk_map_and_copy(next_buf, req->src,
- to_hash - *buflen,
- *next_buflen, 0);
- } else {
+ else
sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
1);
- }
+ if (*next_buflen)
+ scatterwalk_map_and_copy(next_buf, req->src,
+ to_hash - *buflen,
+ *next_buflen, 0);
desc = edesc->hw_desc;
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 42fed40198a0..c0c3043b5d61 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1169,11 +1169,13 @@ static int i2c_imx_probe(struct platform_device *pdev)
/* Init DMA config if supported */
ret = i2c_imx_dma_request(i2c_imx, phy_addr);
if (ret < 0)
- goto clk_notifier_unregister;
+ goto del_adapter;
dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n");
return 0; /* Return OK */
+del_adapter:
+ i2c_del_adapter(&i2c_imx->adapter);
clk_notifier_unregister:
clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
rpm_disable:
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 2649e0f2ff65..f5ecb660fe7d 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -351,7 +351,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
if (family == AF_INET) {
rt = container_of(dst, struct rtable, dst);
- return rt->rt_uses_gateway;
+ return rt->rt_gw_family == AF_INET;
}
rt6 = container_of(dst, struct rt6_info, dst);
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 6bcc63aaa50b..be95ac5aeb30 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -148,7 +148,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
return ret;
}
- *addr = pci_resource_start(dev->pdev, 0) +
+ *addr = dev->bar_addr +
MLX5_GET64(alloc_memic_out, out, memic_start_addr);
return 0;
@@ -167,7 +167,7 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
u64 start_page_idx;
int err;
- addr -= pci_resource_start(dev->pdev, 0);
+ addr -= dev->bar_addr;
start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 531ff20b32ad..0845e95d2d11 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2009,7 +2009,7 @@ static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
+ return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
}
static int get_command(unsigned long offset)
@@ -2199,7 +2199,7 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
page_idx + npages)
return -EINVAL;
- pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
+ pfn = ((dev->mdev->bar_addr +
MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
PAGE_SHIFT) +
page_idx;
@@ -2283,7 +2283,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
goto err_free;
start_offset = memic_addr & ~PAGE_MASK;
- page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
+ page_idx = (memic_addr - memic->dev->bar_addr -
MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
@@ -2326,7 +2326,7 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
if (ret)
return ret;
- page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
+ page_idx = (dm->dev_addr - memic->dev->bar_addr -
MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c85f00255884..ca921fd40499 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1194,8 +1194,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
MLX5_SET64(mkc, mkc, len, length);
MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr,
- memic_addr - pci_resource_start(dev->mdev->pdev, 0));
+ MLX5_SET64(mkc, mkc, start_addr, memic_addr - dev->mdev->bar_addr);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7cd006da1dae..ef7d69269a88 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -5119,7 +5119,7 @@ out:
wmb();
/* currently we support only regular doorbells */
- mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, NULL);
+ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 032883180f65..0010a3ed64f1 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1407,7 +1407,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
if (neigh->nud_state & NUD_VALID) {
nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
" is %pM, Gateway is 0x%08X \n", dst_ip,
- neigh->ha, ntohl(rt->rt_gateway));
+ neigh->ha, ntohl(rt->rt_gw4));
if (arpindex >= 0) {
if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 95c6d86ab5e8..c4ef1fceead6 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -115,6 +115,7 @@ struct mapped_device {
struct srcu_struct io_barrier;
};
+void disable_discard(struct mapped_device *md);
void disable_write_same(struct mapped_device *md);
void disable_write_zeroes(struct mapped_device *md);
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index b53f30f16b4d..4b76f84424c3 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -36,7 +36,7 @@ struct dm_device {
struct list_head list;
};
-const char *dm_allowed_targets[] __initconst = {
+const char * const dm_allowed_targets[] __initconst = {
"crypt",
"delay",
"linear",
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index d57d997a52c8..7c678f50aaa3 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -913,7 +913,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
{
return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
- range2->logical_sector + range2->n_sectors > range2->logical_sector;
+ range1->logical_sector + range1->n_sectors > range2->logical_sector;
}
static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
@@ -959,8 +959,6 @@ static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity
struct dm_integrity_range *last_range =
list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
struct task_struct *last_range_task;
- if (!ranges_overlap(range, last_range))
- break;
last_range_task = last_range->task;
list_del(&last_range->wait_entry);
if (!add_new_range(ic, last_range, false)) {
@@ -3185,7 +3183,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
journal_watermark = val;
else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
sync_msec = val;
- else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) {
+ else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
if (ic->meta_dev) {
dm_put_device(ti, ic->meta_dev);
ic->meta_dev = NULL;
@@ -3204,17 +3202,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
ic->sectors_per_block = val >> SECTOR_SHIFT;
- } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
+ } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
"Invalid internal_hash argument");
if (r)
goto bad;
- } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
+ } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
"Invalid journal_crypt argument");
if (r)
goto bad;
- } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
+ } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
"Invalid journal_mac argument");
if (r)
@@ -3616,7 +3614,7 @@ static struct target_type integrity_target = {
.io_hints = dm_integrity_io_hints,
};
-int __init dm_integrity_init(void)
+static int __init dm_integrity_init(void)
{
int r;
@@ -3635,7 +3633,7 @@ int __init dm_integrity_init(void)
return r;
}
-void dm_integrity_exit(void)
+static void __exit dm_integrity_exit(void)
{
dm_unregister_target(&integrity_target);
kmem_cache_destroy(journal_io_cache);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 09773636602d..b66745bd08bb 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -222,11 +222,14 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped)
}
if (unlikely(error == BLK_STS_TARGET)) {
- if (req_op(clone) == REQ_OP_WRITE_SAME &&
- !clone->q->limits.max_write_same_sectors)
+ if (req_op(clone) == REQ_OP_DISCARD &&
+ !clone->q->limits.max_discard_sectors)
+ disable_discard(tio->md);
+ else if (req_op(clone) == REQ_OP_WRITE_SAME &&
+ !clone->q->limits.max_write_same_sectors)
disable_write_same(tio->md);
- if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
- !clone->q->limits.max_write_zeroes_sectors)
+ else if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
+ !clone->q->limits.max_write_zeroes_sectors)
disable_write_zeroes(tio->md);
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ba9481f1bf3c..cde3b49b2a91 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1844,6 +1844,36 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
return true;
}
+static int device_requires_stable_pages(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && bdi_cap_stable_pages_required(q->backing_dev_info);
+}
+
+/*
+ * If any underlying device requires stable pages, a table must require
+ * them as well. Only targets that support iterate_devices are considered:
+ * don't want error, zero, etc to require stable pages.
+ */
+static bool dm_table_requires_stable_pages(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned i;
+
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ ti = dm_table_get_target(t, i);
+
+ if (ti->type->iterate_devices &&
+ ti->type->iterate_devices(ti, device_requires_stable_pages, NULL))
+ return true;
+ }
+
+ return false;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1897,6 +1927,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
dm_table_verify_integrity(t);
/*
+ * Some devices don't use blk_integrity but still want stable pages
+ * because they do their own checksumming.
+ */
+ if (dm_table_requires_stable_pages(t))
+ q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
+ else
+ q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
+
+ /*
* Determine whether or not this queue's I/O timings contribute
* to the entropy pool, Only request-based targets use this.
* Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 68d24056d0b1..043f0761e4a0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -945,6 +945,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
}
}
+void disable_discard(struct mapped_device *md)
+{
+ struct queue_limits *limits = dm_get_queue_limits(md);
+
+ /* device doesn't really support DISCARD, disable it */
+ limits->max_discard_sectors = 0;
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
+}
+
void disable_write_same(struct mapped_device *md)
{
struct queue_limits *limits = dm_get_queue_limits(md);
@@ -970,11 +979,14 @@ static void clone_endio(struct bio *bio)
dm_endio_fn endio = tio->ti->type->end_io;
if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
- if (bio_op(bio) == REQ_OP_WRITE_SAME &&
- !bio->bi_disk->queue->limits.max_write_same_sectors)
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !bio->bi_disk->queue->limits.max_discard_sectors)
+ disable_discard(md);
+ else if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+ !bio->bi_disk->queue->limits.max_write_same_sectors)
disable_write_same(md);
- if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
- !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
+ else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
disable_write_zeroes(md);
}
@@ -1042,15 +1054,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
return -EINVAL;
}
- /*
- * BIO based queue uses its own splitting. When multipage bvecs
- * is switched on, size of the incoming bio may be too big to
- * be handled in some targets, such as crypt.
- *
- * When these targets are ready for the big bio, we can remove
- * the limit.
- */
- ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE);
+ ti->max_io_len = (uint32_t) len;
return 0;
}
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 72428b6bfc47..7b7286b4d81e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1876,7 +1876,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
continue;
}
- if (time_after(jiffies, timeo) && !chip_ready(map, adr))
+ /*
+ * We check "time_after" and "!chip_good" before checking "chip_good" to avoid
+ * the failure due to scheduling.
+ */
+ if (time_after(jiffies, timeo) && !chip_good(map, adr, datum))
break;
if (chip_good(map, adr, datum)) {
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 3d27616d9c85..51cf5eca9c7f 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -116,11 +116,15 @@ static struct net_device * __init ipddp_init(void)
*/
static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
{
- __be32 paddr = skb_rtable(skb)->rt_gateway;
+ struct rtable *rtable = skb_rtable(skb);
+ __be32 paddr = 0;
struct ddpehdr *ddp;
struct ipddp_route *rt;
struct atalk_addr *our_addr;
+ if (rtable->rt_gw_family == AF_INET)
+ paddr = rtable->rt_gw4;
+
spin_lock(&ipddp_route_lock);
/*
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 0d15a12a4560..3568129fb7da 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -32,6 +32,7 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/rtnetlink.h>
@@ -131,21 +132,9 @@ static void dummy_get_drvinfo(struct net_device *dev,
strlcpy(info->version, DRV_VERSION, sizeof(info->version));
}
-static int dummy_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *ts_info)
-{
- ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
-
- ts_info->phc_index = -1;
-
- return 0;
-};
-
static const struct ethtool_ops dummy_ethtool_ops = {
.get_drvinfo = dummy_get_drvinfo,
- .get_ts_info = dummy_get_ts_info,
+ .get_ts_info = ethtool_op_get_ts_info,
};
static void dummy_setup(struct net_device *dev)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4feac114b779..6528a597367b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1133,6 +1133,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
tpa_info = &rxr->rx_tpa[agg_id];
if (unlikely(cons != rxr->rx_next_cons)) {
+ netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n",
+ cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
return;
}
@@ -1585,15 +1587,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
cons = rxcmp->rx_cmp_opaque;
- rx_buf = &rxr->rx_buf_ring[cons];
- data = rx_buf->data;
- data_ptr = rx_buf->data_ptr;
if (unlikely(cons != rxr->rx_next_cons)) {
int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
+ netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
+ cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
return rc1;
}
+ rx_buf = &rxr->rx_buf_ring[cons];
+ data = rx_buf->data;
+ data_ptr = rx_buf->data_ptr;
prefetch(data_ptr);
misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1);
@@ -1610,11 +1614,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
rx_buf->data = NULL;
if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) {
+ u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2);
+
bnxt_reuse_rx_data(rxr, cons, data);
if (agg_bufs)
bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
rc = -EIO;
+ if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
+ netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
+ bnxt_sched_reset(bp, rxr);
+ }
goto next_rx;
}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index d029468738ed..664fedf0cd80 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -4283,7 +4283,7 @@ static void tg3_power_down(struct tg3 *tp)
pci_set_power_state(tp->pdev, PCI_D3hot);
}
-static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex)
+static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u32 *speed, u8 *duplex)
{
switch (val & MII_TG3_AUX_STAT_SPDMASK) {
case MII_TG3_AUX_STAT_10HALF:
@@ -4787,7 +4787,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, bool force_reset)
bool current_link_up;
u32 bmsr, val;
u32 lcl_adv, rmt_adv;
- u16 current_speed;
+ u32 current_speed;
u8 current_duplex;
int i, err;
@@ -5719,7 +5719,7 @@ out:
static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset)
{
u32 orig_pause_cfg;
- u16 orig_active_speed;
+ u32 orig_active_speed;
u8 orig_active_duplex;
u32 mac_status;
bool current_link_up;
@@ -5823,7 +5823,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset)
{
int err = 0;
u32 bmsr, bmcr;
- u16 current_speed = SPEED_UNKNOWN;
+ u32 current_speed = SPEED_UNKNOWN;
u8 current_duplex = DUPLEX_UNKNOWN;
bool current_link_up = false;
u32 local_adv, remote_adv, sgsr;
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index a772a33b685c..6953d0546acb 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2873,7 +2873,7 @@ struct tg3_tx_ring_info {
struct tg3_link_config {
/* Describes what we're trying to get. */
u32 advertising;
- u16 speed;
+ u32 speed;
u8 duplex;
u8 autoneg;
u8 flowctrl;
@@ -2882,7 +2882,7 @@ struct tg3_link_config {
u8 active_flowctrl;
u8 active_duplex;
- u16 active_speed;
+ u32 active_speed;
u32 rmt_adv;
};
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index a44171fddf47..009ed4c1baf3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -898,7 +898,9 @@ static void macb_tx_interrupt(struct macb_queue *queue)
/* First, update TX stats if needed */
if (skb) {
- if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+ if (unlikely(skb_shinfo(skb)->tx_flags &
+ SKBTX_HW_TSTAMP) &&
+ gem_ptp_do_txstamp(queue, skb, desc) == 0) {
/* skb now belongs to timestamp buffer
* and will be removed later
*/
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 673c57b8023f..81c281ada63b 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -962,13 +962,13 @@ static void bgx_poll_for_sgmii_link(struct lmac *lmac)
lmac->last_duplex = (an_result >> 1) & 0x1;
switch (speed) {
case 0:
- lmac->last_speed = 10;
+ lmac->last_speed = SPEED_10;
break;
case 1:
- lmac->last_speed = 100;
+ lmac->last_speed = SPEED_100;
break;
case 2:
- lmac->last_speed = 1000;
+ lmac->last_speed = SPEED_1000;
break;
default:
lmac->link_up = false;
@@ -1012,10 +1012,10 @@ static void bgx_poll_for_link(struct work_struct *work)
!(smu_link & SMU_RX_CTL_STATUS)) {
lmac->link_up = 1;
if (lmac->lmac_type == BGX_MODE_XLAUI)
- lmac->last_speed = 40000;
+ lmac->last_speed = SPEED_40000;
else
- lmac->last_speed = 10000;
- lmac->last_duplex = 1;
+ lmac->last_speed = SPEED_10000;
+ lmac->last_duplex = DUPLEX_FULL;
} else {
lmac->link_up = 0;
lmac->last_speed = SPEED_UNKNOWN;
@@ -1105,8 +1105,8 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
} else {
/* Default to below link speed and duplex */
lmac->link_up = true;
- lmac->last_speed = 1000;
- lmac->last_duplex = 1;
+ lmac->last_speed = SPEED_1000;
+ lmac->last_duplex = DUPLEX_FULL;
bgx_sgmii_change_link_state(lmac);
return 0;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 9d3d45f02be1..360463a40ba9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -43,6 +43,7 @@ enum HCLGE_MBX_OPCODE {
HCLGE_MBX_GET_QID_IN_PF, /* (VF -> PF) get queue id in pf */
HCLGE_MBX_LINK_STAT_MODE, /* (PF -> VF) link mode has changed */
HCLGE_MBX_GET_LINK_MODE, /* (VF -> PF) get the link mode of pf */
+ HLCGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */
HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
@@ -63,6 +64,8 @@ enum hclge_mbx_vlan_cfg_subcode {
HCLGE_MBX_VLAN_FILTER = 0, /* set vlan filter */
HCLGE_MBX_VLAN_TX_OFF_CFG, /* set tx side vlan offload */
HCLGE_MBX_VLAN_RX_OFF_CFG, /* set rx side vlan offload */
+ HCLGE_MBX_PORT_BASE_VLAN_CFG, /* set port based vlan configuration */
+ HCLGE_MBX_GET_PORT_BASE_VLAN_STATE, /* get port based vlan state */
};
#define HCLGE_MBX_MAX_MSG_SIZE 16
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 38b430f11fc1..681c1752c1e3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -147,6 +147,13 @@ enum hnae3_flr_state {
HNAE3_FLR_DONE,
};
+enum hnae3_port_base_vlan_state {
+ HNAE3_PORT_BASE_VLAN_DISABLE,
+ HNAE3_PORT_BASE_VLAN_ENABLE,
+ HNAE3_PORT_BASE_VLAN_MODIFY,
+ HNAE3_PORT_BASE_VLAN_NOCHANGE,
+};
+
struct hnae3_vector_info {
u8 __iomem *io_addr;
int vector;
@@ -578,6 +585,8 @@ struct hnae3_handle {
u32 numa_node_mask; /* for multi-chip support */
+ enum hnae3_port_base_vlan_state port_base_vlan_state;
+
u8 netdev_flags;
struct dentry *hnae3_dbgfs;
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 0f389a43ff8d..923343858f51 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -963,6 +963,16 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb,
{
#define HNS3_TX_VLAN_PRIO_SHIFT 13
+ struct hnae3_handle *handle = tx_ring->tqp->handle;
+
+ /* Since HW limitation, if port based insert VLAN enabled, only one VLAN
+ * header is allowed in skb, otherwise it will cause RAS error.
+ */
+ if (unlikely(skb_vlan_tagged_multi(skb) &&
+ handle->port_base_vlan_state ==
+ HNAE3_PORT_BASE_VLAN_ENABLE))
+ return -EINVAL;
+
if (skb->protocol == htons(ETH_P_8021Q) &&
!(tx_ring->tqp->handle->kinfo.netdev->features &
NETIF_F_HW_VLAN_CTAG_TX)) {
@@ -984,8 +994,16 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb,
* and use inner_vtag in one tag case.
*/
if (skb->protocol == htons(ETH_P_8021Q)) {
- hns3_set_field(*out_vlan_flag, HNS3_TXD_OVLAN_B, 1);
- *out_vtag = vlan_tag;
+ if (handle->port_base_vlan_state ==
+ HNAE3_PORT_BASE_VLAN_DISABLE){
+ hns3_set_field(*out_vlan_flag,
+ HNS3_TXD_OVLAN_B, 1);
+ *out_vtag = vlan_tag;
+ } else {
+ hns3_set_field(*inner_vlan_flag,
+ HNS3_TXD_VLAN_B, 1);
+ *inner_vtag = vlan_tag;
+ }
} else {
hns3_set_field(*inner_vlan_flag, HNS3_TXD_VLAN_B, 1);
*inner_vtag = vlan_tag;
@@ -2313,17 +2331,50 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
}
}
+static int hns3_gro_complete(struct sk_buff *skb)
+{
+ __be16 type = skb->protocol;
+ struct tcphdr *th;
+ int depth = 0;
+
+ while (type == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vh;
+
+ if ((depth + VLAN_HLEN) > skb_headlen(skb))
+ return -EFAULT;
+
+ vh = (struct vlan_hdr *)(skb->data + depth);
+ type = vh->h_vlan_encapsulated_proto;
+ depth += VLAN_HLEN;
+ }
+
+ if (type == htons(ETH_P_IP)) {
+ depth += sizeof(struct iphdr);
+ } else if (type == htons(ETH_P_IPV6)) {
+ depth += sizeof(struct ipv6hdr);
+ } else {
+ netdev_err(skb->dev,
+ "Error: FW GRO supports only IPv4/IPv6, not 0x%04x, depth: %d\n",
+ be16_to_cpu(type), depth);
+ return -EFAULT;
+ }
+
+ th = (struct tcphdr *)(skb->data + depth);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ if (th->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ return 0;
+}
+
static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
- struct hns3_desc *desc)
+ u32 l234info, u32 bd_base_info)
{
struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
int l3_type, l4_type;
- u32 bd_base_info;
int ol4_type;
- u32 l234info;
-
- bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
- l234info = le32_to_cpu(desc->rx.l234_info);
skb->ip_summed = CHECKSUM_NONE;
@@ -2332,12 +2383,6 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
if (!(netdev->features & NETIF_F_RXCSUM))
return;
- /* We MUST enable hardware checksum before enabling hardware GRO */
- if (skb_shinfo(skb)->gso_size) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return;
- }
-
/* check if hardware has done checksum */
if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
return;
@@ -2390,6 +2435,7 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
struct hns3_desc *desc, u32 l234info,
u16 *vlan_tag)
{
+ struct hnae3_handle *handle = ring->tqp->handle;
struct pci_dev *pdev = ring->tqp->handle->pdev;
if (pdev->revision == 0x20) {
@@ -2402,15 +2448,36 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
#define HNS3_STRP_OUTER_VLAN 0x1
#define HNS3_STRP_INNER_VLAN 0x2
+#define HNS3_STRP_BOTH 0x3
+ /* Hardware always insert VLAN tag into RX descriptor when
+ * remove the tag from packet, driver needs to determine
+ * reporting which tag to stack.
+ */
switch (hnae3_get_field(l234info, HNS3_RXD_STRP_TAGP_M,
HNS3_RXD_STRP_TAGP_S)) {
case HNS3_STRP_OUTER_VLAN:
+ if (handle->port_base_vlan_state !=
+ HNAE3_PORT_BASE_VLAN_DISABLE)
+ return false;
+
*vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
return true;
case HNS3_STRP_INNER_VLAN:
+ if (handle->port_base_vlan_state !=
+ HNAE3_PORT_BASE_VLAN_DISABLE)
+ return false;
+
*vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
return true;
+ case HNS3_STRP_BOTH:
+ if (handle->port_base_vlan_state ==
+ HNAE3_PORT_BASE_VLAN_DISABLE)
+ *vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
+ else
+ *vlan_tag = le16_to_cpu(desc->rx.vlan_tag);
+
+ return true;
default:
return false;
}
@@ -2532,8 +2599,9 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
return 0;
}
-static void hns3_set_gro_param(struct sk_buff *skb, u32 l234info,
- u32 bd_base_info)
+static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
+ struct sk_buff *skb, u32 l234info,
+ u32 bd_base_info)
{
u16 gro_count;
u32 l3_type;
@@ -2541,12 +2609,11 @@ static void hns3_set_gro_param(struct sk_buff *skb, u32 l234info,
gro_count = hnae3_get_field(l234info, HNS3_RXD_GRO_COUNT_M,
HNS3_RXD_GRO_COUNT_S);
/* if there is no HW GRO, do not set gro params */
- if (!gro_count)
- return;
+ if (!gro_count) {
+ hns3_rx_checksum(ring, skb, l234info, bd_base_info);
+ return 0;
+ }
- /* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count
- * to skb_shinfo(skb)->gso_segs
- */
NAPI_GRO_CB(skb)->count = gro_count;
l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
@@ -2556,13 +2623,13 @@ static void hns3_set_gro_param(struct sk_buff *skb, u32 l234info,
else if (l3_type == HNS3_L3_TYPE_IPV6)
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
else
- return;
+ return -EFAULT;
skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
HNS3_RXD_GRO_SIZE_M,
HNS3_RXD_GRO_SIZE_S);
- if (skb_shinfo(skb)->gso_size)
- tcp_gro_complete(skb);
+
+ return hns3_gro_complete(skb);
}
static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
@@ -2587,16 +2654,85 @@ static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring,
skb_set_hash(skb, le32_to_cpu(desc->rx.rss_hash), rss_type);
}
-static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
- struct sk_buff **out_skb)
+static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb,
+ struct hns3_desc *desc)
{
struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+ u32 bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+ u32 l234info = le32_to_cpu(desc->rx.l234_info);
enum hns3_pkt_l2t_type l2_frame_type;
+ unsigned int len;
+ int ret;
+
+ /* Based on hw strategy, the tag offloaded will be stored at
+ * ot_vlan_tag in two layer tag case, and stored at vlan_tag
+ * in one layer tag case.
+ */
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+ u16 vlan_tag;
+
+ if (hns3_parse_vlan_tag(ring, desc, l234info, &vlan_tag))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ vlan_tag);
+ }
+
+ if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) {
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.non_vld_descs++;
+ u64_stats_update_end(&ring->syncp);
+
+ return -EINVAL;
+ }
+
+ if (unlikely(!desc->rx.pkt_len || (l234info & (BIT(HNS3_RXD_TRUNCAT_B) |
+ BIT(HNS3_RXD_L2E_B))))) {
+ u64_stats_update_begin(&ring->syncp);
+ if (l234info & BIT(HNS3_RXD_L2E_B))
+ ring->stats.l2_err++;
+ else
+ ring->stats.err_pkt_len++;
+ u64_stats_update_end(&ring->syncp);
+
+ return -EFAULT;
+ }
+
+ len = skb->len;
+
+ /* Do update ip stack process */
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ /* This is needed in order to enable forwarding support */
+ ret = hns3_set_gro_and_checksum(ring, skb, l234info, bd_base_info);
+ if (unlikely(ret)) {
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.rx_err_cnt++;
+ u64_stats_update_end(&ring->syncp);
+ return ret;
+ }
+
+ l2_frame_type = hnae3_get_field(l234info, HNS3_RXD_DMAC_M,
+ HNS3_RXD_DMAC_S);
+
+ u64_stats_update_begin(&ring->syncp);
+ ring->stats.rx_pkts++;
+ ring->stats.rx_bytes += len;
+
+ if (l2_frame_type == HNS3_L2_TYPE_MULTICAST)
+ ring->stats.rx_multicast++;
+
+ u64_stats_update_end(&ring->syncp);
+
+ ring->tqp_vector->rx_group.total_bytes += len;
+ return 0;
+}
+
+static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
+ struct sk_buff **out_skb)
+{
struct sk_buff *skb = ring->skb;
struct hns3_desc_cb *desc_cb;
struct hns3_desc *desc;
u32 bd_base_info;
- u32 l234info;
int length;
int ret;
@@ -2656,62 +2792,12 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
ALIGN(ring->pull_len, sizeof(long)));
}
- l234info = le32_to_cpu(desc->rx.l234_info);
- bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
-
- /* Based on hw strategy, the tag offloaded will be stored at
- * ot_vlan_tag in two layer tag case, and stored at vlan_tag
- * in one layer tag case.
- */
- if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
- u16 vlan_tag;
-
- if (hns3_parse_vlan_tag(ring, desc, l234info, &vlan_tag))
- __vlan_hwaccel_put_tag(skb,
- htons(ETH_P_8021Q),
- vlan_tag);
- }
-
- if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) {
- u64_stats_update_begin(&ring->syncp);
- ring->stats.non_vld_descs++;
- u64_stats_update_end(&ring->syncp);
-
+ ret = hns3_handle_bdinfo(ring, skb, desc);
+ if (unlikely(ret)) {
dev_kfree_skb_any(skb);
- return -EINVAL;
- }
-
- if (unlikely((!desc->rx.pkt_len) ||
- (l234info & (BIT(HNS3_RXD_TRUNCAT_B) |
- BIT(HNS3_RXD_L2E_B))))) {
- u64_stats_update_begin(&ring->syncp);
- if (l234info & BIT(HNS3_RXD_L2E_B))
- ring->stats.l2_err++;
- else
- ring->stats.err_pkt_len++;
- u64_stats_update_end(&ring->syncp);
-
- dev_kfree_skb_any(skb);
- return -EFAULT;
+ return ret;
}
-
- l2_frame_type = hnae3_get_field(l234info, HNS3_RXD_DMAC_M,
- HNS3_RXD_DMAC_S);
- u64_stats_update_begin(&ring->syncp);
- if (l2_frame_type == HNS3_L2_TYPE_MULTICAST)
- ring->stats.rx_multicast++;
-
- ring->stats.rx_pkts++;
- ring->stats.rx_bytes += skb->len;
- u64_stats_update_end(&ring->syncp);
-
- ring->tqp_vector->rx_group.total_bytes += skb->len;
-
- /* This is needed in order to enable forwarding support */
- hns3_set_gro_param(skb, l234info, bd_base_info);
-
- hns3_rx_checksum(ring, skb, desc);
*out_skb = skb;
hns3_set_rx_skb_rss_type(ring, skb);
@@ -2723,7 +2809,6 @@ int hns3_clean_rx_ring(
void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
{
#define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
- struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
int recv_pkts, recv_bds, clean_count, err;
int unused_count = hns3_desc_unused(ring) - ring->pending_buf;
struct sk_buff *skb = ring->skb;
@@ -2760,8 +2845,6 @@ int hns3_clean_rx_ring(
continue;
}
- /* Do update ip stack process */
- skb->protocol = eth_type_trans(skb, netdev);
rx_fn(ring, skb);
recv_bds += ring->pending_buf;
clean_count += ring->pending_buf;
@@ -3718,13 +3801,13 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
struct hns3_nic_priv *priv = netdev_priv(netdev);
int ret;
- hns3_client_stop(handle);
-
hns3_remove_hw_addr(netdev);
if (netdev->reg_state != NETREG_UNINITIALIZED)
unregister_netdev(netdev);
+ hns3_client_stop(handle);
+
if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
netdev_warn(netdev, "already uninitialized\n");
goto out_netdev_free;
@@ -3877,6 +3960,13 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring)
ring_ptr_move_fw(ring, next_to_use);
}
+ /* Free the pending skb in rx ring */
+ if (ring->skb) {
+ dev_kfree_skb_any(ring->skb);
+ ring->skb = NULL;
+ ring->pending_buf = 0;
+ }
+
return 0;
}
@@ -4075,10 +4165,18 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
if (ret)
goto err_uninit_vector;
+ ret = hns3_client_start(handle);
+ if (ret) {
+ dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
+ goto err_uninit_ring;
+ }
+
set_bit(HNS3_NIC_STATE_INITED, &priv->state);
return ret;
+err_uninit_ring:
+ hns3_uninit_all_ring(priv);
err_uninit_vector:
hns3_nic_uninit_vector_data(priv);
priv->ring_data = NULL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 722bb3124bb6..fbd904e3077c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -355,7 +355,7 @@ int hclge_cmd_init(struct hclge_dev *hdev)
int ret;
spin_lock_bh(&hdev->hw.cmq.csq.lock);
- spin_lock_bh(&hdev->hw.cmq.crq.lock);
+ spin_lock(&hdev->hw.cmq.crq.lock);
hdev->hw.cmq.csq.next_to_clean = 0;
hdev->hw.cmq.csq.next_to_use = 0;
@@ -364,7 +364,7 @@ int hclge_cmd_init(struct hclge_dev *hdev)
hclge_cmd_init_regs(&hdev->hw);
- spin_unlock_bh(&hdev->hw.cmq.crq.lock);
+ spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
@@ -373,21 +373,26 @@ int hclge_cmd_init(struct hclge_dev *hdev)
* reset may happen when lower level reset is being processed.
*/
if ((hclge_is_reset_pending(hdev))) {
- set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
- return -EBUSY;
+ ret = -EBUSY;
+ goto err_cmd_init;
}
ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
if (ret) {
dev_err(&hdev->pdev->dev,
"firmware version query failed %d\n", ret);
- return ret;
+ goto err_cmd_init;
}
hdev->fw_version = version;
dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
return 0;
+
+err_cmd_init:
+ set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+
+ return ret;
}
static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 1f52d11f77b5..62ef1619143b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -4,287 +4,468 @@
#include "hclge_err.h"
static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
- { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
- { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
- { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
- { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
- { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
- { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
- { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" },
- { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" },
- { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" },
+ { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
- { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_igu_int[] = {
- { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err" },
+ { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
- { .int_msk = BIT(0), .msg = "rx_buf_overflow" },
- { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow" },
- { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow" },
- { .int_msk = BIT(3), .msg = "tx_buf_overflow" },
- { .int_msk = BIT(4), .msg = "tx_buf_underrun" },
- { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow" },
+ { .int_msk = BIT(0), .msg = "rx_buf_overflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(3), .msg = "tx_buf_overflow",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(4), .msg = "tx_buf_underrun",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow",
+ .reset_level = HNAE3_CORE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ncsi_err_int[] = {
- { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
- { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err" },
- { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err" },
- { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err" },
- { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err" },
- { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err" },
- { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err" },
- { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err" },
- { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err" },
- { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err" },
- { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err" },
- { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err" },
- { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err" },
- { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err" },
- { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err" },
- { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err" },
- { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err" },
- { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err" },
- { .int_msk = BIT(27),
- .msg = "flow_director_ad_mem0_ecc_mbit_err" },
- { .int_msk = BIT(28),
- .msg = "flow_director_ad_mem1_ecc_mbit_err" },
- { .int_msk = BIT(29),
- .msg = "rx_vlan_tag_memory_ecc_mbit_err" },
- { .int_msk = BIT(30),
- .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err" },
+ { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
- { .int_msk = BIT(0), .msg = "tx_vlan_tag_err" },
- { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err" },
+ { .int_msk = BIT(0), .msg = "tx_vlan_tag_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
- { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err" },
- { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err" },
- { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err" },
+ { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_tm_sch_rint[] = {
- { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err" },
- { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err" },
- { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err" },
- { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err" },
- { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err" },
- { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err" },
- { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err" },
- { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err" },
- { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err" },
- { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err" },
- { .int_msk = BIT(12),
- .msg = "tm_sch_port_shap_offset_fifo_wr_err" },
- { .int_msk = BIT(13),
- .msg = "tm_sch_port_shap_offset_fifo_rd_err" },
- { .int_msk = BIT(14),
- .msg = "tm_sch_pg_pshap_offset_fifo_wr_err" },
- { .int_msk = BIT(15),
- .msg = "tm_sch_pg_pshap_offset_fifo_rd_err" },
- { .int_msk = BIT(16),
- .msg = "tm_sch_pg_cshap_offset_fifo_wr_err" },
- { .int_msk = BIT(17),
- .msg = "tm_sch_pg_cshap_offset_fifo_rd_err" },
- { .int_msk = BIT(18),
- .msg = "tm_sch_pri_pshap_offset_fifo_wr_err" },
- { .int_msk = BIT(19),
- .msg = "tm_sch_pri_pshap_offset_fifo_rd_err" },
- { .int_msk = BIT(20),
- .msg = "tm_sch_pri_cshap_offset_fifo_wr_err" },
- { .int_msk = BIT(21),
- .msg = "tm_sch_pri_cshap_offset_fifo_rd_err" },
- { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err" },
- { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err" },
- { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err" },
- { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err" },
- { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err" },
- { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err" },
- { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err" },
- { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err" },
- { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err" },
- { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err" },
+ { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
- { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err" },
- { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err" },
- { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err" },
- { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err" },
- { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err" },
- { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err" },
- { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err" },
- { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err" },
- { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err" },
- { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err" },
- { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err" },
- { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err" },
- { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err" },
- { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err" },
- { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err" },
- { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err" },
- { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err" },
- { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err" },
+ { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
- { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err" },
- { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err" },
- { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err" },
- { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err" },
- { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err" },
- { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err" },
+ { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
- { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err" },
- { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err" },
- { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err" },
- { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err" },
- { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err" },
- { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err" },
- { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err" },
- { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err" },
- { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err" },
- { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err" },
- { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err" },
- { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err" },
- { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err" },
- { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err" },
+ { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
- { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err" },
- { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err" },
- { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err" },
- { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err" },
- { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err" },
- { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err" },
- { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err" },
- { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err" },
- { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err" },
- { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err" },
- { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err" },
- { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err" },
- { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err" },
- { .int_msk = BIT(26), .msg = "rd_bus_err" },
- { .int_msk = BIT(27), .msg = "wr_bus_err" },
- { .int_msk = BIT(28), .msg = "reg_search_miss" },
- { .int_msk = BIT(29), .msg = "rx_q_search_miss" },
- { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect" },
- { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl" },
+ { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(26), .msg = "rd_bus_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(27), .msg = "wr_bus_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(28), .msg = "reg_search_miss",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(29), .msg = "rx_q_search_miss",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
- { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err" },
- { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err" },
- { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err" },
- { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err" },
+ { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
+ { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err",
+ .reset_level = HNAE3_CORE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
- { .int_msk = BIT(0), .msg = "over_8bd_no_fe" },
- { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err" },
- { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err" },
- { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison" },
- { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison" },
- { .int_msk = BIT(5), .msg = "buf_wait_timeout" },
+ { .int_msk = BIT(0), .msg = "over_8bd_no_fe",
+ .reset_level = HNAE3_FUNC_RESET },
+ { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison",
+ .reset_level = HNAE3_FUNC_RESET },
+ { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison",
+ .reset_level = HNAE3_FUNC_RESET },
+ { .int_msk = BIT(5), .msg = "buf_wait_timeout",
+ .reset_level = HNAE3_NONE_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
- { .int_msk = BIT(0), .msg = "buf_sum_err" },
- { .int_msk = BIT(1), .msg = "ppp_mb_num_err" },
- { .int_msk = BIT(2), .msg = "ppp_mbid_err" },
- { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err" },
- { .int_msk = BIT(4), .msg = "ppp_rlt_host_err" },
- { .int_msk = BIT(5), .msg = "cks_edit_position_err" },
- { .int_msk = BIT(6), .msg = "cks_edit_condition_err" },
- { .int_msk = BIT(7), .msg = "vlan_edit_condition_err" },
- { .int_msk = BIT(8), .msg = "vlan_num_ot_err" },
- { .int_msk = BIT(9), .msg = "vlan_num_in_err" },
+ { .int_msk = BIT(0), .msg = "buf_sum_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(1), .msg = "ppp_mb_num_err",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(2), .msg = "ppp_mbid_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "ppp_rlt_host_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "cks_edit_position_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "cks_edit_condition_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "vlan_edit_condition_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "vlan_num_ot_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "vlan_num_in_err",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
#define HCLGE_SSU_MEM_ECC_ERR(x) \
- { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err" }
+ { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \
+ .reset_level = HNAE3_GLOBAL_RESET }
static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
HCLGE_SSU_MEM_ECC_ERR(0),
@@ -323,62 +504,106 @@ static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
};
static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
- { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" },
- { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port" },
- { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port" },
- { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port" },
- { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port" },
- { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port" },
- { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port" },
- { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port" },
- { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port" },
- { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port" },
- { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port" },
- { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port" },
- { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port" },
+ { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
- { .int_msk = BIT(0), .msg = "ig_mac_inf_int" },
- { .int_msk = BIT(1), .msg = "ig_host_inf_int" },
- { .int_msk = BIT(2), .msg = "ig_roc_buf_int" },
- { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int" },
- { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int" },
- { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int" },
- { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int" },
- { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int" },
- { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int" },
- { .int_msk = BIT(9), .msg = "qm_eof_fifo_int" },
- { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int" },
- { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int" },
- { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int" },
- { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int" },
- { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int" },
- { .int_msk = BIT(15), .msg = "host_cmd_fifo_int" },
- { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int" },
- { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int" },
- { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int" },
- { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int" },
- { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int" },
- { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int" },
- { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int" },
- { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int" },
+ { .int_msk = BIT(0), .msg = "ig_mac_inf_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "ig_host_inf_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "ig_roc_buf_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "qm_eof_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(15), .msg = "host_cmd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
- { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg" },
- { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg" },
- { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg" },
- { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg" },
+ { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
- { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" },
- { .int_msk = BIT(9), .msg = "low_water_line_err_port" },
- { .int_msk = BIT(10), .msg = "hi_water_line_err_port" },
+ { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
+ { .int_msk = BIT(9), .msg = "low_water_line_err_port",
+ .reset_level = HNAE3_NONE_RESET },
+ { .int_msk = BIT(10), .msg = "hi_water_line_err_port",
+ .reset_level = HNAE3_GLOBAL_RESET },
{ /* sentinel */ }
};
@@ -406,16 +631,29 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
{ /* sentinel */ }
};
-static void hclge_log_error(struct device *dev, char *reg,
- const struct hclge_hw_error *err,
- u32 err_sts)
+static enum hnae3_reset_type hclge_log_error(struct device *dev, char *reg,
+ const struct hclge_hw_error *err,
+ u32 err_sts)
{
+ enum hnae3_reset_type reset_level = HNAE3_FUNC_RESET;
+ bool need_reset = false;
+
while (err->msg) {
- if (err->int_msk & err_sts)
+ if (err->int_msk & err_sts) {
dev_warn(dev, "%s %s found [error status=0x%x]\n",
reg, err->msg, err_sts);
+ if (err->reset_level != HNAE3_NONE_RESET &&
+ err->reset_level >= reset_level) {
+ reset_level = err->reset_level;
+ need_reset = true;
+ }
+ }
err++;
}
+ if (need_reset)
+ return reset_level;
+ else
+ return HNAE3_NONE_RESET;
}
/* hclge_cmd_query_error: read the error information
@@ -826,6 +1064,7 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
int num)
{
struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+ enum hnae3_reset_type reset_level;
struct device *dev = &hdev->pdev->dev;
__le32 *desc_data;
u32 status;
@@ -845,78 +1084,94 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
/* log HNS common errors */
status = le32_to_cpu(desc[0].data[0]);
if (status) {
- hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
- &hclge_imp_tcm_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+ &hclge_imp_tcm_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[1]);
if (status) {
- hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
- &hclge_cmdq_nic_mem_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+ &hclge_cmdq_nic_mem_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) {
dev_warn(dev, "imp_rd_data_poison_err found\n");
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_NONE_RESET);
}
status = le32_to_cpu(desc[0].data[3]);
if (status) {
- hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
- &hclge_tqp_int_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+ &hclge_tqp_int_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[4]);
if (status) {
- hclge_log_error(dev, "MSIX_ECC_INT_STS",
- &hclge_msix_sram_ecc_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "MSIX_ECC_INT_STS",
+ &hclge_msix_sram_ecc_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log SSU(Storage Switch Unit) errors */
desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*(desc_data + 2));
if (status) {
- hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
- &hclge_ssu_mem_ecc_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
+ &hclge_ssu_mem_ecc_err_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
if (status) {
dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n",
status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
}
status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
if (status) {
- hclge_log_error(dev, "SSU_COMMON_ERR_INT",
- &hclge_ssu_com_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_COMMON_ERR_INT",
+ &hclge_ssu_com_err_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log IGU(Ingress Unit) errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
- if (status)
- hclge_log_error(dev, "IGU_INT_STS",
- &hclge_igu_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "IGU_INT_STS",
+ &hclge_igu_int[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* log PPP(Programmable Packet Process) errors */
desc_data = (__le32 *)&desc[4];
status = le32_to_cpu(*(desc_data + 1));
- if (status)
- hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
- &hclge_ppp_mpf_abnormal_int_st1[0], status);
+ if (status) {
+ reset_level =
+ hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+ &hclge_ppp_mpf_abnormal_int_st1[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
- if (status)
- hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
- &hclge_ppp_mpf_abnormal_int_st3[0], status);
+ if (status) {
+ reset_level =
+ hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+ &hclge_ppp_mpf_abnormal_int_st3[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[5];
@@ -924,55 +1179,60 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
if (status) {
dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST1 %s found\n",
"rpu_rx_pkt_ecc_mbit_err");
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
}
status = le32_to_cpu(*(desc_data + 2));
if (status) {
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
- &hclge_ppu_mpf_abnormal_int_st2[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level =
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+ &hclge_ppu_mpf_abnormal_int_st2[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
if (status) {
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
- &hclge_ppu_mpf_abnormal_int_st3[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level =
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
+ &hclge_ppu_mpf_abnormal_int_st3[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log TM(Traffic Manager) errors */
desc_data = (__le32 *)&desc[6];
status = le32_to_cpu(*desc_data);
if (status) {
- hclge_log_error(dev, "TM_SCH_RINT",
- &hclge_tm_sch_rint[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "TM_SCH_RINT",
+ &hclge_tm_sch_rint[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log QCN(Quantized Congestion Control) errors */
desc_data = (__le32 *)&desc[7];
status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
if (status) {
- hclge_log_error(dev, "QCN_FIFO_RINT",
- &hclge_qcn_fifo_rint[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "QCN_FIFO_RINT",
+ &hclge_qcn_fifo_rint[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
if (status) {
- hclge_log_error(dev, "QCN_ECC_RINT",
- &hclge_qcn_ecc_rint[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "QCN_ECC_RINT",
+ &hclge_qcn_ecc_rint[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log NCSI errors */
desc_data = (__le32 *)&desc[9];
status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
if (status) {
- hclge_log_error(dev, "NCSI_ECC_INT_RPT",
- &hclge_ncsi_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+ reset_level = hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+ &hclge_ncsi_err_int[0], status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* clear all main PF RAS errors */
@@ -1000,6 +1260,7 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
{
struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
struct device *dev = &hdev->pdev->dev;
+ enum hnae3_reset_type reset_level;
__le32 *desc_data;
u32 status;
int ret;
@@ -1018,38 +1279,47 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
/* log SSU(Storage Switch Unit) errors */
status = le32_to_cpu(desc[0].data[0]);
if (status) {
- hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
- &hclge_ssu_port_based_err_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+ &hclge_ssu_port_based_err_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[1]);
if (status) {
- hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
- &hclge_ssu_fifo_overflow_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
+ &hclge_ssu_fifo_overflow_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
status = le32_to_cpu(desc[0].data[2]);
if (status) {
- hclge_log_error(dev, "SSU_ETS_TCG_INT",
- &hclge_ssu_ets_tcg_int[0], status);
- HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+ reset_level = hclge_log_error(dev, "SSU_ETS_TCG_INT",
+ &hclge_ssu_ets_tcg_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
}
/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
- if (status)
- hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
- &hclge_igu_egu_tnl_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+ &hclge_igu_egu_tnl_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
- if (status)
- hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
- &hclge_ppu_pf_abnormal_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
+ &hclge_ppu_pf_abnormal_int[0],
+ status);
+ HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level);
+ }
/* clear all PF RAS errors */
hclge_cmd_reuse_desc(&desc[0], false);
@@ -1343,14 +1613,12 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
{
struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num;
+ enum hnae3_reset_type reset_level;
struct hclge_desc desc_bd;
struct hclge_desc *desc;
__le32 *desc_data;
- int ret = 0;
u32 status;
-
- /* set default handling */
- set_bit(HNAE3_FUNC_RESET, reset_requests);
+ int ret;
/* query the number of bds for the MSIx int status */
hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_MSIX_INT_STS_BD_NUM,
@@ -1390,9 +1658,10 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data);
if (status) {
- hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
- &hclge_mac_afifo_tnl_int[0], status);
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+ reset_level = hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
+ &hclge_mac_afifo_tnl_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
}
/* log PPU(RCB) MPF errors */
@@ -1400,9 +1669,11 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
status = le32_to_cpu(*(desc_data + 2)) &
HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
if (status) {
- hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
- &hclge_ppu_mpf_abnormal_int_st2[0], status);
- set_bit(HNAE3_CORE_RESET, reset_requests);
+ reset_level =
+ hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+ &hclge_ppu_mpf_abnormal_int_st2[0],
+ status);
+ set_bit(reset_level, reset_requests);
}
/* clear all main PF MSIx errors */
@@ -1436,24 +1707,31 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
/* log SSU PF errors */
status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
if (status) {
- hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
- &hclge_ssu_port_based_pf_int[0], status);
- set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+ reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+ &hclge_ssu_port_based_pf_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
}
/* read and log PPP PF errors */
desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*desc_data);
- if (status)
- hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
- &hclge_ppp_pf_abnormal_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
+ &hclge_ppp_pf_abnormal_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
+ }
/* log PPU(RCB) PF errors */
desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
- if (status)
- hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
- &hclge_ppu_pf_abnormal_int[0], status);
+ if (status) {
+ reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
+ &hclge_ppu_pf_abnormal_int[0],
+ status);
+ set_bit(reset_level, reset_requests);
+ }
/* clear all PF MSIx errors */
hclge_cmd_reuse_desc(&desc[0], false);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index fc068280d391..4a2e82f7f112 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -112,6 +112,7 @@ struct hclge_hw_blk {
struct hclge_hw_error {
u32 int_msk;
const char *msg;
+ enum hnae3_reset_type reset_level;
};
int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f51e4c01b670..d2fb548e1f50 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -32,6 +32,7 @@
static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps);
static int hclge_init_vlan_config(struct hclge_dev *hdev);
static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
+static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
u16 *allocated_size, bool is_alloc);
@@ -1357,6 +1358,8 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
vport->back = hdev;
vport->vport_id = i;
vport->mps = HCLGE_MAC_DEFAULT_FRAME;
+ vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
+ vport->rxvlan_cfg.rx_vlan_offload_en = true;
INIT_LIST_HEAD(&vport->vlan_list);
INIT_LIST_HEAD(&vport->uc_mac_list);
INIT_LIST_HEAD(&vport->mc_mac_list);
@@ -1419,7 +1422,7 @@ static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
return ret;
}
-static int hclge_get_tc_num(struct hclge_dev *hdev)
+static u32 hclge_get_tc_num(struct hclge_dev *hdev)
{
int i, cnt = 0;
@@ -1429,17 +1432,6 @@ static int hclge_get_tc_num(struct hclge_dev *hdev)
return cnt;
}
-static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
-{
- int i, cnt = 0;
-
- for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
- if (hdev->hw_tc_map & BIT(i) &&
- hdev->tm_info.hw_pfc_map & BIT(i))
- cnt++;
- return cnt;
-}
-
/* Get the number of pfc enabled TCs, which have private buffer */
static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
struct hclge_pkt_buf_alloc *buf_alloc)
@@ -1503,14 +1495,12 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
struct hclge_pkt_buf_alloc *buf_alloc,
u32 rx_all)
{
- u32 shared_buf_min, shared_buf_tc, shared_std;
- int tc_num, pfc_enable_num;
+ u32 shared_buf_min, shared_buf_tc, shared_std, hi_thrd, lo_thrd;
+ u32 tc_num = hclge_get_tc_num(hdev);
u32 shared_buf, aligned_mps;
u32 rx_priv;
int i;
- tc_num = hclge_get_tc_num(hdev);
- pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
aligned_mps = roundup(hdev->mps, HCLGE_BUF_SIZE_UNIT);
if (hnae3_dev_dcb_supported(hdev))
@@ -1519,9 +1509,7 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
shared_buf_min = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF
+ hdev->dv_buf_size;
- shared_buf_tc = pfc_enable_num * aligned_mps +
- (tc_num - pfc_enable_num) * aligned_mps / 2 +
- aligned_mps;
+ shared_buf_tc = tc_num * aligned_mps + aligned_mps;
shared_std = roundup(max_t(u32, shared_buf_min, shared_buf_tc),
HCLGE_BUF_SIZE_UNIT);
@@ -1538,19 +1526,26 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
} else {
buf_alloc->s_buf.self.high = aligned_mps +
HCLGE_NON_DCB_ADDITIONAL_BUF;
- buf_alloc->s_buf.self.low =
- roundup(aligned_mps / 2, HCLGE_BUF_SIZE_UNIT);
+ buf_alloc->s_buf.self.low = aligned_mps;
+ }
+
+ if (hnae3_dev_dcb_supported(hdev)) {
+ if (tc_num)
+ hi_thrd = (shared_buf - hdev->dv_buf_size) / tc_num;
+ else
+ hi_thrd = shared_buf - hdev->dv_buf_size;
+
+ hi_thrd = max_t(u32, hi_thrd, 2 * aligned_mps);
+ hi_thrd = rounddown(hi_thrd, HCLGE_BUF_SIZE_UNIT);
+ lo_thrd = hi_thrd - aligned_mps / 2;
+ } else {
+ hi_thrd = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF;
+ lo_thrd = aligned_mps;
}
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- if ((hdev->hw_tc_map & BIT(i)) &&
- (hdev->tm_info.hw_pfc_map & BIT(i))) {
- buf_alloc->s_buf.tc_thrd[i].low = aligned_mps;
- buf_alloc->s_buf.tc_thrd[i].high = 2 * aligned_mps;
- } else {
- buf_alloc->s_buf.tc_thrd[i].low = 0;
- buf_alloc->s_buf.tc_thrd[i].high = aligned_mps;
- }
+ buf_alloc->s_buf.tc_thrd[i].low = lo_thrd;
+ buf_alloc->s_buf.tc_thrd[i].high = hi_thrd;
}
return true;
@@ -2163,7 +2158,8 @@ static int hclge_mac_init(struct hclge_dev *hdev)
static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
{
- if (!test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
+ if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
+ !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
schedule_work(&hdev->mbx_service_task);
}
@@ -2677,7 +2673,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
return ret;
}
- if (!reset)
+ if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
continue;
/* Inform VF to process the reset.
@@ -2714,9 +2710,18 @@ int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
static void hclge_do_reset(struct hclge_dev *hdev)
{
+ struct hnae3_handle *handle = &hdev->vport[0].nic;
struct pci_dev *pdev = hdev->pdev;
u32 val;
+ if (hclge_get_hw_reset_stat(handle)) {
+ dev_info(&pdev->dev, "Hardware reset not finish\n");
+ dev_info(&pdev->dev, "func_rst_reg:0x%x, global_rst_reg:0x%x\n",
+ hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING),
+ hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG));
+ return;
+ }
+
switch (hdev->reset_type) {
case HNAE3_GLOBAL_RESET:
val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
@@ -2795,6 +2800,10 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev,
clear_bit(HNAE3_FLR_RESET, addr);
}
+ if (hdev->reset_type != HNAE3_NONE_RESET &&
+ rst_level < hdev->reset_type)
+ return HNAE3_NONE_RESET;
+
return rst_level;
}
@@ -3022,6 +3031,7 @@ static void hclge_reset(struct hclge_dev *hdev)
hdev->last_reset_time = jiffies;
hdev->reset_fail_cnt = 0;
ae_dev->reset_type = HNAE3_NONE_RESET;
+ del_timer(&hdev->reset_timer);
return;
@@ -6567,30 +6577,6 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
return ret;
}
-int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
- u16 vlan_id, bool is_kill)
-{
- struct hclge_vport *vport = hclge_get_vport(handle);
- struct hclge_dev *hdev = vport->back;
-
- return hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id, vlan_id,
- 0, is_kill);
-}
-
-static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
- u16 vlan, u8 qos, __be16 proto)
-{
- struct hclge_vport *vport = hclge_get_vport(handle);
- struct hclge_dev *hdev = vport->back;
-
- if ((vfid >= hdev->num_alloc_vfs) || (vlan > 4095) || (qos > 7))
- return -EINVAL;
- if (proto != htons(ETH_P_8021Q))
- return -EPROTONOSUPPORT;
-
- return hclge_set_vlan_filter_hw(hdev, proto, vfid, vlan, qos, false);
-}
-
static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
{
struct hclge_tx_vtag_cfg *vcfg = &vport->txvlan_cfg;
@@ -6664,6 +6650,52 @@ static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport)
return status;
}
+static int hclge_vlan_offload_cfg(struct hclge_vport *vport,
+ u16 port_base_vlan_state,
+ u16 vlan_tag)
+{
+ int ret;
+
+ if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ vport->txvlan_cfg.accept_tag1 = true;
+ vport->txvlan_cfg.insert_tag1_en = false;
+ vport->txvlan_cfg.default_tag1 = 0;
+ } else {
+ vport->txvlan_cfg.accept_tag1 = false;
+ vport->txvlan_cfg.insert_tag1_en = true;
+ vport->txvlan_cfg.default_tag1 = vlan_tag;
+ }
+
+ vport->txvlan_cfg.accept_untag1 = true;
+
+ /* accept_tag2 and accept_untag2 are not supported on
+ * pdev revision(0x20), new revision support them,
+ * this two fields can not be configured by user.
+ */
+ vport->txvlan_cfg.accept_tag2 = true;
+ vport->txvlan_cfg.accept_untag2 = true;
+ vport->txvlan_cfg.insert_tag2_en = false;
+ vport->txvlan_cfg.default_tag2 = 0;
+
+ if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ vport->rxvlan_cfg.strip_tag1_en = false;
+ vport->rxvlan_cfg.strip_tag2_en =
+ vport->rxvlan_cfg.rx_vlan_offload_en;
+ } else {
+ vport->rxvlan_cfg.strip_tag1_en =
+ vport->rxvlan_cfg.rx_vlan_offload_en;
+ vport->rxvlan_cfg.strip_tag2_en = true;
+ }
+ vport->rxvlan_cfg.vlan1_vlan_prionly = false;
+ vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+
+ ret = hclge_set_vlan_tx_offload_cfg(vport);
+ if (ret)
+ return ret;
+
+ return hclge_set_vlan_rx_offload_cfg(vport);
+}
+
static int hclge_set_vlan_protocol_type(struct hclge_dev *hdev)
{
struct hclge_rx_vlan_type_cfg_cmd *rx_req;
@@ -6754,34 +6786,14 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
return ret;
for (i = 0; i < hdev->num_alloc_vport; i++) {
- vport = &hdev->vport[i];
- vport->txvlan_cfg.accept_tag1 = true;
- vport->txvlan_cfg.accept_untag1 = true;
-
- /* accept_tag2 and accept_untag2 are not supported on
- * pdev revision(0x20), new revision support them. The
- * value of this two fields will not return error when driver
- * send command to fireware in revision(0x20).
- * This two fields can not configured by user.
- */
- vport->txvlan_cfg.accept_tag2 = true;
- vport->txvlan_cfg.accept_untag2 = true;
+ u16 vlan_tag;
- vport->txvlan_cfg.insert_tag1_en = false;
- vport->txvlan_cfg.insert_tag2_en = false;
- vport->txvlan_cfg.default_tag1 = 0;
- vport->txvlan_cfg.default_tag2 = 0;
-
- ret = hclge_set_vlan_tx_offload_cfg(vport);
- if (ret)
- return ret;
-
- vport->rxvlan_cfg.strip_tag1_en = false;
- vport->rxvlan_cfg.strip_tag2_en = true;
- vport->rxvlan_cfg.vlan1_vlan_prionly = false;
- vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+ vport = &hdev->vport[i];
+ vlan_tag = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
- ret = hclge_set_vlan_rx_offload_cfg(vport);
+ ret = hclge_vlan_offload_cfg(vport,
+ vport->port_base_vlan_cfg.state,
+ vlan_tag);
if (ret)
return ret;
}
@@ -6789,7 +6801,8 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
return hclge_set_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
}
-void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id)
+static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+ bool writen_to_tbl)
{
struct hclge_vport_vlan_cfg *vlan;
@@ -6801,14 +6814,38 @@ void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id)
if (!vlan)
return;
- vlan->hd_tbl_status = true;
+ vlan->hd_tbl_status = writen_to_tbl;
vlan->vlan_id = vlan_id;
list_add_tail(&vlan->node, &vport->vlan_list);
}
-void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
- bool is_write_tbl)
+static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport)
+{
+ struct hclge_vport_vlan_cfg *vlan, *tmp;
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+ if (!vlan->hd_tbl_status) {
+ ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+ vport->vport_id,
+ vlan->vlan_id, 0, false);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "restore vport vlan list failed, ret=%d\n",
+ ret);
+ return ret;
+ }
+ }
+ vlan->hd_tbl_status = true;
+ }
+
+ return 0;
+}
+
+static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
+ bool is_write_tbl)
{
struct hclge_vport_vlan_cfg *vlan, *tmp;
struct hclge_dev *hdev = vport->back;
@@ -6871,14 +6908,201 @@ int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
{
struct hclge_vport *vport = hclge_get_vport(handle);
- vport->rxvlan_cfg.strip_tag1_en = false;
- vport->rxvlan_cfg.strip_tag2_en = enable;
+ if (vport->port_base_vlan_cfg.state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ vport->rxvlan_cfg.strip_tag1_en = false;
+ vport->rxvlan_cfg.strip_tag2_en = enable;
+ } else {
+ vport->rxvlan_cfg.strip_tag1_en = enable;
+ vport->rxvlan_cfg.strip_tag2_en = true;
+ }
vport->rxvlan_cfg.vlan1_vlan_prionly = false;
vport->rxvlan_cfg.vlan2_vlan_prionly = false;
+ vport->rxvlan_cfg.rx_vlan_offload_en = enable;
return hclge_set_vlan_rx_offload_cfg(vport);
}
+static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
+ u16 port_base_vlan_state,
+ struct hclge_vlan_info *new_info,
+ struct hclge_vlan_info *old_info)
+{
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_ENABLE) {
+ hclge_rm_vport_all_vlan_table(vport, false);
+ return hclge_set_vlan_filter_hw(hdev,
+ htons(new_info->vlan_proto),
+ vport->vport_id,
+ new_info->vlan_tag,
+ new_info->qos, false);
+ }
+
+ ret = hclge_set_vlan_filter_hw(hdev, htons(old_info->vlan_proto),
+ vport->vport_id, old_info->vlan_tag,
+ old_info->qos, true);
+ if (ret)
+ return ret;
+
+ return hclge_add_vport_all_vlan_table(vport);
+}
+
+int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+ struct hclge_vlan_info *vlan_info)
+{
+ struct hnae3_handle *nic = &vport->nic;
+ struct hclge_vlan_info *old_vlan_info;
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ old_vlan_info = &vport->port_base_vlan_cfg.vlan_info;
+
+ ret = hclge_vlan_offload_cfg(vport, state, vlan_info->vlan_tag);
+ if (ret)
+ return ret;
+
+ if (state == HNAE3_PORT_BASE_VLAN_MODIFY) {
+ /* add new VLAN tag */
+ ret = hclge_set_vlan_filter_hw(hdev, vlan_info->vlan_proto,
+ vport->vport_id,
+ vlan_info->vlan_tag,
+ vlan_info->qos, false);
+ if (ret)
+ return ret;
+
+ /* remove old VLAN tag */
+ ret = hclge_set_vlan_filter_hw(hdev, old_vlan_info->vlan_proto,
+ vport->vport_id,
+ old_vlan_info->vlan_tag,
+ old_vlan_info->qos, true);
+ if (ret)
+ return ret;
+
+ goto update;
+ }
+
+ ret = hclge_update_vlan_filter_entries(vport, state, vlan_info,
+ old_vlan_info);
+ if (ret)
+ return ret;
+
+ /* update state only when disable/enable port based VLAN */
+ vport->port_base_vlan_cfg.state = state;
+ if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
+ else
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+
+update:
+ vport->port_base_vlan_cfg.vlan_info.vlan_tag = vlan_info->vlan_tag;
+ vport->port_base_vlan_cfg.vlan_info.qos = vlan_info->qos;
+ vport->port_base_vlan_cfg.vlan_info.vlan_proto = vlan_info->vlan_proto;
+
+ return 0;
+}
+
+static u16 hclge_get_port_base_vlan_state(struct hclge_vport *vport,
+ enum hnae3_port_base_vlan_state state,
+ u16 vlan)
+{
+ if (state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ if (!vlan)
+ return HNAE3_PORT_BASE_VLAN_NOCHANGE;
+ else
+ return HNAE3_PORT_BASE_VLAN_ENABLE;
+ } else {
+ if (!vlan)
+ return HNAE3_PORT_BASE_VLAN_DISABLE;
+ else if (vport->port_base_vlan_cfg.vlan_info.vlan_tag == vlan)
+ return HNAE3_PORT_BASE_VLAN_NOCHANGE;
+ else
+ return HNAE3_PORT_BASE_VLAN_MODIFY;
+ }
+}
+
+static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
+ u16 vlan, u8 qos, __be16 proto)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_vlan_info vlan_info;
+ u16 state;
+ int ret;
+
+ if (hdev->pdev->revision == 0x20)
+ return -EOPNOTSUPP;
+
+ /* qos is a 3 bits value, so can not be bigger than 7 */
+ if (vfid >= hdev->num_alloc_vfs || vlan > VLAN_N_VID - 1 || qos > 7)
+ return -EINVAL;
+ if (proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ vport = &hdev->vport[vfid];
+ state = hclge_get_port_base_vlan_state(vport,
+ vport->port_base_vlan_cfg.state,
+ vlan);
+ if (state == HNAE3_PORT_BASE_VLAN_NOCHANGE)
+ return 0;
+
+ vlan_info.vlan_tag = vlan;
+ vlan_info.qos = qos;
+ vlan_info.vlan_proto = ntohs(proto);
+
+ /* update port based VLAN for PF */
+ if (!vfid) {
+ hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ ret = hclge_update_port_base_vlan_cfg(vport, state, &vlan_info);
+ hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+
+ return ret;
+ }
+
+ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) {
+ return hclge_update_port_base_vlan_cfg(vport, state,
+ &vlan_info);
+ } else {
+ ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
+ (u8)vfid, state,
+ vlan, qos,
+ ntohs(proto));
+ return ret;
+ }
+}
+
+int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
+ u16 vlan_id, bool is_kill)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ bool writen_to_tbl = false;
+ int ret = 0;
+
+ /* when port based VLAN enabled, we use port based VLAN as the VLAN
+ * filter entry. In this case, we don't update VLAN filter table
+ * when user add new VLAN or remove exist VLAN, just update the vport
+ * VLAN list. The VLAN id in VLAN list won't be writen in VLAN filter
+ * table until port based VLAN disabled
+ */
+ if (handle->port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) {
+ ret = hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id,
+ vlan_id, 0, is_kill);
+ writen_to_tbl = true;
+ }
+
+ if (ret)
+ return ret;
+
+ if (is_kill)
+ hclge_rm_vport_vlan_table(vport, vlan_id, false);
+ else
+ hclge_add_vport_vlan_table(vport, vlan_id,
+ writen_to_tbl);
+
+ return 0;
+}
+
static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps)
{
struct hclge_config_max_frm_size_cmd *req;
@@ -7732,7 +7956,7 @@ static void hclge_reset_vport_state(struct hclge_dev *hdev)
int i;
for (i = 0; i < hdev->num_alloc_vport; i++) {
- hclge_vport_start(vport);
+ hclge_vport_stop(vport);
vport++;
}
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index b57ac4beb313..f04a52f143ae 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -807,10 +807,11 @@ struct hclge_tx_vtag_cfg {
/* VPort level vlan tag configuration for RX direction */
struct hclge_rx_vtag_cfg {
- bool strip_tag1_en; /* Whether strip inner vlan tag */
- bool strip_tag2_en; /* Whether strip outer vlan tag */
- bool vlan1_vlan_prionly;/* Inner VLAN Tag up to descriptor Enable */
- bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
+ u8 rx_vlan_offload_en; /* Whether enable rx vlan offload */
+ u8 strip_tag1_en; /* Whether strip inner vlan tag */
+ u8 strip_tag2_en; /* Whether strip outer vlan tag */
+ u8 vlan1_vlan_prionly; /* Inner VLAN Tag up to descriptor Enable */
+ u8 vlan2_vlan_prionly; /* Outer VLAN Tag up to descriptor Enable */
};
struct hclge_rss_tuple_cfg {
@@ -829,6 +830,17 @@ enum HCLGE_VPORT_STATE {
HCLGE_VPORT_STATE_MAX
};
+struct hclge_vlan_info {
+ u16 vlan_proto; /* so far support 802.1Q only */
+ u16 qos;
+ u16 vlan_tag;
+};
+
+struct hclge_port_base_vlan_config {
+ u16 state;
+ struct hclge_vlan_info vlan_info;
+};
+
struct hclge_vport {
u16 alloc_tqps; /* Allocated Tx/Rx queues */
@@ -845,6 +857,7 @@ struct hclge_vport {
u16 bw_limit; /* VSI BW Limit (0 = disabled) */
u8 dwrr;
+ struct hclge_port_base_vlan_config port_base_vlan_cfg;
struct hclge_tx_vtag_cfg txvlan_cfg;
struct hclge_rx_vtag_cfg rxvlan_cfg;
@@ -924,9 +937,11 @@ void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
enum HCLGE_MAC_ADDR_TYPE mac_type);
void hclge_uninit_vport_mac_table(struct hclge_dev *hdev);
-void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id);
-void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
- bool is_write_tbl);
void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list);
void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev);
+int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
+ struct hclge_vlan_info *vlan_info);
+int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
+ u16 state, u16 vlan_tag, u16 qos,
+ u16 vlan_proto);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 9aa9c643afcd..24386bd894f7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -289,9 +289,25 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
return 0;
}
+int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
+ u16 state, u16 vlan_tag, u16 qos,
+ u16 vlan_proto)
+{
+#define MSG_DATA_SIZE 8
+
+ u8 msg_data[MSG_DATA_SIZE];
+
+ memcpy(&msg_data[0], &state, sizeof(u16));
+ memcpy(&msg_data[2], &vlan_proto, sizeof(u16));
+ memcpy(&msg_data[4], &qos, sizeof(u16));
+ memcpy(&msg_data[6], &vlan_tag, sizeof(u16));
+
+ return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
+ HLCGE_MBX_PUSH_VLAN_INFO, vfid);
+}
+
static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
- struct hclge_mbx_vf_to_pf_cmd *mbx_req,
- bool gen_resp)
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req)
{
int status = 0;
@@ -305,19 +321,27 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
memcpy(&proto, &mbx_req->msg[5], sizeof(proto));
status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
vlan, is_kill);
- if (!status)
- is_kill ? hclge_rm_vport_vlan_table(vport, vlan, false)
- : hclge_add_vport_vlan_table(vport, vlan);
} else if (mbx_req->msg[1] == HCLGE_MBX_VLAN_RX_OFF_CFG) {
struct hnae3_handle *handle = &vport->nic;
bool en = mbx_req->msg[2] ? true : false;
status = hclge_en_hw_strip_rxvtag(handle, en);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_PORT_BASE_VLAN_CFG) {
+ struct hclge_vlan_info *vlan_info;
+ u16 *state;
+
+ state = (u16 *)&mbx_req->msg[2];
+ vlan_info = (struct hclge_vlan_info *)&mbx_req->msg[4];
+ status = hclge_update_port_base_vlan_cfg(vport, *state,
+ vlan_info);
+ } else if (mbx_req->msg[1] == HCLGE_MBX_GET_PORT_BASE_VLAN_STATE) {
+ u8 state;
+
+ state = vport->port_base_vlan_cfg.state;
+ status = hclge_gen_resp_to_vf(vport, mbx_req, 0, &state,
+ sizeof(u8));
}
- if (gen_resp)
- status = hclge_gen_resp_to_vf(vport, mbx_req, status, NULL, 0);
-
return status;
}
@@ -587,7 +611,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
ret);
break;
case HCLGE_MBX_SET_VLAN:
- ret = hclge_set_vf_vlan_cfg(vport, req, false);
+ ret = hclge_set_vf_vlan_cfg(vport, req);
if (ret)
dev_err(&hdev->pdev->dev,
"PF failed(%d) to config VF's VLAN\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 48eda2c6fdae..12be4e293fcf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -121,12 +121,18 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
int hclge_mac_mdio_config(struct hclge_dev *hdev)
{
+#define PHY_INEXISTENT 255
+
struct hclge_mac *mac = &hdev->hw.mac;
struct phy_device *phydev;
struct mii_bus *mdio_bus;
int ret;
- if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
+ if (hdev->hw.mac.phy_addr == PHY_INEXISTENT) {
+ dev_info(&hdev->pdev->dev,
+ "no phy device is connected to mdio bus\n");
+ return 0;
+ } else if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR) {
dev_err(&hdev->pdev->dev, "phy_addr(%d) is too large.\n",
hdev->hw.mac.phy_addr);
return -EINVAL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 930b175a7090..1b428d4a1132 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -334,7 +334,7 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
int ret;
spin_lock_bh(&hdev->hw.cmq.csq.lock);
- spin_lock_bh(&hdev->hw.cmq.crq.lock);
+ spin_lock(&hdev->hw.cmq.crq.lock);
/* initialize the pointers of async rx queue of mailbox */
hdev->arq.hdev = hdev;
@@ -348,7 +348,7 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
hclgevf_cmd_init_regs(&hdev->hw);
- spin_unlock_bh(&hdev->hw.cmq.crq.lock);
+ spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
@@ -357,8 +357,8 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
* reset may happen when lower level reset is being processed.
*/
if (hclgevf_is_reset_pending(hdev)) {
- set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
- return -EBUSY;
+ ret = -EBUSY;
+ goto err_cmd_init;
}
/* get firmware version */
@@ -366,13 +366,18 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
if (ret) {
dev_err(&hdev->pdev->dev,
"failed(%d) to query firmware version\n", ret);
- return ret;
+ goto err_cmd_init;
}
hdev->fw_version = version;
dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
return 0;
+
+err_cmd_init:
+ set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+
+ return ret;
}
static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 509ff93f8cf5..2e277c91a106 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -245,6 +245,27 @@ static int hclgevf_get_tc_info(struct hclgevf_dev *hdev)
return 0;
}
+static int hclgevf_get_port_base_vlan_filter_state(struct hclgevf_dev *hdev)
+{
+ struct hnae3_handle *nic = &hdev->nic;
+ u8 resp_msg;
+ int ret;
+
+ ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+ HCLGE_MBX_GET_PORT_BASE_VLAN_STATE,
+ NULL, 0, true, &resp_msg, sizeof(u8));
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "VF request to get port based vlan state failed %d",
+ ret);
+ return ret;
+ }
+
+ nic->port_base_vlan_state = resp_msg;
+
+ return 0;
+}
+
static int hclgevf_get_queue_info(struct hclgevf_dev *hdev)
{
#define HCLGEVF_TQPS_RSS_INFO_LEN 6
@@ -1474,6 +1495,8 @@ err_reset:
*/
hclgevf_cmd_init(hdev);
dev_err(&hdev->pdev->dev, "failed to reset VF\n");
+ if (hclgevf_is_reset_pending(hdev))
+ hclgevf_reset_task_schedule(hdev);
return ret;
}
@@ -1583,8 +1606,7 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
{
- if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
- !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+ if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) {
set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
schedule_work(&hdev->rst_service_task);
}
@@ -1833,6 +1855,11 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
{
int ret;
+ /* get current port based vlan state from PF */
+ ret = hclgevf_get_port_base_vlan_filter_state(hdev);
+ if (ret)
+ return ret;
+
/* get queue configuration from PF */
ret = hclgevf_get_queue_info(hdev);
if (ret)
@@ -2030,9 +2057,15 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive)
static int hclgevf_client_start(struct hnae3_handle *handle)
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ int ret;
+
+ ret = hclgevf_set_alive(handle, true);
+ if (ret)
+ return ret;
mod_timer(&hdev->keep_alive_timer, jiffies + 2 * HZ);
- return hclgevf_set_alive(handle, true);
+
+ return 0;
}
static void hclgevf_client_stop(struct hnae3_handle *handle)
@@ -2074,6 +2107,10 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
{
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+ if (hdev->keep_alive_timer.function)
+ del_timer_sync(&hdev->keep_alive_timer);
+ if (hdev->keep_alive_task.func)
+ cancel_work_sync(&hdev->keep_alive_task);
if (hdev->service_timer.function)
del_timer_sync(&hdev->service_timer);
if (hdev->service_task.func)
@@ -2779,6 +2816,31 @@ static void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version,
}
}
+void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
+ u8 *port_base_vlan_info, u8 data_size)
+{
+ struct hnae3_handle *nic = &hdev->nic;
+
+ rtnl_lock();
+ hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ rtnl_unlock();
+
+ /* send msg to PF and wait update port based vlan info */
+ hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+ HCLGE_MBX_PORT_BASE_VLAN_CFG,
+ port_base_vlan_info, data_size,
+ false, NULL, 0);
+
+ if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
+ else
+ nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
+
+ rtnl_lock();
+ hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+ rtnl_unlock();
+}
+
static const struct hnae3_ae_ops hclgevf_ops = {
.init_ae_dev = hclgevf_init_ae_dev,
.uninit_ae_dev = hclgevf_uninit_ae_dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index c128863ee7d0..49e5bec53d45 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -290,4 +290,6 @@ void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
u8 duplex);
void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
+ u8 *port_base_vlan_info, u8 data_size);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 3bcf49bde11a..bf570840b1f4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -198,6 +198,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
case HCLGE_MBX_LINK_STAT_CHANGE:
case HCLGE_MBX_ASSERTING_RESET:
case HCLGE_MBX_LINK_STAT_MODE:
+ case HLCGE_MBX_PUSH_VLAN_INFO:
/* set this mbx event as pending. This is required as we
* might loose interrupt event when mbx task is busy
* handling. This shall be cleared when mbx task just
@@ -243,8 +244,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
{
enum hnae3_reset_type reset_type;
- u16 link_status;
- u16 *msg_q;
+ u16 link_status, state;
+ u16 *msg_q, *vlan_info;
u8 duplex;
u32 speed;
u32 tail;
@@ -299,6 +300,12 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
hclgevf_reset_task_schedule(hdev);
break;
+ case HLCGE_MBX_PUSH_VLAN_INFO:
+ state = le16_to_cpu(msg_q[1]);
+ vlan_info = &msg_q[1];
+ hclgevf_update_port_base_vlan_info(hdev, state,
+ (u8 *)vlan_info, 8);
+ break;
default:
dev_err(&hdev->pdev->dev,
"fetched unsupported(%d) message from arq\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index be48c6440251..0a2ffe794a54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1347,7 +1347,7 @@ static void set_wqname(struct mlx5_core_dev *dev)
struct mlx5_cmd *cmd = &dev->cmd;
snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
- dev_name(&dev->pdev->dev));
+ dev->priv.name);
}
static void clean_debug_files(struct mlx5_core_dev *dev)
@@ -1902,9 +1902,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
memset(cmd, 0, sizeof(*cmd));
cmd_if_rev = cmdif_rev(dev);
if (cmd_if_rev != CMD_IF_REV) {
- dev_err(&dev->pdev->dev,
- "Driver cmdif rev(%d) differs from firmware's(%d)\n",
- CMD_IF_REV, cmd_if_rev);
+ mlx5_core_err(dev,
+ "Driver cmdif rev(%d) differs from firmware's(%d)\n",
+ CMD_IF_REV, cmd_if_rev);
return -EINVAL;
}
@@ -1921,14 +1921,14 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
cmd->log_sz = cmd_l >> 4 & 0xf;
cmd->log_stride = cmd_l & 0xf;
if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
- dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
- 1 << cmd->log_sz);
+ mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n",
+ 1 << cmd->log_sz);
err = -EINVAL;
goto err_free_page;
}
if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
- dev_err(&dev->pdev->dev, "command queue size overflow\n");
+ mlx5_core_err(dev, "command queue size overflow\n");
err = -EINVAL;
goto err_free_page;
}
@@ -1939,8 +1939,8 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
if (cmd->cmdif_rev > CMD_IF_REV) {
- dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
- CMD_IF_REV, cmd->cmdif_rev);
+ mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n",
+ CMD_IF_REV, cmd->cmdif_rev);
err = -EOPNOTSUPP;
goto err_free_page;
}
@@ -1956,7 +1956,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
cmd_h = (u32)((u64)(cmd->dma) >> 32);
cmd_l = (u32)(cmd->dma);
if (cmd_l & 0xfff) {
- dev_err(&dev->pdev->dev, "invalid command queue address\n");
+ mlx5_core_err(dev, "invalid command queue address\n");
err = -ENOMEM;
goto err_free_page;
}
@@ -1976,7 +1976,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
set_wqname(dev);
cmd->wq = create_singlethread_workqueue(cmd->wq_name);
if (!cmd->wq) {
- dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
+ mlx5_core_err(dev, "failed to create command workqueue\n");
err = -ENOMEM;
goto err_cache;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
index 83f90e9aff45..7b5901d42994 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
@@ -47,7 +47,7 @@ TRACE_EVENT(mlx5_fw,
TP_ARGS(tracer, trace_timestamp, lost, event_id, msg),
TP_STRUCT__entry(
- __string(dev_name, dev_name(&tracer->dev->pdev->dev))
+ __string(dev_name, tracer->dev->priv.name)
__field(u64, trace_timestamp)
__field(bool, lost)
__field(u8, event_id)
@@ -55,7 +55,7 @@ TRACE_EVENT(mlx5_fw,
),
TP_fast_assign(
- __assign_str(dev_name, dev_name(&tracer->dev->pdev->dev));
+ __assign_str(dev_name, tracer->dev->priv.name);
__entry->trace_timestamp = trace_timestamp;
__entry->lost = lost;
__entry->event_id = event_id;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3e1ea8b42c77..bd855ab8dfe2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -241,7 +241,6 @@ struct mlx5e_params {
struct net_dim_cq_moder rx_cq_moderation;
struct net_dim_cq_moder tx_cq_moderation;
bool lro_en;
- u32 lro_wqe_sz;
u8 tx_min_inline_mode;
bool vlan_strip_disable;
bool scatter_fcs_en;
@@ -975,7 +974,7 @@ void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
*/
wmb();
- mlx5_write64((__be32 *)ctrl, uar_map, NULL);
+ mlx5_write64((__be32 *)ctrl, uar_map);
}
static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
@@ -1087,6 +1086,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof
int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index fa2a3c444cdc..b9d5830e8344 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -70,7 +70,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
if (ret)
return ret;
- if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway)
+ if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET)
return -ENETUNREACH;
#else
return -EOPNOTSUPP;
@@ -96,7 +96,7 @@ static const char *mlx5e_netdev_kind(struct net_device *dev)
if (dev->rtnl_link_ops)
return dev->rtnl_link_ops->kind;
else
- return "";
+ return "unknown";
}
static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
@@ -636,8 +636,10 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
headers_c, headers_v);
} else {
netdev_warn(priv->netdev,
- "decapsulation offload is not supported for %s net device (%d)\n",
- mlx5e_netdev_kind(filter_dev), tunnel_type);
+ "decapsulation offload is not supported for %s (kind: \"%s\")\n",
+ netdev_name(filter_dev),
+ mlx5e_netdev_kind(filter_dev));
+
return -EOPNOTSUPP;
}
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e08a1eb04e22..ba705392b46b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -204,7 +204,6 @@ static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
- params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
params->log_rq_mtu_frames = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
@@ -2637,7 +2636,7 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
- (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+ (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
}
@@ -2812,6 +2811,21 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
return 0;
}
+void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ /* MTU range: 68 - hw-specific max */
+ netdev->min_mtu = ETH_MIN_MTU;
+
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
+ ETH_MAX_MTU);
+}
+
static void mlx5e_netdev_set_tcs(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4913,7 +4927,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
mlx5e_init_l2_addr(priv);
@@ -4921,10 +4934,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
if (!netif_running(netdev))
mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
- /* MTU range: 68 - hw-specific max */
- netdev->min_mtu = ETH_MIN_MTU;
- mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
- netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_netdev_mtu_boundaries(priv);
mlx5e_set_dev_port_mtu(priv);
mlx5_lag_add(mdev, netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index a66b6ed80b30..6bfdefa8b9f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -795,7 +795,8 @@ static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
- if (!mlx5e_tc_tun_device_to_offload(priv, netdev))
+ if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+ !is_vlan_dev(netdev))
return NOTIFY_OK;
switch (event) {
@@ -1623,13 +1624,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
{
- struct net_device *netdev = priv->netdev;
- struct mlx5_core_dev *mdev = priv->mdev;
- u16 max_mtu;
-
- netdev->min_mtu = ETH_MIN_MTU;
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_netdev_mtu_boundaries(priv);
}
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ffc4a36551c8..a2070817a627 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1438,6 +1438,26 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return 0;
}
+static void *get_match_headers_criteria(u32 flags,
+ struct mlx5_flow_spec *spec)
+{
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+ MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ inner_headers) :
+ MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+}
+
+static void *get_match_headers_value(u32 flags,
+ struct mlx5_flow_spec *spec)
+{
+ return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+ MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ inner_headers) :
+ MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+}
+
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f,
@@ -1503,10 +1523,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
/* In decap flow, header pointers should point to the inner
* headers, outer header were already set by parse_tunnel_attr
*/
- headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- inner_headers);
+ headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
+ spec);
+ headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
+ spec);
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
@@ -1521,11 +1541,23 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
if (match.mask->n_proto)
*match_level = MLX5_MATCH_L2;
}
-
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
+ is_vlan_dev(filter_dev)) {
+ struct flow_dissector_key_vlan filter_dev_mask;
+ struct flow_dissector_key_vlan filter_dev_key;
struct flow_match_vlan match;
- flow_rule_match_vlan(rule, &match);
+ if (is_vlan_dev(filter_dev)) {
+ match.key = &filter_dev_key;
+ match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
+ match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
+ match.key->vlan_priority = 0;
+ match.mask = &filter_dev_mask;
+ memset(match.mask, 0xff, sizeof(*match.mask));
+ match.mask->vlan_priority = 0;
+ } else {
+ flow_rule_match_vlan(rule, &match);
+ }
if (match.mask->vlan_id ||
match.mask->vlan_priority ||
match.mask->vlan_tpid) {
@@ -1875,39 +1907,73 @@ struct mlx5_fields {
u8 field;
u8 size;
u32 offset;
+ u32 match_offset;
};
-#define OFFLOAD(fw_field, size, field, off) \
- {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
+#define OFFLOAD(fw_field, size, field, off, match_field) \
+ {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
+ offsetof(struct pedit_headers, field) + (off), \
+ MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
+
+static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
+ void *matchmaskp, int size)
+{
+ bool same = false;
+
+ switch (size) {
+ case sizeof(u8):
+ same = ((*(u8 *)valp) & (*(u8 *)maskp)) ==
+ ((*(u8 *)matchvalp) & (*(u8 *)matchmaskp));
+ break;
+ case sizeof(u16):
+ same = ((*(u16 *)valp) & (*(u16 *)maskp)) ==
+ ((*(u16 *)matchvalp) & (*(u16 *)matchmaskp));
+ break;
+ case sizeof(u32):
+ same = ((*(u32 *)valp) & (*(u32 *)maskp)) ==
+ ((*(u32 *)matchvalp) & (*(u32 *)matchmaskp));
+ break;
+ }
+
+ return same;
+}
static struct mlx5_fields fields[] = {
- OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
- OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0),
- OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
- OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0),
- OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0),
- OFFLOAD(FIRST_VID, 2, vlan.h_vlan_TCI, 0),
-
- OFFLOAD(IP_TTL, 1, ip4.ttl, 0),
- OFFLOAD(SIPV4, 4, ip4.saddr, 0),
- OFFLOAD(DIPV4, 4, ip4.daddr, 0),
-
- OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
- OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0),
- OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0),
- OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0),
- OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
- OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0),
- OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0),
- OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0),
- OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
-
- OFFLOAD(TCP_SPORT, 2, tcp.source, 0),
- OFFLOAD(TCP_DPORT, 2, tcp.dest, 0),
- OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
-
- OFFLOAD(UDP_SPORT, 2, udp.source, 0),
- OFFLOAD(UDP_DPORT, 2, udp.dest, 0),
+ OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16),
+ OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0, dmac_15_0),
+ OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16),
+ OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0, smac_15_0),
+ OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0, ethertype),
+ OFFLOAD(FIRST_VID, 2, vlan.h_vlan_TCI, 0, first_vid),
+
+ OFFLOAD(IP_TTL, 1, ip4.ttl, 0, ttl_hoplimit),
+ OFFLOAD(SIPV4, 4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ OFFLOAD(DIPV4, 4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+
+ OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
+ OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
+ OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
+ OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
+ OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
+ OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
+ OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
+ OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
+ OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit),
+
+ OFFLOAD(TCP_SPORT, 2, tcp.source, 0, tcp_sport),
+ OFFLOAD(TCP_DPORT, 2, tcp.dest, 0, tcp_dport),
+ OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags),
+
+ OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport),
+ OFFLOAD(UDP_DPORT, 2, udp.dest, 0, udp_dport),
};
/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
@@ -1916,9 +1982,14 @@ static struct mlx5_fields fields[] = {
*/
static int offload_pedit_fields(struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+ void *headers_c = get_match_headers_criteria(*action_flags,
+ &parse_attr->spec);
+ void *headers_v = get_match_headers_value(*action_flags,
+ &parse_attr->spec);
int i, action_size, nactions, max_actions, first, last, next_z;
void *s_masks_p, *a_masks_p, *vals_p;
struct mlx5_fields *f;
@@ -1942,6 +2013,8 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
nactions = parse_attr->num_mod_hdr_actions;
for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ bool skip;
+
f = &fields[i];
/* avoid seeing bits set from previous iterations */
s_mask = 0;
@@ -1970,19 +2043,34 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
return -EOPNOTSUPP;
}
+ skip = false;
if (s_mask) {
+ void *match_mask = headers_c + f->match_offset;
+ void *match_val = headers_v + f->match_offset;
+
cmd = MLX5_ACTION_TYPE_SET;
mask = s_mask;
vals_p = (void *)set_vals + f->offset;
+ /* don't rewrite if we have a match on the same value */
+ if (cmp_val_mask(vals_p, s_masks_p, match_val,
+ match_mask, f->size))
+ skip = true;
/* clear to denote we consumed this field */
memset(s_masks_p, 0, f->size);
} else {
+ u32 zero = 0;
+
cmd = MLX5_ACTION_TYPE_ADD;
mask = a_mask;
vals_p = (void *)add_vals + f->offset;
+ /* add 0 is no change */
+ if (!memcmp(vals_p, &zero, f->size))
+ skip = true;
/* clear to denote we consumed this field */
memset(a_masks_p, 0, f->size);
}
+ if (skip)
+ continue;
field_bsize = f->size * BITS_PER_BYTE;
@@ -2029,6 +2117,15 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
return 0;
}
+static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
+ int namespace)
+{
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
struct pedit_headers_action *hdrs,
int namespace,
@@ -2040,11 +2137,7 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
- if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
- max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
- else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
- max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
-
+ max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
/* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
max_actions = min(max_actions, nkeys * 16);
@@ -2077,6 +2170,12 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv,
goto out_err;
}
+ if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "The pedit offload action is not supported");
+ goto out_err;
+ }
+
mask = act->mangle.mask;
val = act->mangle.val;
offset = act->mangle.offset;
@@ -2095,6 +2194,7 @@ out_err:
static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct pedit_headers_action *hdrs,
+ u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *cmd_masks;
@@ -2107,7 +2207,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
goto out_err;
}
- err = offload_pedit_fields(hdrs, parse_attr, extack);
+ err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
if (err < 0)
goto out_dealloc_parsed_actions;
@@ -2219,11 +2319,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
u8 ip_proto;
int i;
- if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
- else
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
-
+ headers_v = get_match_headers_value(actions, spec);
ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
/* for non-IP we only re-write MACs, so we're okay */
@@ -2269,7 +2365,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
actions = flow->nic_attr->action;
if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
- !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP))
+ !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
+ (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@ -2309,10 +2406,25 @@ static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
.mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
.mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
};
+ u8 match_prio_mask, match_prio_val;
+ void *headers_c, *headers_v;
int err;
- if (act->vlan.prio) {
- NL_SET_ERR_MSG_MOD(extack, "Setting VLAN prio is not supported");
+ headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
+ headers_v = get_match_headers_value(*action, &parse_attr->spec);
+
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "VLAN rewrite action must have VLAN protocol match");
+ return -EOPNOTSUPP;
+ }
+
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Changing VLAN prio is not supported");
return -EOPNOTSUPP;
}
@@ -2406,16 +2518,22 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
}
break;
default:
- return -EINVAL;
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
+ return -EOPNOTSUPP;
}
}
if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
- parse_attr, hdrs, extack);
+ parse_attr, hdrs, &action, extack);
if (err)
return err;
+ /* in case all pedit actions are skipped, remove the MOD_HDR
+ * flag.
+ */
+ if (parse_attr->num_mod_hdr_actions == 0)
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
attr->action = action;
@@ -2593,15 +2711,60 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
return 0;
}
+static int add_vlan_push_action(struct mlx5e_priv *priv,
+ struct mlx5_esw_flow_attr *attr,
+ struct net_device **out_dev,
+ u32 *action)
+{
+ struct net_device *vlan_dev = *out_dev;
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_PUSH,
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+ .vlan.prio = 0,
+ };
+ int err;
+
+ err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+ if (err)
+ return err;
+
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
+ dev_get_iflink(vlan_dev));
+ if (is_vlan_dev(*out_dev))
+ err = add_vlan_push_action(priv, attr, out_dev, action);
+
+ return err;
+}
+
+static int add_vlan_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action)
+{
+ int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev);
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_POP,
+ };
+ int err = 0;
+
+ while (nest_level--) {
+ err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
const struct ip_tunnel_info *info = NULL;
const struct flow_action_entry *act;
@@ -2673,6 +2836,20 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
uplink_upper == out_dev)
out_dev = uplink_dev;
+ if (is_vlan_dev(out_dev)) {
+ err = add_vlan_push_action(priv, attr,
+ &out_dev,
+ &action);
+ if (err)
+ return err;
+ }
+ if (is_vlan_dev(parse_attr->filter_dev)) {
+ err = add_vlan_pop_action(priv, attr,
+ &action);
+ if (err)
+ return err;
+ }
+
if (!mlx5e_eswitch_rep(out_dev))
return -EOPNOTSUPP;
@@ -2686,7 +2863,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
out_dev->ifindex;
parse_attr->tun_info[attr->out_count] = *info;
encap = false;
- attr->parse_attr = parse_attr;
attr->dests[attr->out_count].flags |=
MLX5_ESW_DEST_ENCAP;
attr->out_count++;
@@ -2765,16 +2941,27 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
break;
}
default:
- return -EINVAL;
+ NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
+ return -EOPNOTSUPP;
}
}
if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
- parse_attr, hdrs, extack);
+ parse_attr, hdrs, &action, extack);
if (err)
return err;
+ /* in case all pedit actions are skipped, remove the MOD_HDR
+ * flag. we might have set split_count either by pedit or
+ * pop/push. if there is no pop/push either, reset it too.
+ */
+ if (parse_attr->num_mod_hdr_actions == 0) {
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+ (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
+ attr->split_count = 0;
+ }
}
attr->action = action;
@@ -2943,7 +3130,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
- err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack);
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 46a747f7c162..e9837aeb7088 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -707,7 +707,7 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
__raw_writel((__force u32)cpu_to_be32(val), addr);
/* We still want ordering, just not swabbing, so add a barrier */
- mb();
+ wmb();
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index fe770cd2151c..1a3cab34b850 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1698,8 +1698,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
{
int err;
- mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
-
err = esw_offloads_steering_init(esw, total_nvports);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index 5d5864e8df3c..a81e8d2168d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -21,6 +21,7 @@ struct mlx5_event_nb {
static int any_notifier(struct notifier_block *, unsigned long, void *);
static int temp_warn(struct notifier_block *, unsigned long, void *);
static int port_module(struct notifier_block *, unsigned long, void *);
+static int pcie_core(struct notifier_block *, unsigned long, void *);
/* handler which forwards the event to events->nh, driver notifiers */
static int forward_event(struct notifier_block *, unsigned long, void *);
@@ -30,6 +31,7 @@ static struct mlx5_nb events_nbs_ref[] = {
{.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
{.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
{.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
+ {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
{.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
@@ -51,11 +53,14 @@ static struct mlx5_nb events_nbs_ref[] = {
struct mlx5_events {
struct mlx5_core_dev *dev;
+ struct workqueue_struct *wq;
struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
/* driver notifier chain */
struct atomic_notifier_head nh;
/* port module events stats */
struct mlx5_pme_stats pme_stats;
+ /*pcie_core*/
+ struct work_struct pcie_core_work;
};
static const char *eqe_type_str(u8 type)
@@ -249,6 +254,69 @@ static int port_module(struct notifier_block *nb, unsigned long type, void *data
return NOTIFY_OK;
}
+enum {
+ MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
+ MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
+ MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
+};
+
+static void mlx5_pcie_event(struct work_struct *work)
+{
+ u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
+ struct mlx5_events *events;
+ struct mlx5_core_dev *dev;
+ u8 power_status;
+ u16 pci_power;
+
+ events = container_of(work, struct mlx5_events, pcie_core_work);
+ dev = events->dev;
+
+ if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
+ return;
+
+ mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MPEIN, 0, 0);
+ power_status = MLX5_GET(mpein_reg, out, pwr_status);
+ pci_power = MLX5_GET(mpein_reg, out, pci_power);
+
+ switch (power_status) {
+ case MLX5_PCI_POWER_COULD_NOT_BE_READ:
+ mlx5_core_info_rl(dev,
+ "PCIe slot power capability was not advertised.\n");
+ break;
+ case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
+ mlx5_core_warn_rl(dev,
+ "Detected insufficient power on the PCIe slot (%uW).\n",
+ pci_power);
+ break;
+ case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
+ mlx5_core_info_rl(dev,
+ "PCIe slot advertised sufficient power (%uW).\n",
+ pci_power);
+ break;
+ }
+}
+
+static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
+{
+ struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb,
+ struct mlx5_event_nb,
+ nb);
+ struct mlx5_events *events = event_nb->ctx;
+ struct mlx5_eqe *eqe = data;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
+ queue_work(events->wq, &events->pcie_core_work);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
{
*stats = dev->priv.events->pme_stats;
@@ -277,11 +345,17 @@ int mlx5_events_init(struct mlx5_core_dev *dev)
ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
events->dev = dev;
dev->priv.events = events;
+ events->wq = create_singlethread_workqueue("mlx5_events");
+ if (!events->wq)
+ return -ENOMEM;
+ INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
+
return 0;
}
void mlx5_events_cleanup(struct mlx5_core_dev *dev)
{
+ destroy_workqueue(dev->priv.events->wq);
kvfree(dev->priv.events);
}
@@ -304,6 +378,7 @@ void mlx5_events_stop(struct mlx5_core_dev *dev)
for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
+ flush_workqueue(events->wq);
}
int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 873541ef4c1b..ca2296a2f9ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -135,7 +135,7 @@ static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe)
*conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
/* Make sure that doorbell record is visible before ringing */
wmb();
- mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+ mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET);
}
static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 7e2e871dbf83..52c9dee91ea4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -37,6 +37,7 @@
#include <linux/mlx5/eq.h>
+#include "mlx5_core.h"
#include "lib/eq.h"
#include "fpga/cmd.h"
@@ -62,26 +63,26 @@ struct mlx5_fpga_device {
};
#define mlx5_fpga_dbg(__adev, format, ...) \
- dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, ##__VA_ARGS__)
+ mlx5_core_dbg((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
#define mlx5_fpga_err(__adev, format, ...) \
- dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, ##__VA_ARGS__)
+ mlx5_core_err((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
#define mlx5_fpga_warn(__adev, format, ...) \
- dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, ##__VA_ARGS__)
+ mlx5_core_warn((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \
- dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \
- format, __func__, __LINE__, ##__VA_ARGS__)
+ mlx5_core_err_rl((__adev)->mdev, "FPGA: %s:%d: " \
+ format, __func__, __LINE__, ##__VA_ARGS__)
#define mlx5_fpga_notice(__adev, format, ...) \
- dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+ mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
#define mlx5_fpga_info(__adev, format, ...) \
- dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+ mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__)
int mlx5_fpga_init(struct mlx5_core_dev *mdev);
void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 0be3eb86dd84..78e073243f40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -819,7 +819,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
struct mlx5_flow_root_namespace *root = find_root(&prio->node);
struct mlx5_ft_underlay_qp *uqp;
int min_level = INT_MAX;
- int err;
+ int err = 0;
u32 qpn;
if (root->root_ft)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index cb9fa3430c53..3b98fcdd7d0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -152,11 +152,11 @@ static void health_recover(struct work_struct *work)
nic_state = mlx5_get_nic_state(dev);
if (nic_state == MLX5_NIC_IFC_INVALID) {
- dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
+ mlx5_core_err(dev, "health recovery flow aborted since the nic state is invalid\n");
return;
}
- dev_err(&dev->pdev->dev, "starting health recovery flow\n");
+ mlx5_core_err(dev, "starting health recovery flow\n");
mlx5_recover_device(dev);
}
@@ -180,8 +180,8 @@ static void health_care(struct work_struct *work)
if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
schedule_delayed_work(&health->recover_work, recover_delay);
else
- dev_err(&dev->pdev->dev,
- "new health works are not permitted at this stage\n");
+ mlx5_core_err(dev,
+ "new health works are not permitted at this stage\n");
spin_unlock_irqrestore(&health->wq_lock, flags);
}
@@ -228,18 +228,22 @@ static void print_health_info(struct mlx5_core_dev *dev)
return;
for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
- dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
+ mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
+ ioread32be(h->assert_var + i));
- dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
- dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+ mlx5_core_err(dev, "assert_exit_ptr 0x%08x\n",
+ ioread32be(&h->assert_exit_ptr));
+ mlx5_core_err(dev, "assert_callra 0x%08x\n",
+ ioread32be(&h->assert_callra));
sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
- dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str);
- dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
- dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index));
- dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
- dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+ mlx5_core_err(dev, "fw_ver %s\n", fw_str);
+ mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+ mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
+ mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
+ hsynd_str(ioread8(&h->synd)));
+ mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
fw = ioread32be(&h->fw_ver);
- dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw);
+ mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
}
static unsigned long get_next_poll_jiffies(void)
@@ -262,8 +266,7 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
queue_work(health->wq, &health->work);
else
- dev_err(&dev->pdev->dev,
- "new health works are not permitted at this stage\n");
+ mlx5_core_err(dev, "new health works are not permitted at this stage\n");
spin_unlock_irqrestore(&health->wq_lock, flags);
}
@@ -284,7 +287,7 @@ static void poll_health(struct timer_list *t)
health->prev = count;
if (health->miss_counter == MAX_MISSES) {
- dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
+ mlx5_core_err(dev, "device's health compromised - reached miss count\n");
print_health_info(dev);
}
@@ -352,6 +355,13 @@ void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
cancel_delayed_work_sync(&dev->priv.health.recover_work);
}
+void mlx5_health_flush(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ flush_workqueue(health->wq);
+}
+
void mlx5_health_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
@@ -370,7 +380,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
return -ENOMEM;
strcpy(name, "mlx5_health");
- strcat(name, dev_name(&dev->pdev->dev));
+ strcat(name, dev->priv.name);
health->wq = create_singlethread_workqueue(name);
kfree(name);
if (!health->wq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 4eac42555c7d..9b03ae1e1e10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -77,15 +77,14 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
void *ppriv)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
- u16 max_mtu;
int err;
err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
if (err)
return err;
- mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
- netdev->mtu = max_mtu;
+ mlx5e_set_netdev_mtu_boundaries(priv);
+ netdev->mtu = netdev->max_mtu;
mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 76716419370d..5245b0b1770f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -567,24 +567,23 @@ query_ex:
static int set_hca_cap(struct mlx5_core_dev *dev)
{
- struct pci_dev *pdev = dev->pdev;
int err;
err = handle_hca_cap(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap failed\n");
+ mlx5_core_err(dev, "handle_hca_cap failed\n");
goto out;
}
err = handle_hca_cap_atomic(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
goto out;
}
err = handle_hca_cap_odp(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap_odp failed\n");
+ mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
goto out;
}
@@ -716,36 +715,29 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
return -EOPNOTSUPP;
}
-static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
+ const struct pci_device_id *id)
{
- struct pci_dev *pdev = dev->pdev;
+ struct mlx5_priv *priv = &dev->priv;
int err = 0;
- pci_set_drvdata(dev->pdev, dev);
- strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
- priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
-
- mutex_init(&priv->pgdir_mutex);
- INIT_LIST_HEAD(&priv->pgdir_list);
- spin_lock_init(&priv->mkey_lock);
+ dev->pdev = pdev;
+ priv->pci_dev_data = id->driver_data;
- mutex_init(&priv->alloc_mutex);
+ pci_set_drvdata(dev->pdev, dev);
+ dev->bar_addr = pci_resource_start(pdev, 0);
priv->numa_node = dev_to_node(&dev->pdev->dev);
- if (mlx5_debugfs_root)
- priv->dbg_root =
- debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root);
-
err = mlx5_pci_enable_device(dev);
if (err) {
- dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
- goto err_dbg;
+ mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
+ return err;
}
err = request_bar(pdev);
if (err) {
- dev_err(&pdev->dev, "error requesting BARs, aborting\n");
+ mlx5_core_err(dev, "error requesting BARs, aborting\n");
goto err_disable;
}
@@ -753,7 +745,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
err = set_dma_caps(pdev);
if (err) {
- dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
+ mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
goto err_clr_master;
}
@@ -762,11 +754,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
- dev->iseg_base = pci_resource_start(dev->pdev, 0);
+ dev->iseg_base = dev->bar_addr;
dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
if (!dev->iseg) {
err = -ENOMEM;
- dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
+ mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
goto err_clr_master;
}
@@ -777,52 +769,47 @@ err_clr_master:
release_bar(dev->pdev);
err_disable:
mlx5_pci_disable_device(dev);
-
-err_dbg:
- debugfs_remove(priv->dbg_root);
return err;
}
-static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static void mlx5_pci_close(struct mlx5_core_dev *dev)
{
iounmap(dev->iseg);
pci_clear_master(dev->pdev);
release_bar(dev->pdev);
mlx5_pci_disable_device(dev);
- debugfs_remove_recursive(priv->dbg_root);
}
-static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_init_once(struct mlx5_core_dev *dev)
{
- struct pci_dev *pdev = dev->pdev;
int err;
- priv->devcom = mlx5_devcom_register_device(dev);
- if (IS_ERR(priv->devcom))
- dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n",
- priv->devcom);
+ dev->priv.devcom = mlx5_devcom_register_device(dev);
+ if (IS_ERR(dev->priv.devcom))
+ mlx5_core_err(dev, "failed to register with devcom (0x%p)\n",
+ dev->priv.devcom);
err = mlx5_query_board_id(dev);
if (err) {
- dev_err(&pdev->dev, "query board id failed\n");
+ mlx5_core_err(dev, "query board id failed\n");
goto err_devcom;
}
err = mlx5_eq_table_init(dev);
if (err) {
- dev_err(&pdev->dev, "failed to initialize eq\n");
+ mlx5_core_err(dev, "failed to initialize eq\n");
goto err_devcom;
}
err = mlx5_events_init(dev);
if (err) {
- dev_err(&pdev->dev, "failed to initialize events\n");
+ mlx5_core_err(dev, "failed to initialize events\n");
goto err_eq_cleanup;
}
err = mlx5_cq_debugfs_init(dev);
if (err) {
- dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
+ mlx5_core_err(dev, "failed to initialize cq debugfs\n");
goto err_events_cleanup;
}
@@ -838,31 +825,31 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
err = mlx5_init_rl_table(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init rate limiting\n");
+ mlx5_core_err(dev, "Failed to init rate limiting\n");
goto err_tables_cleanup;
}
err = mlx5_mpfs_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init l2 table %d\n", err);
+ mlx5_core_err(dev, "Failed to init l2 table %d\n", err);
goto err_rl_cleanup;
}
err = mlx5_eswitch_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
+ mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
goto err_mpfs_cleanup;
}
err = mlx5_sriov_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
+ mlx5_core_err(dev, "Failed to init sriov %d\n", err);
goto err_eswitch_cleanup;
}
err = mlx5_fpga_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
+ mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
goto err_sriov_cleanup;
}
@@ -912,93 +899,78 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_devcom_unregister_device(dev->priv.devcom);
}
-static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
- bool boot)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
{
- struct pci_dev *pdev = dev->pdev;
int err;
- dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
- mutex_lock(&dev->intf_state_mutex);
- if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
- dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
- __func__);
- goto out;
- }
-
- dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
- fw_rev_min(dev), fw_rev_sub(dev));
+ mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
/* Only PFs hold the relevant PCIe information for this query */
if (mlx5_core_is_pf(dev))
pcie_print_link_status(dev->pdev);
- /* on load removing any previous indication of internal error, device is
- * up
- */
- dev->state = MLX5_DEVICE_STATE_UP;
-
/* wait for firmware to accept initialization segments configurations
*/
err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
if (err) {
- dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
- FW_PRE_INIT_TIMEOUT_MILI);
- goto out_err;
+ mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+ FW_PRE_INIT_TIMEOUT_MILI);
+ return err;
}
err = mlx5_cmd_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
- goto out_err;
+ mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
+ return err;
}
err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
if (err) {
- dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
- FW_INIT_TIMEOUT_MILI);
+ mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
+ FW_INIT_TIMEOUT_MILI);
goto err_cmd_cleanup;
}
err = mlx5_core_enable_hca(dev, 0);
if (err) {
- dev_err(&pdev->dev, "enable hca failed\n");
+ mlx5_core_err(dev, "enable hca failed\n");
goto err_cmd_cleanup;
}
err = mlx5_core_set_issi(dev);
if (err) {
- dev_err(&pdev->dev, "failed to set issi\n");
+ mlx5_core_err(dev, "failed to set issi\n");
goto err_disable_hca;
}
err = mlx5_satisfy_startup_pages(dev, 1);
if (err) {
- dev_err(&pdev->dev, "failed to allocate boot pages\n");
+ mlx5_core_err(dev, "failed to allocate boot pages\n");
goto err_disable_hca;
}
err = set_hca_ctrl(dev);
if (err) {
- dev_err(&pdev->dev, "set_hca_ctrl failed\n");
+ mlx5_core_err(dev, "set_hca_ctrl failed\n");
goto reclaim_boot_pages;
}
err = set_hca_cap(dev);
if (err) {
- dev_err(&pdev->dev, "set_hca_cap failed\n");
+ mlx5_core_err(dev, "set_hca_cap failed\n");
goto reclaim_boot_pages;
}
err = mlx5_satisfy_startup_pages(dev, 0);
if (err) {
- dev_err(&pdev->dev, "failed to allocate init pages\n");
+ mlx5_core_err(dev, "failed to allocate init pages\n");
goto reclaim_boot_pages;
}
err = mlx5_cmd_init_hca(dev, sw_owner_id);
if (err) {
- dev_err(&pdev->dev, "init hca failed\n");
+ mlx5_core_err(dev, "init hca failed\n");
goto reclaim_boot_pages;
}
@@ -1008,23 +980,50 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
err = mlx5_query_hca_caps(dev);
if (err) {
- dev_err(&pdev->dev, "query hca failed\n");
- goto err_stop_poll;
+ mlx5_core_err(dev, "query hca failed\n");
+ goto stop_health;
}
- if (boot) {
- err = mlx5_init_once(dev, priv);
- if (err) {
- dev_err(&pdev->dev, "sw objs init failed\n");
- goto err_stop_poll;
- }
+ return 0;
+
+stop_health:
+ mlx5_stop_health_poll(dev, boot);
+reclaim_boot_pages:
+ mlx5_reclaim_startup_pages(dev);
+err_disable_hca:
+ mlx5_core_disable_hca(dev, 0);
+err_cmd_cleanup:
+ mlx5_cmd_cleanup(dev);
+
+ return err;
+}
+
+static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
+{
+ int err;
+
+ mlx5_stop_health_poll(dev, boot);
+ err = mlx5_cmd_teardown_hca(dev);
+ if (err) {
+ mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
+ return err;
}
+ mlx5_reclaim_startup_pages(dev);
+ mlx5_core_disable_hca(dev, 0);
+ mlx5_cmd_cleanup(dev);
+
+ return 0;
+}
+
+static int mlx5_load(struct mlx5_core_dev *dev)
+{
+ int err;
dev->priv.uar = mlx5_get_uars_page(dev);
if (IS_ERR(dev->priv.uar)) {
- dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+ mlx5_core_err(dev, "Failed allocating uar, aborting\n");
err = PTR_ERR(dev->priv.uar);
- goto err_get_uars;
+ return err;
}
mlx5_events_start(dev);
@@ -1032,132 +1031,155 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
err = mlx5_eq_table_create(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to create EQs\n");
+ mlx5_core_err(dev, "Failed to create EQs\n");
goto err_eq_table;
}
err = mlx5_fw_tracer_init(dev->tracer);
if (err) {
- dev_err(&pdev->dev, "Failed to init FW tracer\n");
+ mlx5_core_err(dev, "Failed to init FW tracer\n");
goto err_fw_tracer;
}
err = mlx5_fpga_device_start(dev);
if (err) {
- dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+ mlx5_core_err(dev, "fpga device start failed %d\n", err);
goto err_fpga_start;
}
err = mlx5_accel_ipsec_init(dev);
if (err) {
- dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+ mlx5_core_err(dev, "IPSec device start failed %d\n", err);
goto err_ipsec_start;
}
err = mlx5_accel_tls_init(dev);
if (err) {
- dev_err(&pdev->dev, "TLS device start failed %d\n", err);
+ mlx5_core_err(dev, "TLS device start failed %d\n", err);
goto err_tls_start;
}
err = mlx5_init_fs(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init flow steering\n");
+ mlx5_core_err(dev, "Failed to init flow steering\n");
goto err_fs;
}
err = mlx5_core_set_hca_defaults(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to set hca defaults\n");
+ mlx5_core_err(dev, "Failed to set hca defaults\n");
goto err_fs;
}
err = mlx5_sriov_attach(dev);
if (err) {
- dev_err(&pdev->dev, "sriov init failed %d\n", err);
+ mlx5_core_err(dev, "sriov init failed %d\n", err);
goto err_sriov;
}
err = mlx5_ec_init(dev);
if (err) {
- dev_err(&pdev->dev, "Failed to init embedded CPU\n");
+ mlx5_core_err(dev, "Failed to init embedded CPU\n");
goto err_ec;
}
- if (mlx5_device_registered(dev)) {
- mlx5_attach_device(dev);
- } else {
- err = mlx5_register_device(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
- goto err_reg_dev;
- }
- }
-
- set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
-out:
- mutex_unlock(&dev->intf_state_mutex);
-
return 0;
-err_reg_dev:
- mlx5_ec_cleanup(dev);
-
err_ec:
mlx5_sriov_detach(dev);
-
err_sriov:
mlx5_cleanup_fs(dev);
-
err_fs:
mlx5_accel_tls_cleanup(dev);
-
err_tls_start:
mlx5_accel_ipsec_cleanup(dev);
-
err_ipsec_start:
mlx5_fpga_device_stop(dev);
-
err_fpga_start:
mlx5_fw_tracer_cleanup(dev->tracer);
-
err_fw_tracer:
mlx5_eq_table_destroy(dev);
-
err_eq_table:
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
- mlx5_put_uars_page(dev, priv->uar);
+ mlx5_put_uars_page(dev, dev->priv.uar);
+ return err;
+}
-err_get_uars:
- if (boot)
- mlx5_cleanup_once(dev);
+static void mlx5_unload(struct mlx5_core_dev *dev)
+{
+ mlx5_ec_cleanup(dev);
+ mlx5_sriov_detach(dev);
+ mlx5_cleanup_fs(dev);
+ mlx5_accel_ipsec_cleanup(dev);
+ mlx5_accel_tls_cleanup(dev);
+ mlx5_fpga_device_stop(dev);
+ mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_eq_table_destroy(dev);
+ mlx5_pagealloc_stop(dev);
+ mlx5_events_stop(dev);
+ mlx5_put_uars_page(dev, dev->priv.uar);
+}
-err_stop_poll:
- mlx5_stop_health_poll(dev, boot);
- if (mlx5_cmd_teardown_hca(dev)) {
- dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
- goto out_err;
+static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
+{
+ int err = 0;
+
+ dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_warn(dev, "interface is up, NOP\n");
+ goto out;
}
+ /* remove any previous indication of internal error */
+ dev->state = MLX5_DEVICE_STATE_UP;
-reclaim_boot_pages:
- mlx5_reclaim_startup_pages(dev);
+ err = mlx5_function_setup(dev, boot);
+ if (err)
+ goto out;
-err_disable_hca:
- mlx5_core_disable_hca(dev, 0);
+ if (boot) {
+ err = mlx5_init_once(dev);
+ if (err) {
+ mlx5_core_err(dev, "sw objs init failed\n");
+ goto function_teardown;
+ }
+ }
-err_cmd_cleanup:
- mlx5_cmd_cleanup(dev);
+ err = mlx5_load(dev);
+ if (err)
+ goto err_load;
-out_err:
+ if (mlx5_device_registered(dev)) {
+ mlx5_attach_device(dev);
+ } else {
+ err = mlx5_register_device(dev);
+ if (err) {
+ mlx5_core_err(dev, "register device failed %d\n", err);
+ goto err_reg_dev;
+ }
+ }
+
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+
+ return err;
+
+err_reg_dev:
+ mlx5_unload(dev);
+err_load:
+ if (boot)
+ mlx5_cleanup_once(dev);
+function_teardown:
+ mlx5_function_teardown(dev, boot);
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mutex_unlock(&dev->intf_state_mutex);
return err;
}
-static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
- bool cleanup)
+static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
{
int err = 0;
@@ -1166,8 +1188,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mutex_lock(&dev->intf_state_mutex);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
- dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
- __func__);
+ mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+ __func__);
if (cleanup)
mlx5_cleanup_once(dev);
goto out;
@@ -1178,30 +1200,12 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
if (mlx5_device_registered(dev))
mlx5_detach_device(dev);
- mlx5_ec_cleanup(dev);
- mlx5_sriov_detach(dev);
- mlx5_cleanup_fs(dev);
- mlx5_accel_ipsec_cleanup(dev);
- mlx5_accel_tls_cleanup(dev);
- mlx5_fpga_device_stop(dev);
- mlx5_fw_tracer_cleanup(dev->tracer);
- mlx5_eq_table_destroy(dev);
- mlx5_pagealloc_stop(dev);
- mlx5_events_stop(dev);
- mlx5_put_uars_page(dev, priv->uar);
+ mlx5_unload(dev);
+
if (cleanup)
mlx5_cleanup_once(dev);
- mlx5_stop_health_poll(dev, cleanup);
-
- err = mlx5_cmd_teardown_hca(dev);
- if (err) {
- dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
- goto out;
- }
- mlx5_reclaim_startup_pages(dev);
- mlx5_core_disable_hca(dev, 0);
- mlx5_cmd_cleanup(dev);
+ mlx5_function_teardown(dev, cleanup);
out:
mutex_unlock(&dev->intf_state_mutex);
return err;
@@ -1218,29 +1222,15 @@ static const struct devlink_ops mlx5_devlink_ops = {
#endif
};
-#define MLX5_IB_MOD "mlx5_ib"
-static int init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx, const char *name)
{
- struct mlx5_core_dev *dev;
- struct devlink *devlink;
- struct mlx5_priv *priv;
+ struct mlx5_priv *priv = &dev->priv;
int err;
- devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
- if (!devlink) {
- dev_err(&pdev->dev, "kzalloc failed\n");
- return -ENOMEM;
- }
-
- dev = devlink_priv(devlink);
- priv = &dev->priv;
- priv->pci_dev_data = id->driver_data;
-
- pci_set_drvdata(pdev, dev);
+ strncpy(priv->name, name, MLX5_MAX_NAME_LEN);
+ priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
- dev->pdev = pdev;
- dev->profile = &profile[prof_sel];
+ dev->profile = &profile[profile_idx];
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
@@ -1252,25 +1242,72 @@ static int init_one(struct pci_dev *pdev,
INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
- err = mlx5_pci_init(dev, priv);
- if (err) {
- dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
- goto clean_dev;
+ mutex_init(&priv->alloc_mutex);
+ mutex_init(&priv->pgdir_mutex);
+ INIT_LIST_HEAD(&priv->pgdir_list);
+ spin_lock_init(&priv->mkey_lock);
+
+ priv->dbg_root = debugfs_create_dir(name, mlx5_debugfs_root);
+ if (!priv->dbg_root) {
+ pr_err("mlx5_core: %s error, Cannot create debugfs dir, aborting\n", name);
+ return -ENOMEM;
}
err = mlx5_health_init(dev);
- if (err) {
- dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
- goto close_pci;
- }
+ if (err)
+ goto err_health_init;
err = mlx5_pagealloc_init(dev);
if (err)
goto err_pagealloc_init;
- err = mlx5_load_one(dev, priv, true);
+ return 0;
+
+err_pagealloc_init:
+ mlx5_health_cleanup(dev);
+err_health_init:
+ debugfs_remove(dev->priv.dbg_root);
+
+ return err;
+}
+
+static void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+{
+ mlx5_pagealloc_cleanup(dev);
+ mlx5_health_cleanup(dev);
+ debugfs_remove_recursive(dev->priv.dbg_root);
+}
+
+#define MLX5_IB_MOD "mlx5_ib"
+static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct mlx5_core_dev *dev;
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+ if (!devlink) {
+ dev_err(&pdev->dev, "kzalloc failed\n");
+ return -ENOMEM;
+ }
+
+ dev = devlink_priv(devlink);
+
+ err = mlx5_mdev_init(dev, prof_sel, dev_name(&pdev->dev));
+ if (err)
+ goto mdev_init_err;
+
+ err = mlx5_pci_init(dev, pdev, id);
+ if (err) {
+ mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n",
+ err);
+ goto pci_init_err;
+ }
+
+ err = mlx5_load_one(dev, true);
if (err) {
- dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
+ mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n",
+ err);
goto err_load_one;
}
@@ -1284,14 +1321,13 @@ static int init_one(struct pci_dev *pdev,
return 0;
clean_load:
- mlx5_unload_one(dev, priv, true);
+ mlx5_unload_one(dev, true);
+
err_load_one:
- mlx5_pagealloc_cleanup(dev);
-err_pagealloc_init:
- mlx5_health_cleanup(dev);
-close_pci:
- mlx5_pci_close(dev, priv);
-clean_dev:
+ mlx5_pci_close(dev);
+pci_init_err:
+ mlx5_mdev_uninit(dev);
+mdev_init_err:
devlink_free(devlink);
return err;
@@ -1301,20 +1337,18 @@ static void remove_one(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
- struct mlx5_priv *priv = &dev->priv;
devlink_unregister(devlink);
mlx5_unregister_device(dev);
- if (mlx5_unload_one(dev, priv, true)) {
- dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
- mlx5_health_cleanup(dev);
+ if (mlx5_unload_one(dev, true)) {
+ mlx5_core_err(dev, "mlx5_unload_one failed\n");
+ mlx5_health_flush(dev);
return;
}
- mlx5_pagealloc_cleanup(dev);
- mlx5_health_cleanup(dev);
- mlx5_pci_close(dev, priv);
+ mlx5_pci_close(dev);
+ mlx5_mdev_uninit(dev);
devlink_free(devlink);
}
@@ -1322,12 +1356,11 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- struct mlx5_priv *priv = &dev->priv;
- dev_info(&pdev->dev, "%s was called\n", __func__);
+ mlx5_core_info(dev, "%s was called\n", __func__);
mlx5_enter_error_state(dev, false);
- mlx5_unload_one(dev, priv, false);
+ mlx5_unload_one(dev, false);
/* In case of kernel call drain the health wq */
if (state) {
mlx5_drain_health_wq(dev);
@@ -1354,7 +1387,9 @@ static int wait_vital(struct pci_dev *pdev)
count = ioread32be(health->health_counter);
if (count && count != 0xffffffff) {
if (last_count && last_count != count) {
- dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+ mlx5_core_info(dev,
+ "wait vital counter value 0x%x after %d iterations\n",
+ count, i);
return 0;
}
last_count = count;
@@ -1370,12 +1405,12 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
- dev_info(&pdev->dev, "%s was called\n", __func__);
+ mlx5_core_info(dev, "%s was called\n", __func__);
err = mlx5_pci_enable_device(dev);
if (err) {
- dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
- , __func__, err);
+ mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
+ __func__, err);
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -1384,7 +1419,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
pci_save_state(pdev);
if (wait_vital(pdev)) {
- dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+ mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -1394,17 +1429,16 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
static void mlx5_pci_resume(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- struct mlx5_priv *priv = &dev->priv;
int err;
- dev_info(&pdev->dev, "%s was called\n", __func__);
+ mlx5_core_info(dev, "%s was called\n", __func__);
- err = mlx5_load_one(dev, priv, false);
+ err = mlx5_load_one(dev, false);
if (err)
- dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
- , __func__, err);
+ mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
+ __func__, err);
else
- dev_info(&pdev->dev, "%s: device recovered\n", __func__);
+ mlx5_core_info(dev, "%s: device recovered\n", __func__);
}
static const struct pci_error_handlers mlx5_err_handler = {
@@ -1466,13 +1500,12 @@ succeed:
static void shutdown(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
- struct mlx5_priv *priv = &dev->priv;
int err;
- dev_info(&pdev->dev, "Shutdown was called\n");
+ mlx5_core_info(dev, "Shutdown was called\n");
err = mlx5_try_fast_unload(dev);
if (err)
- mlx5_unload_one(dev, priv, false);
+ mlx5_unload_one(dev, false);
mlx5_pci_disable_device(dev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 6fb99be60584..8213c994e205 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -48,12 +48,12 @@
extern uint mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \
- dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_debug("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_dbg_once(__dev, format, ...) \
- dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_debug_once("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
@@ -64,28 +64,37 @@ do { \
} while (0)
#define mlx5_core_err(__dev, format, ...) \
- dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_err("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
-#define mlx5_core_err_rl(__dev, format, ...) \
- dev_err_ratelimited(&(__dev)->pdev->dev, \
- "%s:%d:(pid %d): " format, \
- __func__, __LINE__, current->pid, \
+#define mlx5_core_err_rl(__dev, format, ...) \
+ pr_err_ratelimited("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
+ __func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_warn(__dev, format, ...) \
- dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_warn("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_warn_once(__dev, format, ...) \
- dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ pr_warn_once("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
+#define mlx5_core_warn_rl(__dev, format, ...) \
+ pr_warn_ratelimited("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
#define mlx5_core_info(__dev, format, ...) \
- dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
+ pr_info("%s " format, (__dev)->priv.name, ##__VA_ARGS__)
+
+#define mlx5_core_info_rl(__dev, format, ...) \
+ pr_info_ratelimited("%s:%s:%d:(pid %d): " format, (__dev)->priv.name, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
enum {
MLX5_CMD_DATA, /* print command payload only */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 94464723ff77..0d006224d7b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -79,7 +79,7 @@ static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index)
else
system_page_index = index;
- return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index;
+ return (mdev->bar_addr >> PAGE_SHIFT) + system_page_index;
}
static void up_rel_func(struct kref *kref)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 027e393c7cb2..c2fba5c7c9ee 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -934,6 +934,46 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
pool_type, p_cur, p_max);
}
+static int
+mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ char fw_info_psid[MLXSW_REG_MGIR_FW_INFO_PSID_SIZE];
+ u32 hw_rev, fw_major, fw_minor, fw_sub_minor;
+ char mgir_pl[MLXSW_REG_MGIR_LEN];
+ char buf[32];
+ int err;
+
+ err = devlink_info_driver_name_put(req,
+ mlxsw_core->bus_info->device_kind);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgir_pack(mgir_pl);
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mgir_unpack(mgir_pl, &hw_rev, fw_info_psid, &fw_major,
+ &fw_minor, &fw_sub_minor);
+
+ sprintf(buf, "%X", hw_rev);
+ err = devlink_info_version_fixed_put(req, "hw.revision", buf);
+ if (err)
+ return err;
+
+ err = devlink_info_version_fixed_put(req, "fw.psid", fw_info_psid);
+ if (err)
+ return err;
+
+ sprintf(buf, "%d.%d.%d", fw_major, fw_minor, fw_sub_minor);
+ err = devlink_info_version_running_put(req, "fw.version", buf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
@@ -968,6 +1008,7 @@ static const struct devlink_ops mlxsw_devlink_ops = {
.sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear,
.sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get,
+ .info_get = mlxsw_devlink_info_get,
};
static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index eb4c5e8964cd..e1ee7f4994db 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -8534,6 +8534,60 @@ static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port,
mlxsw_reg_mpar_pa_id_set(payload, pa_id);
}
+/* MGIR - Management General Information Register
+ * ----------------------------------------------
+ * MGIR register allows software to query the hardware and firmware general
+ * information.
+ */
+#define MLXSW_REG_MGIR_ID 0x9020
+#define MLXSW_REG_MGIR_LEN 0x9C
+
+MLXSW_REG_DEFINE(mgir, MLXSW_REG_MGIR_ID, MLXSW_REG_MGIR_LEN);
+
+/* reg_mgir_hw_info_device_hw_revision
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, hw_info_device_hw_revision, 0x0, 16, 16);
+
+#define MLXSW_REG_MGIR_FW_INFO_PSID_SIZE 16
+
+/* reg_mgir_fw_info_psid
+ * PSID (ASCII string).
+ * Access: RO
+ */
+MLXSW_ITEM_BUF(reg, mgir, fw_info_psid, 0x30, MLXSW_REG_MGIR_FW_INFO_PSID_SIZE);
+
+/* reg_mgir_fw_info_extended_major
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_major, 0x44, 0, 32);
+
+/* reg_mgir_fw_info_extended_minor
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_minor, 0x48, 0, 32);
+
+/* reg_mgir_fw_info_extended_sub_minor
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgir, fw_info_extended_sub_minor, 0x4C, 0, 32);
+
+static inline void mlxsw_reg_mgir_pack(char *payload)
+{
+ MLXSW_REG_ZERO(mgir, payload);
+}
+
+static inline void
+mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,
+ u32 *fw_major, u32 *fw_minor, u32 *fw_sub_minor)
+{
+ *hw_rev = mlxsw_reg_mgir_hw_info_device_hw_revision_get(payload);
+ mlxsw_reg_mgir_fw_info_psid_memcpy_from(payload, fw_info_psid);
+ *fw_major = mlxsw_reg_mgir_fw_info_extended_major_get(payload);
+ *fw_minor = mlxsw_reg_mgir_fw_info_extended_minor_get(payload);
+ *fw_sub_minor = mlxsw_reg_mgir_fw_info_extended_sub_minor_get(payload);
+}
+
/* MRSR - Management Reset and Shutdown Register
* ---------------------------------------------
* MRSR register is used to reset or shutdown the switch or
@@ -9958,6 +10012,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(mcia),
MLXSW_REG(mpat),
MLXSW_REG(mpar),
+ MLXSW_REG(mgir),
MLXSW_REG(mrsr),
MLXSW_REG(mlcr),
MLXSW_REG(mpsc),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0ba9daa05a52..5f05723011b4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4915,7 +4915,7 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
- return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_has_gw;
+ return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
}
static struct fib6_info *
@@ -5055,7 +5055,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt)
{
- return rt->fib6_nh.fib_nh_has_gw ||
+ return rt->fib6_nh.fib_nh_gw_family ||
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
}
@@ -6092,6 +6092,14 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
return notifier_from_errno(-EINVAL);
}
+ if (info->family == AF_INET) {
+ struct fib_entry_notifier_info *fen_info = ptr;
+
+ if (fen_info->fi->fib_nh_is_v6) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 536c23c578c3..560a60e522f9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -316,7 +316,11 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
dev = rt->dst.dev;
*saddrp = fl4.saddr;
- *daddrp = rt->rt_gateway;
+ if (rt->rt_gw_family == AF_INET)
+ *daddrp = rt->rt_gw4;
+ /* can not offload if route has an IPv6 gateway */
+ else if (rt->rt_gw_family == AF_INET6)
+ dev = NULL;
out:
ip_rt_put(rt);
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 47c708f08ade..0673f3aa2c8d 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -15,6 +15,7 @@ nfp-objs := \
nfpcore/nfp_resource.o \
nfpcore/nfp_rtsym.o \
nfpcore/nfp_target.o \
+ ccm.o \
nfp_asm.o \
nfp_app.o \
nfp_app_nic.o \
diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
index 9584f03f3efa..69e84ff7f2e5 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c
@@ -261,10 +261,15 @@ int nfp_abm_ctrl_qm_disable(struct nfp_abm *abm)
int nfp_abm_ctrl_prio_map_update(struct nfp_abm_link *alink, u32 *packed)
{
+ const u32 cmd = NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET;
struct nfp_net *nn = alink->vnic;
unsigned int i;
int err;
+ err = nfp_net_mbox_lock(nn, alink->abm->prio_map_len);
+ if (err)
+ return err;
+
/* Write data_len and wipe reserved */
nn_writeq(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATALEN,
alink->abm->prio_map_len);
@@ -273,8 +278,7 @@ int nfp_abm_ctrl_prio_map_update(struct nfp_abm_link *alink, u32 *packed)
nn_writel(nn, nn->tlv_caps.mbox_off + NFP_NET_ABM_MBOX_DATA + i,
packed[i / sizeof(u32)]);
- err = nfp_net_reconfig_mbox(nn,
- NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET);
+ err = nfp_net_mbox_reconfig_and_unlock(nn, cmd);
if (err)
nfp_err(alink->abm->app->cpp,
"setting DSCP -> VQ map failed with error %d\n", err);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 9b6cfa697879..bc9850e4ec5e 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -6,48 +6,13 @@
#include <linux/bug.h>
#include <linux/jiffies.h>
#include <linux/skbuff.h>
-#include <linux/wait.h>
+#include "../ccm.h"
#include "../nfp_app.h"
#include "../nfp_net.h"
#include "fw.h"
#include "main.h"
-#define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4)
-
-static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
-{
- u16 used_tags;
-
- used_tags = bpf->tag_alloc_next - bpf->tag_alloc_last;
-
- return used_tags > NFP_BPF_TAG_ALLOC_SPAN;
-}
-
-static int nfp_bpf_alloc_tag(struct nfp_app_bpf *bpf)
-{
- /* All FW communication for BPF is request-reply. To make sure we
- * don't reuse the message ID too early after timeout - limit the
- * number of requests in flight.
- */
- if (nfp_bpf_all_tags_busy(bpf)) {
- cmsg_warn(bpf, "all FW request contexts busy!\n");
- return -EAGAIN;
- }
-
- WARN_ON(__test_and_set_bit(bpf->tag_alloc_next, bpf->tag_allocator));
- return bpf->tag_alloc_next++;
-}
-
-static void nfp_bpf_free_tag(struct nfp_app_bpf *bpf, u16 tag)
-{
- WARN_ON(!__test_and_clear_bit(tag, bpf->tag_allocator));
-
- while (!test_bit(bpf->tag_alloc_last, bpf->tag_allocator) &&
- bpf->tag_alloc_last != bpf->tag_alloc_next)
- bpf->tag_alloc_last++;
-}
-
static struct sk_buff *
nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
{
@@ -87,149 +52,6 @@ nfp_bpf_cmsg_map_reply_size(struct nfp_app_bpf *bpf, unsigned int n)
return size;
}
-static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
-{
- struct cmsg_hdr *hdr;
-
- hdr = (struct cmsg_hdr *)skb->data;
-
- return hdr->type;
-}
-
-static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
-{
- struct cmsg_hdr *hdr;
-
- hdr = (struct cmsg_hdr *)skb->data;
-
- return be16_to_cpu(hdr->tag);
-}
-
-static struct sk_buff *__nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
-{
- unsigned int msg_tag;
- struct sk_buff *skb;
-
- skb_queue_walk(&bpf->cmsg_replies, skb) {
- msg_tag = nfp_bpf_cmsg_get_tag(skb);
- if (msg_tag == tag) {
- nfp_bpf_free_tag(bpf, tag);
- __skb_unlink(skb, &bpf->cmsg_replies);
- return skb;
- }
- }
-
- return NULL;
-}
-
-static struct sk_buff *nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
-{
- struct sk_buff *skb;
-
- nfp_ctrl_lock(bpf->app->ctrl);
- skb = __nfp_bpf_reply(bpf, tag);
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- return skb;
-}
-
-static struct sk_buff *nfp_bpf_reply_drop_tag(struct nfp_app_bpf *bpf, u16 tag)
-{
- struct sk_buff *skb;
-
- nfp_ctrl_lock(bpf->app->ctrl);
- skb = __nfp_bpf_reply(bpf, tag);
- if (!skb)
- nfp_bpf_free_tag(bpf, tag);
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- return skb;
-}
-
-static struct sk_buff *
-nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
- int tag)
-{
- struct sk_buff *skb;
- int i, err;
-
- for (i = 0; i < 50; i++) {
- udelay(4);
- skb = nfp_bpf_reply(bpf, tag);
- if (skb)
- return skb;
- }
-
- err = wait_event_interruptible_timeout(bpf->cmsg_wq,
- skb = nfp_bpf_reply(bpf, tag),
- msecs_to_jiffies(5000));
- /* We didn't get a response - try last time and atomically drop
- * the tag even if no response is matched.
- */
- if (!skb)
- skb = nfp_bpf_reply_drop_tag(bpf, tag);
- if (err < 0) {
- cmsg_warn(bpf, "%s waiting for response to 0x%02x: %d\n",
- err == ERESTARTSYS ? "interrupted" : "error",
- type, err);
- return ERR_PTR(err);
- }
- if (!skb) {
- cmsg_warn(bpf, "timeout waiting for response to 0x%02x\n",
- type);
- return ERR_PTR(-ETIMEDOUT);
- }
-
- return skb;
-}
-
-static struct sk_buff *
-nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
- enum nfp_bpf_cmsg_type type, unsigned int reply_size)
-{
- struct cmsg_hdr *hdr;
- int tag;
-
- nfp_ctrl_lock(bpf->app->ctrl);
- tag = nfp_bpf_alloc_tag(bpf);
- if (tag < 0) {
- nfp_ctrl_unlock(bpf->app->ctrl);
- dev_kfree_skb_any(skb);
- return ERR_PTR(tag);
- }
-
- hdr = (void *)skb->data;
- hdr->ver = CMSG_MAP_ABI_VERSION;
- hdr->type = type;
- hdr->tag = cpu_to_be16(tag);
-
- __nfp_app_ctrl_tx(bpf->app, skb);
-
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- skb = nfp_bpf_cmsg_wait_reply(bpf, type, tag);
- if (IS_ERR(skb))
- return skb;
-
- hdr = (struct cmsg_hdr *)skb->data;
- if (hdr->type != __CMSG_REPLY(type)) {
- cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
- hdr->type, __CMSG_REPLY(type));
- goto err_free;
- }
- /* 0 reply_size means caller will do the validation */
- if (reply_size && skb->len != reply_size) {
- cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
- type, skb->len, reply_size);
- goto err_free;
- }
-
- return skb;
-err_free:
- dev_kfree_skb_any(skb);
- return ERR_PTR(-EIO);
-}
-
static int
nfp_bpf_ctrl_rc_to_errno(struct nfp_app_bpf *bpf,
struct cmsg_reply_map_simple *reply)
@@ -275,8 +97,8 @@ nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map)
req->map_type = cpu_to_be32(map->map_type);
req->map_flags = 0;
- skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_ALLOC,
- sizeof(*reply));
+ skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_ALLOC,
+ sizeof(*reply));
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -310,8 +132,8 @@ void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map)
req = (void *)skb->data;
req->tid = cpu_to_be32(nfp_map->tid);
- skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_FREE,
- sizeof(*reply));
+ skb = nfp_ccm_communicate(&bpf->ccm, skb, NFP_CCM_TYPE_BPF_MAP_FREE,
+ sizeof(*reply));
if (IS_ERR(skb)) {
cmsg_warn(bpf, "leaking map - I/O error\n");
return;
@@ -354,8 +176,7 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
}
static int
-nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
- enum nfp_bpf_cmsg_type op,
+nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
{
struct nfp_bpf_map *nfp_map = offmap->dev_priv;
@@ -386,8 +207,8 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
map->value_size);
- skb = nfp_bpf_cmsg_communicate(bpf, skb, op,
- nfp_bpf_cmsg_map_reply_size(bpf, 1));
+ skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
+ nfp_bpf_cmsg_map_reply_size(bpf, 1));
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -415,34 +236,34 @@ err_free:
int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap,
void *key, void *value, u64 flags)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_UPDATE,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_UPDATE,
key, value, flags, NULL, NULL);
}
int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_DELETE,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_DELETE,
key, NULL, 0, NULL, NULL);
}
int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
void *key, void *value)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_LOOKUP,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_LOOKUP,
key, NULL, 0, NULL, value);
}
int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap,
void *next_key)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETFIRST,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETFIRST,
NULL, NULL, 0, next_key, NULL);
}
int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
void *key, void *next_key)
{
- return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETNEXT,
+ return nfp_bpf_ctrl_entry_op(offmap, NFP_CCM_TYPE_BPF_MAP_GETNEXT,
key, NULL, 0, next_key, NULL);
}
@@ -456,54 +277,35 @@ unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
{
struct nfp_app_bpf *bpf = app->priv;
- unsigned int tag;
if (unlikely(skb->len < sizeof(struct cmsg_reply_map_simple))) {
cmsg_warn(bpf, "cmsg drop - too short %d!\n", skb->len);
- goto err_free;
+ dev_kfree_skb_any(skb);
+ return;
}
- if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
+ if (nfp_ccm_get_type(skb) == NFP_CCM_TYPE_BPF_BPF_EVENT) {
if (!nfp_bpf_event_output(bpf, skb->data, skb->len))
dev_consume_skb_any(skb);
else
dev_kfree_skb_any(skb);
- return;
}
- nfp_ctrl_lock(bpf->app->ctrl);
-
- tag = nfp_bpf_cmsg_get_tag(skb);
- if (unlikely(!test_bit(tag, bpf->tag_allocator))) {
- cmsg_warn(bpf, "cmsg drop - no one is waiting for tag %u!\n",
- tag);
- goto err_unlock;
- }
-
- __skb_queue_tail(&bpf->cmsg_replies, skb);
- wake_up_interruptible_all(&bpf->cmsg_wq);
-
- nfp_ctrl_unlock(bpf->app->ctrl);
-
- return;
-err_unlock:
- nfp_ctrl_unlock(bpf->app->ctrl);
-err_free:
- dev_kfree_skb_any(skb);
+ nfp_ccm_rx(&bpf->ccm, skb);
}
void
nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
{
+ const struct nfp_ccm_hdr *hdr = data;
struct nfp_app_bpf *bpf = app->priv;
- const struct cmsg_hdr *hdr = data;
if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) {
cmsg_warn(bpf, "cmsg drop - too short %d!\n", len);
return;
}
- if (hdr->type == CMSG_TYPE_BPF_EVENT)
+ if (hdr->type == NFP_CCM_TYPE_BPF_BPF_EVENT)
nfp_bpf_event_output(bpf, data, len);
else
cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n",
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 721921bcf120..06c4286bd79e 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -6,6 +6,7 @@
#include <linux/bitops.h>
#include <linux/types.h>
+#include "../ccm.h"
/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
* our FW ABI. In that case we will do translation in the driver.
@@ -52,22 +53,6 @@ struct nfp_bpf_cap_tlv_maps {
/*
* Types defined for map related control messages
*/
-#define CMSG_MAP_ABI_VERSION 1
-
-enum nfp_bpf_cmsg_type {
- CMSG_TYPE_MAP_ALLOC = 1,
- CMSG_TYPE_MAP_FREE = 2,
- CMSG_TYPE_MAP_LOOKUP = 3,
- CMSG_TYPE_MAP_UPDATE = 4,
- CMSG_TYPE_MAP_DELETE = 5,
- CMSG_TYPE_MAP_GETNEXT = 6,
- CMSG_TYPE_MAP_GETFIRST = 7,
- CMSG_TYPE_BPF_EVENT = 8,
- __CMSG_TYPE_MAP_MAX,
-};
-
-#define CMSG_TYPE_MAP_REPLY_BIT 7
-#define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req))
/* BPF ABIv2 fixed-length control message fields */
#define CMSG_MAP_KEY_LW 16
@@ -84,19 +69,13 @@ enum nfp_bpf_cmsg_status {
CMSG_RC_ERR_MAP_E2BIG = 7,
};
-struct cmsg_hdr {
- u8 type;
- u8 ver;
- __be16 tag;
-};
-
struct cmsg_reply_map_simple {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 rc;
};
struct cmsg_req_map_alloc_tbl {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 key_size; /* in bytes */
__be32 value_size; /* in bytes */
__be32 max_entries;
@@ -110,7 +89,7 @@ struct cmsg_reply_map_alloc_tbl {
};
struct cmsg_req_map_free_tbl {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 tid;
};
@@ -120,7 +99,7 @@ struct cmsg_reply_map_free_tbl {
};
struct cmsg_req_map_op {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 tid;
__be32 count;
__be32 flags;
@@ -135,7 +114,7 @@ struct cmsg_reply_map_op {
};
struct cmsg_bpf_event {
- struct cmsg_hdr hdr;
+ struct nfp_ccm_hdr hdr;
__be32 cpu_id;
__be64 map_ptr;
__be32 data_size;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 275de9f4c61c..9c136da25221 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -442,14 +442,16 @@ static int nfp_bpf_init(struct nfp_app *app)
bpf->app = app;
app->priv = bpf;
- skb_queue_head_init(&bpf->cmsg_replies);
- init_waitqueue_head(&bpf->cmsg_wq);
INIT_LIST_HEAD(&bpf->map_list);
- err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
+ err = nfp_ccm_init(&bpf->ccm, app);
if (err)
goto err_free_bpf;
+ err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
+ if (err)
+ goto err_clean_ccm;
+
nfp_bpf_init_capabilities(bpf);
err = nfp_bpf_parse_capabilities(app);
@@ -474,6 +476,8 @@ static int nfp_bpf_init(struct nfp_app *app)
err_free_neutral_maps:
rhashtable_destroy(&bpf->maps_neutral);
+err_clean_ccm:
+ nfp_ccm_clean(&bpf->ccm);
err_free_bpf:
kfree(bpf);
return err;
@@ -484,7 +488,7 @@ static void nfp_bpf_clean(struct nfp_app *app)
struct nfp_app_bpf *bpf = app->priv;
bpf_offload_dev_destroy(bpf->bpf_dev);
- WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
+ nfp_ccm_clean(&bpf->ccm);
WARN_ON(!list_empty(&bpf->map_list));
WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
rhashtable_free_and_destroy(&bpf->maps_neutral,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index b25a48218bcf..e54d1ac84df2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/wait.h>
+#include "../ccm.h"
#include "../nfp_asm.h"
#include "fw.h"
@@ -84,16 +85,10 @@ enum pkt_vec {
/**
* struct nfp_app_bpf - bpf app priv structure
* @app: backpointer to the app
+ * @ccm: common control message handler data
*
* @bpf_dev: BPF offload device handle
*
- * @tag_allocator: bitmap of control message tags in use
- * @tag_alloc_next: next tag bit to allocate
- * @tag_alloc_last: next tag bit to be freed
- *
- * @cmsg_replies: received cmsg replies waiting to be consumed
- * @cmsg_wq: work queue for waiting for cmsg replies
- *
* @cmsg_key_sz: size of key in cmsg element array
* @cmsg_val_sz: size of value in cmsg element array
*
@@ -132,16 +127,10 @@ enum pkt_vec {
*/
struct nfp_app_bpf {
struct nfp_app *app;
+ struct nfp_ccm ccm;
struct bpf_offload_dev *bpf_dev;
- DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
- u16 tag_alloc_next;
- u16 tag_alloc_last;
-
- struct sk_buff_head cmsg_replies;
- struct wait_queue_head cmsg_wq;
-
unsigned int cmsg_key_sz;
unsigned int cmsg_val_sz;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 15dce97650a5..39c9fec222b4 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -22,6 +22,7 @@
#include <net/tc_act/tc_mirred.h>
#include "main.h"
+#include "../ccm.h"
#include "../nfp_app.h"
#include "../nfp_net_ctrl.h"
#include "../nfp_net.h"
@@ -452,7 +453,7 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
return -EINVAL;
- if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
+ if (cbe->hdr.ver != NFP_CCM_ABI_VERSION)
return -EINVAL;
rcu_read_lock();
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.c b/drivers/net/ethernet/netronome/nfp/ccm.c
new file mode 100644
index 000000000000..94476e41e261
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/ccm.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2016-2019 Netronome Systems, Inc. */
+
+#include <linux/bitops.h>
+
+#include "ccm.h"
+#include "nfp_app.h"
+#include "nfp_net.h"
+
+#define NFP_CCM_TYPE_REPLY_BIT 7
+#define __NFP_CCM_REPLY(req) (BIT(NFP_CCM_TYPE_REPLY_BIT) | (req))
+
+#define ccm_warn(app, msg...) nn_dp_warn(&(app)->ctrl->dp, msg)
+
+#define NFP_CCM_TAG_ALLOC_SPAN (U16_MAX / 4)
+
+static bool nfp_ccm_all_tags_busy(struct nfp_ccm *ccm)
+{
+ u16 used_tags;
+
+ used_tags = ccm->tag_alloc_next - ccm->tag_alloc_last;
+
+ return used_tags > NFP_CCM_TAG_ALLOC_SPAN;
+}
+
+static int nfp_ccm_alloc_tag(struct nfp_ccm *ccm)
+{
+ /* CCM is for FW communication which is request-reply. To make sure
+ * we don't reuse the message ID too early after timeout - limit the
+ * number of requests in flight.
+ */
+ if (unlikely(nfp_ccm_all_tags_busy(ccm))) {
+ ccm_warn(ccm->app, "all FW request contexts busy!\n");
+ return -EAGAIN;
+ }
+
+ WARN_ON(__test_and_set_bit(ccm->tag_alloc_next, ccm->tag_allocator));
+ return ccm->tag_alloc_next++;
+}
+
+static void nfp_ccm_free_tag(struct nfp_ccm *ccm, u16 tag)
+{
+ WARN_ON(!__test_and_clear_bit(tag, ccm->tag_allocator));
+
+ while (!test_bit(ccm->tag_alloc_last, ccm->tag_allocator) &&
+ ccm->tag_alloc_last != ccm->tag_alloc_next)
+ ccm->tag_alloc_last++;
+}
+
+static struct sk_buff *__nfp_ccm_reply(struct nfp_ccm *ccm, u16 tag)
+{
+ unsigned int msg_tag;
+ struct sk_buff *skb;
+
+ skb_queue_walk(&ccm->replies, skb) {
+ msg_tag = nfp_ccm_get_tag(skb);
+ if (msg_tag == tag) {
+ nfp_ccm_free_tag(ccm, tag);
+ __skb_unlink(skb, &ccm->replies);
+ return skb;
+ }
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *
+nfp_ccm_reply(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag)
+{
+ struct sk_buff *skb;
+
+ nfp_ctrl_lock(app->ctrl);
+ skb = __nfp_ccm_reply(ccm, tag);
+ nfp_ctrl_unlock(app->ctrl);
+
+ return skb;
+}
+
+static struct sk_buff *
+nfp_ccm_reply_drop_tag(struct nfp_ccm *ccm, struct nfp_app *app, u16 tag)
+{
+ struct sk_buff *skb;
+
+ nfp_ctrl_lock(app->ctrl);
+ skb = __nfp_ccm_reply(ccm, tag);
+ if (!skb)
+ nfp_ccm_free_tag(ccm, tag);
+ nfp_ctrl_unlock(app->ctrl);
+
+ return skb;
+}
+
+static struct sk_buff *
+nfp_ccm_wait_reply(struct nfp_ccm *ccm, struct nfp_app *app,
+ enum nfp_ccm_type type, int tag)
+{
+ struct sk_buff *skb;
+ int i, err;
+
+ for (i = 0; i < 50; i++) {
+ udelay(4);
+ skb = nfp_ccm_reply(ccm, app, tag);
+ if (skb)
+ return skb;
+ }
+
+ err = wait_event_interruptible_timeout(ccm->wq,
+ skb = nfp_ccm_reply(ccm, app,
+ tag),
+ msecs_to_jiffies(5000));
+ /* We didn't get a response - try last time and atomically drop
+ * the tag even if no response is matched.
+ */
+ if (!skb)
+ skb = nfp_ccm_reply_drop_tag(ccm, app, tag);
+ if (err < 0) {
+ ccm_warn(app, "%s waiting for response to 0x%02x: %d\n",
+ err == ERESTARTSYS ? "interrupted" : "error",
+ type, err);
+ return ERR_PTR(err);
+ }
+ if (!skb) {
+ ccm_warn(app, "timeout waiting for response to 0x%02x\n", type);
+ return ERR_PTR(-ETIMEDOUT);
+ }
+
+ return skb;
+}
+
+struct sk_buff *
+nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
+ enum nfp_ccm_type type, unsigned int reply_size)
+{
+ struct nfp_app *app = ccm->app;
+ struct nfp_ccm_hdr *hdr;
+ int reply_type, tag;
+
+ nfp_ctrl_lock(app->ctrl);
+ tag = nfp_ccm_alloc_tag(ccm);
+ if (tag < 0) {
+ nfp_ctrl_unlock(app->ctrl);
+ dev_kfree_skb_any(skb);
+ return ERR_PTR(tag);
+ }
+
+ hdr = (void *)skb->data;
+ hdr->ver = NFP_CCM_ABI_VERSION;
+ hdr->type = type;
+ hdr->tag = cpu_to_be16(tag);
+
+ __nfp_app_ctrl_tx(app, skb);
+
+ nfp_ctrl_unlock(app->ctrl);
+
+ skb = nfp_ccm_wait_reply(ccm, app, type, tag);
+ if (IS_ERR(skb))
+ return skb;
+
+ reply_type = nfp_ccm_get_type(skb);
+ if (reply_type != __NFP_CCM_REPLY(type)) {
+ ccm_warn(app, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
+ reply_type, __NFP_CCM_REPLY(type));
+ goto err_free;
+ }
+ /* 0 reply_size means caller will do the validation */
+ if (reply_size && skb->len != reply_size) {
+ ccm_warn(app, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
+ type, skb->len, reply_size);
+ goto err_free;
+ }
+
+ return skb;
+err_free:
+ dev_kfree_skb_any(skb);
+ return ERR_PTR(-EIO);
+}
+
+void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb)
+{
+ struct nfp_app *app = ccm->app;
+ unsigned int tag;
+
+ if (unlikely(skb->len < sizeof(struct nfp_ccm_hdr))) {
+ ccm_warn(app, "cmsg drop - too short %d!\n", skb->len);
+ goto err_free;
+ }
+
+ nfp_ctrl_lock(app->ctrl);
+
+ tag = nfp_ccm_get_tag(skb);
+ if (unlikely(!test_bit(tag, ccm->tag_allocator))) {
+ ccm_warn(app, "cmsg drop - no one is waiting for tag %u!\n",
+ tag);
+ goto err_unlock;
+ }
+
+ __skb_queue_tail(&ccm->replies, skb);
+ wake_up_interruptible_all(&ccm->wq);
+
+ nfp_ctrl_unlock(app->ctrl);
+ return;
+
+err_unlock:
+ nfp_ctrl_unlock(app->ctrl);
+err_free:
+ dev_kfree_skb_any(skb);
+}
+
+int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app)
+{
+ ccm->app = app;
+ skb_queue_head_init(&ccm->replies);
+ init_waitqueue_head(&ccm->wq);
+ return 0;
+}
+
+void nfp_ccm_clean(struct nfp_ccm *ccm)
+{
+ WARN_ON(!skb_queue_empty(&ccm->replies));
+}
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h
new file mode 100644
index 000000000000..e2fe4b867958
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/ccm.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2016-2019 Netronome Systems, Inc. */
+
+#ifndef NFP_CCM_H
+#define NFP_CCM_H 1
+
+#include <linux/bitmap.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+
+struct nfp_app;
+
+/* Firmware ABI */
+
+enum nfp_ccm_type {
+ NFP_CCM_TYPE_BPF_MAP_ALLOC = 1,
+ NFP_CCM_TYPE_BPF_MAP_FREE = 2,
+ NFP_CCM_TYPE_BPF_MAP_LOOKUP = 3,
+ NFP_CCM_TYPE_BPF_MAP_UPDATE = 4,
+ NFP_CCM_TYPE_BPF_MAP_DELETE = 5,
+ NFP_CCM_TYPE_BPF_MAP_GETNEXT = 6,
+ NFP_CCM_TYPE_BPF_MAP_GETFIRST = 7,
+ NFP_CCM_TYPE_BPF_BPF_EVENT = 8,
+ __NFP_CCM_TYPE_MAX,
+};
+
+#define NFP_CCM_ABI_VERSION 1
+
+struct nfp_ccm_hdr {
+ u8 type;
+ u8 ver;
+ __be16 tag;
+};
+
+static inline u8 nfp_ccm_get_type(struct sk_buff *skb)
+{
+ struct nfp_ccm_hdr *hdr;
+
+ hdr = (struct nfp_ccm_hdr *)skb->data;
+
+ return hdr->type;
+}
+
+static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb)
+{
+ struct nfp_ccm_hdr *hdr;
+
+ hdr = (struct nfp_ccm_hdr *)skb->data;
+
+ return be16_to_cpu(hdr->tag);
+}
+
+/* Implementation */
+
+/**
+ * struct nfp_ccm - common control message handling
+ * @tag_allocator: bitmap of control message tags in use
+ * @tag_alloc_next: next tag bit to allocate
+ * @tag_alloc_last: next tag bit to be freed
+ *
+ * @replies: received cmsg replies waiting to be consumed
+ * @wq: work queue for waiting for cmsg replies
+ */
+struct nfp_ccm {
+ struct nfp_app *app;
+
+ DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
+ u16 tag_alloc_next;
+ u16 tag_alloc_last;
+
+ struct sk_buff_head replies;
+ struct wait_queue_head wq;
+};
+
+int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app);
+void nfp_ccm_clean(struct nfp_ccm *ccm);
+void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb);
+struct sk_buff *
+nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb,
+ enum nfp_ccm_type type, unsigned int reply_size);
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index be37c2d6151c..df9aff2684ed 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -539,12 +539,17 @@ struct nfp_net_dp {
* @shared_handler: Handler for shared interrupts
* @shared_name: Name for shared interrupt
* @me_freq_mhz: ME clock_freq (MHz)
- * @reconfig_lock: Protects HW reconfiguration request regs/machinery
+ * @reconfig_lock: Protects @reconfig_posted, @reconfig_timer_active,
+ * @reconfig_sync_present and HW reconfiguration request
+ * regs/machinery from async requests (sync must take
+ * @bar_lock)
* @reconfig_posted: Pending reconfig bits coming from async sources
* @reconfig_timer_active: Timer for reading reconfiguration results is pending
* @reconfig_sync_present: Some thread is performing synchronous reconfig
* @reconfig_timer: Timer for async reading of reconfig results
* @reconfig_in_progress_update: Update FW is processing now (debug only)
+ * @bar_lock: vNIC config BAR access lock, protects: update,
+ * mailbox area
* @link_up: Is the link up?
* @link_status_lock: Protects @link_* and ensures atomicity with BAR reading
* @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter
@@ -615,6 +620,8 @@ struct nfp_net {
struct timer_list reconfig_timer;
u32 reconfig_in_progress_update;
+ struct mutex bar_lock;
+
u32 rx_coalesce_usecs;
u32 rx_coalesce_max_frames;
u32 tx_coalesce_usecs;
@@ -839,6 +846,16 @@ static inline void nfp_ctrl_unlock(struct nfp_net *nn)
spin_unlock_bh(&nn->r_vecs[0].lock);
}
+static inline void nn_ctrl_bar_lock(struct nfp_net *nn)
+{
+ mutex_lock(&nn->bar_lock);
+}
+
+static inline void nn_ctrl_bar_unlock(struct nfp_net *nn)
+{
+ mutex_unlock(&nn->bar_lock);
+}
+
/* Globals */
extern const char nfp_driver_version[];
@@ -871,7 +888,9 @@ unsigned int nfp_net_rss_key_sz(struct nfp_net *nn);
void nfp_net_rss_write_itbl(struct nfp_net *nn);
void nfp_net_rss_write_key(struct nfp_net *nn);
void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
-int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd);
+int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size);
+int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd);
+int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd);
unsigned int
nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index bde9695b9f8a..e4be04cdef30 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -23,6 +23,7 @@
#include <linux/interrupt.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/lockdep.h>
#include <linux/mm.h>
#include <linux/overflow.h>
#include <linux/page_ref.h>
@@ -137,20 +138,37 @@ static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
return false;
}
-static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
+static bool __nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
{
bool timed_out = false;
+ int i;
+
+ /* Poll update field, waiting for NFP to ack the config.
+ * Do an opportunistic wait-busy loop, afterward sleep.
+ */
+ for (i = 0; i < 50; i++) {
+ if (nfp_net_reconfig_check_done(nn, false))
+ return false;
+ udelay(4);
+ }
- /* Poll update field, waiting for NFP to ack the config */
while (!nfp_net_reconfig_check_done(nn, timed_out)) {
- msleep(1);
+ usleep_range(250, 500);
timed_out = time_is_before_eq_jiffies(deadline);
}
+ return timed_out;
+}
+
+static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
+{
+ if (__nfp_net_reconfig_wait(nn, deadline))
+ return -EIO;
+
if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
return -EIO;
- return timed_out ? -EIO : 0;
+ return 0;
}
static void nfp_net_reconfig_timer(struct timer_list *t)
@@ -243,7 +261,7 @@ static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
}
/**
- * nfp_net_reconfig() - Reconfigure the firmware
+ * __nfp_net_reconfig() - Reconfigure the firmware
* @nn: NFP Net device to reconfigure
* @update: The value for the update field in the BAR config
*
@@ -253,10 +271,12 @@ static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
*
* Return: Negative errno on error, 0 on success
*/
-int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+static int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
{
int ret;
+ lockdep_assert_held(&nn->bar_lock);
+
nfp_net_reconfig_sync_enter(nn);
nfp_net_reconfig_start(nn, update);
@@ -274,8 +294,31 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update)
return ret;
}
+int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+{
+ int ret;
+
+ nn_ctrl_bar_lock(nn);
+ ret = __nfp_net_reconfig(nn, update);
+ nn_ctrl_bar_unlock(nn);
+
+ return ret;
+}
+
+int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size)
+{
+ if (nn->tlv_caps.mbox_len < NFP_NET_CFG_MBOX_SIMPLE_VAL + data_size) {
+ nn_err(nn, "mailbox too small for %u of data (%u)\n",
+ data_size, nn->tlv_caps.mbox_len);
+ return -EIO;
+ }
+
+ nn_ctrl_bar_lock(nn);
+ return 0;
+}
+
/**
- * nfp_net_reconfig_mbox() - Reconfigure the firmware via the mailbox
+ * nfp_net_mbox_reconfig() - Reconfigure the firmware via the mailbox
* @nn: NFP Net device to reconfigure
* @mbox_cmd: The value for the mailbox command
*
@@ -283,19 +326,15 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update)
*
* Return: Negative errno on error, 0 on success
*/
-int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd)
+int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
{
u32 mbox = nn->tlv_caps.mbox_off;
int ret;
- if (!nfp_net_has_mbox(&nn->tlv_caps)) {
- nn_err(nn, "no mailbox present, command: %u\n", mbox_cmd);
- return -EIO;
- }
-
+ lockdep_assert_held(&nn->bar_lock);
nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
- ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
+ ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
if (ret) {
nn_err(nn, "Mailbox update error\n");
return ret;
@@ -304,6 +343,15 @@ int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd)
return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
}
+int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
+{
+ int ret;
+
+ ret = nfp_net_mbox_reconfig(nn, mbox_cmd);
+ nn_ctrl_bar_unlock(nn);
+ return ret;
+}
+
/* Interrupt configuration and handling
*/
@@ -3111,7 +3159,9 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
static int
nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
{
+ const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD;
struct nfp_net *nn = netdev_priv(netdev);
+ int err;
/* Priority tagged packets with vlan id 0 are processed by the
* NFP as untagged packets
@@ -3119,17 +3169,23 @@ nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
if (!vid)
return 0;
+ err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
+ if (err)
+ return err;
+
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
ETH_P_8021Q);
- return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD);
+ return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
}
static int
nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
{
+ const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL;
struct nfp_net *nn = netdev_priv(netdev);
+ int err;
/* Priority tagged packets with vlan id 0 are processed by the
* NFP as untagged packets
@@ -3137,11 +3193,15 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
if (!vid)
return 0;
+ err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
+ if (err)
+ return err;
+
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
ETH_P_8021Q);
- return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL);
+ return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
}
static void nfp_net_stat64(struct net_device *netdev,
@@ -3633,6 +3693,8 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
+ mutex_init(&nn->bar_lock);
+
spin_lock_init(&nn->reconfig_lock);
spin_lock_init(&nn->link_status_lock);
@@ -3660,6 +3722,9 @@ err_free_nn:
void nfp_net_free(struct nfp_net *nn)
{
WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
+
+ mutex_destroy(&nn->bar_lock);
+
if (nn->dp.netdev)
free_netdev(nn->dp.netdev);
else
@@ -3921,9 +3986,6 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
}
- if (nn->dp.netdev)
- nfp_net_netdev_init(nn);
-
/* Stash the re-configuration queue away. First odd queue in TX Bar */
nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
@@ -3936,6 +3998,9 @@ int nfp_net_init(struct nfp_net *nn)
if (err)
return err;
+ if (nn->dp.netdev)
+ nfp_net_netdev_init(nn);
+
nfp_net_vecs_init(nn);
if (!nn->dp.netdev)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index f5d564bbb55a..25919e338071 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -389,7 +389,6 @@
#define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0
#define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4
#define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8
-#define NFP_NET_CFG_MBOX_SIMPLE_LEN 12
#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1
#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2
@@ -495,10 +494,4 @@ struct nfp_net_tlv_caps {
int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
struct nfp_net_tlv_caps *caps);
-
-static inline bool nfp_net_has_mbox(struct nfp_net_tlv_caps *caps)
-{
- return caps->mbox_len >= NFP_NET_CFG_MBOX_SIMPLE_LEN;
-}
-
#endif /* _NFP_NET_CTRL_H_ */
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 0c443ea98479..374a4d4371f9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -497,7 +497,7 @@ struct qlcnic_hardware_context {
u16 board_type;
u16 supported_type;
- u16 link_speed;
+ u32 link_speed;
u16 link_duplex;
u16 link_autoneg;
u16 module_type;
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 71f848e6e1ec..efaea1a0ad64 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -28,6 +28,7 @@
#include <linux/pm_runtime.h>
#include <linux/firmware.h>
#include <linux/prefetch.h>
+#include <linux/pci-aspm.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
@@ -698,6 +699,8 @@ struct rtl8169_private {
u32 ocp_base;
};
+typedef void (*rtl_generic_fct)(struct rtl8169_private *tp);
+
MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
module_param_named(debug, debug.msg_enable, int, 0);
@@ -3987,131 +3990,65 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
static void rtl_hw_phy_config(struct net_device *dev)
{
+ static const rtl_generic_fct phy_configs[] = {
+ /* PCI devices. */
+ [RTL_GIGA_MAC_VER_01] = NULL,
+ [RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
+ [RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
+ [RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
+ [RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config,
+ [RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config,
+ /* PCI-E devices. */
+ [RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_10] = NULL,
+ [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
+ [RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config,
+ [RTL_GIGA_MAC_VER_13] = NULL,
+ [RTL_GIGA_MAC_VER_14] = NULL,
+ [RTL_GIGA_MAC_VER_15] = NULL,
+ [RTL_GIGA_MAC_VER_16] = NULL,
+ [RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
+ [RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config,
+ [RTL_GIGA_MAC_VER_22] = rtl8168c_4_hw_phy_config,
+ [RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config,
+ [RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config,
+ [RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_31] = NULL,
+ [RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config,
+ [RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
+ [RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_41] = NULL,
+ [RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
+ };
struct rtl8169_private *tp = netdev_priv(dev);
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01:
- break;
- case RTL_GIGA_MAC_VER_02:
- case RTL_GIGA_MAC_VER_03:
- rtl8169s_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_04:
- rtl8169sb_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_05:
- rtl8169scd_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_06:
- rtl8169sce_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_07:
- case RTL_GIGA_MAC_VER_08:
- case RTL_GIGA_MAC_VER_09:
- rtl8102e_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_11:
- rtl8168bb_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_12:
- rtl8168bef_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_17:
- rtl8168bef_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_18:
- rtl8168cp_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_19:
- rtl8168c_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_20:
- rtl8168c_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_21:
- rtl8168c_3_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_22:
- rtl8168c_4_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_23:
- case RTL_GIGA_MAC_VER_24:
- rtl8168cp_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_25:
- rtl8168d_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_26:
- rtl8168d_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_27:
- rtl8168d_3_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_28:
- rtl8168d_4_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_29:
- case RTL_GIGA_MAC_VER_30:
- rtl8105e_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_31:
- /* None. */
- break;
- case RTL_GIGA_MAC_VER_32:
- case RTL_GIGA_MAC_VER_33:
- rtl8168e_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_34:
- rtl8168e_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_35:
- rtl8168f_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_36:
- rtl8168f_2_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_37:
- rtl8402_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_38:
- rtl8411_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_39:
- rtl8106e_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_40:
- rtl8168g_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_42:
- case RTL_GIGA_MAC_VER_43:
- case RTL_GIGA_MAC_VER_44:
- rtl8168g_2_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_47:
- rtl8168h_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_46:
- case RTL_GIGA_MAC_VER_48:
- rtl8168h_2_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_49:
- rtl8168ep_1_hw_phy_config(tp);
- break;
- case RTL_GIGA_MAC_VER_50:
- case RTL_GIGA_MAC_VER_51:
- rtl8168ep_2_hw_phy_config(tp);
- break;
-
- case RTL_GIGA_MAC_VER_41:
- default:
- break;
- }
+ if (phy_configs[tp->mac_version])
+ phy_configs[tp->mac_version](tp);
}
static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
@@ -5444,122 +5381,6 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
rtl_hw_aspm_clkreq_enable(tp, true);
}
-static void rtl_hw_start_8168(struct rtl8169_private *tp)
-{
- RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
-
- /* Work around for RxFIFO overflow. */
- if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
- tp->irq_mask |= RxFIFOOver;
- tp->irq_mask &= ~RxOverflow;
- }
-
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_11:
- rtl_hw_start_8168bb(tp);
- break;
-
- case RTL_GIGA_MAC_VER_12:
- case RTL_GIGA_MAC_VER_17:
- rtl_hw_start_8168bef(tp);
- break;
-
- case RTL_GIGA_MAC_VER_18:
- rtl_hw_start_8168cp_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_19:
- rtl_hw_start_8168c_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_20:
- rtl_hw_start_8168c_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_21:
- rtl_hw_start_8168c_3(tp);
- break;
-
- case RTL_GIGA_MAC_VER_22:
- rtl_hw_start_8168c_4(tp);
- break;
-
- case RTL_GIGA_MAC_VER_23:
- rtl_hw_start_8168cp_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_24:
- rtl_hw_start_8168cp_3(tp);
- break;
-
- case RTL_GIGA_MAC_VER_25:
- case RTL_GIGA_MAC_VER_26:
- case RTL_GIGA_MAC_VER_27:
- rtl_hw_start_8168d(tp);
- break;
-
- case RTL_GIGA_MAC_VER_28:
- rtl_hw_start_8168d_4(tp);
- break;
-
- case RTL_GIGA_MAC_VER_31:
- rtl_hw_start_8168dp(tp);
- break;
-
- case RTL_GIGA_MAC_VER_32:
- case RTL_GIGA_MAC_VER_33:
- rtl_hw_start_8168e_1(tp);
- break;
- case RTL_GIGA_MAC_VER_34:
- rtl_hw_start_8168e_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_35:
- case RTL_GIGA_MAC_VER_36:
- rtl_hw_start_8168f_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_38:
- rtl_hw_start_8411(tp);
- break;
-
- case RTL_GIGA_MAC_VER_40:
- case RTL_GIGA_MAC_VER_41:
- rtl_hw_start_8168g_1(tp);
- break;
- case RTL_GIGA_MAC_VER_42:
- rtl_hw_start_8168g_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_44:
- rtl_hw_start_8411_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_45:
- case RTL_GIGA_MAC_VER_46:
- rtl_hw_start_8168h_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_49:
- rtl_hw_start_8168ep_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_50:
- rtl_hw_start_8168ep_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_51:
- rtl_hw_start_8168ep_3(tp);
- break;
-
- default:
- netif_err(tp, drv, tp->dev,
- "unknown chipset (mac_version = %d)\n",
- tp->mac_version);
- break;
- }
-}
-
static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
{
static const struct ephy_info e_info_8102e_1[] = {
@@ -5685,6 +5506,73 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
rtl_hw_aspm_clkreq_enable(tp, true);
}
+static void rtl_hw_config(struct rtl8169_private *tp)
+{
+ static const rtl_generic_fct hw_configs[] = {
+ [RTL_GIGA_MAC_VER_07] = rtl_hw_start_8102e_1,
+ [RTL_GIGA_MAC_VER_08] = rtl_hw_start_8102e_3,
+ [RTL_GIGA_MAC_VER_09] = rtl_hw_start_8102e_2,
+ [RTL_GIGA_MAC_VER_10] = NULL,
+ [RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168bb,
+ [RTL_GIGA_MAC_VER_12] = rtl_hw_start_8168bef,
+ [RTL_GIGA_MAC_VER_13] = NULL,
+ [RTL_GIGA_MAC_VER_14] = NULL,
+ [RTL_GIGA_MAC_VER_15] = NULL,
+ [RTL_GIGA_MAC_VER_16] = NULL,
+ [RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168bef,
+ [RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1,
+ [RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1,
+ [RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2,
+ [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_3,
+ [RTL_GIGA_MAC_VER_22] = rtl_hw_start_8168c_4,
+ [RTL_GIGA_MAC_VER_23] = rtl_hw_start_8168cp_2,
+ [RTL_GIGA_MAC_VER_24] = rtl_hw_start_8168cp_3,
+ [RTL_GIGA_MAC_VER_25] = rtl_hw_start_8168d,
+ [RTL_GIGA_MAC_VER_26] = rtl_hw_start_8168d,
+ [RTL_GIGA_MAC_VER_27] = rtl_hw_start_8168d,
+ [RTL_GIGA_MAC_VER_28] = rtl_hw_start_8168d_4,
+ [RTL_GIGA_MAC_VER_29] = rtl_hw_start_8105e_1,
+ [RTL_GIGA_MAC_VER_30] = rtl_hw_start_8105e_2,
+ [RTL_GIGA_MAC_VER_31] = rtl_hw_start_8168dp,
+ [RTL_GIGA_MAC_VER_32] = rtl_hw_start_8168e_1,
+ [RTL_GIGA_MAC_VER_33] = rtl_hw_start_8168e_1,
+ [RTL_GIGA_MAC_VER_34] = rtl_hw_start_8168e_2,
+ [RTL_GIGA_MAC_VER_35] = rtl_hw_start_8168f_1,
+ [RTL_GIGA_MAC_VER_36] = rtl_hw_start_8168f_1,
+ [RTL_GIGA_MAC_VER_37] = rtl_hw_start_8402,
+ [RTL_GIGA_MAC_VER_38] = rtl_hw_start_8411,
+ [RTL_GIGA_MAC_VER_39] = rtl_hw_start_8106,
+ [RTL_GIGA_MAC_VER_40] = rtl_hw_start_8168g_1,
+ [RTL_GIGA_MAC_VER_41] = rtl_hw_start_8168g_1,
+ [RTL_GIGA_MAC_VER_42] = rtl_hw_start_8168g_2,
+ [RTL_GIGA_MAC_VER_43] = rtl_hw_start_8168g_2,
+ [RTL_GIGA_MAC_VER_44] = rtl_hw_start_8411_2,
+ [RTL_GIGA_MAC_VER_45] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_46] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_47] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_48] = rtl_hw_start_8168h_1,
+ [RTL_GIGA_MAC_VER_49] = rtl_hw_start_8168ep_1,
+ [RTL_GIGA_MAC_VER_50] = rtl_hw_start_8168ep_2,
+ [RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3,
+ };
+
+ if (hw_configs[tp->mac_version])
+ hw_configs[tp->mac_version](tp);
+}
+
+static void rtl_hw_start_8168(struct rtl8169_private *tp)
+{
+ RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
+
+ /* Workaround for RxFIFO overflow. */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
+ tp->irq_mask |= RxFIFOOver;
+ tp->irq_mask &= ~RxOverflow;
+ }
+
+ rtl_hw_config(tp);
+}
+
static void rtl_hw_start_8101(struct rtl8169_private *tp)
{
if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
@@ -5700,41 +5588,7 @@ static void rtl_hw_start_8101(struct rtl8169_private *tp)
tp->cp_cmd &= CPCMD_QUIRK_MASK;
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_07:
- rtl_hw_start_8102e_1(tp);
- break;
-
- case RTL_GIGA_MAC_VER_08:
- rtl_hw_start_8102e_3(tp);
- break;
-
- case RTL_GIGA_MAC_VER_09:
- rtl_hw_start_8102e_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_29:
- rtl_hw_start_8105e_1(tp);
- break;
- case RTL_GIGA_MAC_VER_30:
- rtl_hw_start_8105e_2(tp);
- break;
-
- case RTL_GIGA_MAC_VER_37:
- rtl_hw_start_8402(tp);
- break;
-
- case RTL_GIGA_MAC_VER_39:
- rtl_hw_start_8106(tp);
- break;
- case RTL_GIGA_MAC_VER_43:
- rtl_hw_start_8168g_2(tp);
- break;
- case RTL_GIGA_MAC_VER_47:
- case RTL_GIGA_MAC_VER_48:
- rtl_hw_start_8168h_1(tp);
- break;
- }
+ rtl_hw_config(tp);
}
static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -6526,10 +6380,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
}
- if (status & (RTL_EVENT_NAPI | LinkChg)) {
- rtl_irq_disable(tp);
- napi_schedule_irqoff(&tp->napi);
- }
+ rtl_irq_disable(tp);
+ napi_schedule_irqoff(&tp->napi);
out:
rtl_ack_events(tp, status);
@@ -7336,6 +7188,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
return rc;
+ /* Disable ASPM completely as that cause random device stop working
+ * problems as well as full system hangs for some PCIe devices users.
+ */
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+
/* enable device (incl. PCI PM wakeup and hotplug setup) */
rc = pcim_enable_device(pdev);
if (rc < 0) {
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index a71c900ca04f..7ae6c124bfe9 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2207,6 +2207,15 @@ static int rocker_router_fib_event(struct notifier_block *nb,
switch (event) {
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
+ if (info->family == AF_INET) {
+ struct fib_entry_notifier_info *fen_info = ptr;
+
+ if (fen_info->fi->fib_nh_is_v6) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
+
memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
/* Take referece on fib_info to prevent it from being
* freed while work is queued. Release it afterwards.
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 2df7f60fe052..857e4bf99883 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -128,21 +128,9 @@ static u32 always_on(struct net_device *dev)
return 1;
}
-static int loopback_get_ts_info(struct net_device *netdev,
- struct ethtool_ts_info *ts_info)
-{
- ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
-
- ts_info->phc_index = -1;
-
- return 0;
-};
-
static const struct ethtool_ops loopback_ethtool_ops = {
.get_link = always_on,
- .get_ts_info = loopback_get_ts_info,
+ .get_ts_info = ethtool_op_get_ts_info,
};
static int loopback_dev_init(struct net_device *dev)
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index 0fee1d06c084..cdf8611d2811 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -3,17 +3,13 @@
obj-$(CONFIG_NETDEVSIM) += netdevsim.o
netdevsim-objs := \
- netdev.o \
+ netdev.o devlink.o fib.o sdev.o \
ifeq ($(CONFIG_BPF_SYSCALL),y)
netdevsim-objs += \
bpf.o
endif
-ifneq ($(CONFIG_NET_DEVLINK),)
-netdevsim-objs += devlink.o fib.o
-endif
-
ifneq ($(CONFIG_XFRM_OFFLOAD),)
netdevsim-objs += ipsec.o
endif
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index f92c43453ec6..a93aafe87db3 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -27,7 +27,7 @@
bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__)
struct nsim_bpf_bound_prog {
- struct netdevsim *ns;
+ struct netdevsim_shared_dev *sdev;
struct bpf_prog *prog;
struct dentry *ddir;
const char *state;
@@ -65,8 +65,8 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
struct nsim_bpf_bound_prog *state;
state = env->prog->aux->offload->dev_priv;
- if (state->ns->bpf_bind_verifier_delay && !insn_idx)
- msleep(state->ns->bpf_bind_verifier_delay);
+ if (state->sdev->bpf_bind_verifier_delay && !insn_idx)
+ msleep(state->sdev->bpf_bind_verifier_delay);
if (insn_idx == env->prog->len - 1)
pr_vlog(env, "Hello from netdevsim!\n");
@@ -213,7 +213,8 @@ nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf,
return 0;
}
-static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
+static int nsim_bpf_create_prog(struct netdevsim_shared_dev *sdev,
+ struct bpf_prog *prog)
{
struct nsim_bpf_bound_prog *state;
char name[16];
@@ -222,13 +223,13 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
if (!state)
return -ENOMEM;
- state->ns = ns;
+ state->sdev = sdev;
state->prog = prog;
state->state = "verify";
/* Program id is not populated yet when we create the state. */
- sprintf(name, "%u", ns->sdev->prog_id_gen++);
- state->ddir = debugfs_create_dir(name, ns->sdev->ddir_bpf_bound_progs);
+ sprintf(name, "%u", sdev->prog_id_gen++);
+ state->ddir = debugfs_create_dir(name, sdev->ddir_bpf_bound_progs);
if (IS_ERR_OR_NULL(state->ddir)) {
kfree(state);
return -ENOMEM;
@@ -239,7 +240,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
&state->state, &nsim_bpf_string_fops);
debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
- list_add_tail(&state->l, &ns->sdev->bpf_bound_progs);
+ list_add_tail(&state->l, &sdev->bpf_bound_progs);
prog->aux->offload->dev_priv = state;
@@ -248,12 +249,13 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
{
- struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev);
+ struct netdevsim_shared_dev *sdev =
+ bpf_offload_dev_priv(prog->aux->offload->offdev);
- if (!ns->bpf_bind_accept)
+ if (!sdev->bpf_bind_accept)
return -EOPNOTSUPP;
- return nsim_bpf_create_prog(ns, prog);
+ return nsim_bpf_create_prog(sdev, prog);
}
static int nsim_bpf_translate(struct bpf_prog *prog)
@@ -576,39 +578,55 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
}
}
-int nsim_bpf_init(struct netdevsim *ns)
+static int nsim_bpf_sdev_init(struct netdevsim_shared_dev *sdev)
{
int err;
- if (ns->sdev->refcnt == 1) {
- INIT_LIST_HEAD(&ns->sdev->bpf_bound_progs);
- INIT_LIST_HEAD(&ns->sdev->bpf_bound_maps);
+ INIT_LIST_HEAD(&sdev->bpf_bound_progs);
+ INIT_LIST_HEAD(&sdev->bpf_bound_maps);
- ns->sdev->ddir_bpf_bound_progs =
- debugfs_create_dir("bpf_bound_progs", ns->sdev->ddir);
- if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
- return -ENOMEM;
+ sdev->ddir_bpf_bound_progs =
+ debugfs_create_dir("bpf_bound_progs", sdev->ddir);
+ if (IS_ERR_OR_NULL(sdev->ddir_bpf_bound_progs))
+ return -ENOMEM;
+
+ sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, sdev);
+ err = PTR_ERR_OR_ZERO(sdev->bpf_dev);
+ if (err)
+ return err;
- ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops,
- ns);
- err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
+ sdev->bpf_bind_accept = true;
+ debugfs_create_bool("bpf_bind_accept", 0600, sdev->ddir,
+ &sdev->bpf_bind_accept);
+ debugfs_create_u32("bpf_bind_verifier_delay", 0600, sdev->ddir,
+ &sdev->bpf_bind_verifier_delay);
+ return 0;
+}
+
+static void nsim_bpf_sdev_uninit(struct netdevsim_shared_dev *sdev)
+{
+ WARN_ON(!list_empty(&sdev->bpf_bound_progs));
+ WARN_ON(!list_empty(&sdev->bpf_bound_maps));
+ bpf_offload_dev_destroy(sdev->bpf_dev);
+}
+
+int nsim_bpf_init(struct netdevsim *ns)
+{
+ int err;
+
+ if (ns->sdev->refcnt == 1) {
+ err = nsim_bpf_sdev_init(ns->sdev);
if (err)
return err;
}
err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev);
if (err)
- goto err_destroy_bdev;
+ goto err_bpf_sdev_uninit;
debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
&ns->bpf_offloaded_id);
- ns->bpf_bind_accept = true;
- debugfs_create_bool("bpf_bind_accept", 0600, ns->ddir,
- &ns->bpf_bind_accept);
- debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir,
- &ns->bpf_bind_verifier_delay);
-
ns->bpf_tc_accept = true;
debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
&ns->bpf_tc_accept);
@@ -627,9 +645,9 @@ int nsim_bpf_init(struct netdevsim *ns)
return 0;
-err_destroy_bdev:
+err_bpf_sdev_uninit:
if (ns->sdev->refcnt == 1)
- bpf_offload_dev_destroy(ns->sdev->bpf_dev);
+ nsim_bpf_sdev_uninit(ns->sdev);
return err;
}
@@ -640,9 +658,6 @@ void nsim_bpf_uninit(struct netdevsim *ns)
WARN_ON(ns->bpf_offloaded);
bpf_offload_dev_netdev_unregister(ns->sdev->bpf_dev, ns->netdev);
- if (ns->sdev->refcnt == 1) {
- WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
- WARN_ON(!list_empty(&ns->sdev->bpf_bound_maps));
- bpf_offload_dev_destroy(ns->sdev->bpf_dev);
- }
+ if (ns->sdev->refcnt == 1)
+ nsim_bpf_sdev_uninit(ns->sdev);
}
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 75a50b59cb8f..7805fa840383 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -25,6 +25,8 @@
#include "netdevsim.h"
+static u32 nsim_dev_id;
+
struct nsim_vf_config {
int link_state;
u16 min_tx_rate;
@@ -38,10 +40,7 @@ struct nsim_vf_config {
bool rss_query_enabled;
};
-static u32 nsim_dev_id;
-
static struct dentry *nsim_ddir;
-static struct dentry *nsim_sdev_ddir;
static int nsim_num_vf(struct device *dev)
{
@@ -139,7 +138,6 @@ static void nsim_dev_release(struct device *dev)
struct netdevsim *ns = to_nsim(dev);
nsim_vfs_disable(ns);
- free_netdev(ns->netdev);
}
static struct device_type nsim_dev_type = {
@@ -159,8 +157,8 @@ static int nsim_get_port_parent_id(struct net_device *dev,
static int nsim_init(struct net_device *dev)
{
- char sdev_ddir_name[10], sdev_link_name[32];
struct netdevsim *ns = netdev_priv(dev);
+ char sdev_link_name[32];
int err;
ns->netdev = dev;
@@ -168,32 +166,13 @@ static int nsim_init(struct net_device *dev)
if (IS_ERR_OR_NULL(ns->ddir))
return -ENOMEM;
- if (!ns->sdev) {
- ns->sdev = kzalloc(sizeof(*ns->sdev), GFP_KERNEL);
- if (!ns->sdev) {
- err = -ENOMEM;
- goto err_debugfs_destroy;
- }
- ns->sdev->refcnt = 1;
- ns->sdev->switch_id = nsim_dev_id;
- sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id);
- ns->sdev->ddir = debugfs_create_dir(sdev_ddir_name,
- nsim_sdev_ddir);
- if (IS_ERR_OR_NULL(ns->sdev->ddir)) {
- err = PTR_ERR_OR_ZERO(ns->sdev->ddir) ?: -EINVAL;
- goto err_sdev_free;
- }
- } else {
- sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id);
- ns->sdev->refcnt++;
- }
-
- sprintf(sdev_link_name, "../../" DRV_NAME "_sdev/%s", sdev_ddir_name);
+ sprintf(sdev_link_name, "../../" DRV_NAME "_sdev/%u",
+ ns->sdev->switch_id);
debugfs_create_symlink("sdev", ns->ddir, sdev_link_name);
err = nsim_bpf_init(ns);
if (err)
- goto err_sdev_destroy;
+ goto err_debugfs_destroy;
ns->dev.id = nsim_dev_id++;
ns->dev.bus = &nsim_bus;
@@ -216,12 +195,6 @@ err_unreg_dev:
device_unregister(&ns->dev);
err_bpf_uninit:
nsim_bpf_uninit(ns);
-err_sdev_destroy:
- if (!--ns->sdev->refcnt) {
- debugfs_remove_recursive(ns->sdev->ddir);
-err_sdev_free:
- kfree(ns->sdev);
- }
err_debugfs_destroy:
debugfs_remove_recursive(ns->ddir);
return err;
@@ -235,10 +208,6 @@ static void nsim_uninit(struct net_device *dev)
nsim_devlink_teardown(ns);
debugfs_remove_recursive(ns->ddir);
nsim_bpf_uninit(ns);
- if (!--ns->sdev->refcnt) {
- debugfs_remove_recursive(ns->sdev->ddir);
- kfree(ns->sdev);
- }
}
static void nsim_free(struct net_device *dev)
@@ -247,6 +216,7 @@ static void nsim_free(struct net_device *dev)
device_unregister(&ns->dev);
/* netdev and vf state will be freed out of device_release() */
+ nsim_sdev_put(ns->sdev);
}
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -490,6 +460,7 @@ static void nsim_setup(struct net_device *dev)
eth_hw_addr_random(dev);
dev->netdev_ops = &nsim_netdev_ops;
+ dev->needs_free_netdev = true;
dev->priv_destructor = nsim_free;
dev->tx_queue_len = 0;
@@ -523,10 +494,11 @@ static int nsim_newlink(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct netdevsim *joinns = NULL;
+ int err;
if (tb[IFLA_LINK]) {
struct net_device *joindev;
- struct netdevsim *joinns;
joindev = __dev_get_by_index(src_net,
nla_get_u32(tb[IFLA_LINK]));
@@ -536,17 +508,20 @@ static int nsim_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
joinns = netdev_priv(joindev);
- if (!joinns->sdev || !joinns->sdev->refcnt)
- return -EINVAL;
- ns->sdev = joinns->sdev;
}
- return register_netdevice(dev);
-}
+ ns->sdev = nsim_sdev_get(joinns);
+ if (IS_ERR(ns->sdev))
+ return PTR_ERR(ns->sdev);
-static void nsim_dellink(struct net_device *dev, struct list_head *head)
-{
- unregister_netdevice_queue(dev, head);
+ err = register_netdevice(dev);
+ if (err)
+ goto err_sdev_put;
+ return 0;
+
+err_sdev_put:
+ nsim_sdev_put(ns->sdev);
+ return err;
}
static struct rtnl_link_ops nsim_link_ops __read_mostly = {
@@ -555,7 +530,6 @@ static struct rtnl_link_ops nsim_link_ops __read_mostly = {
.setup = nsim_setup,
.validate = nsim_validate,
.newlink = nsim_newlink,
- .dellink = nsim_dellink,
};
static int __init nsim_module_init(void)
@@ -566,15 +540,13 @@ static int __init nsim_module_init(void)
if (IS_ERR_OR_NULL(nsim_ddir))
return -ENOMEM;
- nsim_sdev_ddir = debugfs_create_dir(DRV_NAME "_sdev", NULL);
- if (IS_ERR_OR_NULL(nsim_sdev_ddir)) {
- err = -ENOMEM;
+ err = nsim_sdev_init();
+ if (err)
goto err_debugfs_destroy;
- }
err = bus_register(&nsim_bus);
if (err)
- goto err_sdir_destroy;
+ goto err_sdev_exit;
err = nsim_devlink_init();
if (err)
@@ -590,8 +562,8 @@ err_dl_fini:
nsim_devlink_exit();
err_unreg_bus:
bus_unregister(&nsim_bus);
-err_sdir_destroy:
- debugfs_remove_recursive(nsim_sdev_ddir);
+err_sdev_exit:
+ nsim_sdev_exit();
err_debugfs_destroy:
debugfs_remove_recursive(nsim_ddir);
return err;
@@ -602,7 +574,7 @@ static void __exit nsim_module_exit(void)
rtnl_link_unregister(&nsim_link_ops);
nsim_devlink_exit();
bus_unregister(&nsim_bus);
- debugfs_remove_recursive(nsim_sdev_ddir);
+ nsim_sdev_exit();
debugfs_remove_recursive(nsim_ddir);
}
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 384c254fafc5..2667f9b0e1f9 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -39,6 +39,9 @@ struct netdevsim_shared_dev {
struct bpf_offload_dev *bpf_dev;
+ bool bpf_bind_accept;
+ u32 bpf_bind_verifier_delay;
+
struct dentry *ddir_bpf_bound_progs;
u32 prog_id_gen;
@@ -46,6 +49,13 @@ struct netdevsim_shared_dev {
struct list_head bpf_bound_maps;
};
+struct netdevsim;
+
+struct netdevsim_shared_dev *nsim_sdev_get(struct netdevsim *joinns);
+void nsim_sdev_put(struct netdevsim_shared_dev *sdev);
+int nsim_sdev_init(void);
+void nsim_sdev_exit(void);
+
#define NSIM_IPSEC_MAX_SA_COUNT 33
#define NSIM_IPSEC_VALID BIT(31)
@@ -88,18 +98,13 @@ struct netdevsim {
struct xdp_attachment_info xdp;
struct xdp_attachment_info xdp_hw;
- bool bpf_bind_accept;
- u32 bpf_bind_verifier_delay;
-
bool bpf_tc_accept;
bool bpf_tc_non_bound_accept;
bool bpf_xdpdrv_accept;
bool bpf_xdpoffload_accept;
bool bpf_map_accept;
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
struct devlink *devlink;
-#endif
struct nsim_ipsec ipsec;
};
@@ -138,7 +143,6 @@ nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
}
#endif
-#if IS_ENABLED(CONFIG_NET_DEVLINK)
enum nsim_resource_id {
NSIM_RESOURCE_NONE, /* DEVLINK_RESOURCE_ID_PARENT_TOP */
NSIM_RESOURCE_IPV4,
@@ -160,25 +164,6 @@ void nsim_fib_exit(void);
u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val,
struct netlink_ext_ack *extack);
-#else
-static inline int nsim_devlink_setup(struct netdevsim *ns)
-{
- return 0;
-}
-
-static inline void nsim_devlink_teardown(struct netdevsim *ns)
-{
-}
-
-static inline int nsim_devlink_init(void)
-{
- return 0;
-}
-
-static inline void nsim_devlink_exit(void)
-{
-}
-#endif
#if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
void nsim_ipsec_init(struct netdevsim *ns);
diff --git a/drivers/net/netdevsim/sdev.c b/drivers/net/netdevsim/sdev.c
new file mode 100644
index 000000000000..6712da3340d6
--- /dev/null
+++ b/drivers/net/netdevsim/sdev.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "netdevsim.h"
+
+static struct dentry *nsim_sdev_ddir;
+
+static u32 nsim_sdev_id;
+
+struct netdevsim_shared_dev *nsim_sdev_get(struct netdevsim *joinns)
+{
+ struct netdevsim_shared_dev *sdev;
+ char sdev_ddir_name[10];
+ int err;
+
+ if (joinns) {
+ if (WARN_ON(!joinns->sdev))
+ return ERR_PTR(-EINVAL);
+ sdev = joinns->sdev;
+ sdev->refcnt++;
+ return sdev;
+ }
+
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev)
+ return ERR_PTR(-ENOMEM);
+ sdev->refcnt = 1;
+ sdev->switch_id = nsim_sdev_id++;
+
+ sprintf(sdev_ddir_name, "%u", sdev->switch_id);
+ sdev->ddir = debugfs_create_dir(sdev_ddir_name, nsim_sdev_ddir);
+ if (IS_ERR_OR_NULL(sdev->ddir)) {
+ err = PTR_ERR_OR_ZERO(sdev->ddir) ?: -EINVAL;
+ goto err_sdev_free;
+ }
+
+ return sdev;
+
+err_sdev_free:
+ nsim_sdev_id--;
+ kfree(sdev);
+ return ERR_PTR(err);
+}
+
+void nsim_sdev_put(struct netdevsim_shared_dev *sdev)
+{
+ if (--sdev->refcnt)
+ return;
+ debugfs_remove_recursive(sdev->ddir);
+ kfree(sdev);
+}
+
+int nsim_sdev_init(void)
+{
+ nsim_sdev_ddir = debugfs_create_dir(DRV_NAME "_sdev", NULL);
+ if (IS_ERR_OR_NULL(nsim_sdev_ddir))
+ return -ENOMEM;
+ return 0;
+}
+
+void nsim_sdev_exit(void)
+{
+ debugfs_remove_recursive(nsim_sdev_ddir);
+}
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index b7133c3f7437..eed4fe3d871f 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -598,8 +598,6 @@ static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQ1202),
.name = "Aquantia AQ1202",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
@@ -608,8 +606,6 @@ static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQ2104),
.name = "Aquantia AQ2104",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
@@ -618,8 +614,6 @@ static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR105),
.name = "Aquantia AQR105",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
@@ -628,8 +622,6 @@ static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR106),
.name = "Aquantia AQR106",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
@@ -638,8 +630,6 @@ static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR107),
.name = "Aquantia AQR107",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.probe = aqr107_probe,
.config_init = aqr107_config_init,
.config_aneg = aqr_config_aneg,
@@ -658,8 +648,6 @@ static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQCS109),
.name = "Aquantia AQCS109",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.probe = aqr107_probe,
.config_init = aqcs109_config_init,
.config_aneg = aqr_config_aneg,
@@ -678,8 +666,6 @@ static struct phy_driver aqr_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR405),
.name = "Aquantia AQR405",
- .aneg_done = genphy_c45_aneg_done,
- .get_features = genphy_c45_pma_read_abilities,
.config_aneg = aqr_config_aneg,
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index fc06e8c9a64b..238a20e13d6a 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -482,7 +482,6 @@ static struct phy_driver mv3310_drivers[] = {
.phy_id = MARVELL_PHY_ID_88E2110,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "mv88x2110",
- .get_features = genphy_c45_pma_read_abilities,
.probe = mv3310_probe,
.suspend = mv3310_suspend,
.resume = mv3310_resume,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 05c3e87ffc2d..abe13dfe50ad 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -516,21 +516,10 @@ int gen10g_config_aneg(struct phy_device *phydev)
}
EXPORT_SYMBOL_GPL(gen10g_config_aneg);
-static int gen10g_read_status(struct phy_device *phydev)
-{
- /* For now just lie and say it's 10G all the time */
- phydev->speed = SPEED_10000;
- phydev->duplex = DUPLEX_FULL;
-
- return genphy_c45_read_link(phydev);
-}
-
-struct phy_driver genphy_10g_driver = {
+struct phy_driver genphy_c45_driver = {
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
- .name = "Generic 10G PHY",
+ .name = "Generic Clause 45 PHY",
.soft_reset = genphy_no_soft_reset,
- .features = PHY_10GBIT_FEATURES,
- .config_aneg = gen10g_config_aneg,
- .read_status = gen10g_read_status,
+ .read_status = genphy_c45_read_status,
};
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 5016cd5fd7c7..12ce671020a5 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -8,6 +8,11 @@
const char *phy_speed_to_str(int speed)
{
+ BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 67,
+ "Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
+ "If a speed or mode has been added please update phy_speed_to_str "
+ "and the PHY settings array.\n");
+
switch (speed) {
case SPEED_10:
return "10Mbps";
@@ -35,6 +40,8 @@ const char *phy_speed_to_str(int speed)
return "56Gbps";
case SPEED_100000:
return "100Gbps";
+ case SPEED_200000:
+ return "200Gbps";
case SPEED_UNKNOWN:
return "Unknown";
default:
@@ -58,222 +65,81 @@ EXPORT_SYMBOL_GPL(phy_duplex_to_str);
/* A mapping of all SUPPORTED settings to speed/duplex. This table
* must be grouped by speed and sorted in descending match priority
* - iow, descending speed. */
+
+#define PHY_SETTING(s, d, b) { .speed = SPEED_ ## s, .duplex = DUPLEX_ ## d, \
+ .bit = ETHTOOL_LINK_MODE_ ## b ## _BIT}
+
static const struct phy_setting settings[] = {
+ /* 200G */
+ PHY_SETTING( 200000, FULL, 200000baseCR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseKR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseLR4_ER4_FR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseDR4_Full ),
+ PHY_SETTING( 200000, FULL, 200000baseSR4_Full ),
/* 100G */
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
- },
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
- },
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
- },
- {
- .speed = SPEED_100000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
- },
+ PHY_SETTING( 100000, FULL, 100000baseCR4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseKR4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseLR4_ER4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseSR4_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseCR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseKR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseLR2_ER2_FR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseDR2_Full ),
+ PHY_SETTING( 100000, FULL, 100000baseSR2_Full ),
/* 56G */
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT,
- },
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT,
- },
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT,
- },
- {
- .speed = SPEED_56000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT,
- },
+ PHY_SETTING( 56000, FULL, 56000baseCR4_Full ),
+ PHY_SETTING( 56000, FULL, 56000baseKR4_Full ),
+ PHY_SETTING( 56000, FULL, 56000baseLR4_Full ),
+ PHY_SETTING( 56000, FULL, 56000baseSR4_Full ),
/* 50G */
- {
- .speed = SPEED_50000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
- },
- {
- .speed = SPEED_50000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
- },
- {
- .speed = SPEED_50000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
- },
+ PHY_SETTING( 50000, FULL, 50000baseCR2_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseKR2_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseSR2_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseCR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseKR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseLR_ER_FR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseDR_Full ),
+ PHY_SETTING( 50000, FULL, 50000baseSR_Full ),
/* 40G */
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
- },
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
- },
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
- },
- {
- .speed = SPEED_40000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
- },
+ PHY_SETTING( 40000, FULL, 40000baseCR4_Full ),
+ PHY_SETTING( 40000, FULL, 40000baseKR4_Full ),
+ PHY_SETTING( 40000, FULL, 40000baseLR4_Full ),
+ PHY_SETTING( 40000, FULL, 40000baseSR4_Full ),
/* 25G */
- {
- .speed = SPEED_25000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
- },
- {
- .speed = SPEED_25000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
- },
- {
- .speed = SPEED_25000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
- },
-
+ PHY_SETTING( 25000, FULL, 25000baseCR_Full ),
+ PHY_SETTING( 25000, FULL, 25000baseKR_Full ),
+ PHY_SETTING( 25000, FULL, 25000baseSR_Full ),
/* 20G */
- {
- .speed = SPEED_20000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
- },
- {
- .speed = SPEED_20000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT,
- },
+ PHY_SETTING( 20000, FULL, 20000baseKR2_Full ),
+ PHY_SETTING( 20000, FULL, 20000baseMLD2_Full ),
/* 10G */
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
- },
- {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
- },
+ PHY_SETTING( 10000, FULL, 10000baseCR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseER_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseKR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseKX4_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseLR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseLRM_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseR_FEC ),
+ PHY_SETTING( 10000, FULL, 10000baseSR_Full ),
+ PHY_SETTING( 10000, FULL, 10000baseT_Full ),
/* 5G */
- {
- .speed = SPEED_5000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
- },
-
+ PHY_SETTING( 5000, FULL, 5000baseT_Full ),
/* 2.5G */
- {
- .speed = SPEED_2500,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
- },
- {
- .speed = SPEED_2500,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
- },
+ PHY_SETTING( 2500, FULL, 2500baseT_Full ),
+ PHY_SETTING( 2500, FULL, 2500baseX_Full ),
/* 1G */
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
- },
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- },
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_HALF,
- .bit = ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- },
- {
- .speed = SPEED_1000,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
- },
+ PHY_SETTING( 1000, FULL, 1000baseKX_Full ),
+ PHY_SETTING( 1000, FULL, 1000baseT_Full ),
+ PHY_SETTING( 1000, HALF, 1000baseT_Half ),
+ PHY_SETTING( 1000, FULL, 1000baseX_Full ),
/* 100M */
- {
- .speed = SPEED_100,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
- },
- {
- .speed = SPEED_100,
- .duplex = DUPLEX_HALF,
- .bit = ETHTOOL_LINK_MODE_100baseT_Half_BIT,
- },
+ PHY_SETTING( 100, FULL, 100baseT_Full ),
+ PHY_SETTING( 100, HALF, 100baseT_Half ),
/* 10M */
- {
- .speed = SPEED_10,
- .duplex = DUPLEX_FULL,
- .bit = ETHTOOL_LINK_MODE_10baseT_Full_BIT,
- },
- {
- .speed = SPEED_10,
- .duplex = DUPLEX_HALF,
- .bit = ETHTOOL_LINK_MODE_10baseT_Half_BIT,
- },
+ PHY_SETTING( 10, FULL, 10baseT_Full ),
+ PHY_SETTING( 10, HALF, 10baseT_Half ),
};
+#undef PHY_SETTING
/**
* phy_lookup_setting - lookup a PHY setting
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index f7a6d0ffb1ac..48adb3d1f1ee 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -225,7 +225,7 @@ static void phy_mdio_device_remove(struct mdio_device *mdiodev)
}
static struct phy_driver genphy_driver;
-extern struct phy_driver genphy_10g_driver;
+extern struct phy_driver genphy_c45_driver;
static LIST_HEAD(phy_fixup_list);
static DEFINE_MUTEX(phy_fixup_lock);
@@ -1174,7 +1174,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
*/
if (!d->driver) {
if (phydev->is_c45)
- d->driver = &genphy_10g_driver.mdiodrv.driver;
+ d->driver = &genphy_c45_driver.mdiodrv.driver;
else
d->driver = &genphy_driver.mdiodrv.driver;
@@ -1335,7 +1335,7 @@ EXPORT_SYMBOL_GPL(phy_driver_is_genphy);
bool phy_driver_is_genphy_10g(struct phy_device *phydev)
{
return phy_driver_is_genphy_kind(phydev,
- &genphy_10g_driver.mdiodrv.driver);
+ &genphy_c45_driver.mdiodrv.driver);
}
EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g);
@@ -1710,19 +1710,17 @@ int genphy_update_link(struct phy_device *phydev)
*/
if (!phy_polling_mode(phydev)) {
status = phy_read(phydev, MII_BMSR);
- if (status < 0) {
+ if (status < 0)
return status;
- } else if (status & BMSR_LSTATUS) {
- phydev->link = 1;
- return 0;
- }
+ else if (status & BMSR_LSTATUS)
+ goto done;
}
/* Read link and autonegotiation status */
status = phy_read(phydev, MII_BMSR);
if (status < 0)
return status;
-
+done:
phydev->link = status & BMSR_LSTATUS ? 1 : 0;
phydev->autoneg_complete = status & BMSR_ANEGCOMPLETE ? 1 : 0;
@@ -1756,10 +1754,7 @@ int genphy_read_status(struct phy_device *phydev)
linkmode_zero(phydev->lp_advertising);
if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- phydev->supported) ||
- linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- phydev->supported)) {
+ if (phydev->is_gigabit_capable) {
lpagb = phy_read(phydev, MII_STAT1000);
if (lpagb < 0)
return lpagb;
@@ -2154,6 +2149,13 @@ static int phy_probe(struct device *dev)
if (err)
goto out;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+ phydev->supported))
+ phydev->is_gigabit_capable = 1;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ phydev->supported))
+ phydev->is_gigabit_capable = 1;
+
of_set_phy_supported(phydev);
linkmode_copy(phydev->advertising, phydev->supported);
@@ -2304,14 +2306,14 @@ static int __init phy_init(void)
features_init();
- rc = phy_driver_register(&genphy_10g_driver, THIS_MODULE);
+ rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE);
if (rc)
- goto err_10g;
+ goto err_c45;
rc = phy_driver_register(&genphy_driver, THIS_MODULE);
if (rc) {
- phy_driver_unregister(&genphy_10g_driver);
-err_10g:
+ phy_driver_unregister(&genphy_c45_driver);
+err_c45:
mdio_bus_exit();
}
@@ -2320,7 +2322,7 @@ err_10g:
static void __exit phy_exit(void)
{
- phy_driver_unregister(&genphy_10g_driver);
+ phy_driver_unregister(&genphy_c45_driver);
phy_driver_unregister(&genphy_driver);
mdio_bus_exit();
}
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9195f3476b1d..18c4e5d17b05 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -63,6 +63,7 @@ enum qmi_wwan_flags {
enum qmi_wwan_quirks {
QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */
+ QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */
};
struct qmimux_hdr {
@@ -845,6 +846,16 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = {
.data = QMI_WWAN_QUIRK_DTR,
};
+static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = {
+ .description = "WWAN/QMI device",
+ .flags = FLAG_WWAN | FLAG_SEND_ZLP,
+ .bind = qmi_wwan_bind,
+ .unbind = qmi_wwan_unbind,
+ .manage_power = qmi_wwan_manage_power,
+ .rx_fixup = qmi_wwan_rx_fixup,
+ .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG,
+};
+
#define HUAWEI_VENDOR_ID 0x12D1
/* map QMI/wwan function by a fixed interface number */
@@ -865,6 +876,15 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = {
#define QMI_GOBI_DEVICE(vend, prod) \
QMI_FIXED_INTF(vend, prod, 0)
+/* Quectel does not use fixed interface numbers on at least some of their
+ * devices. We need to check the number of endpoints to ensure that we bind to
+ * the correct interface.
+ */
+#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \
+ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \
+ USB_SUBCLASS_VENDOR_SPEC, 0xff), \
+ .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg
+
static const struct usb_device_id products[] = {
/* 1. CDC ECM like devices match on the control interface */
{ /* Huawei E392, E398 and possibly others sharing both device id and more... */
@@ -969,20 +989,9 @@ static const struct usb_device_id products[] = {
USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7),
.driver_info = (unsigned long)&qmi_wwan_info,
},
- { /* Quectel EP06/EG06/EM06 */
- USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0306,
- USB_CLASS_VENDOR_SPEC,
- USB_SUBCLASS_VENDOR_SPEC,
- 0xff),
- .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr,
- },
- { /* Quectel EG12/EM12 */
- USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0512,
- USB_CLASS_VENDOR_SPEC,
- USB_SUBCLASS_VENDOR_SPEC,
- 0xff),
- .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr,
- },
+ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
+ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */
+ {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */
/* 3. Combined interface devices matching on interface number */
{QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */
@@ -1271,7 +1280,6 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
{QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
{QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */
- {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
@@ -1351,27 +1359,12 @@ static bool quectel_ec20_detected(struct usb_interface *intf)
return false;
}
-static bool quectel_diag_detected(struct usb_interface *intf)
-{
- struct usb_device *dev = interface_to_usbdev(intf);
- struct usb_interface_descriptor intf_desc = intf->cur_altsetting->desc;
- u16 id_vendor = le16_to_cpu(dev->descriptor.idVendor);
- u16 id_product = le16_to_cpu(dev->descriptor.idProduct);
-
- if (id_vendor != 0x2c7c || intf_desc.bNumEndpoints != 2)
- return false;
-
- if (id_product == 0x0306 || id_product == 0x0512)
- return true;
- else
- return false;
-}
-
static int qmi_wwan_probe(struct usb_interface *intf,
const struct usb_device_id *prod)
{
struct usb_device_id *id = (struct usb_device_id *)prod;
struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc;
+ const struct driver_info *info;
/* Workaround to enable dynamic IDs. This disables usbnet
* blacklisting functionality. Which, if required, can be
@@ -1405,10 +1398,14 @@ static int qmi_wwan_probe(struct usb_interface *intf,
* we need to match on class/subclass/protocol. These values are
* identical for the diagnostic- and QMI-interface, but bNumEndpoints is
* different. Ignore the current interface if the number of endpoints
- * the number for the diag interface (two).
+ * equals the number for the diag interface (two).
*/
- if (quectel_diag_detected(intf))
- return -ENODEV;
+ info = (void *)&id->driver_info;
+
+ if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
+ if (desc->bNumEndpoints == 2)
+ return -ENODEV;
+ }
return usbnet_probe(intf, id);
}
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 569e87a51a33..09a1433b0833 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -162,18 +162,6 @@ static void veth_get_ethtool_stats(struct net_device *dev,
}
}
-static int veth_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *info)
-{
- info->so_timestamping =
- SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
-
- return 0;
-}
-
static const struct ethtool_ops veth_ethtool_ops = {
.get_drvinfo = veth_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -181,7 +169,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
.get_sset_count = veth_get_sset_count,
.get_ethtool_stats = veth_get_ethtool_stats,
.get_link_ksettings = veth_get_link_ksettings,
- .get_ts_info = veth_get_ts_info,
+ .get_ts_info = ethtool_op_get_ts_info,
};
/* general routines */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 6d1a1abbed27..ce711e5f8c5d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -370,7 +370,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
@@ -549,7 +549,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
- u32 nexthop;
+ bool is_v6gw = false;
int ret = -EINVAL;
nf_reset(skb);
@@ -572,13 +572,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
rcu_read_lock_bh();
- nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
- neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+ neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ /* if crossing protocols, can not use the cached header */
+ ret = neigh_output(neigh, skb, is_v6gw);
rcu_read_unlock_bh();
return ret;
}
@@ -1275,8 +1273,12 @@ static void vrf_setup(struct net_device *dev)
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_NO_RX_HANDLER;
- dev->min_mtu = 0;
- dev->max_mtu = 0;
+ /* VRF devices do not care about MTU, but if the MTU is set
+ * too low then the ipv4 and ipv6 protocols are disabled
+ * which breaks networking.
+ */
+ dev->min_mtu = IPV6_MIN_MTU;
+ dev->max_mtu = ETH_MAX_MTU;
}
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 936c0b3e0ba2..05847eb91a1b 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -248,6 +248,22 @@ struct xenvif_hash {
struct xenvif_hash_cache cache;
};
+struct backend_info {
+ struct xenbus_device *dev;
+ struct xenvif *vif;
+
+ /* This is the state that will be reflected in xenstore when any
+ * active hotplug script completes.
+ */
+ enum xenbus_state state;
+
+ enum xenbus_state frontend_state;
+ struct xenbus_watch hotplug_status_watch;
+ u8 have_hotplug_status_watch:1;
+
+ const char *hotplug_script;
+};
+
struct xenvif {
/* Unique identifier for this interface. */
domid_t domid;
@@ -283,6 +299,8 @@ struct xenvif {
struct xenbus_watch credit_watch;
struct xenbus_watch mcast_ctrl_watch;
+ struct backend_info *be;
+
spinlock_t lock;
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 330ddb64930f..41c9e8f2e520 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -22,22 +22,6 @@
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
-struct backend_info {
- struct xenbus_device *dev;
- struct xenvif *vif;
-
- /* This is the state that will be reflected in xenstore when any
- * active hotplug script completes.
- */
- enum xenbus_state state;
-
- enum xenbus_state frontend_state;
- struct xenbus_watch hotplug_status_watch;
- u8 have_hotplug_status_watch:1;
-
- const char *hotplug_script;
-};
-
static int connect_data_rings(struct backend_info *be,
struct xenvif_queue *queue);
static void connect(struct backend_info *be);
@@ -472,6 +456,7 @@ static int backend_create_xenvif(struct backend_info *be)
return err;
}
be->vif = vif;
+ vif->be = be;
kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
return 0;
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 1be571c20062..6bad04cbb1d3 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -157,8 +157,12 @@
#define DBG_IRT(x...)
#endif
+#ifdef CONFIG_64BIT
+#define COMPARE_IRTE_ADDR(irte, hpa) ((irte)->dest_iosapic_addr == (hpa))
+#else
#define COMPARE_IRTE_ADDR(irte, hpa) \
- ((irte)->dest_iosapic_addr == F_EXTEND(hpa))
+ ((irte)->dest_iosapic_addr == ((hpa) | 0xffffffff00000000ULL))
+#endif
#define IOSAPIC_REG_SELECT 0x00
#define IOSAPIC_REG_WINDOW 0x10
diff --git a/drivers/reset/reset-meson-audio-arb.c b/drivers/reset/reset-meson-audio-arb.c
index 91751617b37a..c53a2185a039 100644
--- a/drivers/reset/reset-meson-audio-arb.c
+++ b/drivers/reset/reset-meson-audio-arb.c
@@ -130,6 +130,7 @@ static int meson_audio_arb_probe(struct platform_device *pdev)
arb->rstc.nr_resets = ARRAY_SIZE(axg_audio_arb_reset_bits);
arb->rstc.ops = &meson_audio_arb_rstc_ops;
arb->rstc.of_node = dev->of_node;
+ arb->rstc.owner = THIS_MODULE;
/*
* Enable general :
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index a71734c41693..f933c06bff4f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -667,9 +667,9 @@ config RTC_DRV_S5M
will be called rtc-s5m.
config RTC_DRV_SD3078
- tristate "ZXW Crystal SD3078"
+ tristate "ZXW Shenzhen whwave SD3078"
help
- If you say yes here you get support for the ZXW Crystal
+ If you say yes here you get support for the ZXW Shenzhen whwave
SD3078 RTC chips.
This driver can also be built as a module. If so, the module
diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index e5444296075e..4d6bf9304ceb 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -298,7 +298,7 @@ static int cros_ec_rtc_suspend(struct device *dev)
struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
if (device_may_wakeup(dev))
- enable_irq_wake(cros_ec_rtc->cros_ec->irq);
+ return enable_irq_wake(cros_ec_rtc->cros_ec->irq);
return 0;
}
@@ -309,7 +309,7 @@ static int cros_ec_rtc_resume(struct device *dev)
struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
if (device_may_wakeup(dev))
- disable_irq_wake(cros_ec_rtc->cros_ec->irq);
+ return disable_irq_wake(cros_ec_rtc->cros_ec->irq);
return 0;
}
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index b4e054c64bad..69b54e5556c0 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -480,6 +480,13 @@ static int da9063_rtc_probe(struct platform_device *pdev)
da9063_data_to_tm(data, &rtc->alarm_time, rtc);
rtc->rtc_sync = false;
+ /*
+ * TODO: some models have alarms on a minute boundary but still support
+ * real hardware interrupts. Add this once the core supports it.
+ */
+ if (config->rtc_data_start != RTC_SEC)
+ rtc->rtc_dev->uie_unsupported = 1;
+
irq_alarm = platform_get_irq_byname(pdev, "ALARM");
ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL,
da9063_alarm_event,
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index d417b203cbc5..1d3de2a3d1a4 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -374,7 +374,7 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
{
unsigned int byte;
- int value = 0xff; /* return 0xff for ignored values */
+ int value = -1; /* return -1 for ignored values */
byte = readb(rtc->regbase + reg_off);
if (byte & AR_ENB) {
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c98f264f1d83..a497b2c0cb79 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3878,10 +3878,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
* wake up the thread.
*/
spin_lock(&lpfc_cmd->buf_lock);
- if (unlikely(lpfc_cmd->cur_iocbq.iocb_flag & LPFC_DRIVER_ABORTED)) {
- lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
- if (lpfc_cmd->waitq)
- wake_up(lpfc_cmd->waitq);
+ lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
+ if (lpfc_cmd->waitq) {
+ wake_up(lpfc_cmd->waitq);
lpfc_cmd->waitq = NULL;
}
spin_unlock(&lpfc_cmd->buf_lock);
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index e74a62448ba4..e5db9a9954dc 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1392,10 +1392,8 @@ static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi)
static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi)
{
- struct qedi_nvm_iscsi_image nvm_image;
-
qedi->iscsi_image = dma_alloc_coherent(&qedi->pdev->dev,
- sizeof(nvm_image),
+ sizeof(struct qedi_nvm_iscsi_image),
&qedi->nvm_buf_dma, GFP_KERNEL);
if (!qedi->iscsi_image) {
QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n");
@@ -2236,14 +2234,13 @@ static void qedi_boot_release(void *data)
static int qedi_get_boot_info(struct qedi_ctx *qedi)
{
int ret = 1;
- struct qedi_nvm_iscsi_image nvm_image;
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"Get NVM iSCSI CFG image\n");
ret = qedi_ops->common->nvm_get_image(qedi->cdev,
QED_NVM_IMAGE_ISCSI_CFG,
(char *)qedi->iscsi_image,
- sizeof(nvm_image));
+ sizeof(struct qedi_nvm_iscsi_image));
if (ret)
QEDI_ERR(&qedi->dbg_ctx,
"Could not get NVM image. ret = %d\n", ret);
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index c4cbfd07b916..a08ff3bd6310 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -238,6 +238,7 @@ static struct {
{"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+ {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
{"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
{"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */
diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
index 5a58cbf3a75d..c14006ac98f9 100644
--- a/drivers/scsi/scsi_dh.c
+++ b/drivers/scsi/scsi_dh.c
@@ -75,6 +75,7 @@ static const struct scsi_dh_blist scsi_dh_blist[] = {
{"NETAPP", "INF-01-00", "rdac", },
{"LSI", "INF-01-00", "rdac", },
{"ENGENIO", "INF-01-00", "rdac", },
+ {"LENOVO", "DE_Series", "rdac", },
{NULL, NULL, NULL },
};
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 84380bae20f1..8472de1007ff 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -385,7 +385,7 @@ enum storvsc_request_type {
* This is the end of Protocol specific defines.
*/
-static int storvsc_ringbuffer_size = (256 * PAGE_SIZE);
+static int storvsc_ringbuffer_size = (128 * 1024);
static u32 max_outstanding_req_per_channel;
static int storvsc_vcpus_per_sub_channel = 4;
@@ -668,13 +668,22 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns)
{
struct device *dev = &device->device;
struct storvsc_device *stor_device;
- int num_cpus = num_online_cpus();
int num_sc;
struct storvsc_cmd_request *request;
struct vstor_packet *vstor_packet;
int ret, t;
- num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns);
+ /*
+ * If the number of CPUs is artificially restricted, such as
+ * with maxcpus=1 on the kernel boot line, Hyper-V could offer
+ * sub-channels >= the number of CPUs. These sub-channels
+ * should not be created. The primary channel is already created
+ * and assigned to one CPU, so check against # CPUs - 1.
+ */
+ num_sc = min((int)(num_online_cpus() - 1), max_chns);
+ if (!num_sc)
+ return;
+
stor_device = get_out_stor_device(device);
if (!stor_device)
return;
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index a25659b5a5d1..3fa20e95a6bb 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1661,11 +1661,11 @@ static void __init vfio_pci_fill_ids(void)
rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
subvendor, subdevice, class, class_mask, 0);
if (rc)
- pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n",
+ pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
vendor, device, subvendor, subdevice,
class, class_mask, rc);
else
- pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n",
+ pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
vendor, device, subvendor, subdevice,
class, class_mask);
}
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 8dbb270998f4..6b64e45a5269 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1398,7 +1398,7 @@ unlock_exit:
mutex_unlock(&container->lock);
}
-const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
+static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
.name = "iommu-vfio-powerpc",
.owner = THIS_MODULE,
.open = tce_iommu_open,
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 73652e21efec..d0f731c9920a 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -58,12 +58,18 @@ module_param_named(disable_hugepages,
MODULE_PARM_DESC(disable_hugepages,
"Disable VFIO IOMMU support for IOMMU hugepages.");
+static unsigned int dma_entry_limit __read_mostly = U16_MAX;
+module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
+MODULE_PARM_DESC(dma_entry_limit,
+ "Maximum number of user DMA mappings per container (65535).");
+
struct vfio_iommu {
struct list_head domain_list;
struct vfio_domain *external_domain; /* domain for external user */
struct mutex lock;
struct rb_root dma_list;
struct blocking_notifier_head notifier;
+ unsigned int dma_avail;
bool v2;
bool nesting;
};
@@ -836,6 +842,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
vfio_unlink_dma(iommu, dma);
put_task_struct(dma->task);
kfree(dma);
+ iommu->dma_avail++;
}
static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
@@ -1081,12 +1088,18 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
goto out_unlock;
}
+ if (!iommu->dma_avail) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) {
ret = -ENOMEM;
goto out_unlock;
}
+ iommu->dma_avail--;
dma->iova = iova;
dma->vaddr = vaddr;
dma->prot = prot;
@@ -1583,6 +1596,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
INIT_LIST_HEAD(&iommu->domain_list);
iommu->dma_list = RB_ROOT;
+ iommu->dma_avail = dma_entry_limit;
mutex_init(&iommu->lock);
BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c
index de01a6d0059d..a1c61e351d3f 100644
--- a/drivers/xen/privcmd-buf.c
+++ b/drivers/xen/privcmd-buf.c
@@ -140,8 +140,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
- GFP_KERNEL);
+ vma_priv = kzalloc(struct_size(vma_priv, pages, count), GFP_KERNEL);
if (!vma_priv)
return -ENOMEM;
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index c3e201025ef0..0782ff3c2273 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -622,9 +622,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
if (xen_store_evtchn == 0)
return -ENOENT;
- nonseekable_open(inode, filp);
-
- filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */
+ stream_open(inode, filp);
u = kzalloc(sizeof(*u), GFP_KERNEL);
if (u == NULL)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ec32fece5e1e..9285dd4f4b1c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -755,11 +755,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
umode_t mode, dev_t dev)
{
struct inode *inode;
- struct resv_map *resv_map;
+ struct resv_map *resv_map = NULL;
- resv_map = resv_map_alloc();
- if (!resv_map)
- return NULL;
+ /*
+ * Reserve maps are only needed for inodes that can have associated
+ * page allocations.
+ */
+ if (S_ISREG(mode) || S_ISLNK(mode)) {
+ resv_map = resv_map_alloc();
+ if (!resv_map)
+ return NULL;
+ }
inode = new_inode(sb);
if (inode) {
@@ -794,8 +800,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
- } else
- kref_put(&resv_map->refs, resv_map_release);
+ } else {
+ if (resv_map)
+ kref_put(&resv_map->refs, resv_map_release);
+ }
return inode;
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bbdbd56cf2ac..07d6ef195d05 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2215,6 +2215,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
fput(ctx->user_files[i]);
kfree(ctx->user_files);
+ ctx->user_files = NULL;
ctx->nr_user_files = 0;
return ret;
}
diff --git a/fs/open.c b/fs/open.c
index f1c2f855fd43..a00350018a47 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1215,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp)
}
EXPORT_SYMBOL(nonseekable_open);
+
+/*
+ * stream_open is used by subsystems that want stream-like file descriptors.
+ * Such file descriptors are not seekable and don't have notion of position
+ * (file.f_pos is always 0). Contrary to file descriptors of other regular
+ * files, .read() and .write() can run simultaneously.
+ *
+ * stream_open never fails and is marked to return int so that it could be
+ * directly used as file_operations.open .
+ */
+int stream_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+ filp->f_mode |= FMODE_STREAM;
+ return 0;
+}
+
+EXPORT_SYMBOL(stream_open);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ddef482f1334..6a803a0b75df 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -616,24 +616,25 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- long nr;
- unsigned long args[6], sp, pc;
+ struct syscall_info info;
+ u64 *args = &info.data.args[0];
int res;
res = lock_trace(task);
if (res)
return res;
- if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+ if (task_current_syscall(task, &info))
seq_puts(m, "running\n");
- else if (nr < 0)
- seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+ else if (info.data.nr < 0)
+ seq_printf(m, "%d 0x%llx 0x%llx\n",
+ info.data.nr, info.sp, info.data.instruction_pointer);
else
seq_printf(m,
- "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
- nr,
+ "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
+ info.data.nr,
args[0], args[1], args[2], args[3], args[4], args[5],
- sp, pc);
+ info.sp, info.data.instruction_pointer);
unlock_trace(task);
return 0;
diff --git a/fs/read_write.c b/fs/read_write.c
index 177ccc3d405a..61b43ad7608e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
static inline loff_t file_pos_read(struct file *file)
{
- return file->f_pos;
+ return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;
}
static inline void file_pos_write(struct file *file, loff_t pos)
{
- file->f_pos = pos;
+ if ((file->f_mode & FMODE_STREAM) == 0)
+ file->f_pos = pos;
}
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 0c938a4354f6..b88239e9efe4 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -105,41 +105,30 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
* syscall_get_arguments - extract system call parameter values
* @task: task of interest, must be blocked
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array filled with argument values
*
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5). Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call. First argument is stored in
+* @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args);
+ unsigned long *args);
/**
* syscall_set_arguments - change system call parameter value
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array of argument values to store
*
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args);
/**
diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
index 8063e8314eef..6d487c5eba2c 100644
--- a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
+++ b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
@@ -51,7 +51,10 @@
#define RESET_SD_EMMC_A 44
#define RESET_SD_EMMC_B 45
#define RESET_SD_EMMC_C 46
-/* 47-60 */
+/* 47 */
+#define RESET_USB_PHY20 48
+#define RESET_USB_PHY21 49
+/* 50-60 */
#define RESET_AUDIO_CODEC 61
/* 62-63 */
/* RESET2 */
diff --git a/include/keys/trusted.h b/include/keys/trusted.h
index adbcb6817826..0071298b9b28 100644
--- a/include/keys/trusted.h
+++ b/include/keys/trusted.h
@@ -38,7 +38,7 @@ enum {
int TSS_authhmac(unsigned char *digest, const unsigned char *key,
unsigned int keylen, unsigned char *h1,
- unsigned char *h2, unsigned char h3, ...);
+ unsigned char *h2, unsigned int h3, ...);
int TSS_checkhmac1(unsigned char *buffer,
const uint32_t command,
const unsigned char *ononce,
diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h
index 50fb0dee23e8..d35b8ec1c485 100644
--- a/include/linux/bitrev.h
+++ b/include/linux/bitrev.h
@@ -34,41 +34,41 @@ static inline u32 __bitrev32(u32 x)
#define __constant_bitrev32(x) \
({ \
- u32 __x = x; \
- __x = (__x >> 16) | (__x << 16); \
- __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \
- __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \
- __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \
- __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \
- __x; \
+ u32 ___x = x; \
+ ___x = (___x >> 16) | (___x << 16); \
+ ___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8); \
+ ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \
+ ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \
+ ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \
+ ___x; \
})
#define __constant_bitrev16(x) \
({ \
- u16 __x = x; \
- __x = (__x >> 8) | (__x << 8); \
- __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \
- __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \
- __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \
- __x; \
+ u16 ___x = x; \
+ ___x = (___x >> 8) | (___x << 8); \
+ ___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4); \
+ ___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2); \
+ ___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1); \
+ ___x; \
})
#define __constant_bitrev8x4(x) \
({ \
- u32 __x = x; \
- __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \
- __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \
- __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \
- __x; \
+ u32 ___x = x; \
+ ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \
+ ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \
+ ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \
+ ___x; \
})
#define __constant_bitrev8(x) \
({ \
- u8 __x = x; \
- __x = (__x >> 4) | (__x << 4); \
- __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \
- __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \
- __x; \
+ u8 ___x = x; \
+ ___x = (___x >> 4) | (___x << 4); \
+ ___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2); \
+ ___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1); \
+ ___x; \
})
#define bitrev32(x) \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f62897198844..e4d4c1771ab0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -57,6 +57,12 @@ struct bpf_map_ops {
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
+
+ /* Direct value access helpers. */
+ int (*map_direct_value_addr)(const struct bpf_map *map,
+ u64 *imm, u32 off);
+ int (*map_direct_value_meta)(const struct bpf_map *map,
+ u64 imm, u32 *off);
};
struct bpf_map {
@@ -81,7 +87,8 @@ struct bpf_map {
struct btf *btf;
u32 pages;
bool unpriv_array;
- /* 51 bytes hole */
+ bool frozen; /* write-once */
+ /* 48 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
@@ -421,8 +428,38 @@ struct bpf_array {
};
};
+#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 32
+#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
+ BPF_F_RDONLY_PROG | \
+ BPF_F_WRONLY | \
+ BPF_F_WRONLY_PROG)
+
+#define BPF_MAP_CAN_READ BIT(0)
+#define BPF_MAP_CAN_WRITE BIT(1)
+
+static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
+{
+ u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+
+ /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is
+ * not possible.
+ */
+ if (access_flags & BPF_F_RDONLY_PROG)
+ return BPF_MAP_CAN_READ;
+ else if (access_flags & BPF_F_WRONLY_PROG)
+ return BPF_MAP_CAN_WRITE;
+ else
+ return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE;
+}
+
+static inline bool bpf_map_flags_access_ok(u32 access_flags)
+{
+ return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) !=
+ (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+}
+
struct bpf_event_entry {
struct perf_event *event;
struct file *perf_file;
@@ -446,14 +483,6 @@ typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
-int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
- const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
-
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
@@ -644,6 +673,13 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
int array_map_alloc_check(union bpf_attr *attr);
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -755,6 +791,27 @@ static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
{
return ERR_PTR(-EOPNOTSUPP);
}
+
+static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
+static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
+static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7d8228d1c898..b3ab61fe1932 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -207,6 +207,7 @@ struct bpf_verifier_state {
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
struct bpf_verifier_state_list *next;
+ int miss_cnt, hit_cnt;
};
/* Possible states for alu_state member. */
@@ -223,6 +224,10 @@ struct bpf_insn_aux_data {
unsigned long map_state; /* pointer/poison value for maps */
s32 call_imm; /* saved imm field of call insn */
u32 alu_limit; /* limit for add/sub register with pointer */
+ struct {
+ u32 map_index; /* index into used_maps[] */
+ u32 map_off; /* offset from value base address */
+ };
};
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
int sanitize_stack_off; /* stack slot to be cleared */
@@ -248,6 +253,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
return log->len_used >= log->len_total - 1;
}
+#define BPF_LOG_LEVEL1 1
+#define BPF_LOG_LEVEL2 2
+#define BPF_LOG_STATS 4
+#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
+#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
+
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
return log->level && log->ubuf && !bpf_verifier_log_full(log);
@@ -274,6 +285,7 @@ struct bpf_verifier_env {
bool strict_alignment; /* perform strict pointer alignment checks */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+ struct bpf_verifier_state_list *free_list;
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
@@ -284,6 +296,21 @@ struct bpf_verifier_env {
struct bpf_verifier_log log;
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
u32 subprog_cnt;
+ /* number of instructions analyzed by the verifier */
+ u32 insn_processed;
+ /* total verification time */
+ u64 verification_time;
+ /* maximum number of verifier states kept in 'branching' instructions */
+ u32 max_states_per_insn;
+ /* total number of allocated verifier states */
+ u32 total_states;
+ /* some states are freed during program analysis.
+ * this is peak number of states. this number dominates kernel
+ * memory consumption during verification
+ */
+ u32 peak_states;
+ /* longest register parentage chain walked for liveness marking */
+ u32 longest_mark_read_walk;
};
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 455d31b55828..64cdf2a23d42 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -51,6 +51,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
+bool btf_type_is_void(const struct btf_type *t);
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b42df09b04c..dd28e7679089 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -158,6 +158,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_OPENED ((__force fmode_t)0x80000)
#define FMODE_CREATED ((__force fmode_t)0x100000)
+/* File is stream-like */
+#define FMODE_STREAM ((__force fmode_t)0x200000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
@@ -3074,6 +3077,7 @@ extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
+extern int stream_open(struct inode * inode, struct file * filp);
#ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1f3d880b7ca1..dbb6118370c1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -566,7 +566,10 @@ struct mem_cgroup *lock_page_memcg(struct page *page);
void __unlock_page_memcg(struct mem_cgroup *memcg);
void unlock_page_memcg(struct page *page);
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page_state().
+ */
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
int idx)
{
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 612c8c2f2466..769326ea1d9b 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -170,7 +170,7 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
doorbell[1] = cpu_to_be32(cq->cqn);
- mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
+ mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL);
}
static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f93a5598b942..db7dca75d726 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -361,6 +361,7 @@ enum {
enum {
MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1,
+ MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5,
};
enum {
diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h
index 0787de28f2fc..5c267707e1df 100644
--- a/include/linux/mlx5/doorbell.h
+++ b/include/linux/mlx5/doorbell.h
@@ -36,46 +36,25 @@
#define MLX5_BF_OFFSET 0x800
#define MLX5_CQ_DOORBELL 0x20
-#if BITS_PER_LONG == 64
/* Assume that we can just write a 64-bit doorbell atomically. s390
* actually doesn't have writeq() but S/390 systems don't even have
* PCI so we won't worry about it.
+ *
+ * Note that the write is not atomic on 32-bit systems! In contrast to 64-bit
+ * ones, it requires proper locking. mlx5_write64 doesn't do any locking, so use
+ * it at your own discretion, protected by some kind of lock on 32 bits.
+ *
+ * TODO: use write{q,l}_relaxed()
*/
-#define MLX5_DECLARE_DOORBELL_LOCK(name)
-#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0)
-#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL)
-
-static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
+static inline void mlx5_write64(__be32 val[2], void __iomem *dest)
{
+#if BITS_PER_LONG == 64
__raw_writeq(*(u64 *)val, dest);
-}
-
#else
-
-/* Just fall back to a spinlock to protect the doorbell if
- * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
- * MMIO writes.
- */
-
-#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
-#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
-#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr)
-
-static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
- spinlock_t *doorbell_lock)
-{
- unsigned long flags;
-
- if (doorbell_lock)
- spin_lock_irqsave(doorbell_lock, flags);
__raw_writel((__force u32) val[0], dest);
__raw_writel((__force u32) val[1], dest + 4);
- if (doorbell_lock)
- spin_unlock_irqrestore(doorbell_lock, flags);
-}
-
#endif
+}
#endif /* MLX5_DOORBELL_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0d0729648844..d2d380d5e415 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -133,6 +133,7 @@ enum {
MLX5_REG_MTRC_CONF = 0x9041,
MLX5_REG_MTRC_STDB = 0x9042,
MLX5_REG_MTRC_CTRL = 0x9043,
+ MLX5_REG_MPEIN = 0x9050,
MLX5_REG_MPCNT = 0x9051,
MLX5_REG_MTPPS = 0x9053,
MLX5_REG_MTPPSE = 0x9054,
@@ -662,6 +663,7 @@ struct mlx5_core_dev {
u64 sys_image_guid;
phys_addr_t iseg_base;
struct mlx5_init_seg __iomem *iseg;
+ phys_addr_t bar_addr;
enum mlx5_device_state state;
/* sync interface state */
struct mutex intf_state_mutex;
@@ -887,6 +889,7 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
+void mlx5_health_flush(struct mlx5_core_dev *dev);
void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b0e17c94566c..0e0e63d4d7aa 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8027,6 +8027,52 @@ struct mlx5_ifc_ppcnt_reg_bits {
union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
};
+struct mlx5_ifc_mpein_reg_bits {
+ u8 reserved_at_0[0x2];
+ u8 depth[0x6];
+ u8 pcie_index[0x8];
+ u8 node[0x8];
+ u8 reserved_at_18[0x8];
+
+ u8 capability_mask[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 link_width_enabled[0x8];
+ u8 link_speed_enabled[0x10];
+
+ u8 lane0_physical_position[0x8];
+ u8 link_width_active[0x8];
+ u8 link_speed_active[0x10];
+
+ u8 num_of_pfs[0x10];
+ u8 num_of_vfs[0x10];
+
+ u8 bdf0[0x10];
+ u8 reserved_at_b0[0x10];
+
+ u8 max_read_request_size[0x4];
+ u8 max_payload_size[0x4];
+ u8 reserved_at_c8[0x5];
+ u8 pwr_status[0x3];
+ u8 port_type[0x4];
+ u8 reserved_at_d4[0xb];
+ u8 lane_reversal[0x1];
+
+ u8 reserved_at_e0[0x14];
+ u8 pci_power[0xc];
+
+ u8 reserved_at_100[0x20];
+
+ u8 device_status[0x10];
+ u8 port_state[0x8];
+ u8 reserved_at_138[0x8];
+
+ u8 reserved_at_140[0x10];
+ u8 receiver_detect_result[0x10];
+
+ u8 reserved_at_160[0x20];
+};
+
struct mlx5_ifc_mpcnt_reg_bits {
u8 reserved_at_0[0x8];
u8 pcie_index[0x8];
@@ -8346,7 +8392,9 @@ struct mlx5_ifc_pcam_reg_bits {
};
struct mlx5_ifc_mcam_enhanced_features_bits {
- u8 reserved_at_0[0x74];
+ u8 reserved_at_0[0x6e];
+ u8 pci_status_and_power[0x1];
+ u8 reserved_at_6f[0x5];
u8 mark_tx_action_cnp[0x1];
u8 mark_tx_action_cqe[0x1];
u8 dynamic_tx_overflow[0x1];
@@ -8954,6 +9002,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
struct mlx5_ifc_ppad_reg_bits ppad_reg;
struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
+ struct mlx5_ifc_mpein_reg_bits mpein_reg;
struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
struct mlx5_ifc_pplm_reg_bits pplm_reg;
struct mlx5_ifc_pplr_reg_bits pplr_reg;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7eade9132f02..4ef4bbe78a1d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -671,7 +671,7 @@ enum vm_fault_reason {
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
-#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf)
+#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf)
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \
VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \
diff --git a/include/linux/phy.h b/include/linux/phy.h
index ab7439b3da2b..0f9552b17ee7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -345,6 +345,7 @@ struct phy_c45_device_ids {
* is_c45: Set to true if this phy uses clause 45 addressing.
* is_internal: Set to true if this phy is internal to a MAC.
* is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc.
+ * is_gigabit_capable: Set to true if PHY supports 1000Mbps
* has_fixups: Set to true if this phy has fixups/quirks.
* suspended: Set to true if this phy has been suspended successfully.
* sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
@@ -382,6 +383,7 @@ struct phy_device {
unsigned is_c45:1;
unsigned is_internal:1;
unsigned is_pseudo_fixed_link:1;
+ unsigned is_gigabit_capable:1;
unsigned has_fixups:1;
unsigned suspended:1;
unsigned sysfs_links:1;
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index edb9b040c94c..d5084ebd9f03 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -9,6 +9,13 @@
#include <linux/bug.h> /* For BUG_ON. */
#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
#include <uapi/linux/ptrace.h>
+#include <linux/seccomp.h>
+
+/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */
+struct syscall_info {
+ __u64 sp;
+ struct seccomp_data data;
+};
extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
@@ -407,9 +414,7 @@ static inline void user_single_step_report(struct pt_regs *regs)
#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
#endif
-extern int task_current_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc);
+extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
#endif
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 460c0eaf6b96..f7714d3b46bd 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -35,12 +35,12 @@
* the least significant bit set but otherwise stores the address of
* the hash bucket. This allows us to be be sure we've found the end
* of the right list.
- * The value stored in the hash bucket has BIT(2) used as a lock bit.
+ * The value stored in the hash bucket has BIT(0) used as a lock bit.
* This bit must be atomically set before any changes are made to
* the chain. To avoid dereferencing this pointer without clearing
* the bit first, we use an opaque 'struct rhash_lock_head *' for the
* pointer stored in the bucket. This struct needs to be defined so
- * that rcu_derefernce() works on it, but it has no content so a
+ * that rcu_dereference() works on it, but it has no content so a
* cast is needed for it to be useful. This ensures it isn't
* used by mistake with clearing the lock bit first.
*/
@@ -88,89 +88,22 @@ struct bucket_table {
};
/*
- * We lock a bucket by setting BIT(1) in the pointer - this is always
- * zero in real pointers and in the nulls marker.
- * bit_spin_locks do not handle contention well, but the whole point
- * of the hashtable design is to achieve minimum per-bucket contention.
- * A nested hash table might not have a bucket pointer. In that case
- * we cannot get a lock. For remove and replace the bucket cannot be
- * interesting and doesn't need locking.
- * For insert we allocate the bucket if this is the last bucket_table,
- * and then take the lock.
- * Sometimes we unlock a bucket by writing a new pointer there. In that
- * case we don't need to unlock, but we do need to reset state such as
- * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
- * provides the same release semantics that bit_spin_unlock() provides,
- * this is safe.
- */
-
-static inline void rht_lock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt)
-{
- local_bh_disable();
- bit_spin_lock(1, (unsigned long *)bkt);
- lock_map_acquire(&tbl->dep_map);
-}
-
-static inline void rht_lock_nested(struct bucket_table *tbl,
- struct rhash_lock_head **bucket,
- unsigned int subclass)
-{
- local_bh_disable();
- bit_spin_lock(1, (unsigned long *)bucket);
- lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
-}
-
-static inline void rht_unlock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt)
-{
- lock_map_release(&tbl->dep_map);
- bit_spin_unlock(1, (unsigned long *)bkt);
- local_bh_enable();
-}
-
-static inline void rht_assign_unlock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt,
- struct rhash_head *obj)
-{
- struct rhash_head **p = (struct rhash_head **)bkt;
-
- lock_map_release(&tbl->dep_map);
- rcu_assign_pointer(*p, obj);
- preempt_enable();
- __release(bitlock);
- local_bh_enable();
-}
-
-/*
- * If 'p' is a bucket head and might be locked:
- * rht_ptr() returns the address without the lock bit.
- * rht_ptr_locked() returns the address WITH the lock bit.
- */
-static inline struct rhash_head __rcu *rht_ptr(const struct rhash_lock_head *p)
-{
- return (void *)(((unsigned long)p) & ~BIT(1));
-}
-
-static inline struct rhash_lock_head __rcu *rht_ptr_locked(const
- struct rhash_head *p)
-{
- return (void *)(((unsigned long)p) | BIT(1));
-}
-
-/*
* NULLS_MARKER() expects a hash value with the low
* bits mostly likely to be significant, and it discards
* the msb.
- * We git it an address, in which the bottom 2 bits are
+ * We give it an address, in which the bottom bit is
* always 0, and the msb might be significant.
* So we shift the address down one bit to align with
* expectations and avoid losing a significant bit.
+ *
+ * We never store the NULLS_MARKER in the hash table
+ * itself as we need the lsb for locking.
+ * Instead we store a NULL
*/
#define RHT_NULLS_MARKER(ptr) \
((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
#define INIT_RHT_NULLS_HEAD(ptr) \
- ((ptr) = RHT_NULLS_MARKER(&(ptr)))
+ ((ptr) = NULL)
static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
{
@@ -372,6 +305,105 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
&tbl->buckets[hash];
}
+/*
+ * We lock a bucket by setting BIT(0) in the pointer - this is always
+ * zero in real pointers. The NULLS mark is never stored in the bucket,
+ * rather we store NULL if the bucket is empty.
+ * bit_spin_locks do not handle contention well, but the whole point
+ * of the hashtable design is to achieve minimum per-bucket contention.
+ * A nested hash table might not have a bucket pointer. In that case
+ * we cannot get a lock. For remove and replace the bucket cannot be
+ * interesting and doesn't need locking.
+ * For insert we allocate the bucket if this is the last bucket_table,
+ * and then take the lock.
+ * Sometimes we unlock a bucket by writing a new pointer there. In that
+ * case we don't need to unlock, but we do need to reset state such as
+ * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer()
+ * provides the same release semantics that bit_spin_unlock() provides,
+ * this is safe.
+ * When we write to a bucket without unlocking, we use rht_assign_locked().
+ */
+
+static inline void rht_lock(struct bucket_table *tbl,
+ struct rhash_lock_head **bkt)
+{
+ local_bh_disable();
+ bit_spin_lock(0, (unsigned long *)bkt);
+ lock_map_acquire(&tbl->dep_map);
+}
+
+static inline void rht_lock_nested(struct bucket_table *tbl,
+ struct rhash_lock_head **bucket,
+ unsigned int subclass)
+{
+ local_bh_disable();
+ bit_spin_lock(0, (unsigned long *)bucket);
+ lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
+}
+
+static inline void rht_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head **bkt)
+{
+ lock_map_release(&tbl->dep_map);
+ bit_spin_unlock(0, (unsigned long *)bkt);
+ local_bh_enable();
+}
+
+/*
+ * Where 'bkt' is a bucket and might be locked:
+ * rht_ptr() dereferences that pointer and clears the lock bit.
+ * rht_ptr_exclusive() dereferences in a context where exclusive
+ * access is guaranteed, such as when destroying the table.
+ */
+static inline struct rhash_head *rht_ptr(
+ struct rhash_lock_head __rcu * const *bkt,
+ struct bucket_table *tbl,
+ unsigned int hash)
+{
+ const struct rhash_lock_head *p =
+ rht_dereference_bucket_rcu(*bkt, tbl, hash);
+
+ if ((((unsigned long)p) & ~BIT(0)) == 0)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+static inline struct rhash_head *rht_ptr_exclusive(
+ struct rhash_lock_head __rcu * const *bkt)
+{
+ const struct rhash_lock_head *p =
+ rcu_dereference_protected(*bkt, 1);
+
+ if (!p)
+ return RHT_NULLS_MARKER(bkt);
+ return (void *)(((unsigned long)p) & ~BIT(0));
+}
+
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0)));
+}
+
+static inline void rht_assign_unlock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt,
+ struct rhash_head *obj)
+{
+ struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
+
+ if (rht_is_a_nulls(obj))
+ obj = NULL;
+ lock_map_release(&tbl->dep_map);
+ rcu_assign_pointer(*p, obj);
+ preempt_enable();
+ __release(bitlock);
+ local_bh_enable();
+}
+
/**
* rht_for_each_from - iterate over hash chain from given head
* @pos: the &struct rhash_head to use as a loop cursor.
@@ -380,8 +412,8 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @hash: the hash value / bucket index
*/
#define rht_for_each_from(pos, head, tbl, hash) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
- !rht_is_a_nulls(pos); \
+ for (pos = head; \
+ !rht_is_a_nulls(pos); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
/**
@@ -391,7 +423,8 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @hash: the hash value / bucket index
*/
#define rht_for_each(pos, tbl, hash) \
- rht_for_each_from(pos, rht_ptr(*rht_bucket(tbl, hash)), tbl, hash)
+ rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash)
/**
* rht_for_each_entry_from - iterate over hash chain from given head
@@ -403,7 +436,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @member: name of the &struct rhash_head within the hashable struct.
*/
#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \
- for (pos = rht_dereference_bucket(head, tbl, hash); \
+ for (pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket((pos)->next, tbl, hash))
@@ -416,8 +449,9 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* @member: name of the &struct rhash_head within the hashable struct.
*/
#define rht_for_each_entry(tpos, pos, tbl, hash, member) \
- rht_for_each_entry_from(tpos, pos, rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash, member)
+ rht_for_each_entry_from(tpos, pos, \
+ rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
+ tbl, hash, member)
/**
* rht_for_each_entry_safe - safely iterate over hash chain of given type
@@ -432,8 +466,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* remove the loop cursor from the list.
*/
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
- for (pos = rht_dereference_bucket(rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash), \
+ for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
next = !rht_is_a_nulls(pos) ? \
rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
@@ -454,7 +487,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
*/
#define rht_for_each_rcu_from(pos, head, tbl, hash) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
!rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
@@ -469,10 +502,9 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* traversal is guarded by rcu_read_lock().
*/
#define rht_for_each_rcu(pos, tbl, hash) \
- for (({barrier(); }), \
- pos = rht_ptr(rht_dereference_bucket_rcu( \
- *rht_bucket(tbl, hash), tbl, hash)); \
- !rht_is_a_nulls(pos); \
+ for (({barrier(); }), \
+ pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash); \
+ !rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
/**
@@ -490,7 +522,7 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
*/
#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
for (({barrier(); }), \
- pos = rht_dereference_bucket_rcu(head, tbl, hash); \
+ pos = head; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
@@ -508,8 +540,9 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
*/
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
rht_for_each_entry_rcu_from(tpos, pos, \
- rht_ptr(*rht_bucket(tbl, hash)), \
- tbl, hash, member)
+ rht_ptr(rht_bucket(tbl, hash), \
+ tbl, hash), \
+ tbl, hash, member)
/**
* rhl_for_each_rcu - iterate over rcu hash table list
@@ -564,8 +597,7 @@ restart:
hash = rht_key_hashfn(ht, tbl, key, params);
bkt = rht_bucket(tbl, hash);
do {
- he = rht_ptr(rht_dereference_bucket_rcu(*bkt, tbl, hash));
- rht_for_each_rcu_from(he, he, tbl, hash) {
+ rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
rhashtable_compare(&arg, rht_obj(ht, he)))
@@ -698,7 +730,7 @@ slow_path:
return rhashtable_insert_slow(ht, key, obj);
}
- rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *plist;
struct rhlist_head *list;
@@ -743,7 +775,7 @@ slow_path:
goto slow_path;
/* Inserting at head of list makes unlocking free. */
- head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (rhlist) {
@@ -970,7 +1002,7 @@ static inline int __rhashtable_remove_fast_one(
pprev = NULL;
rht_lock(tbl, bkt);
- rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
list = container_of(he, struct rhlist_head, rhead);
@@ -1129,7 +1161,7 @@ static inline int __rhashtable_replace_fast(
pprev = NULL;
rht_lock(tbl, bkt);
- rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (he != obj_old) {
pprev = &he->next;
continue;
diff --git a/include/linux/string.h b/include/linux/string.h
index 7927b875f80c..6ab0a6fa512e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -150,6 +150,9 @@ extern void * memscan(void *,int,__kernel_size_t);
#ifndef __HAVE_ARCH_MEMCMP
extern int memcmp(const void *,const void *,__kernel_size_t);
#endif
+#ifndef __HAVE_ARCH_BCMP
+extern int bcmp(const void *,const void *,__kernel_size_t);
+#endif
#ifndef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *,int,__kernel_size_t);
#endif
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 58dbb4e82908..2e9235adfa0d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -127,6 +127,10 @@ struct rt6_exception {
struct fib6_nh {
struct fib_nh_common nh_common;
+
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ unsigned long last_probe;
+#endif
};
struct fib6_info {
@@ -155,10 +159,6 @@ struct fib6_info {
struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-#ifdef CONFIG_IPV6_ROUTER_PREF
- unsigned long last_probe;
-#endif
-
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 342180a7285c..5909fc421305 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -69,7 +69,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
return !(f6i->fib6_flags & (RTF_ADDRCONF|RTF_DYNAMIC)) &&
- f6i->fib6_nh.fib_nh_has_gw;
+ f6i->fib6_nh.fib_nh_gw_family;
}
void ip6_route_input(struct sk_buff *skb);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 3ce07841dc3b..d8195c77e247 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -32,10 +32,14 @@ struct fib_config {
u8 fc_protocol;
u8 fc_scope;
u8 fc_type;
- /* 3 bytes unused */
+ u8 fc_gw_family;
+ /* 2 bytes unused */
u32 fc_table;
__be32 fc_dst;
- __be32 fc_gw;
+ union {
+ __be32 fc_gw4;
+ struct in6_addr fc_gw6;
+ };
int fc_oif;
u32 fc_flags;
u32 fc_priority;
@@ -83,8 +87,8 @@ struct fib_nh_common {
struct lwtunnel_state *nhc_lwtstate;
unsigned char nhc_scope;
u8 nhc_family;
- u8 nhc_has_gw:1,
- unused:7;
+ u8 nhc_gw_family;
+
union {
__be32 ipv4;
struct in6_addr ipv6;
@@ -112,8 +116,7 @@ struct fib_nh {
#define fib_nh_flags nh_common.nhc_flags
#define fib_nh_lws nh_common.nhc_lwtstate
#define fib_nh_scope nh_common.nhc_scope
-#define fib_nh_family nh_common.nhc_family
-#define fib_nh_has_gw nh_common.nhc_has_gw
+#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw4 nh_common.nhc_gw.ipv4
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
#define fib_nh_weight nh_common.nhc_weight
@@ -144,6 +147,7 @@ struct fib_info {
#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
int fib_nhs;
+ bool fib_nh_is_v6;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].fib_nh_dev
@@ -397,6 +401,8 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
/* Exported by fib_frontend.c */
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
+ struct netlink_ext_ack *extack);
__be32 fib_compute_spec_dst(struct sk_buff *skb);
bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index d8d9c0b0e8c0..453b55bf6723 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -12,6 +12,8 @@
/* structs from net/ip6_fib.h */
struct fib6_info;
+struct fib6_nh;
+struct fib6_config;
/* This is ugly, ideally these symbols should be built
* into the core kernel.
@@ -40,6 +42,10 @@ struct ipv6_stub {
u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr);
+ int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack);
+ void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index ddfbb591e2c5..366150053043 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -2,6 +2,8 @@
#ifndef _NDISC_H
#define _NDISC_H
+#include <net/ipv6_stubs.h>
+
/*
* ICMP codes for neighbour discovery messages
*/
@@ -379,6 +381,14 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev
return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev);
}
+static inline
+struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev,
+ const void *pkey)
+{
+ return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
+ ndisc_hashfn, pkey, dev);
+}
+
static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey)
{
struct neighbour *n;
@@ -409,6 +419,36 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
rcu_read_unlock_bh();
}
+static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
+ const void *pkey)
+{
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+ n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
+ if (n) {
+ unsigned long now = jiffies;
+
+ /* avoid dirtying neighbour */
+ if (n->confirmed != now)
+ n->confirmed = now;
+ }
+ rcu_read_unlock_bh();
+}
+
+/* uses ipv6_stub and is meant for use outside of IPv6 core */
+static inline struct neighbour *ip_neigh_gw6(struct net_device *dev,
+ const void *addr)
+{
+ struct neighbour *neigh;
+
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, addr);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false);
+
+ return neigh;
+}
+
int ndisc_init(void);
int ndisc_late_init(void);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 7c1ab9edba03..3e5438bd0101 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -498,11 +498,12 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
return dev_queue_xmit(skb);
}
-static inline int neigh_output(struct neighbour *n, struct sk_buff *skb)
+static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
+ bool skip_cache)
{
const struct hh_cache *hh = &n->hh;
- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
+ if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache)
return neigh_hh_output(hh, skb);
else
return n->output(n, skb);
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 87499b6b35d6..df5c69db68af 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -166,7 +166,7 @@ struct nci_conn_info {
* According to specification 102 622 chapter 4.4 Pipes,
* the pipe identifier is 7 bits long.
*/
-#define NCI_HCI_MAX_PIPES 127
+#define NCI_HCI_MAX_PIPES 128
struct nci_hci_gate {
u8 gate;
diff --git a/include/net/route.h b/include/net/route.h
index 9883dc82f723..96f6c9ae33c2 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -29,6 +29,8 @@
#include <net/flow.h>
#include <net/inet_sock.h>
#include <net/ip_fib.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -55,12 +57,15 @@ struct rtable {
unsigned int rt_flags;
__u16 rt_type;
__u8 rt_is_input;
- __u8 rt_uses_gateway;
+ u8 rt_gw_family;
int rt_iif;
/* Info on neighbour */
- __be32 rt_gateway;
+ union {
+ __be32 rt_gw4;
+ struct in6_addr rt_gw6;
+ };
/* Miscellaneous cached information */
u32 rt_mtu_locked:1,
@@ -82,8 +87,8 @@ static inline bool rt_is_output_route(const struct rtable *rt)
static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
{
- if (rt->rt_gateway)
- return rt->rt_gateway;
+ if (rt->rt_gw_family == AF_INET)
+ return rt->rt_gw4;
return daddr;
}
@@ -347,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
return hoplimit;
}
+static inline struct neighbour *ip_neigh_gw4(struct net_device *dev,
+ __be32 daddr)
+{
+ struct neighbour *neigh;
+
+ neigh = __ipv4_neigh_lookup_noref(dev, daddr);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&arp_tbl, &daddr, dev, false);
+
+ return neigh;
+}
+
+static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt,
+ struct sk_buff *skb,
+ bool *is_v6gw)
+{
+ struct net_device *dev = rt->dst.dev;
+ struct neighbour *neigh;
+
+ if (likely(rt->rt_gw_family == AF_INET)) {
+ neigh = ip_neigh_gw4(dev, rt->rt_gw4);
+ } else if (rt->rt_gw_family == AF_INET6) {
+ neigh = ip_neigh_gw6(dev, &rt->rt_gw6);
+ *is_v6gw = true;
+ } else {
+ neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr);
+ }
+ return neigh;
+}
+
#endif /* _ROUTE_H */
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 0aea0e262452..e8f85cd2afce 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -52,10 +52,7 @@ struct qdisc_size_table {
struct qdisc_skb_head {
struct sk_buff *head;
struct sk_buff *tail;
- union {
- u32 qlen;
- atomic_t atomic_qlen;
- };
+ __u32 qlen;
spinlock_t lock;
};
@@ -146,9 +143,14 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}
+static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
+{
+ return q->flags & TCQ_F_CPUSTATS;
+}
+
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
- if (qdisc->flags & TCQ_F_NOLOCK)
+ if (qdisc_is_percpu_stats(qdisc))
return qdisc->empty;
return !qdisc->q.qlen;
}
@@ -481,19 +483,27 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
BUILD_BUG_ON(sizeof(qcb->data) < sz);
}
+static inline int qdisc_qlen_cpu(const struct Qdisc *q)
+{
+ return this_cpu_ptr(q->cpu_qstats)->qlen;
+}
+
static inline int qdisc_qlen(const struct Qdisc *q)
{
return q->q.qlen;
}
-static inline u32 qdisc_qlen_sum(const struct Qdisc *q)
+static inline int qdisc_qlen_sum(const struct Qdisc *q)
{
- u32 qlen = q->qstats.qlen;
+ __u32 qlen = q->qstats.qlen;
+ int i;
- if (q->flags & TCQ_F_NOLOCK)
- qlen += atomic_read(&q->q.atomic_qlen);
- else
+ if (qdisc_is_percpu_stats(q)) {
+ for_each_possible_cpu(i)
+ qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
+ } else {
qlen += q->q.qlen;
+ }
return qlen;
}
@@ -747,7 +757,7 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
const struct Qdisc *q = rcu_dereference(txq->qdisc);
- if (q->q.qlen) {
+ if (!qdisc_is_empty(q)) {
rcu_read_unlock();
return false;
}
@@ -817,11 +827,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return sch->enqueue(skb, sch, to_free);
}
-static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
-{
- return q->flags & TCQ_F_CPUSTATS;
-}
-
static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
__u64 bytes, __u32 packets)
{
@@ -889,14 +894,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
}
-static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch)
+static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
{
- atomic_inc(&sch->q.atomic_qlen);
+ this_cpu_inc(sch->cpu_qstats->qlen);
}
-static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch)
+static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
{
- atomic_dec(&sch->q.atomic_qlen);
+ this_cpu_dec(sch->cpu_qstats->qlen);
}
static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
@@ -1106,6 +1111,32 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
return skb;
}
+static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch,
+ struct sk_buff *skb)
+{
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_backlog_dec(sch, skb);
+ qdisc_bstats_cpu_update(sch, skb);
+ qdisc_qstats_cpu_qlen_dec(sch);
+ } else {
+ qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+ }
+}
+
+static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch,
+ unsigned int pkt_len)
+{
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_qlen_inc(sch);
+ this_cpu_add(sch->cpu_qstats->backlog, pkt_len);
+ } else {
+ sch->qstats.backlog += pkt_len;
+ sch->q.qlen++;
+ }
+}
+
/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
{
@@ -1113,8 +1144,13 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
if (skb) {
skb = __skb_dequeue(&sch->gso_skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
+ if (qdisc_is_percpu_stats(sch)) {
+ qdisc_qstats_cpu_backlog_dec(sch, skb);
+ qdisc_qstats_cpu_qlen_dec(sch);
+ } else {
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+ }
} else {
skb = sch->dequeue(sch);
}
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index bb0ecba3db2b..f4ac7117ff29 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -59,7 +59,7 @@ void sctp_ulpq_free(struct sctp_ulpq *);
int sctp_ulpq_tail_data(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
/* Add a new event for propagation to the ULP. */
-int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev);
+int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sk_buff_head *skb_list);
/* Renege previously received chunks. */
void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index 7f83b6eafc5c..6f2a4dc35e37 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -69,13 +69,13 @@ TRACE_EVENT(fib_table_lookup,
__assign_str(name, dev ? dev->name : "-");
if (nhc) {
- if (nhc->nhc_family == AF_INET) {
+ if (nhc->nhc_gw_family == AF_INET) {
p32 = (__be32 *) __entry->gw4;
*p32 = nhc->nhc_gw.ipv4;
in6 = (struct in6_addr *)__entry->gw6;
*in6 = in6_zero;
- } else if (nhc->nhc_family == AF_INET6) {
+ } else if (nhc->nhc_gw_family == AF_INET6) {
p32 = (__be32 *) __entry->gw4;
*p32 = 0;
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index 44a3259ed4a5..b6e0cbc2c71f 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -28,7 +28,7 @@ TRACE_EVENT_FN(sys_enter,
TP_fast_assign(
__entry->id = id;
- syscall_get_arguments(current, regs, 0, 6, __entry->args);
+ syscall_get_arguments(current, regs, __entry->args);
),
TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)",
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 837024512baf..2e96d0b4bf65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -105,6 +105,7 @@ enum bpf_cmd {
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+ BPF_MAP_FREEZE,
};
enum bpf_map_type {
@@ -255,8 +256,19 @@ enum bpf_attach_type {
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
-/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd map fd
+ * insn[1].imm: 0 offset into value
+ * insn[0].off: 0 0
+ * insn[1].off: 0 0
+ * ldimm64 rewrite: address of map address of map[0]+offset
+ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_FD 1
+#define BPF_PSEUDO_MAP_VALUE 2
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -283,7 +295,7 @@ enum bpf_attach_type {
#define BPF_OBJ_NAME_LEN 16U
-/* Flags for accessing BPF object */
+/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
@@ -293,6 +305,10 @@ enum bpf_attach_type {
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG (1U << 7)
+#define BPF_F_WRONLY_PROG (1U << 8)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
@@ -396,6 +412,13 @@ union bpf_attr {
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
+ __u32 ctx_size_in; /* input: len of ctx_in */
+ __u32 ctx_size_out; /* input/output: len of ctx_out
+ * returns ENOSPC if ctx_out
+ * is too small.
+ */
+ __aligned_u64 ctx_in;
+ __aligned_u64 ctx_out;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -1500,6 +1523,10 @@ union bpf_attr {
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; **len** is the length of the inner MAC header.
+ *
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -2641,10 +2668,16 @@ enum bpf_func_id {
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 7b7475ef2f17..9310652ca4f9 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -39,11 +39,11 @@ struct btf_type {
* struct, union and fwd
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT and UNION.
+ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- * FUNC and FUNC_PROTO.
+ * FUNC, FUNC_PROTO and VAR.
* "type" is a type_id referring to another type.
*/
union {
@@ -70,8 +70,10 @@ struct btf_type {
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
-#define BTF_KIND_MAX 13
-#define NR_BTF_KINDS 14
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
@@ -138,4 +140,26 @@ struct btf_param {
__u32 type;
};
+enum {
+ BTF_VAR_STATIC = 0,
+ BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+ __u32 linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 50c76f4fa402..818ad368b586 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1599,7 +1599,7 @@ enum ethtool_link_mode_bit_indices {
static inline int ethtool_validate_speed(__u32 speed)
{
- return speed <= INT_MAX || speed == SPEED_UNKNOWN;
+ return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN;
}
/* Duplex, half or full. */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c72e0d8e1e65..584636c9e2eb 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -22,7 +22,7 @@
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -63,6 +63,7 @@ int array_map_alloc_check(union bpf_attr *attr)
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags) ||
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
@@ -160,6 +161,36 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
return array->value + array->elem_size * (index & array->index_mask);
}
+static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
+ u32 off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ if (map->max_entries != 1)
+ return -ENOTSUPP;
+ if (off >= map->value_size)
+ return -EINVAL;
+
+ *imm = (unsigned long)array->value;
+ return 0;
+}
+
+static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
+ u32 *off)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ u64 base = (unsigned long)array->value;
+ u64 range = array->elem_size;
+
+ if (map->max_entries != 1)
+ return -ENOTSUPP;
+ if (imm < base || imm >= base + range)
+ return -ENOENT;
+
+ *off = imm - base;
+ return 0;
+}
+
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
@@ -360,7 +391,8 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
return;
}
- seq_printf(m, "%u: ", *(u32 *)key);
+ if (map->btf_key_type_id)
+ seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
@@ -397,6 +429,18 @@ static int array_map_check_btf(const struct bpf_map *map,
{
u32 int_data;
+ /* One exception for keyless BTF: .bss/.data/.rodata map */
+ if (btf_type_is_void(key_type)) {
+ if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+ map->max_entries != 1)
+ return -EINVAL;
+
+ if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
+ return -EINVAL;
+
+ return 0;
+ }
+
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
return -EINVAL;
@@ -419,6 +463,8 @@ const struct bpf_map_ops array_map_ops = {
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
+ .map_direct_value_addr = array_map_direct_value_addr,
+ .map_direct_value_meta = array_map_direct_value_meta,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
@@ -440,6 +486,9 @@ static int fd_array_map_alloc_check(union bpf_attr *attr)
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return -EINVAL;
+ /* Program read-only/write-only not supported for special maps yet. */
+ if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
+ return -EINVAL;
return array_map_alloc_check(attr);
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index bd3921b1514b..cad09858a5f2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -185,6 +185,16 @@
i < btf_type_vlen(struct_type); \
i++, member++)
+#define for_each_vsi(i, struct_type, member) \
+ for (i = 0, member = btf_type_var_secinfo(struct_type); \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
+#define for_each_vsi_from(i, from, struct_type, member) \
+ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
+ i < btf_type_vlen(struct_type); \
+ i++, member++)
+
static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
@@ -262,6 +272,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_RESTRICT] = "RESTRICT",
[BTF_KIND_FUNC] = "FUNC",
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
+ [BTF_KIND_VAR] = "VAR",
+ [BTF_KIND_DATASEC] = "DATASEC",
};
struct btf_kind_operations {
@@ -314,7 +326,7 @@ static bool btf_type_is_modifier(const struct btf_type *t)
return false;
}
-static bool btf_type_is_void(const struct btf_type *t)
+bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void;
}
@@ -375,13 +387,36 @@ static bool btf_type_is_int(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
}
+static bool btf_type_is_var(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+}
+
+static bool btf_type_is_datasec(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
+}
+
+/* Types that act only as a source, not sink or intermediate
+ * type when resolving.
+ */
+static bool btf_type_is_resolve_source_only(const struct btf_type *t)
+{
+ return btf_type_is_var(t) ||
+ btf_type_is_datasec(t);
+}
+
/* What types need to be resolved?
*
* btf_type_is_modifier() is an obvious one.
*
* btf_type_is_struct() because its member refers to
* another type (through member->type).
-
+ *
+ * btf_type_is_var() because the variable refers to
+ * another type. btf_type_is_datasec() holds multiple
+ * btf_type_is_var() types that need resolving.
+ *
* btf_type_is_array() because its element (array->type)
* refers to another type. Array can be thought of a
* special case of struct while array just has the same
@@ -390,9 +425,11 @@ static bool btf_type_is_int(const struct btf_type *t)
static bool btf_type_needs_resolve(const struct btf_type *t)
{
return btf_type_is_modifier(t) ||
- btf_type_is_ptr(t) ||
- btf_type_is_struct(t) ||
- btf_type_is_array(t);
+ btf_type_is_ptr(t) ||
+ btf_type_is_struct(t) ||
+ btf_type_is_array(t) ||
+ btf_type_is_var(t) ||
+ btf_type_is_datasec(t);
}
/* t->size can be used */
@@ -403,6 +440,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_DATASEC:
return true;
}
@@ -467,6 +505,16 @@ static const struct btf_enum *btf_type_enum(const struct btf_type *t)
return (const struct btf_enum *)(t + 1);
}
+static const struct btf_var *btf_type_var(const struct btf_type *t)
+{
+ return (const struct btf_var *)(t + 1);
+}
+
+static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
+{
+ return (const struct btf_var_secinfo *)(t + 1);
+}
+
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
@@ -478,23 +526,31 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
offset < btf->hdr.str_len;
}
-/* Only C-style identifier is permitted. This can be relaxed if
- * necessary.
- */
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
+{
+ if ((first ? !isalpha(c) :
+ !isalnum(c)) &&
+ c != '_' &&
+ ((c == '.' && !dot_ok) ||
+ c != '.'))
+ return false;
+ return true;
+}
+
+static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
{
/* offset must be valid */
const char *src = &btf->strings[offset];
const char *src_limit;
- if (!isalpha(*src) && *src != '_')
+ if (!__btf_name_char_ok(*src, true, dot_ok))
return false;
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN;
src++;
while (*src && src < src_limit) {
- if (!isalnum(*src) && *src != '_')
+ if (!__btf_name_char_ok(*src, false, dot_ok))
return false;
src++;
}
@@ -502,6 +558,19 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
return !*src;
}
+/* Only C-style identifier is permitted. This can be relaxed if
+ * necessary.
+ */
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+{
+ return __btf_name_valid(btf, offset, false);
+}
+
+static bool btf_name_valid_section(const struct btf *btf, u32 offset)
+{
+ return __btf_name_valid(btf, offset, true);
+}
+
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
{
if (!offset)
@@ -697,6 +766,32 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n");
}
+__printf(4, 5)
+static void btf_verifier_log_vsi(struct btf_verifier_env *env,
+ const struct btf_type *datasec_type,
+ const struct btf_var_secinfo *vsi,
+ const char *fmt, ...)
+{
+ struct bpf_verifier_log *log = &env->log;
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+ if (env->phase != CHECK_META)
+ btf_verifier_log_type(env, datasec_type, NULL);
+
+ __btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
+ vsi->type, vsi->offset, vsi->size);
+ if (fmt && *fmt) {
+ __btf_verifier_log(log, " ");
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+ }
+
+ __btf_verifier_log(log, "\n");
+}
+
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
u32 btf_data_size)
{
@@ -974,7 +1069,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
} else if (btf_type_is_ptr(size_type)) {
size = sizeof(void *);
} else {
- if (WARN_ON_ONCE(!btf_type_is_modifier(size_type)))
+ if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
+ !btf_type_is_var(size_type)))
return NULL;
size = btf->resolved_sizes[size_type_id];
@@ -1509,7 +1605,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
u32 next_type_size = 0;
next_type = btf_type_by_id(btf, next_type_id);
- if (!next_type) {
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1542,6 +1638,53 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
return 0;
}
+static int btf_var_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *next_type;
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ struct btf *btf = env->btf;
+ u32 next_type_size;
+
+ next_type = btf_type_by_id(btf, next_type_id);
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, next_type) &&
+ !env_type_is_resolved(env, next_type_id))
+ return env_stack_push(env, next_type, next_type_id);
+
+ if (btf_type_is_modifier(next_type)) {
+ const struct btf_type *resolved_type;
+ u32 resolved_type_id;
+
+ resolved_type_id = next_type_id;
+ resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
+
+ if (btf_type_is_ptr(resolved_type) &&
+ !env_type_is_resolve_sink(env, resolved_type) &&
+ !env_type_is_resolved(env, resolved_type_id))
+ return env_stack_push(env, resolved_type,
+ resolved_type_id);
+ }
+
+ /* We must resolve to something concrete at this point, no
+ * forward types or similar that would resolve to size of
+ * zero is allowed.
+ */
+ if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ env_stack_pop_resolved(env, next_type_id, next_type_size);
+
+ return 0;
+}
+
static int btf_ptr_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
@@ -1551,7 +1694,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
struct btf *btf = env->btf;
next_type = btf_type_by_id(btf, next_type_id);
- if (!next_type) {
+ if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1609,6 +1752,15 @@ static void btf_modifier_seq_show(const struct btf *btf,
btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
}
+static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t,
+ u32 type_id, void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ t = btf_type_id_resolve(btf, &type_id);
+
+ btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+}
+
static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct seq_file *m)
@@ -1776,7 +1928,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
- if (btf_type_nosize_or_null(index_type)) {
+ if (btf_type_is_resolve_source_only(index_type) ||
+ btf_type_nosize_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
@@ -1795,7 +1948,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
- if (btf_type_nosize_or_null(elem_type)) {
+ if (btf_type_is_resolve_source_only(elem_type) ||
+ btf_type_nosize_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
"Invalid elem");
return -EINVAL;
@@ -2016,7 +2170,8 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
const struct btf_type *member_type = btf_type_by_id(env->btf,
member_type_id);
- if (btf_type_nosize_or_null(member_type)) {
+ if (btf_type_is_resolve_source_only(member_type) ||
+ btf_type_nosize_or_null(member_type)) {
btf_verifier_log_member(env, v->t, member,
"Invalid member");
return -EINVAL;
@@ -2411,6 +2566,222 @@ static struct btf_kind_operations func_ops = {
.seq_show = btf_df_seq_show,
};
+static s32 btf_var_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_var *var;
+ u32 meta_needed = sizeof(*var);
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen != 0");
+ return -EINVAL;
+ }
+
+ if (btf_type_kflag(t)) {
+ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+ return -EINVAL;
+ }
+
+ if (!t->name_off ||
+ !__btf_name_valid(env->btf, t->name_off, true)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ /* A var cannot be in type void */
+ if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
+ btf_verifier_log_type(env, t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ var = btf_type_var(t);
+ if (var->linkage != BTF_VAR_STATIC &&
+ var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+ btf_verifier_log_type(env, t, "Linkage not supported");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ return meta_needed;
+}
+
+static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
+{
+ const struct btf_var *var = btf_type_var(t);
+
+ btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
+}
+
+static const struct btf_kind_operations var_ops = {
+ .check_meta = btf_var_check_meta,
+ .resolve = btf_var_resolve,
+ .check_member = btf_df_check_member,
+ .check_kflag_member = btf_df_check_kflag_member,
+ .log_details = btf_var_log,
+ .seq_show = btf_var_seq_show,
+};
+
+static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_var_secinfo *vsi;
+ u64 last_vsi_end_off = 0, sum = 0;
+ u32 i, meta_needed;
+
+ meta_needed = btf_type_vlen(t) * sizeof(*vsi);
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ if (!btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen == 0");
+ return -EINVAL;
+ }
+
+ if (!t->size) {
+ btf_verifier_log_type(env, t, "size == 0");
+ return -EINVAL;
+ }
+
+ if (btf_type_kflag(t)) {
+ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+ return -EINVAL;
+ }
+
+ if (!t->name_off ||
+ !btf_name_valid_section(env->btf, t->name_off)) {
+ btf_verifier_log_type(env, t, "Invalid name");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ for_each_vsi(i, t, vsi) {
+ /* A var cannot be in type void */
+ if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid offset");
+ return -EINVAL;
+ }
+
+ if (!vsi->size || vsi->size > t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid size");
+ return -EINVAL;
+ }
+
+ last_vsi_end_off = vsi->offset + vsi->size;
+ if (last_vsi_end_off > t->size) {
+ btf_verifier_log_vsi(env, t, vsi,
+ "Invalid offset+size");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_vsi(env, t, vsi, NULL);
+ sum += vsi->size;
+ }
+
+ if (t->size < sum) {
+ btf_verifier_log_type(env, t, "Invalid btf_info size");
+ return -EINVAL;
+ }
+
+ return meta_needed;
+}
+
+static int btf_datasec_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_var_secinfo *vsi;
+ struct btf *btf = env->btf;
+ u16 i;
+
+ for_each_vsi_from(i, v->next_member, v->t, vsi) {
+ u32 var_type_id = vsi->type, type_id, type_size = 0;
+ const struct btf_type *var_type = btf_type_by_id(env->btf,
+ var_type_id);
+ if (!var_type || !btf_type_is_var(var_type)) {
+ btf_verifier_log_vsi(env, v->t, vsi,
+ "Not a VAR kind member");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, var_type) &&
+ !env_type_is_resolved(env, var_type_id)) {
+ env_stack_set_next_member(env, i + 1);
+ return env_stack_push(env, var_type, var_type_id);
+ }
+
+ type_id = var_type->type;
+ if (!btf_type_id_size(btf, &type_id, &type_size)) {
+ btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
+ return -EINVAL;
+ }
+
+ if (vsi->size < type_size) {
+ btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
+ return -EINVAL;
+ }
+ }
+
+ env_stack_pop_resolved(env, 0, 0);
+ return 0;
+}
+
+static void btf_datasec_log(struct btf_verifier_env *env,
+ const struct btf_type *t)
+{
+ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+}
+
+static void btf_datasec_seq_show(const struct btf *btf,
+ const struct btf_type *t, u32 type_id,
+ void *data, u8 bits_offset,
+ struct seq_file *m)
+{
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *var;
+ u32 i;
+
+ seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off));
+ for_each_vsi(i, t, vsi) {
+ var = btf_type_by_id(btf, vsi->type);
+ if (i)
+ seq_puts(m, ",");
+ btf_type_ops(var)->seq_show(btf, var, vsi->type,
+ data + vsi->offset, bits_offset, m);
+ }
+ seq_puts(m, "}");
+}
+
+static const struct btf_kind_operations datasec_ops = {
+ .check_meta = btf_datasec_check_meta,
+ .resolve = btf_datasec_resolve,
+ .check_member = btf_df_check_member,
+ .check_kflag_member = btf_df_check_kflag_member,
+ .log_details = btf_datasec_log,
+ .seq_show = btf_datasec_seq_show,
+};
+
static int btf_func_proto_check(struct btf_verifier_env *env,
const struct btf_type *t)
{
@@ -2542,6 +2913,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_RESTRICT] = &modifier_ops,
[BTF_KIND_FUNC] = &func_ops,
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
+ [BTF_KIND_VAR] = &var_ops,
+ [BTF_KIND_DATASEC] = &datasec_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -2622,13 +2995,17 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
if (!env_type_is_resolved(env, type_id))
return false;
- if (btf_type_is_struct(t))
+ if (btf_type_is_struct(t) || btf_type_is_datasec(t))
return !btf->resolved_ids[type_id] &&
- !btf->resolved_sizes[type_id];
+ !btf->resolved_sizes[type_id];
- if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) {
+ if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
+ btf_type_is_var(t)) {
t = btf_type_id_resolve(btf, &type_id);
- return t && !btf_type_is_modifier(t);
+ return t &&
+ !btf_type_is_modifier(t) &&
+ !btf_type_is_var(t) &&
+ !btf_type_is_datasec(t);
}
if (btf_type_is_array(t)) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ff09d32a8a1b..ace8c22c8b0e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -292,7 +292,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
dst[i] = fp->insnsi[i];
if (!was_ld_map &&
dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
- dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
+ (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
+ dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
was_ld_map = true;
dst[i].imm = 0;
} else if (was_ld_map &&
@@ -438,6 +439,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
const u32 cnt_max = S16_MAX;
struct bpf_prog *prog_adj;
+ int err;
/* Since our patchlet doesn't expand the image, we're done. */
if (insn_delta == 0) {
@@ -453,8 +455,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
* we afterwards may not fail anymore.
*/
if (insn_adj_cnt > cnt_max &&
- bpf_adj_branches(prog, off, off + 1, off + len, true))
- return NULL;
+ (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
+ return ERR_PTR(err);
/* Several new instructions need to be inserted. Make room
* for them. Likely, there's no need for a new allocation as
@@ -463,7 +465,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
GFP_USER);
if (!prog_adj)
- return NULL;
+ return ERR_PTR(-ENOMEM);
prog_adj->len = insn_adj_cnt;
@@ -1096,13 +1098,13 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
continue;
tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
- if (!tmp) {
+ if (IS_ERR(tmp)) {
/* Patching may have repointed aux->prog during
* realloc from the original one, so we need to
* fix it up here on error.
*/
bpf_jit_prog_release_other(prog, clone);
- return ERR_PTR(-ENOMEM);
+ return tmp;
}
clone = tmp;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index de73f55e42fd..d9ce383c0f9c 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -205,10 +205,11 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
* part of the ldimm64 insn is accessible.
*/
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
- bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+ bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
+ insn->src_reg == BPF_PSEUDO_MAP_VALUE;
char tmp[64];
- if (map_ptr && !allow_ptr_leaks)
+ if (is_ptr && !allow_ptr_leaks)
imm = 0;
verbose(cbs->private_data, "(%02x) r%d = %s\n",
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fed15cf94dca..192d32e77db3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -23,7 +23,7 @@
#define HTAB_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
- BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED)
+ BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
struct bucket {
struct hlist_nulls_head head;
@@ -262,8 +262,8 @@ static int htab_map_alloc_check(union bpf_attr *attr)
/* Guard against local DoS, and discourage production use. */
return -EPERM;
- if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
- /* reserved bits should not be used */
+ if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags))
return -EINVAL;
if (!lru && percpu_lru)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 6b572e2de7fb..980e8f1f6cb5 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -14,7 +14,7 @@ DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STO
#ifdef CONFIG_CGROUP_BPF
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
struct bpf_cgroup_storage_map {
struct bpf_map map;
@@ -282,8 +282,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
if (attr->value_size > PAGE_SIZE)
return ERR_PTR(-E2BIG);
- if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
- /* reserved bits should not be used */
+ if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags))
return ERR_PTR(-EINVAL);
if (attr->max_entries)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 93a5cbbde421..e61630c2e50b 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -538,7 +538,7 @@ out:
#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \
- BPF_F_RDONLY | BPF_F_WRONLY)
+ BPF_F_ACCESS_MASK)
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
@@ -553,6 +553,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
if (attr->max_entries == 0 ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
attr->map_flags & ~LPM_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags) ||
attr->key_size < LPM_KEY_SIZE_MIN ||
attr->key_size > LPM_KEY_SIZE_MAX ||
attr->value_size < LPM_VAL_SIZE_MIN ||
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index b384ea9f3254..0b140d236889 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -11,8 +11,7 @@
#include "percpu_freelist.h"
#define QUEUE_STACK_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
-
+ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
struct bpf_queue_stack {
struct bpf_map map;
@@ -52,7 +51,8 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 0 ||
attr->value_size == 0 ||
- attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
+ attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags))
return -EINVAL;
if (attr->value_size > KMALLOC_MAX_SIZE)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index afca36f53c49..d995eedfdd16 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -166,13 +166,25 @@ void bpf_map_area_free(void *area)
kvfree(area);
}
+static u32 bpf_map_flags_retain_permanent(u32 flags)
+{
+ /* Some map creation flags are not tied to the map object but
+ * rather to the map fd instead, so they have no meaning upon
+ * map object inspection since multiple file descriptors with
+ * different (access) properties can exist here. Thus, given
+ * this has zero meaning for the map itself, lets clear these
+ * from here.
+ */
+ return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
+}
+
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
{
map->map_type = attr->map_type;
map->key_size = attr->key_size;
map->value_size = attr->value_size;
map->max_entries = attr->max_entries;
- map->map_flags = attr->map_flags;
+ map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
map->numa_node = bpf_map_attr_numa_node(attr);
}
@@ -343,6 +355,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
return 0;
}
+static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
+{
+ fmode_t mode = f.file->f_mode;
+
+ /* Our file permissions may have been overridden by global
+ * map permissions facing syscall side.
+ */
+ if (READ_ONCE(map->frozen))
+ mode &= ~FMODE_CAN_WRITE;
+ return mode;
+}
+
#ifdef CONFIG_PROC_FS
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
@@ -364,14 +388,16 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
"memlock:\t%llu\n"
- "map_id:\t%u\n",
+ "map_id:\t%u\n"
+ "frozen:\t%u\n",
map->map_type,
map->key_size,
map->value_size,
map->max_entries,
map->map_flags,
map->pages * 1ULL << PAGE_SHIFT,
- map->id);
+ map->id,
+ READ_ONCE(map->frozen));
if (owner_prog_type) {
seq_printf(m, "owner_prog_type:\t%u\n",
@@ -448,10 +474,10 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
const char *end = src + BPF_OBJ_NAME_LEN;
memset(dst, 0, BPF_OBJ_NAME_LEN);
-
- /* Copy all isalnum() and '_' char */
+ /* Copy all isalnum(), '_' and '.' chars. */
while (src < end && *src) {
- if (!isalnum(*src) && *src != '_')
+ if (!isalnum(*src) &&
+ *src != '_' && *src != '.')
return -EINVAL;
*dst++ = *src++;
}
@@ -478,9 +504,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
u32 key_size, value_size;
int ret = 0;
- key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
- if (!key_type || key_size != map->key_size)
- return -EINVAL;
+ /* Some maps allow key to be unspecified. */
+ if (btf_key_id) {
+ key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+ if (!key_type || key_size != map->key_size)
+ return -EINVAL;
+ } else {
+ key_type = btf_type_by_id(btf, 0);
+ if (!map->ops->map_check_btf)
+ return -EINVAL;
+ }
value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
if (!value_type || value_size != map->value_size)
@@ -489,6 +522,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
map->spin_lock_off = btf_find_spin_lock(btf, value_type);
if (map_value_has_spin_lock(map)) {
+ if (map->map_flags & BPF_F_RDONLY_PROG)
+ return -EACCES;
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
@@ -545,7 +580,7 @@ static int map_create(union bpf_attr *attr)
if (attr->btf_key_type_id || attr->btf_value_type_id) {
struct btf *btf;
- if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
+ if (!attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
@@ -713,8 +748,7 @@ static int map_lookup_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_READ)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
@@ -843,8 +877,7 @@ static int map_update_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -955,8 +988,7 @@ static int map_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -1007,8 +1039,7 @@ static int map_get_next_key(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_READ)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
@@ -1075,8 +1106,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
-
- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -1118,6 +1148,36 @@ err_put:
return err;
}
+#define BPF_MAP_FREEZE_LAST_FIELD map_fd
+
+static int map_freeze(const union bpf_attr *attr)
+{
+ int err = 0, ufd = attr->map_fd;
+ struct bpf_map *map;
+ struct fd f;
+
+ if (CHECK_ATTR(BPF_MAP_FREEZE))
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ if (READ_ONCE(map->frozen)) {
+ err = -EBUSY;
+ goto err_put;
+ }
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ WRITE_ONCE(map->frozen, true);
+err_put:
+ fdput(f);
+ return err;
+}
+
static const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -1557,7 +1617,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
/* eBPF programs must be GPL compatible to use GPL-ed functions */
is_gpl = license_is_gpl_compatible(license);
- if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
+ if (attr->insn_cnt == 0 ||
+ attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
return -E2BIG;
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
@@ -1948,7 +2009,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
return cgroup_bpf_prog_query(attr, uattr);
}
-#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -1961,6 +2022,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
return -EINVAL;
+ if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
+ (!attr->test.ctx_size_in && attr->test.ctx_in))
+ return -EINVAL;
+
+ if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
+ (!attr->test.ctx_size_out && attr->test.ctx_out))
+ return -EINVAL;
+
prog = bpf_prog_get(attr->test.prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -2071,13 +2140,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
}
static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
- unsigned long addr)
+ unsigned long addr, u32 *off,
+ u32 *type)
{
+ const struct bpf_map *map;
int i;
- for (i = 0; i < prog->aux->used_map_cnt; i++)
- if (prog->aux->used_maps[i] == (void *)addr)
- return prog->aux->used_maps[i];
+ for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
+ map = prog->aux->used_maps[i];
+ if (map == (void *)addr) {
+ *type = BPF_PSEUDO_MAP_FD;
+ return map;
+ }
+ if (!map->ops->map_direct_value_meta)
+ continue;
+ if (!map->ops->map_direct_value_meta(map, addr, off)) {
+ *type = BPF_PSEUDO_MAP_VALUE;
+ return map;
+ }
+ }
+
return NULL;
}
@@ -2085,6 +2167,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
{
const struct bpf_map *map;
struct bpf_insn *insns;
+ u32 off, type;
u64 imm;
int i;
@@ -2112,11 +2195,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
continue;
imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
- map = bpf_map_from_imm(prog, imm);
+ map = bpf_map_from_imm(prog, imm, &off, &type);
if (map) {
- insns[i].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[i].src_reg = type;
insns[i].imm = map->id;
- insns[i + 1].imm = 0;
+ insns[i + 1].imm = off;
continue;
}
}
@@ -2706,6 +2789,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_GET_NEXT_KEY:
err = map_get_next_key(&attr);
break;
+ case BPF_MAP_FREEZE:
+ err = map_freeze(&attr);
+ break;
case BPF_PROG_LOAD:
err = bpf_prog_load(&attr, uattr);
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b7ad8003c4e6..f25b7c9c20ba 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -176,7 +176,6 @@ struct bpf_verifier_stack_elem {
struct bpf_verifier_stack_elem *next;
};
-#define BPF_COMPLEXITY_LIMIT_INSNS 131072
#define BPF_COMPLEXITY_LIMIT_STACK 1024
#define BPF_COMPLEXITY_LIMIT_STATES 64
@@ -1092,7 +1091,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
*/
subprog[env->subprog_cnt].start = insn_cnt;
- if (env->log.level > 1)
+ if (env->log.level & BPF_LOG_LEVEL2)
for (i = 0; i < env->subprog_cnt; i++)
verbose(env, "func#%d @%d\n", i, subprog[i].start);
@@ -1139,6 +1138,7 @@ static int mark_reg_read(struct bpf_verifier_env *env,
struct bpf_reg_state *parent)
{
bool writes = parent == state->parent; /* Observe write marks */
+ int cnt = 0;
while (parent) {
/* if read wasn't screened by an earlier write ... */
@@ -1150,12 +1150,25 @@ static int mark_reg_read(struct bpf_verifier_env *env,
parent->var_off.value, parent->off);
return -EFAULT;
}
+ if (parent->live & REG_LIVE_READ)
+ /* The parentage chain never changes and
+ * this parent was already marked as LIVE_READ.
+ * There is no need to keep walking the chain again and
+ * keep re-marking all parents as LIVE_READ.
+ * This case happens when the same register is read
+ * multiple times without writes into it in-between.
+ */
+ break;
/* ... then we depend on parent's value */
parent->live |= REG_LIVE_READ;
state = parent;
parent = state->parent;
writes = true;
+ cnt++;
}
+
+ if (env->longest_mark_read_walk < cnt)
+ env->longest_mark_read_walk = cnt;
return 0;
}
@@ -1413,7 +1426,7 @@ static int check_stack_access(struct bpf_verifier_env *env,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable stack access var_off=%s off=%d size=%d",
+ verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
tn_buf, off, size);
return -EACCES;
}
@@ -1426,6 +1439,28 @@ static int check_stack_access(struct bpf_verifier_env *env,
return 0;
}
+static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
+ int off, int size, enum bpf_access_type type)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_map *map = regs[regno].map_ptr;
+ u32 cap = bpf_map_flags_to_cap(map);
+
+ if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
+ verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
+ map->value_size, off, size);
+ return -EACCES;
+ }
+
+ if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
+ verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
+ map->value_size, off, size);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
/* check read/write into map element returned by bpf_map_lookup_elem() */
static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size, bool zero_size_allowed)
@@ -1455,7 +1490,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
* need to try adding each of min_value and max_value to off
* to make sure our theoretical access will be safe.
*/
- if (env->log.level)
+ if (env->log.level & BPF_LOG_LEVEL)
print_verifier_state(env, state);
/* The minimum value is only important with signed
@@ -2012,7 +2047,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
verbose(env, "R%d leaks addr into map\n", value_regno);
return -EACCES;
}
-
+ err = check_map_access_type(env, regno, off, size, t);
+ if (err)
+ return err;
err = check_map_access(env, regno, off, size, false);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
@@ -2158,6 +2195,29 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
BPF_SIZE(insn->code), BPF_WRITE, -1, true);
}
+static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
+ int off, int access_size,
+ bool zero_size_allowed)
+{
+ struct bpf_reg_state *reg = reg_state(env, regno);
+
+ if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
+ access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
+ if (tnum_is_const(reg->var_off)) {
+ verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
+ regno, off, access_size);
+ } else {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
+ regno, tn_buf, access_size);
+ }
+ return -EACCES;
+ }
+ return 0;
+}
+
/* when register 'regno' is passed into function that will read 'access_size'
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized.
@@ -2170,7 +2230,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
{
struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_func_state *state = func(env, reg);
- int off, i, slot, spi;
+ int err, min_off, max_off, i, slot, spi;
if (reg->type != PTR_TO_STACK) {
/* Allow zero-byte read from NULL, regardless of pointer type */
@@ -2184,21 +2244,57 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
return -EACCES;
}
- /* Only allow fixed-offset stack reads */
- if (!tnum_is_const(reg->var_off)) {
- char tn_buf[48];
+ if (tnum_is_const(reg->var_off)) {
+ min_off = max_off = reg->var_off.value + reg->off;
+ err = __check_stack_boundary(env, regno, min_off, access_size,
+ zero_size_allowed);
+ if (err)
+ return err;
+ } else {
+ /* Variable offset is prohibited for unprivileged mode for
+ * simplicity since it requires corresponding support in
+ * Spectre masking for stack ALU.
+ * See also retrieve_ptr_limit().
+ */
+ if (!env->allow_ptr_leaks) {
+ char tn_buf[48];
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "invalid variable stack read R%d var_off=%s\n",
- regno, tn_buf);
- return -EACCES;
- }
- off = reg->off + reg->var_off.value;
- if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
- access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
- verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
- regno, off, access_size);
- return -EACCES;
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
+ regno, tn_buf);
+ return -EACCES;
+ }
+ /* Only initialized buffer on stack is allowed to be accessed
+ * with variable offset. With uninitialized buffer it's hard to
+ * guarantee that whole memory is marked as initialized on
+ * helper return since specific bounds are unknown what may
+ * cause uninitialized stack leaking.
+ */
+ if (meta && meta->raw_mode)
+ meta = NULL;
+
+ if (reg->smax_value >= BPF_MAX_VAR_OFF ||
+ reg->smax_value <= -BPF_MAX_VAR_OFF) {
+ verbose(env, "R%d unbounded indirect variable offset stack access\n",
+ regno);
+ return -EACCES;
+ }
+ min_off = reg->smin_value + reg->off;
+ max_off = reg->smax_value + reg->off;
+ err = __check_stack_boundary(env, regno, min_off, access_size,
+ zero_size_allowed);
+ if (err) {
+ verbose(env, "R%d min value is outside of stack bound\n",
+ regno);
+ return err;
+ }
+ err = __check_stack_boundary(env, regno, max_off, access_size,
+ zero_size_allowed);
+ if (err) {
+ verbose(env, "R%d max value is outside of stack bound\n",
+ regno);
+ return err;
+ }
}
if (meta && meta->raw_mode) {
@@ -2207,10 +2303,10 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
return 0;
}
- for (i = 0; i < access_size; i++) {
+ for (i = min_off; i < max_off + access_size; i++) {
u8 *stype;
- slot = -(off + i) - 1;
+ slot = -i - 1;
spi = slot / BPF_REG_SIZE;
if (state->allocated_stack <= slot)
goto err;
@@ -2223,8 +2319,16 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
goto mark;
}
err:
- verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
- off, i, access_size);
+ if (tnum_is_const(reg->var_off)) {
+ verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
+ min_off, i - min_off, access_size);
+ } else {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
+ tn_buf, i - min_off, access_size);
+ }
return -EACCES;
mark:
/* reading any byte out of 8-byte 'spill_slot' will cause
@@ -2233,7 +2337,7 @@ mark:
mark_reg_read(env, &state->stack[spi].spilled_ptr,
state->stack[spi].spilled_ptr.parent);
}
- return update_stack_depth(env, state, off);
+ return update_stack_depth(env, state, min_off);
}
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@ -2248,6 +2352,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_VALUE:
+ if (check_map_access_type(env, regno, reg->off, access_size,
+ meta && meta->raw_mode ? BPF_WRITE :
+ BPF_READ))
+ return -EACCES;
return check_map_access(env, regno, reg->off, access_size,
zero_size_allowed);
default: /* scalar_value|ptr_to_stack or invalid ptr */
@@ -2906,7 +3014,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "caller:\n");
print_verifier_state(env, caller);
verbose(env, "callee:\n");
@@ -2946,7 +3054,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return err;
*insn_idx = callee->callsite + 1;
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "returning from callee:\n");
print_verifier_state(env, callee);
verbose(env, "to caller at %d:\n", *insn_idx);
@@ -2980,6 +3088,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
int func_id, int insn_idx)
{
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ struct bpf_map *map = meta->map_ptr;
if (func_id != BPF_FUNC_tail_call &&
func_id != BPF_FUNC_map_lookup_elem &&
@@ -2990,11 +3099,24 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
func_id != BPF_FUNC_map_peek_elem)
return 0;
- if (meta->map_ptr == NULL) {
+ if (map == NULL) {
verbose(env, "kernel subsystem misconfigured verifier\n");
return -EINVAL;
}
+ /* In case of read-only, some additional restrictions
+ * need to be applied in order to prevent altering the
+ * state of the map from program side.
+ */
+ if ((map->map_flags & BPF_F_RDONLY_PROG) &&
+ (func_id == BPF_FUNC_map_delete_elem ||
+ func_id == BPF_FUNC_map_update_elem ||
+ func_id == BPF_FUNC_map_push_elem ||
+ func_id == BPF_FUNC_map_pop_elem)) {
+ verbose(env, "write into map forbidden\n");
+ return -EACCES;
+ }
+
if (!BPF_MAP_PTR(aux->map_state))
bpf_map_ptr_store(aux, meta->map_ptr,
meta->map_ptr->unpriv_array);
@@ -3285,6 +3407,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
switch (ptr_reg->type) {
case PTR_TO_STACK:
+ /* Indirect variable offset stack access is prohibited in
+ * unprivileged mode so it's not handled here.
+ */
off = ptr_reg->off + ptr_reg->var_off.value;
if (mask_to_left)
*ptr_limit = MAX_BPF_STACK + off;
@@ -4969,23 +5094,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
insn->dst_reg);
return -EACCES;
}
- if (env->log.level)
+ if (env->log.level & BPF_LOG_LEVEL)
print_verifier_state(env, this_branch->frame[this_branch->curframe]);
return 0;
}
-/* return the map pointer stored inside BPF_LD_IMM64 instruction */
-static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
-{
- u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
-
- return (struct bpf_map *) (unsigned long) imm64;
-}
-
/* verify BPF_LD_IMM64 instruction */
static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
+ struct bpf_insn_aux_data *aux = cur_aux(env);
struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_map *map;
int err;
if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -5009,11 +5128,22 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
- /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
- BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
+ map = env->used_maps[aux->map_index];
+ mark_reg_known_zero(env, regs, insn->dst_reg);
+ regs[insn->dst_reg].map_ptr = map;
+
+ if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
+ regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+ regs[insn->dst_reg].off = aux->map_off;
+ if (map_value_has_spin_lock(map))
+ regs[insn->dst_reg].id = ++env->id_gen;
+ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+ regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+ } else {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
- regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
- regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
return 0;
}
@@ -5267,13 +5397,13 @@ static int check_cfg(struct bpf_verifier_env *env)
int ret = 0;
int i, t;
- insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_state)
return -ENOMEM;
- insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_stack) {
- kfree(insn_state);
+ kvfree(insn_state);
return -ENOMEM;
}
@@ -5371,8 +5501,8 @@ check_state:
ret = 0; /* cfg looks good */
err_free:
- kfree(insn_state);
- kfree(insn_stack);
+ kvfree(insn_state);
+ kvfree(insn_stack);
return ret;
}
@@ -6115,11 +6245,13 @@ static int propagate_liveness(struct bpf_verifier_env *env,
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
- struct bpf_verifier_state_list *sl;
+ struct bpf_verifier_state_list *sl, **pprev;
struct bpf_verifier_state *cur = env->cur_state, *new;
int i, j, err, states_cnt = 0;
- sl = env->explored_states[insn_idx];
+ pprev = &env->explored_states[insn_idx];
+ sl = *pprev;
+
if (!sl)
/* this 'insn_idx' instruction wasn't marked, so we will not
* be doing state search here
@@ -6130,6 +6262,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
while (sl != STATE_LIST_MARK) {
if (states_equal(env, &sl->state, cur)) {
+ sl->hit_cnt++;
/* reached equivalent register/stack state,
* prune the search.
* Registers read by the continuation are read by us.
@@ -6145,10 +6278,40 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
return err;
return 1;
}
- sl = sl->next;
states_cnt++;
+ sl->miss_cnt++;
+ /* heuristic to determine whether this state is beneficial
+ * to keep checking from state equivalence point of view.
+ * Higher numbers increase max_states_per_insn and verification time,
+ * but do not meaningfully decrease insn_processed.
+ */
+ if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
+ /* the state is unlikely to be useful. Remove it to
+ * speed up verification
+ */
+ *pprev = sl->next;
+ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ env->peak_states--;
+ } else {
+ /* cannot free this state, since parentage chain may
+ * walk it later. Add it for free_list instead to
+ * be freed at the end of verification
+ */
+ sl->next = env->free_list;
+ env->free_list = sl;
+ }
+ sl = *pprev;
+ continue;
+ }
+ pprev = &sl->next;
+ sl = *pprev;
}
+ if (env->max_states_per_insn < states_cnt)
+ env->max_states_per_insn = states_cnt;
+
if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
return 0;
@@ -6162,6 +6325,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
if (!new_sl)
return -ENOMEM;
+ env->total_states++;
+ env->peak_states++;
/* add new state to the head of linked list */
new = &new_sl->state;
@@ -6246,8 +6411,7 @@ static int do_check(struct bpf_verifier_env *env)
struct bpf_verifier_state *state;
struct bpf_insn *insns = env->prog->insnsi;
struct bpf_reg_state *regs;
- int insn_cnt = env->prog->len, i;
- int insn_processed = 0;
+ int insn_cnt = env->prog->len;
bool do_print_state = false;
env->prev_linfo = NULL;
@@ -6282,10 +6446,10 @@ static int do_check(struct bpf_verifier_env *env)
insn = &insns[env->insn_idx];
class = BPF_CLASS(insn->code);
- if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
+ if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
verbose(env,
"BPF program is too large. Processed %d insn\n",
- insn_processed);
+ env->insn_processed);
return -E2BIG;
}
@@ -6294,7 +6458,7 @@ static int do_check(struct bpf_verifier_env *env)
return err;
if (err == 1) {
/* found equivalent state, can prune the search */
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
if (do_print_state)
verbose(env, "\nfrom %d to %d%s: safe\n",
env->prev_insn_idx, env->insn_idx,
@@ -6312,8 +6476,9 @@ static int do_check(struct bpf_verifier_env *env)
if (need_resched())
cond_resched();
- if (env->log.level > 1 || (env->log.level && do_print_state)) {
- if (env->log.level > 1)
+ if (env->log.level & BPF_LOG_LEVEL2 ||
+ (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
+ if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "%d:", env->insn_idx);
else
verbose(env, "\nfrom %d to %d%s:",
@@ -6324,7 +6489,7 @@ static int do_check(struct bpf_verifier_env *env)
do_print_state = false;
}
- if (env->log.level) {
+ if (env->log.level & BPF_LOG_LEVEL) {
const struct bpf_insn_cbs cbs = {
.cb_print = verbose,
.private_data = env,
@@ -6589,16 +6754,6 @@ process_bpf_exit:
env->insn_idx++;
}
- verbose(env, "processed %d insns (limit %d), stack depth ",
- insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
- for (i = 0; i < env->subprog_cnt; i++) {
- u32 depth = env->subprog_info[i].stack_depth;
-
- verbose(env, "%d", depth);
- if (i + 1 < env->subprog_cnt)
- verbose(env, "+");
- }
- verbose(env, "\n");
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
return 0;
}
@@ -6696,8 +6851,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
}
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+ struct bpf_insn_aux_data *aux;
struct bpf_map *map;
struct fd f;
+ u64 addr;
if (i == insn_cnt - 1 || insn[1].code != 0 ||
insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@ -6706,13 +6863,19 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (insn->src_reg == 0)
+ if (insn[0].src_reg == 0)
/* valid generic load 64-bit imm */
goto next_insn;
- if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
- insn[1].imm != 0) {
- verbose(env, "unrecognized bpf_ld_imm64 insn\n");
+ /* In final convert_pseudo_ld_imm64() step, this is
+ * converted into regular 64-bit imm load insn.
+ */
+ if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
+ insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
+ (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
+ insn[1].imm != 0)) {
+ verbose(env,
+ "unrecognized bpf_ld_imm64 insn\n");
return -EINVAL;
}
@@ -6730,16 +6893,47 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
return err;
}
- /* store map pointer inside BPF_LD_IMM64 instruction */
- insn[0].imm = (u32) (unsigned long) map;
- insn[1].imm = ((u64) (unsigned long) map) >> 32;
+ aux = &env->insn_aux_data[i];
+ if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+ addr = (unsigned long)map;
+ } else {
+ u32 off = insn[1].imm;
+
+ if (off >= BPF_MAX_VAR_OFF) {
+ verbose(env, "direct value offset of %u is not allowed\n", off);
+ fdput(f);
+ return -EINVAL;
+ }
+
+ if (!map->ops->map_direct_value_addr) {
+ verbose(env, "no direct value access support for this map type\n");
+ fdput(f);
+ return -EINVAL;
+ }
+
+ err = map->ops->map_direct_value_addr(map, &addr, off);
+ if (err) {
+ verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
+ map->value_size, off);
+ fdput(f);
+ return err;
+ }
+
+ aux->map_off = off;
+ addr += off;
+ }
+
+ insn[0].imm = (u32)addr;
+ insn[1].imm = addr >> 32;
/* check whether we recorded this map already */
- for (j = 0; j < env->used_map_cnt; j++)
+ for (j = 0; j < env->used_map_cnt; j++) {
if (env->used_maps[j] == map) {
+ aux->map_index = j;
fdput(f);
goto next_insn;
}
+ }
if (env->used_map_cnt >= MAX_USED_MAPS) {
fdput(f);
@@ -6756,6 +6950,8 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
fdput(f);
return PTR_ERR(map);
}
+
+ aux->map_index = env->used_map_cnt;
env->used_maps[env->used_map_cnt++] = map;
if (bpf_map_is_cgroup_storage(map) &&
@@ -6861,8 +7057,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
struct bpf_prog *new_prog;
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
- if (!new_prog)
+ if (IS_ERR(new_prog)) {
+ if (PTR_ERR(new_prog) == -ERANGE)
+ verbose(env,
+ "insn %d cannot be patched due to 16-bit range\n",
+ env->insn_aux_data[off].orig_idx);
return NULL;
+ }
if (adjust_insn_aux_data(env, new_prog->len, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
@@ -7804,6 +8005,14 @@ static void free_states(struct bpf_verifier_env *env)
struct bpf_verifier_state_list *sl, *sln;
int i;
+ sl = env->free_list;
+ while (sl) {
+ sln = sl->next;
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ sl = sln;
+ }
+
if (!env->explored_states)
return;
@@ -7819,12 +8028,37 @@ static void free_states(struct bpf_verifier_env *env)
}
}
- kfree(env->explored_states);
+ kvfree(env->explored_states);
+}
+
+static void print_verification_stats(struct bpf_verifier_env *env)
+{
+ int i;
+
+ if (env->log.level & BPF_LOG_STATS) {
+ verbose(env, "verification time %lld usec\n",
+ div_u64(env->verification_time, 1000));
+ verbose(env, "stack depth ");
+ for (i = 0; i < env->subprog_cnt; i++) {
+ u32 depth = env->subprog_info[i].stack_depth;
+
+ verbose(env, "%d", depth);
+ if (i + 1 < env->subprog_cnt)
+ verbose(env, "+");
+ }
+ verbose(env, "\n");
+ }
+ verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
+ "total_states %d peak_states %d mark_read %d\n",
+ env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
+ env->max_states_per_insn, env->total_states,
+ env->peak_states, env->longest_mark_read_walk);
}
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
union bpf_attr __user *uattr)
{
+ u64 start_time = ktime_get_ns();
struct bpf_verifier_env *env;
struct bpf_verifier_log *log;
int i, len, ret = -EINVAL;
@@ -7866,8 +8100,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
ret = -EINVAL;
/* log attributes have to be sane */
- if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
- !log->level || !log->ubuf)
+ if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
+ !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
goto err_unlock;
}
@@ -7890,7 +8124,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
goto skip_full_check;
}
- env->explored_states = kcalloc(env->prog->len,
+ env->explored_states = kvcalloc(env->prog->len,
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
@@ -7948,6 +8182,9 @@ skip_full_check:
if (ret == 0)
ret = fixup_call_args(env);
+ env->verification_time = ktime_get_ns() - start_time;
+ print_verification_stats(env);
+
if (log->level && bpf_verifier_log_full(log))
ret = -ENOSPC;
if (log->level && !log->ubuf) {
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54a0347ca812..df27e499956a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
sd->nr = syscall_get_nr(task, regs);
sd->arch = syscall_get_arch();
- syscall_get_arguments(task, regs, 0, 6, args);
+ syscall_get_arguments(task, regs, args);
sd->args[0] = args[0];
sd->args[1] = args[1];
sd->args[2] = args[2];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e5da394d1ca3..c9ec050bcf46 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -128,6 +128,7 @@ static int zero;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
+static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
static int one_hundred = 100;
@@ -1750,7 +1751,7 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(files_stat.max_files),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero,
+ .extra1 = &zero_ul,
.extra2 = &long_max,
},
{
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f93a56d2db27..fa8fbff736d6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -314,6 +314,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
struct ring_buffer_event *event;
struct ring_buffer *buffer;
unsigned long irq_flags;
+ unsigned long args[6];
int pc;
int syscall_nr;
int size;
@@ -347,7 +348,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
entry = ring_buffer_event_data(event);
entry->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
event_trigger_unlock_commit(trace_file, buffer, event, entry,
irq_flags, pc);
@@ -583,6 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct hlist_head *head;
+ unsigned long args[6];
bool valid_prog_array;
int syscall_nr;
int rctx;
@@ -613,8 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
return;
rec->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args,
- (unsigned long *)&rec->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d9e81779e37..188fc17c2202 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -219,6 +219,14 @@ config DEBUG_INFO_DWARF4
But it significantly improves the success of resolving
variables in gdb on optimized code.
+config DEBUG_INFO_BTF
+ bool "Generate BTF typeinfo"
+ depends on DEBUG_INFO
+ help
+ Generate deduplicated BTF type information from DWARF debug info.
+ Turning this on expects presence of pahole tool, which will convert
+ DWARF type info into equivalent deduplicated BTF type info.
+
config GDB_SCRIPTS
bool "Provide GDB scripts for kernel debugging"
depends on DEBUG_INFO
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 4525fb094844..a8ede77afe0d 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -291,13 +291,14 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
{
const unsigned char *ip = in;
unsigned char *op = out;
+ unsigned char *data_start;
size_t l = in_len;
size_t t = 0;
signed char state_offset = -2;
unsigned int m4_max_offset;
- // LZO v0 will never write 17 as first byte,
- // so this is used to version the bitstream
+ // LZO v0 will never write 17 as first byte (except for zero-length
+ // input), so this is used to version the bitstream
if (bitstream_version > 0) {
*op++ = 17;
*op++ = bitstream_version;
@@ -306,6 +307,8 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
m4_max_offset = M4_MAX_OFFSET_V0;
}
+ data_start = op;
+
while (l > 20) {
size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
uintptr_t ll_end = (uintptr_t) ip + ll;
@@ -324,7 +327,7 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
if (t > 0) {
const unsigned char *ii = in + in_len - t;
- if (op == out && t <= 238) {
+ if (op == data_start && t <= 238) {
*op++ = (17 + t);
} else if (t <= 3) {
op[state_offset] |= t;
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 6d2600ea3b55..9e07e9ef1aad 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -54,11 +54,9 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
if (unlikely(in_len < 3))
goto input_overrun;
- if (likely(*ip == 17)) {
+ if (likely(in_len >= 5) && likely(*ip == 17)) {
bitstream_version = ip[1];
ip += 2;
- if (unlikely(in_len < 5))
- goto input_overrun;
} else {
bitstream_version = 0;
}
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index a8583af43b59..6529fe1b45c1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -59,7 +59,7 @@ int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
return 1;
if (unlikely(tbl->nest))
return 1;
- return bit_spin_is_locked(1, (unsigned long *)&tbl->buckets[hash]);
+ return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
}
EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
#else
@@ -175,8 +175,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
int i;
static struct lock_class_key __key;
- size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- tbl = kvzalloc(size, gfp);
+ tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
size = nbuckets;
@@ -224,7 +223,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
int err = -EAGAIN;
struct rhash_head *head, *next, *entry;
- struct rhash_head **pprev = NULL;
+ struct rhash_head __rcu **pprev = NULL;
unsigned int new_hash;
if (new_tbl->nest)
@@ -232,7 +231,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
err = -ENOENT;
- rht_for_each_from(entry, rht_ptr(*bkt), old_tbl, old_hash) {
+ rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+ old_tbl, old_hash) {
err = 0;
next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -249,8 +249,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
- head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash],
- new_tbl, new_hash));
+ head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
RCU_INIT_POINTER(entry->next, head);
@@ -260,7 +259,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
rcu_assign_pointer(*pprev, next);
else
/* Need to preserved the bit lock. */
- rcu_assign_pointer(*bkt, rht_ptr_locked(next));
+ rht_assign_locked(bkt, next);
out:
return err;
@@ -487,12 +486,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
.ht = ht,
.key = key,
};
- struct rhash_head **pprev = NULL;
+ struct rhash_head __rcu **pprev = NULL;
struct rhash_head *head;
int elasticity;
elasticity = RHT_ELASTICITY;
- rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
+ rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
struct rhlist_head *plist;
@@ -518,7 +517,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
rcu_assign_pointer(*pprev, obj);
else
/* Need to preserve the bit lock */
- rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+ rht_assign_locked(bkt, obj);
return NULL;
}
@@ -558,7 +557,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
if (unlikely(rht_grow_above_100(ht, tbl)))
return ERR_PTR(-EAGAIN);
- head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
+ head = rht_ptr(bkt, tbl, hash);
RCU_INIT_POINTER(obj->next, head);
if (ht->rhlist) {
@@ -571,7 +570,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
/* bkt is always the head of the list, so it holds
* the lock, which we need to preserve
*/
- rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+ rht_assign_locked(bkt, obj);
atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
@@ -1140,7 +1139,7 @@ restart:
struct rhash_head *pos, *next;
cond_resched();
- for (pos = rht_ptr(rht_dereference(*rht_bucket(tbl, i), ht)),
+ for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL;
!rht_is_a_nulls(pos);
diff --git a/lib/string.c b/lib/string.c
index 38e4ca08e757..3ab861c1a857 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -866,6 +866,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count)
EXPORT_SYMBOL(memcmp);
#endif
+#ifndef __HAVE_ARCH_BCMP
+/**
+ * bcmp - returns 0 if and only if the buffers have identical contents.
+ * @a: pointer to first buffer.
+ * @b: pointer to second buffer.
+ * @len: size of buffers.
+ *
+ * The sign or magnitude of a non-zero return value has no particular
+ * meaning, and architectures may implement their own more efficient bcmp(). So
+ * while this particular implementation is a simple (tail) call to memcmp, do
+ * not rely on anything but whether the return value is zero or non-zero.
+ */
+#undef bcmp
+int bcmp(const void *a, const void *b, size_t len)
+{
+ return memcmp(a, b, len);
+}
+EXPORT_SYMBOL(bcmp);
+#endif
+
#ifndef __HAVE_ARCH_MEMSCAN
/**
* memscan - Find a character in an area of memory.
diff --git a/lib/syscall.c b/lib/syscall.c
index 1a7077f20eae..fb328e7ccb08 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -5,16 +5,14 @@
#include <linux/export.h>
#include <asm/syscall.h>
-static int collect_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc)
+static int collect_syscall(struct task_struct *target, struct syscall_info *info)
{
struct pt_regs *regs;
if (!try_get_task_stack(target)) {
/* Task has no stack, so the task isn't in a syscall. */
- *sp = *pc = 0;
- *callno = -1;
+ memset(info, 0, sizeof(*info));
+ info->data.nr = -1;
return 0;
}
@@ -24,12 +22,13 @@ static int collect_syscall(struct task_struct *target, long *callno,
return -EAGAIN;
}
- *sp = user_stack_pointer(regs);
- *pc = instruction_pointer(regs);
+ info->sp = user_stack_pointer(regs);
+ info->data.instruction_pointer = instruction_pointer(regs);
- *callno = syscall_get_nr(target, regs);
- if (*callno != -1L && maxargs > 0)
- syscall_get_arguments(target, regs, 0, maxargs, args);
+ info->data.nr = syscall_get_nr(target, regs);
+ if (info->data.nr != -1L)
+ syscall_get_arguments(target, regs,
+ (unsigned long *)&info->data.args[0]);
put_task_stack(target);
return 0;
@@ -38,41 +37,35 @@ static int collect_syscall(struct task_struct *target, long *callno,
/**
* task_current_syscall - Discover what a blocked task is doing.
* @target: thread to examine
- * @callno: filled with system call number or -1
- * @args: filled with @maxargs system call arguments
- * @maxargs: number of elements in @args to fill
- * @sp: filled with user stack pointer
- * @pc: filled with user PC
+ * @info: structure with the following fields:
+ * .sp - filled with user stack pointer
+ * .data.nr - filled with system call number or -1
+ * .data.args - filled with @maxargs system call arguments
+ * .data.instruction_pointer - filled with user PC
*
- * If @target is blocked in a system call, returns zero with *@callno
- * set to the the call's number and @args filled in with its arguments.
- * Registers not used for system call arguments may not be available and
- * it is not kosher to use &struct user_regset calls while the system
+ * If @target is blocked in a system call, returns zero with @info.data.nr
+ * set to the the call's number and @info.data.args filled in with its
+ * arguments. Registers not used for system call arguments may not be available
+ * and it is not kosher to use &struct user_regset calls while the system
* call is still in progress. Note we may get this result if @target
* has finished its system call but not yet returned to user mode, such
* as when it's stopped for signal handling or syscall exit tracing.
*
* If @target is blocked in the kernel during a fault or exception,
- * returns zero with *@callno set to -1 and does not fill in @args.
- * If so, it's now safe to examine @target using &struct user_regset
- * get() calls as long as we're sure @target won't return to user mode.
+ * returns zero with *@info.data.nr set to -1 and does not fill in
+ * @info.data.args. If so, it's now safe to examine @target using
+ * &struct user_regset get() calls as long as we're sure @target won't return
+ * to user mode.
*
* Returns -%EAGAIN if @target does not remain blocked.
- *
- * Returns -%EINVAL if @maxargs is too large (maximum is six).
*/
-int task_current_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc)
+int task_current_syscall(struct task_struct *target, struct syscall_info *info)
{
long state;
unsigned long ncsw;
- if (unlikely(maxargs > 6))
- return -EINVAL;
-
if (target == current)
- return collect_syscall(target, callno, args, maxargs, sp, pc);
+ return collect_syscall(target, info);
state = target->state;
if (unlikely(!state))
@@ -80,7 +73,7 @@ int task_current_syscall(struct task_struct *target, long *callno,
ncsw = wait_task_inactive(target, state);
if (unlikely(!ncsw) ||
- unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+ unlikely(collect_syscall(target, info)) ||
unlikely(wait_task_inactive(target, state) != ncsw))
return -EAGAIN;
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 02592c2a249c..084fe5a6ac57 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
struct rhash_head *pos, *next;
struct test_obj_rhl *p;
- pos = rht_ptr(rht_dereference(tbl->buckets[i], ht));
+ pos = rht_ptr_exclusive(tbl->buckets + i);
next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
if (!rht_is_a_nulls(pos)) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 404acdcd0455..165ea46bf149 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pmd_lock(mm, pmd);
+ if (!pmd_none(*pmd)) {
+ if (write) {
+ if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
+ goto out_unlock;
+ }
+ entry = pmd_mkyoung(*pmd);
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+ if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+ update_mmu_cache_pmd(vma, addr, pmd);
+ }
+
+ goto out_unlock;
+ }
+
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pmd_mkdevmap(entry);
@@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pgtable) {
pgtable_trans_huge_deposit(mm, pmd, pgtable);
mm_inc_nr_ptes(mm);
+ pgtable = NULL;
}
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
+
+out_unlock:
spin_unlock(ptl);
+ if (pgtable)
+ pte_free(mm, pgtable);
}
vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pud_lock(mm, pud);
+ if (!pud_none(*pud)) {
+ if (write) {
+ if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pud(*pud));
+ goto out_unlock;
+ }
+ entry = pud_mkyoung(*pud);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ if (pudp_set_access_flags(vma, addr, pud, entry, 1))
+ update_mmu_cache_pud(vma, addr, pud);
+ }
+ goto out_unlock;
+ }
+
entry = pud_mkhuge(pfn_t_pud(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pud_mkdevmap(entry);
@@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
}
set_pud_at(mm, addr, pud, entry);
update_mmu_cache_pud(vma, addr, pud);
+
+out_unlock:
spin_unlock(ptl);
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 707fa5579f66..6c318f5ac234 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1529,11 +1529,6 @@ static void kmemleak_scan(void)
}
rcu_read_unlock();
- /* data/bss scanning */
- scan_large_block(_sdata, _edata);
- scan_large_block(__bss_start, __bss_stop);
- scan_large_block(__start_ro_after_init, __end_ro_after_init);
-
#ifdef CONFIG_SMP
/* per-cpu sections scanning */
for_each_possible_cpu(i)
@@ -2071,6 +2066,17 @@ void __init kmemleak_init(void)
}
local_irq_restore(flags);
+ /* register the data/bss sections */
+ create_object((unsigned long)_sdata, _edata - _sdata,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ create_object((unsigned long)__bss_start, __bss_stop - __bss_start,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ /* only register .data..ro_after_init if not within .data */
+ if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata)
+ create_object((unsigned long)__start_ro_after_init,
+ __end_ro_after_init - __start_ro_after_init,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+
/*
* This is the point where tracking allocations is safe. Automatic
* scanning is started during the late initcall. Add the early logged
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 532e0e2a4817..81a0d3914ec9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3882,6 +3882,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
return &memcg->cgwb_domain;
}
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page().
+ */
+static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
+{
+ long x = atomic_long_read(&memcg->stat[idx]);
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+ if (x < 0)
+ x = 0;
+ return x;
+}
+
/**
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
* @wb: bdi_writeback in question
@@ -3907,10 +3923,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
- *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+ *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
/* this should eventually include NR_UNSTABLE_NFS */
- *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+ *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
(1 << LRU_ACTIVE_FILE));
*pheadroom = PAGE_COUNTER_MAX;
diff --git a/mm/slab.c b/mm/slab.c
index 329bfe67f2ca..47a380a486ee 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4308,7 +4308,8 @@ static void show_symbol(struct seq_file *m, unsigned long address)
static int leaks_show(struct seq_file *m, void *p)
{
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
+ struct kmem_cache *cachep = list_entry(p, struct kmem_cache,
+ root_caches_node);
struct page *page;
struct kmem_cache_node *n;
const char *name;
diff --git a/mm/util.c b/mm/util.c
index d559bde497a9..43a2984bccaa 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(vmemdup_user);
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
*
- * Return: newly allocated copy of @s or %NULL in case of error
+ * Return: newly allocated copy of @s or an ERR_PTR() in case of error
*/
char *strndup_user(const char __user *s, long n)
{
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d795b9c5aea4..b9e67e589a7b 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -345,8 +345,8 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
rt = (struct rtable *) dst;
- if (rt->rt_gateway)
- daddr = &rt->rt_gateway;
+ if (rt->rt_gw_family == AF_INET)
+ daddr = &rt->rt_gw4;
else
daddr = &ip_hdr(skb)->daddr;
n = dst_neigh_lookup(dst, daddr);
diff --git a/net/bpf/Makefile b/net/bpf/Makefile
index 27b2992a0692..b0ca361742e4 100644
--- a/net/bpf/Makefile
+++ b/net/bpf/Makefile
@@ -1 +1 @@
-obj-y := test_run.o
+obj-$(CONFIG_BPF_SYSCALL) := test_run.o
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fab142b796ef..2221573dacdb 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -123,12 +123,126 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
return data;
}
+static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
+{
+ void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
+ void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
+ u32 size = kattr->test.ctx_size_in;
+ void *data;
+ int err;
+
+ if (!data_in && !data_out)
+ return NULL;
+
+ data = kzalloc(max_size, GFP_USER);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (data_in) {
+ err = bpf_check_uarg_tail_zero(data_in, max_size, size);
+ if (err) {
+ kfree(data);
+ return ERR_PTR(err);
+ }
+
+ size = min_t(u32, max_size, size);
+ if (copy_from_user(data, data_in, size)) {
+ kfree(data);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ return data;
+}
+
+static int bpf_ctx_finish(const union bpf_attr *kattr,
+ union bpf_attr __user *uattr, const void *data,
+ u32 size)
+{
+ void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
+ int err = -EFAULT;
+ u32 copy_size = size;
+
+ if (!data || !data_out)
+ return 0;
+
+ if (copy_size > kattr->test.ctx_size_out) {
+ copy_size = kattr->test.ctx_size_out;
+ err = -ENOSPC;
+ }
+
+ if (copy_to_user(data_out, data, copy_size))
+ goto out;
+ if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
+ goto out;
+ if (err != -ENOSPC)
+ err = 0;
+out:
+ return err;
+}
+
+/**
+ * range_is_zero - test whether buffer is initialized
+ * @buf: buffer to check
+ * @from: check from this position
+ * @to: check up until (excluding) this position
+ *
+ * This function returns true if the there is a non-zero byte
+ * in the buf in the range [from,to).
+ */
+static inline bool range_is_zero(void *buf, size_t from, size_t to)
+{
+ return !memchr_inv((u8 *)buf + from, 0, to - from);
+}
+
+static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
+{
+ struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+
+ if (!__skb)
+ return 0;
+
+ /* make sure the fields we don't use are zeroed */
+ if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority)))
+ return -EINVAL;
+
+ /* priority is allowed */
+
+ if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
+ FIELD_SIZEOF(struct __sk_buff, priority),
+ offsetof(struct __sk_buff, cb)))
+ return -EINVAL;
+
+ /* cb is allowed */
+
+ if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
+ FIELD_SIZEOF(struct __sk_buff, cb),
+ sizeof(struct __sk_buff)))
+ return -EINVAL;
+
+ skb->priority = __skb->priority;
+ memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
+
+ return 0;
+}
+
+static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
+{
+ struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+
+ if (!__skb)
+ return;
+
+ __skb->priority = skb->priority;
+ memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
+}
+
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
bool is_l2 = false, is_direct_pkt_access = false;
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
+ struct __sk_buff *ctx = NULL;
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
@@ -141,6 +255,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(data))
return PTR_ERR(data);
+ ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
+ if (IS_ERR(ctx)) {
+ kfree(data);
+ return PTR_ERR(ctx);
+ }
+
switch (prog->type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
@@ -158,6 +278,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
sk = kzalloc(sizeof(struct sock), GFP_USER);
if (!sk) {
kfree(data);
+ kfree(ctx);
return -ENOMEM;
}
sock_net_set(sk, current->nsproxy->net_ns);
@@ -166,6 +287,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
+ kfree(ctx);
kfree(sk);
return -ENOMEM;
}
@@ -180,32 +302,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
+ ret = convert___skb_to_skb(skb, ctx);
+ if (ret)
+ goto out;
ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
- if (ret) {
- kfree_skb(skb);
- kfree(sk);
- return ret;
- }
+ if (ret)
+ goto out;
if (!is_l2) {
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
- kfree_skb(skb);
- kfree(sk);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
}
memset(__skb_push(skb, hh_len), 0, hh_len);
}
+ convert_skb_to___skb(skb, ctx);
size = skb->len;
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, ctx,
+ sizeof(struct __sk_buff));
+out:
kfree_skb(skb);
kfree(sk);
+ kfree(ctx);
return ret;
}
@@ -220,6 +347,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
+ if (kattr->test.ctx_in || kattr->test.ctx_out)
+ return -EINVAL;
+
data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -263,6 +393,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
+ if (kattr->test.ctx_in || kattr->test.ctx_out)
+ return -EINVAL;
+
data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 711d7156efd8..6c6e01963aac 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -186,15 +186,19 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt)
goto noxoff;
if (likely(!netif_queue_stopped(caifd->netdev))) {
+ struct Qdisc *sch;
+
/* If we run with a TX queue, check if the queue is too long*/
txq = netdev_get_tx_queue(skb->dev, 0);
- qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc));
-
- if (likely(qlen == 0))
+ sch = rcu_dereference_bh(txq->qdisc);
+ if (likely(qdisc_is_empty(sch)))
goto noxoff;
+ /* can check for explicit qdisc len value only !NOLOCK,
+ * always set flow off otherwise
+ */
high = (caifd->netdev->tx_queue_len * q_high) / 100;
- if (likely(qlen < high))
+ if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high))
goto noxoff;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8904e3407163..95a27fdf9a40 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2970,11 +2970,14 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
- BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+ BPF_F_ADJ_ROOM_ENCAP_L2( \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK))
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
{
+ u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
u16 mac_len = 0, inner_net = 0, inner_trans = 0;
unsigned int gso_type = SKB_GSO_DODGY;
@@ -3009,6 +3012,8 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
mac_len = skb->network_header - skb->mac_header;
inner_net = skb->network_header;
+ if (inner_mac_len > len_diff)
+ return -EINVAL;
inner_trans = skb->transport_header;
}
@@ -3017,8 +3022,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
return ret;
if (encap) {
- /* inner mac == inner_net on l3 encap */
- skb->inner_mac_header = inner_net;
+ skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
skb_set_inner_protocol(skb, skb->protocol);
@@ -3032,7 +3036,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
gso_type |= SKB_GSO_GRE;
else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
gso_type |= SKB_GSO_IPXIP6;
- else
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
gso_type |= SKB_GSO_IPXIP4;
if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
@@ -4639,15 +4643,26 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nhc->nhc_dev;
- if (nhc->nhc_has_gw)
- params->ipv4_dst = nhc->nhc_gw.ipv4;
params->rt_metric = res.fi->fib_priority;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
- neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
+ if (likely(nhc->nhc_gw_family != AF_INET6)) {
+ if (nhc->nhc_gw_family)
+ params->ipv4_dst = nhc->nhc_gw.ipv4;
+
+ neigh = __ipv4_neigh_lookup_noref(dev,
+ (__force u32)params->ipv4_dst);
+ } else {
+ struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
+
+ params->family = AF_INET6;
+ *dst = nhc->nhc_gw.ipv6;
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+ }
+
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
@@ -4752,18 +4767,16 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (f6i->fib6_nh.fib_nh_lws)
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
- if (f6i->fib6_nh.fib_nh_has_gw)
+ if (f6i->fib6_nh.fib_nh_gw_family)
*dst = f6i->fib6_nh.fib_nh_gw6;
dev = f6i->fib6_nh.fib_nh_dev;
params->rt_metric = f6i->fib6_metric;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
- * not needed here. Can not use __ipv6_neigh_lookup_noref here
- * because we need to get nd_tbl via the stub
+ * not needed here.
*/
- neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
- ndisc_hashfn, dst, dev);
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index ac679f74ba47..9bf1b9ad1780 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -291,6 +291,7 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
for_each_possible_cpu(i) {
const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
+ qstats->qlen = 0;
qstats->backlog += qcpu->backlog;
qstats->drops += qcpu->drops;
qstats->requeues += qcpu->requeues;
@@ -306,6 +307,7 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
if (cpu) {
__gnet_stats_copy_queue_cpu(qstats, cpu);
} else {
+ qstats->qlen = q->qlen;
qstats->backlog = q->backlog;
qstats->drops = q->drops;
qstats->requeues = q->requeues;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7e6dcc625701..ebb5b6d21a13 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -839,7 +839,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
nla = tb[NETNSA_FD];
} else if (tb[NETNSA_NSID]) {
- peer = get_net_ns_by_id(net, nla_get_u32(tb[NETNSA_NSID]));
+ peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
if (!peer)
peer = ERR_PTR(-ENOENT);
nla = tb[NETNSA_NSID];
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 76338c38738a..19aa32fc1802 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -94,8 +94,6 @@ int dns_query(const char *type, const char *name, size_t namelen,
desclen += typelen + 1;
}
- if (!namelen)
- namelen = strnlen(name, 256);
if (namelen < 3 || namelen > 255)
return -EINVAL;
desclen += namelen + 1;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 15f779bd26b3..d4b63f94f7be 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -558,7 +558,8 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
if (rt->rt_gateway.sa_family == AF_INET && addr) {
unsigned int addr_type;
- cfg->fc_gw = addr;
+ cfg->fc_gw4 = addr;
+ cfg->fc_gw_family = AF_INET;
addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
if (rt->rt_flags & RTF_GATEWAY &&
addr_type == RTN_UNICAST)
@@ -568,7 +569,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
if (cmd == SIOCDELRT)
return 0;
- if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+ if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
return -EINVAL;
if (cfg->fc_scope == RT_SCOPE_NOWHERE)
@@ -664,10 +665,55 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_DPORT] = { .type = NLA_U16 },
};
+int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
+ struct netlink_ext_ack *extack)
+{
+ struct rtvia *via;
+ int alen;
+
+ if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
+ NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
+ return -EINVAL;
+ }
+
+ via = nla_data(nla);
+ alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);
+
+ switch (via->rtvia_family) {
+ case AF_INET:
+ if (alen != sizeof(__be32)) {
+ NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
+ return -EINVAL;
+ }
+ cfg->fc_gw_family = AF_INET;
+ cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
+ break;
+ case AF_INET6:
+#ifdef CONFIG_IPV6
+ if (alen != sizeof(struct in6_addr)) {
+ NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
+ return -EINVAL;
+ }
+ cfg->fc_gw_family = AF_INET6;
+ cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
+#else
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+ return -EINVAL;
+#endif
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
struct nlmsghdr *nlh, struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
+ bool has_gw = false, has_via = false;
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
@@ -708,12 +754,17 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
cfg->fc_oif = nla_get_u32(attr);
break;
case RTA_GATEWAY:
- cfg->fc_gw = nla_get_be32(attr);
+ has_gw = true;
+ cfg->fc_gw4 = nla_get_be32(attr);
+ if (cfg->fc_gw4)
+ cfg->fc_gw_family = AF_INET;
break;
case RTA_VIA:
- NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
- err = -EINVAL;
- goto errout;
+ has_via = true;
+ err = fib_gw_from_via(cfg, attr, extack);
+ if (err)
+ goto errout;
+ break;
case RTA_PRIORITY:
cfg->fc_priority = nla_get_u32(attr);
break;
@@ -752,6 +803,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
}
}
+ if (has_gw && has_via) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop configuration can not contain both GATEWAY and VIA");
+ goto errout;
+ }
+
return 0;
errout:
return err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8e0cb1687a74..779d2be2b135 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -41,6 +41,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
#include <net/netlink.h>
#include <net/nexthop.h>
#include <net/lwtunnel.h>
@@ -276,7 +277,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
for_nexthops(fi) {
if (nh->fib_nh_oif != onh->fib_nh_oif ||
- nh->fib_nh_gw4 != onh->fib_nh_gw4 ||
+ nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
nh->fib_nh_scope != onh->fib_nh_scope ||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->fib_nh_weight != onh->fib_nh_weight ||
@@ -287,6 +288,15 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
return -1;
+
+ if (nh->fib_nh_gw_family == AF_INET &&
+ nh->fib_nh_gw4 != onh->fib_nh_gw4)
+ return -1;
+
+ if (nh->fib_nh_gw_family == AF_INET6 &&
+ ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
+ return -1;
+
onh++;
} endfor_nexthops(fi);
return 0;
@@ -447,10 +457,18 @@ static int fib_detect_death(struct fib_info *fi, int order,
struct fib_info **last_resort, int *last_idx,
int dflt)
{
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
struct neighbour *n;
int state = NUD_NONE;
- n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].fib_nh_gw4, fi->fib_dev);
+ if (likely(nhc->nhc_gw_family == AF_INET))
+ n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
+ else if (nhc->nhc_gw_family == AF_INET6)
+ n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
+ nhc->nhc_dev);
+ else
+ n = NULL;
+
if (n) {
state = n->nud_state;
neigh_release(n);
@@ -511,10 +529,12 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
goto init_failure;
nh->fib_nh_oif = cfg->fc_oif;
- if (cfg->fc_gw) {
- nh->fib_nh_gw4 = cfg->fc_gw;
- nh->fib_nh_has_gw = 1;
- }
+ nh->fib_nh_gw_family = cfg->fc_gw_family;
+ if (cfg->fc_gw_family == AF_INET)
+ nh->fib_nh_gw4 = cfg->fc_gw4;
+ else if (cfg->fc_gw_family == AF_INET6)
+ nh->fib_nh_gw6 = cfg->fc_gw6;
+
nh->fib_nh_flags = cfg->fc_flags;
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -586,11 +606,24 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
- struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+ struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
- if (nla)
- fib_cfg.fc_gw = nla_get_in_addr(nla);
+ nlav = nla_find(attrs, attrlen, RTA_VIA);
+ if (nla && nlav) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop configuration can not contain both GATEWAY and VIA");
+ return -EINVAL;
+ }
+ if (nla) {
+ fib_cfg.fc_gw4 = nla_get_in_addr(nla);
+ if (fib_cfg.fc_gw4)
+ fib_cfg.fc_gw_family = AF_INET;
+ } else if (nlav) {
+ ret = fib_gw_from_via(&fib_cfg, nlav, extack);
+ if (ret)
+ goto errout;
+ }
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla)
@@ -616,10 +649,16 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
"Nexthop device index does not match RTA_OIF");
goto errout;
}
- if (cfg->fc_gw && fi->fib_nh->fib_nh_gw4 != cfg->fc_gw) {
- NL_SET_ERR_MSG(extack,
- "Nexthop gateway does not match RTA_GATEWAY");
- goto errout;
+ if (cfg->fc_gw_family) {
+ if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
+ (cfg->fc_gw_family == AF_INET &&
+ fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
+ (cfg->fc_gw_family == AF_INET6 &&
+ ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
+ goto errout;
+ }
}
#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
@@ -719,7 +758,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
- if (cfg->fc_oif || cfg->fc_gw) {
+ if (cfg->fc_oif || cfg->fc_gw_family) {
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
fi->fib_nh, cfg, extack))
@@ -730,10 +769,20 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
cfg->fc_flow != fi->fib_nh->nh_tclassid)
return 1;
#endif
- if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->fib_nh_oif) &&
- (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->fib_nh_gw4))
- return 0;
- return 1;
+ if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
+ (cfg->fc_gw_family &&
+ cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
+ return 1;
+
+ if (cfg->fc_gw_family == AF_INET &&
+ cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
+ return 1;
+
+ if (cfg->fc_gw_family == AF_INET6 &&
+ ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
+ return 1;
+
+ return 0;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -754,11 +803,43 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
- struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+ struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
- if (nla && nla_get_in_addr(nla) != nh->fib_nh_gw4)
- return 1;
+ nlav = nla_find(attrs, attrlen, RTA_VIA);
+ if (nla && nlav) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop configuration can not contain both GATEWAY and VIA");
+ return -EINVAL;
+ }
+
+ if (nla) {
+ if (nh->fib_nh_gw_family != AF_INET ||
+ nla_get_in_addr(nla) != nh->fib_nh_gw4)
+ return 1;
+ } else if (nlav) {
+ struct fib_config cfg2;
+ int err;
+
+ err = fib_gw_from_via(&cfg2, nlav, extack);
+ if (err)
+ return err;
+
+ switch (nh->fib_nh_gw_family) {
+ case AF_INET:
+ if (cfg2.fc_gw_family != AF_INET ||
+ cfg2.fc_gw4 != nh->fib_nh_gw4)
+ return 1;
+ break;
+ case AF_INET6:
+ if (cfg2.fc_gw_family != AF_INET6 ||
+ ipv6_addr_cmp(&cfg2.fc_gw6,
+ &nh->fib_nh_gw6))
+ return 1;
+ break;
+ }
+ }
+
#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla && nla_get_u32(nla) != nh->nh_tclassid)
@@ -812,6 +893,30 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
return true;
}
+static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
+ u32 table, struct netlink_ext_ack *extack)
+{
+ struct fib6_config cfg = {
+ .fc_table = table,
+ .fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
+ .fc_ifindex = nh->fib_nh_oif,
+ .fc_gateway = nh->fib_nh_gw6,
+ };
+ struct fib6_nh fib6_nh = {};
+ int err;
+
+ err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
+ if (!err) {
+ nh->fib_nh_dev = fib6_nh.fib_nh_dev;
+ dev_hold(nh->fib_nh_dev);
+ nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
+ nh->fib_nh_scope = RT_SCOPE_LINK;
+
+ ipv6_stub->fib6_nh_release(&fib6_nh);
+ }
+
+ return err;
+}
/*
* Picture
@@ -856,134 +961,152 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
* |
* |-> {local prefix} (terminal node)
*/
-static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
- struct netlink_ext_ack *extack)
+static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
+ u8 scope, struct netlink_ext_ack *extack)
{
- int err = 0;
- struct net *net;
struct net_device *dev;
+ struct fib_result res;
+ int err;
- net = cfg->fc_nlinfo.nl_net;
- if (nh->fib_nh_gw4) {
- struct fib_result res;
-
- if (nh->fib_nh_flags & RTNH_F_ONLINK) {
- unsigned int addr_type;
+ if (nh->fib_nh_flags & RTNH_F_ONLINK) {
+ unsigned int addr_type;
- if (cfg->fc_scope >= RT_SCOPE_LINK) {
- NL_SET_ERR_MSG(extack,
- "Nexthop has invalid scope");
- return -EINVAL;
- }
- dev = __dev_get_by_index(net, nh->fib_nh_oif);
- if (!dev) {
- NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
- return -ENODEV;
- }
- if (!(dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack,
- "Nexthop device is not up");
- return -ENETDOWN;
- }
- addr_type = inet_addr_type_dev_table(net, dev,
- nh->fib_nh_gw4);
- if (addr_type != RTN_UNICAST) {
- NL_SET_ERR_MSG(extack,
- "Nexthop has invalid gateway");
- return -EINVAL;
- }
- if (!netif_carrier_ok(dev))
- nh->fib_nh_flags |= RTNH_F_LINKDOWN;
- nh->fib_nh_dev = dev;
- dev_hold(dev);
- nh->fib_nh_scope = RT_SCOPE_LINK;
- return 0;
+ if (scope >= RT_SCOPE_LINK) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
+ return -EINVAL;
}
- rcu_read_lock();
- {
- struct fib_table *tbl = NULL;
- struct flowi4 fl4 = {
- .daddr = nh->fib_nh_gw4,
- .flowi4_scope = cfg->fc_scope + 1,
- .flowi4_oif = nh->fib_nh_oif,
- .flowi4_iif = LOOPBACK_IFINDEX,
- };
-
- /* It is not necessary, but requires a bit of thinking */
- if (fl4.flowi4_scope < RT_SCOPE_LINK)
- fl4.flowi4_scope = RT_SCOPE_LINK;
-
- if (cfg->fc_table)
- tbl = fib_get_table(net, cfg->fc_table);
-
- if (tbl)
- err = fib_table_lookup(tbl, &fl4, &res,
- FIB_LOOKUP_IGNORE_LINKSTATE |
- FIB_LOOKUP_NOREF);
-
- /* on error or if no table given do full lookup. This
- * is needed for example when nexthops are in the local
- * table rather than the given table
- */
- if (!tbl || err) {
- err = fib_lookup(net, &fl4, &res,
- FIB_LOOKUP_IGNORE_LINKSTATE);
- }
-
- if (err) {
- NL_SET_ERR_MSG(extack,
- "Nexthop has invalid gateway");
- rcu_read_unlock();
- return err;
- }
+ dev = __dev_get_by_index(net, nh->fib_nh_oif);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
+ return -ENODEV;
}
- err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
- NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
- goto out;
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ return -ENETDOWN;
}
- nh->fib_nh_scope = res.scope;
- nh->fib_nh_oif = FIB_RES_OIF(res);
- nh->fib_nh_dev = dev = FIB_RES_DEV(res);
- if (!dev) {
- NL_SET_ERR_MSG(extack,
- "No egress device for nexthop gateway");
- goto out;
+ addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
+ if (addr_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ return -EINVAL;
}
- dev_hold(dev);
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
- err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
- } else {
- struct in_device *in_dev;
-
- if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
- NL_SET_ERR_MSG(extack,
- "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
- return -EINVAL;
+ nh->fib_nh_dev = dev;
+ dev_hold(dev);
+ nh->fib_nh_scope = RT_SCOPE_LINK;
+ return 0;
+ }
+ rcu_read_lock();
+ {
+ struct fib_table *tbl = NULL;
+ struct flowi4 fl4 = {
+ .daddr = nh->fib_nh_gw4,
+ .flowi4_scope = scope + 1,
+ .flowi4_oif = nh->fib_nh_oif,
+ .flowi4_iif = LOOPBACK_IFINDEX,
+ };
+
+ /* It is not necessary, but requires a bit of thinking */
+ if (fl4.flowi4_scope < RT_SCOPE_LINK)
+ fl4.flowi4_scope = RT_SCOPE_LINK;
+
+ if (table)
+ tbl = fib_get_table(net, table);
+
+ if (tbl)
+ err = fib_table_lookup(tbl, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE |
+ FIB_LOOKUP_NOREF);
+
+ /* on error or if no table given do full lookup. This
+ * is needed for example when nexthops are in the local
+ * table rather than the given table
+ */
+ if (!tbl || err) {
+ err = fib_lookup(net, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE);
}
- rcu_read_lock();
- err = -ENODEV;
- in_dev = inetdev_by_index(net, nh->fib_nh_oif);
- if (!in_dev)
- goto out;
- err = -ENETDOWN;
- if (!(in_dev->dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
+
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
goto out;
}
- nh->fib_nh_dev = in_dev->dev;
- dev_hold(nh->fib_nh_dev);
- nh->fib_nh_scope = RT_SCOPE_HOST;
- if (!netif_carrier_ok(nh->fib_nh_dev))
- nh->fib_nh_flags |= RTNH_F_LINKDOWN;
- err = 0;
}
+
+ err = -EINVAL;
+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ goto out;
+ }
+ nh->fib_nh_scope = res.scope;
+ nh->fib_nh_oif = FIB_RES_OIF(res);
+ nh->fib_nh_dev = dev = FIB_RES_DEV(res);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "No egress device for nexthop gateway");
+ goto out;
+ }
+ dev_hold(dev);
+ if (!netif_carrier_ok(dev))
+ nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
+ struct netlink_ext_ack *extack)
+{
+ struct in_device *in_dev;
+ int err;
+
+ if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+
+ err = -ENODEV;
+ in_dev = inetdev_by_index(net, nh->fib_nh_oif);
+ if (!in_dev)
+ goto out;
+ err = -ENETDOWN;
+ if (!(in_dev->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
+ goto out;
+ }
+
+ nh->fib_nh_dev = in_dev->dev;
+ dev_hold(nh->fib_nh_dev);
+ nh->fib_nh_scope = RT_SCOPE_HOST;
+ if (!netif_carrier_ok(nh->fib_nh_dev))
+ nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ err = 0;
out:
rcu_read_unlock();
return err;
}
+static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ u32 table = cfg->fc_table;
+ int err;
+
+ if (nh->fib_nh_gw_family == AF_INET)
+ err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack);
+ else if (nh->fib_nh_gw_family == AF_INET6)
+ err = fib_check_nh_v6_gw(net, nh, table, extack);
+ else
+ err = fib_check_nh_nongw(net, nh, extack);
+
+ return err;
+}
+
static inline unsigned int fib_laddr_hashfn(__be32 val)
{
unsigned int mask = (fib_info_hash_size - 1);
@@ -1204,7 +1327,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto failure;
if (fib_props[cfg->fc_type].error) {
- if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+ if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Gateway, device and multipath can not be specified for this route type");
goto err_inval;
@@ -1238,7 +1361,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
"Route with host scope can not have multiple nexthops");
goto err_inval;
}
- if (nh->fib_nh_gw4) {
+ if (nh->fib_nh_gw_family) {
NL_SET_ERR_MSG(extack,
"Route with host scope can not have a gateway");
goto err_inval;
@@ -1269,6 +1392,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
+ if (nexthop_nh->fib_nh_gw_family == AF_INET6)
+ fi->fib_nh_is_v6 = true;
} endfor_nexthops(fi)
fib_rebalance(fi);
@@ -1341,18 +1466,32 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
rcu_read_unlock();
}
- if (nhc->nhc_has_gw) {
- switch (nhc->nhc_family) {
- case AF_INET:
- if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
- goto nla_put_failure;
- break;
- case AF_INET6:
- if (nla_put_in6_addr(skb, RTA_GATEWAY,
- &nhc->nhc_gw.ipv6) < 0)
+ switch (nhc->nhc_gw_family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
+ goto nla_put_failure;
+ break;
+ case AF_INET6:
+ /* if gateway family does not match nexthop family
+ * gateway is encoded as RTA_VIA
+ */
+ if (nhc->nhc_gw_family != nhc->nhc_family) {
+ int alen = sizeof(struct in6_addr);
+ struct nlattr *nla;
+ struct rtvia *via;
+
+ nla = nla_reserve(skb, RTA_VIA, alen + 2);
+ if (!nla)
goto nla_put_failure;
- break;
+
+ via = nla_data(nla);
+ via->rtvia_family = AF_INET6;
+ memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
+ } else if (nla_put_in6_addr(skb, RTA_GATEWAY,
+ &nhc->nhc_gw.ipv6) < 0) {
+ goto nla_put_failure;
}
+ break;
}
*flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
@@ -1832,8 +1971,14 @@ static bool fib_good_nh(const struct fib_nh *nh)
rcu_read_lock_bh();
- n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
- (__force u32)nh->fib_nh_gw4);
+ if (likely(nh->fib_nh_gw_family == AF_INET))
+ n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
+ (__force u32)nh->fib_nh_gw4);
+ else if (nh->fib_nh_gw_family == AF_INET6)
+ n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
+ &nh->fib_nh_gw6);
+ else
+ n = NULL;
if (n)
state = n->nud_state;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 100e63f57ea6..b038f563baa4 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -136,7 +136,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
break;
case 1: {
- /* Direct encasulation of IPv4 or IPv6 */
+ /* Direct encapsulation of IPv4 or IPv6 */
int prot;
@@ -170,9 +170,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
/* guehdr may change after pull */
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- hdrlen = sizeof(struct guehdr) + optlen;
-
- if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
+ if (validate_gue_flags(guehdr, optlen))
goto drop;
hdrlen = sizeof(struct guehdr) + optlen;
@@ -1137,7 +1135,7 @@ static int gue_err(struct sk_buff *skb, u32 info)
case 0: /* Full GUE header present */
break;
case 1: {
- /* Direct encasulation of IPv4 or IPv6 */
+ /* Direct encapsulation of IPv4 or IPv6 */
skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
switch (((struct iphdr *)guehdr)->version) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6ea523d71947..a175e3e7ae97 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -564,7 +564,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
goto route_err;
rcu_read_unlock();
return &rt->dst;
@@ -602,7 +602,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
goto route_err;
return &rt->dst;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 00ec819f949b..06f6f280b9ff 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
rt = skb_rtable(skb);
- if (opt->is_strictroute && rt->rt_uses_gateway)
+ if (opt->is_strictroute && rt->rt_gw_family)
goto sr_failed;
IPCB(skb)->flags |= IPSKB_FORWARDED;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fd219f7bd3ea..4b0526441476 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
@@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb,
len,
htons(ETH_P_TEB),
@@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
goto drop;
if (tunnel->collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
@@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
md->version = ver;
md2 = &md->u.md2;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 10b35328cfbc..4e42c1974ba2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -188,7 +188,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
- u32 nexthop;
+ bool is_v6gw = false;
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@@ -218,16 +218,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
}
rcu_read_lock_bh();
- nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
- neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
+ neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
int res;
sock_confirm_neigh(skb, neigh);
- res = neigh_output(neigh, skb);
-
+ /* if crossing protocols, can not use the cached header */
+ res = neigh_output(neigh, skb, is_v6gw);
rcu_read_unlock_bh();
return res;
}
@@ -472,7 +469,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
- if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
+ if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f3f2adf630d4..efa6a36cbfff 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -434,37 +434,46 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net_device *dev = dst->dev;
- const __be32 *pkey = daddr;
- const struct rtable *rt;
struct neighbour *n;
- rt = (const struct rtable *) dst;
- if (rt->rt_gateway)
- pkey = (const __be32 *) &rt->rt_gateway;
- else if (skb)
- pkey = &ip_hdr(skb)->daddr;
+ rcu_read_lock_bh();
+
+ if (likely(rt->rt_gw_family == AF_INET)) {
+ n = ip_neigh_gw4(dev, rt->rt_gw4);
+ } else if (rt->rt_gw_family == AF_INET6) {
+ n = ip_neigh_gw6(dev, &rt->rt_gw6);
+ } else {
+ __be32 pkey;
+
+ pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
+ n = ip_neigh_gw4(dev, pkey);
+ }
+
+ if (n && !refcount_inc_not_zero(&n->refcnt))
+ n = NULL;
- n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
- if (n)
- return n;
- return neigh_create(&arp_tbl, pkey, dev);
+ rcu_read_unlock_bh();
+
+ return n;
}
static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
+ const struct rtable *rt = container_of(dst, struct rtable, dst);
struct net_device *dev = dst->dev;
const __be32 *pkey = daddr;
- const struct rtable *rt;
- rt = (const struct rtable *)dst;
- if (rt->rt_gateway)
- pkey = (const __be32 *)&rt->rt_gateway;
- else if (!daddr ||
+ if (rt->rt_gw_family == AF_INET) {
+ pkey = (const __be32 *)&rt->rt_gw4;
+ } else if (rt->rt_gw_family == AF_INET6) {
+ return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
+ } else if (!daddr ||
(rt->rt_flags &
- (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
+ (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
return;
-
+ }
__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
}
@@ -629,8 +638,8 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
if (fnhe->fnhe_gw) {
rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = fnhe->fnhe_gw;
- rt->rt_uses_gateway = 1;
+ rt->rt_gw_family = AF_INET;
+ rt->rt_gw4 = fnhe->fnhe_gw;
}
}
@@ -747,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
return;
}
- if (rt->rt_gateway != old_gw)
+ if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
return;
in_dev = __in_dev_get_rcu(dev);
@@ -1282,7 +1291,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(ip_mtu_locked(dst))) {
- if (rt->rt_uses_gateway && mtu > 576)
+ if (rt->rt_gw_family && mtu > 576)
mtu = 576;
}
@@ -1410,8 +1419,10 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
orig = NULL;
}
fill_route_from_fnhe(rt, fnhe);
- if (!rt->rt_gateway)
- rt->rt_gateway = daddr;
+ if (!rt->rt_gw4) {
+ rt->rt_gw4 = daddr;
+ rt->rt_gw_family = AF_INET;
+ }
if (do_cache) {
dst_hold(&rt->dst);
@@ -1535,14 +1546,20 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (fi) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
- struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
+ struct fib_nh *nh;
- if (nh->fib_nh_gw4 && nh->fib_nh_scope == RT_SCOPE_LINK) {
- rt->rt_gateway = nh->fib_nh_gw4;
- rt->rt_uses_gateway = 1;
+ if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
+ rt->rt_gw_family = nhc->nhc_gw_family;
+ /* only INET and INET6 are supported */
+ if (likely(nhc->nhc_gw_family == AF_INET))
+ rt->rt_gw4 = nhc->nhc_gw.ipv4;
+ else
+ rt->rt_gw6 = nhc->nhc_gw.ipv6;
}
+
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
+ nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -1557,8 +1574,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
* However, if we are unsuccessful at storing this
* route into the cache we really need to set it.
*/
- if (!rt->rt_gateway)
- rt->rt_gateway = daddr;
+ if (!rt->rt_gw4) {
+ rt->rt_gw_family = AF_INET;
+ rt->rt_gw4 = daddr;
+ }
rt_add_uncached_list(rt);
}
} else
@@ -1591,8 +1610,8 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_iif = 0;
rt->rt_pmtu = 0;
rt->rt_mtu_locked = 0;
- rt->rt_gateway = 0;
- rt->rt_uses_gateway = 0;
+ rt->rt_gw_family = 0;
+ rt->rt_gw4 = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
@@ -1734,8 +1753,9 @@ static int __mkroute_input(struct sk_buff *skb,
do_cache = res->fi && !itag;
if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
skb->protocol == htons(ETH_P_IP)) {
- __be32 gw = nhc->nhc_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
+ __be32 gw;
+ gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
if (IN_DEV_SHARED_MEDIA(out_dev) ||
inet_addr_onlink(out_dev, saddr, gw))
IPCB(skb)->flags |= IPSKB_DOREDIRECT;
@@ -2284,7 +2304,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
} else {
if (unlikely(fl4->flowi4_flags &
FLOWI_FLAG_KNOWN_NH &&
- !(nhc->nhc_has_gw &&
+ !(nhc->nhc_gw_family &&
nhc->nhc_scope == RT_SCOPE_LINK))) {
do_cache = false;
goto add;
@@ -2594,8 +2614,11 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
- rt->rt_gateway = ort->rt_gateway;
- rt->rt_uses_gateway = ort->rt_uses_gateway;
+ rt->rt_gw_family = ort->rt_gw_family;
+ if (rt->rt_gw_family == AF_INET)
+ rt->rt_gw4 = ort->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ rt->rt_gw6 = ort->rt_gw6;
INIT_LIST_HEAD(&rt->rt_uncached);
}
@@ -2674,9 +2697,22 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
- if (rt->rt_uses_gateway &&
- nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
+ if (rt->rt_gw_family == AF_INET &&
+ nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
goto nla_put_failure;
+ } else if (rt->rt_gw_family == AF_INET6) {
+ int alen = sizeof(struct in6_addr);
+ struct nlattr *nla;
+ struct rtvia *via;
+
+ nla = nla_reserve(skb, RTA_VIA, alen + 2);
+ if (!nla)
+ goto nla_put_failure;
+
+ via = nla_data(nla);
+ via->rtvia_family = AF_INET6;
+ memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+ }
expires = rt->dst.expires;
if (expires) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d73a6d6652f6..72d19b1838ed 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -97,8 +97,11 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
- xdst->u.rt.rt_gateway = rt->rt_gateway;
- xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
+ xdst->u.rt.rt_gw_family = rt->rt_gw_family;
+ if (rt->rt_gw_family == AF_INET)
+ xdst->u.rt.rt_gw4 = rt->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ xdst->u.rt.rt_gw6 = rt->rt_gw6;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2e8d1d2d8d3d..340a0f06f974 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2421,7 +2421,7 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
for_each_fib6_node_rt_rcu(fn) {
if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
continue;
- if (no_gw && rt->fib6_nh.fib_nh_has_gw)
+ if (no_gw && rt->fib6_nh.fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 945b66e3008f..e37e4c5871f7 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -173,6 +173,14 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
return 0;
}
+static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+ struct fib6_config *cfg, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
+ return -EAFNOSUPPORT;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
@@ -181,6 +189,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.fib6_lookup = eafnosupport_fib6_lookup,
.fib6_multipath_select = eafnosupport_fib6_multipath_select,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
+ .fib6_nh_init = eafnosupport_fib6_nh_init,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1789bf99c419..1dac6ea6666a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -919,6 +919,8 @@ static const struct ipv6_stub ipv6_stub_impl = {
.fib6_lookup = fib6_lookup,
.fib6_multipath_select = fib6_multipath_select,
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
+ .fib6_nh_init = fib6_nh_init,
+ .fib6_nh_release = fib6_nh_release,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8c00609a1513..46f54a5bb1f0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2304,7 +2304,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_nh.fib_nh_has_gw) {
+ if (rt->fib6_nh.fib_nh_gw_family) {
flags |= RTF_GATEWAY;
seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
} else {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b32c95f02128..655e46b227f9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -525,10 +525,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
}
static int ip6erspan_rcv(struct sk_buff *skb,
- struct tnl_ptk_info *tpi)
+ struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
{
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
const struct ipv6hdr *ipv6h;
struct erspan_md2 *md2;
struct ip6_tnl *tunnel;
@@ -547,18 +547,16 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)skb->data;
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb, len,
htons(ETH_P_TEB),
false, false) < 0)
return PACKET_REJECT;
if (tunnel->parms.collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
@@ -571,6 +569,14 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
md->version = ver;
@@ -607,7 +613,7 @@ static int gre_rcv(struct sk_buff *skb)
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD)
+ if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
goto out;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e51f3c648b09..adef2236abe2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -117,7 +117,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6e89151693d0..a77c004d67fb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -102,7 +102,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict);
static size_t rt6_nlmsg_size(struct fib6_info *rt);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
@@ -446,12 +447,13 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) {
+ const struct fib6_nh *nh = &sibling->fib6_nh;
int nh_upper_bound;
- nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound);
+ nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
if (fl6->mp_hash > nh_upper_bound)
continue;
- if (rt6_score_route(sibling, oif, strict) < 0)
+ if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
match = sibling;
break;
@@ -464,12 +466,34 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
* Route lookup. rcu_read_lock() should be held.
*/
+static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
+ const struct in6_addr *saddr, int oif, int flags)
+{
+ const struct net_device *dev;
+
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
+ return false;
+
+ dev = nh->fib_nh_dev;
+ if (oif) {
+ if (dev->ifindex == oif)
+ return true;
+ } else {
+ if (ipv6_chk_addr(net, saddr, dev,
+ flags & RT6_LOOKUP_F_IFACE))
+ return true;
+ }
+
+ return false;
+}
+
static inline struct fib6_info *rt6_device_match(struct net *net,
struct fib6_info *rt,
const struct in6_addr *saddr,
int oif,
int flags)
{
+ const struct fib6_nh *nh;
struct fib6_info *sprt;
if (!oif && ipv6_addr_any(saddr) &&
@@ -477,19 +501,9 @@ static inline struct fib6_info *rt6_device_match(struct net *net,
return rt;
for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
- const struct net_device *dev = sprt->fib6_nh.fib_nh_dev;
-
- if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
- continue;
-
- if (oif) {
- if (dev->ifindex == oif)
- return sprt;
- } else {
- if (ipv6_chk_addr(net, saddr, dev,
- flags & RT6_LOOKUP_F_IFACE))
- return sprt;
- }
+ nh = &sprt->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags))
+ return sprt;
}
if (oif && flags & RT6_LOOKUP_F_IFACE)
@@ -517,7 +531,7 @@ static void rt6_probe_deferred(struct work_struct *w)
kfree(work);
}
-static void rt6_probe(struct fib6_info *rt)
+static void rt6_probe(struct fib6_nh *fib6_nh)
{
struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
@@ -533,11 +547,11 @@ static void rt6_probe(struct fib6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (!rt || !rt->fib6_nh.fib_nh_has_gw)
+ if (fib6_nh->fib_nh_gw_family)
return;
- nh_gw = &rt->fib6_nh.fib_nh_gw6;
- dev = rt->fib6_nh.fib_nh_dev;
+ nh_gw = &fib6_nh->fib_nh_gw6;
+ dev = fib6_nh->fib_nh_dev;
rcu_read_lock_bh();
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
@@ -554,13 +568,13 @@ static void rt6_probe(struct fib6_info *rt)
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else if (time_after(jiffies, rt->last_probe +
+ } else if (time_after(jiffies, fib6_nh->last_probe +
idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
if (work) {
- rt->last_probe = jiffies;
+ fib6_nh->last_probe = jiffies;
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
@@ -572,7 +586,7 @@ out:
rcu_read_unlock_bh();
}
#else
-static inline void rt6_probe(struct fib6_info *rt)
+static inline void rt6_probe(struct fib6_nh *fib6_nh)
{
}
#endif
@@ -580,27 +594,14 @@ static inline void rt6_probe(struct fib6_info *rt)
/*
* Default Router Selection (RFC 2461 6.3.6)
*/
-static inline int rt6_check_dev(struct fib6_info *rt, int oif)
-{
- const struct net_device *dev = rt->fib6_nh.fib_nh_dev;
-
- if (!oif || dev->ifindex == oif)
- return 2;
- return 0;
-}
-
-static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
+static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
{
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh;
- if (rt->fib6_flags & RTF_NONEXTHOP ||
- !rt->fib6_nh.fib_nh_has_gw)
- return RT6_NUD_SUCCEED;
-
rcu_read_lock_bh();
- neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev,
- &rt->fib6_nh.fib_nh_gw6);
+ neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
+ &fib6_nh->fib_nh_gw6);
if (neigh) {
read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
@@ -621,43 +622,44 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
return ret;
}
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
+static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
+ int strict)
{
- int m;
+ int m = 0;
+
+ if (!oif || nh->fib_nh_dev->ifindex == oif)
+ m = 2;
- m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
- m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
+ m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
#endif
- if (strict & RT6_LOOKUP_F_REACHABLE) {
- int n = rt6_check_neigh(rt);
+ if ((strict & RT6_LOOKUP_F_REACHABLE) &&
+ !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
+ int n = rt6_check_neigh(nh);
if (n < 0)
return n;
}
return m;
}
-static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
- int *mpri, struct fib6_info *match,
- bool *do_rr)
+static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
+ int oif, int strict, int *mpri, bool *do_rr)
{
- int m;
bool match_do_rr = false;
+ bool rc = false;
+ int m;
- if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (nh->fib_nh_flags & RTNH_F_DEAD)
goto out;
- if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) &&
- rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
+ if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
+ nh->fib_nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
- if (fib6_check_expired(rt))
- goto out;
-
- m = rt6_score_route(rt, oif, strict);
+ m = rt6_score_route(nh, fib6_flags, oif, strict);
if (m == RT6_NUD_FAIL_DO_RR) {
match_do_rr = true;
m = 0; /* lowest valid score */
@@ -666,53 +668,64 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
}
if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(rt);
+ rt6_probe(nh);
/* note that m can be RT6_NUD_FAIL_PROBE at this point */
if (m > *mpri) {
*do_rr = match_do_rr;
*mpri = m;
- match = rt;
+ rc = true;
}
out:
- return match;
+ return rc;
}
-static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
- struct fib6_info *leaf,
- struct fib6_info *rr_head,
- u32 metric, int oif, int strict,
- bool *do_rr)
+static void __find_rr_leaf(struct fib6_info *rt_start,
+ struct fib6_info *nomatch, u32 metric,
+ struct fib6_info **match, struct fib6_info **cont,
+ int oif, int strict, bool *do_rr, int *mpri)
{
- struct fib6_info *rt, *match, *cont;
- int mpri = -1;
+ struct fib6_info *rt;
- match = NULL;
- cont = NULL;
- for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
+ for (rt = rt_start;
+ rt && rt != nomatch;
+ rt = rcu_dereference(rt->fib6_next)) {
+ struct fib6_nh *nh;
+
+ if (cont && rt->fib6_metric != metric) {
+ *cont = rt;
+ return;
}
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ if (fib6_check_expired(rt))
+ continue;
+
+ nh = &rt->fib6_nh;
+ if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr))
+ *match = rt;
}
+}
- for (rt = leaf; rt && rt != rr_head;
- rt = rcu_dereference(rt->fib6_next)) {
- if (rt->fib6_metric != metric) {
- cont = rt;
- break;
- }
+static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
+ struct fib6_info *leaf,
+ struct fib6_info *rr_head,
+ u32 metric, int oif, int strict,
+ bool *do_rr)
+{
+ struct fib6_info *match = NULL, *cont = NULL;
+ int mpri = -1;
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
- }
+ __find_rr_leaf(rr_head, NULL, metric, &match, &cont,
+ oif, strict, do_rr, &mpri);
+
+ __find_rr_leaf(leaf, rr_head, metric, &match, &cont,
+ oif, strict, do_rr, &mpri);
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
- match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ __find_rr_leaf(cont, NULL, metric, &match, NULL,
+ oif, strict, do_rr, &mpri);
return match;
}
@@ -769,7 +782,7 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
{
- return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw;
+ return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family;
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -975,7 +988,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
rt->rt6i_dst = ort->fib6_dst;
rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
rt->rt6i_flags = ort->fib6_flags;
- if (ort->fib6_nh.fib_nh_has_gw) {
+ if (ort->fib6_nh.fib_nh_gw_family) {
rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
rt->rt6i_flags |= RTF_GATEWAY;
}
@@ -1061,36 +1074,37 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
f6i = rcu_dereference(fn->leaf);
- if (!f6i) {
+ if (!f6i)
f6i = net->ipv6.fib6_null_entry;
- } else {
+ else
f6i = rt6_device_match(net, f6i, &fl6->saddr,
fl6->flowi6_oif, flags);
- if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
- f6i = fib6_multipath_select(net, f6i, fl6,
- fl6->flowi6_oif, skb,
- flags);
- }
+
if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
- }
- trace_fib6_table_lookup(net, f6i, table, fl6);
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ goto out;
+ }
+ if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
+ f6i = fib6_multipath_select(net, f6i, fl6, fl6->flowi6_oif, skb,
+ flags);
/* Search through exception table */
rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt) {
if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
- } else if (f6i == net->ipv6.fib6_null_entry) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
} else {
rt = ip6_create_rt_rcu(f6i);
}
+out:
+ trace_fib6_table_lookup(net, f6i, table, fl6);
+
rcu_read_unlock();
return rt;
@@ -1841,9 +1855,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_lock();
f6i = fib6_table_lookup(net, table, oif, fl6, strict);
- if (f6i->fib6_nsiblings)
- f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
-
if (f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry;
rcu_read_unlock();
@@ -1851,6 +1862,9 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
return rt;
}
+ if (f6i->fib6_nsiblings)
+ f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
+
/*Search through exception table */
rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt) {
@@ -1860,7 +1874,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_unlock();
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
- !f6i->fib6_nh.fib_nh_has_gw)) {
+ !f6i->fib6_nh.fib_nh_gw_family)) {
/* Create a RTF_CACHE clone which will not be
* owned by the fib6 tree. It is for the special case where
* the daddr in the skb during the neighbor look-up is different
@@ -2393,6 +2407,35 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
NULL);
}
+static bool ip6_redirect_nh_match(struct fib6_info *f6i,
+ struct fib6_nh *nh,
+ struct flowi6 *fl6,
+ const struct in6_addr *gw,
+ struct rt6_info **ret)
+{
+ if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
+ fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
+ return false;
+
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
+ struct rt6_info *rt_cache;
+
+ rt_cache = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
+ *ret = rt_cache;
+ return true;
+ }
+ return false;
+ }
+ return true;
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
@@ -2406,7 +2449,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *ret = NULL, *rt_cache;
+ struct rt6_info *ret = NULL;
struct fib6_info *rt;
struct fib6_node *fn;
@@ -2424,34 +2467,13 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
- continue;
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (!rt->fib6_nh.fib_nh_has_gw)
- continue;
- if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
- continue;
- /* rt_cache's gateway might be different from its 'parent'
- * in the case of an ip redirect.
- * So we keep searching in the exception table if the gateway
- * is different.
- */
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
- rt_cache = rt6_find_cached_rt(rt,
- &fl6->daddr,
- &fl6->saddr);
- if (rt_cache &&
- ipv6_addr_equal(&rdfl->gateway,
- &rt_cache->rt6i_gateway)) {
- ret = rt_cache;
- break;
- }
- continue;
- }
- break;
+ if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6,
+ &rdfl->gateway, &ret))
+ goto out;
}
if (!rt)
@@ -2964,7 +2986,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
goto out;
fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
- fib6_nh->fib_nh_has_gw = 1;
+ fib6_nh->fib_nh_gw_family = AF_INET6;
}
err = -ENODEV;
@@ -3476,7 +3498,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
- !rt->fib6_nh.fib_nh_has_gw)
+ !rt->fib6_nh.fib_nh_gw_family)
continue;
if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
continue;
@@ -3807,7 +3829,7 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
struct in6_addr *gateway = (struct in6_addr *)arg;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- rt->fib6_nh.fib_nh_has_gw &&
+ rt->fib6_nh.fib_nh_gw_family &&
ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
return -1;
}
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index f3a8557494d6..2619c2fbea93 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -137,10 +137,14 @@ static int mpls_xmit(struct sk_buff *skb)
mpls_stats_inc_outucastpkts(out_dev, skb);
- if (rt)
- err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway,
- skb);
- else if (rt6) {
+ if (rt) {
+ if (rt->rt_gw_family == AF_INET)
+ err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4,
+ skb);
+ else if (rt->rt_gw_family == AF_INET6)
+ err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6,
+ skb);
+ } else if (rt6) {
if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
/* 6PE (RFC 4798) */
err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ddfc52ac1f9b..c0d323b58e73 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
create_info = (struct nci_hci_create_pipe_resp *)skb->data;
dest_gate = create_info->dest_gate;
new_pipe = create_info->pipe;
+ if (new_pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
/* Save the new created pipe and bind with local gate,
* the description for skb->data[3] is destination gate id
@@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
goto exit;
}
delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+ if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
ndev->hci_dev->pipes[delete_info->pipe].gate =
NCI_HCI_INVALID_GATE;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2763176e369c..4b5585358699 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -336,8 +336,7 @@ static void fl_mask_free_work(struct work_struct *work)
fl_mask_free(mask);
}
-static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
- bool async)
+static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask)
{
if (!refcount_dec_and_test(&mask->refcnt))
return false;
@@ -348,10 +347,7 @@ static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
list_del_rcu(&mask->list);
spin_unlock(&head->masks_lock);
- if (async)
- tcf_queue_work(&mask->rwork, fl_mask_free_work);
- else
- fl_mask_free(mask);
+ tcf_queue_work(&mask->rwork, fl_mask_free_work);
return true;
}
@@ -538,7 +534,6 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = fl_head_dereference(tp);
- bool async = tcf_exts_get_net(&f->exts);
*last = false;
@@ -555,7 +550,7 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
list_del_rcu(&f->list);
spin_unlock(&tp->lock);
- *last = fl_mask_put(head, f->mask, async);
+ *last = fl_mask_put(head, f->mask);
if (!tc_skip_hw(f->flags))
fl_hw_destroy_filter(tp, f, rtnl_held, extack);
tcf_unbind_filter(tp, &f->res);
@@ -1466,9 +1461,9 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
struct fl_flow_mask *mask = fnew->mask;
int err;
- err = rhashtable_insert_fast(&mask->ht,
- &fnew->ht_node,
- mask->filter_ht_params);
+ err = rhashtable_lookup_insert_fast(&mask->ht,
+ &fnew->ht_node,
+ mask->filter_ht_params);
if (err) {
*in_ht = false;
/* It is okay if filter with same key exists when
@@ -1605,11 +1600,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
spin_unlock(&tp->lock);
- fl_mask_put(head, fold->mask, true);
+ fl_mask_put(head, fold->mask);
if (!tc_skip_hw(fold->flags))
fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
tcf_unbind_filter(tp, &fold->res);
- tcf_exts_get_net(&fold->exts);
/* Caller holds reference to fold, so refcnt is always > 0
* after this.
*/
@@ -1657,8 +1651,9 @@ errout_ht:
rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
fnew->mask->filter_ht_params);
errout_mask:
- fl_mask_put(head, fnew->mask, true);
+ fl_mask_put(head, fnew->mask);
errout:
+ tcf_exts_get_net(&fnew->exts);
tcf_queue_work(&fnew->rwork, fl_destroy_filter_work);
errout_tb:
kfree(tb);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fb8f138b9776..c126b9f78d6e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -998,6 +998,19 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
qdisc_put(old);
}
+static void qdisc_clear_nolock(struct Qdisc *sch)
+{
+ sch->flags &= ~TCQ_F_NOLOCK;
+ if (!(sch->flags & TCQ_F_CPUSTATS))
+ return;
+
+ free_percpu(sch->cpu_bstats);
+ free_percpu(sch->cpu_qstats);
+ sch->cpu_bstats = NULL;
+ sch->cpu_qstats = NULL;
+ sch->flags &= ~TCQ_F_CPUSTATS;
+}
+
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
* to device "dev".
*
@@ -1076,7 +1089,7 @@ skip:
/* Only support running class lockless if parent is lockless */
if (new && (new->flags & TCQ_F_NOLOCK) &&
parent && !(parent->flags & TCQ_F_NOLOCK))
- new->flags &= ~TCQ_F_NOLOCK;
+ qdisc_clear_nolock(new);
if (!cops || !cops->graft)
return -EOPNOTSUPP;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index c6a502933fe7..f68fd7a0e038 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -61,16 +61,20 @@
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <net/netevent.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
+static LIST_HEAD(cbs_list);
+static DEFINE_SPINLOCK(cbs_list_lock);
+
#define BYTES_PER_KBIT (1000LL / 8)
struct cbs_sched_data {
bool offload;
int queue;
- s64 port_rate; /* in bytes/s */
+ atomic64_t port_rate; /* in bytes/s */
s64 last; /* timestamp in ns */
s64 credits; /* in bytes */
s32 locredit; /* in bytes */
@@ -82,6 +86,7 @@ struct cbs_sched_data {
struct sk_buff **to_free);
struct sk_buff *(*dequeue)(struct Qdisc *sch);
struct Qdisc *qdisc;
+ struct list_head cbs_list;
};
static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -181,6 +186,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
s64 credits;
int len;
+ if (atomic64_read(&q->port_rate) == -1) {
+ WARN_ONCE(1, "cbs: dequeue() called with unknown port rate.");
+ return NULL;
+ }
+
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
@@ -207,7 +217,8 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
/* As sendslope is a negative number, this will decrease the
* amount of q->credits.
*/
- credits = credits_from_len(len, q->sendslope, q->port_rate);
+ credits = credits_from_len(len, q->sendslope,
+ atomic64_read(&q->port_rate));
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
@@ -294,6 +305,50 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
return 0;
}
+static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
+{
+ struct ethtool_link_ksettings ecmd;
+ int port_rate = -1;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
+ ecmd.base.speed != SPEED_UNKNOWN)
+ port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT;
+
+ atomic64_set(&q->port_rate, port_rate);
+ netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n",
+ dev->name, (long long)atomic64_read(&q->port_rate),
+ ecmd.base.speed);
+}
+
+static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct cbs_sched_data *q;
+ struct net_device *qdev;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ if (event != NETDEV_UP && event != NETDEV_CHANGE)
+ return NOTIFY_DONE;
+
+ spin_lock(&cbs_list_lock);
+ list_for_each_entry(q, &cbs_list, cbs_list) {
+ qdev = qdisc_dev(q->qdisc);
+ if (qdev == dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&cbs_list_lock);
+
+ if (found)
+ cbs_set_port_rate(dev, q);
+
+ return NOTIFY_DONE;
+}
+
static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -315,16 +370,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
qopt = nla_data(tb[TCA_CBS_PARMS]);
if (!qopt->offload) {
- struct ethtool_link_ksettings ecmd;
- s64 link_speed;
-
- if (!__ethtool_get_link_ksettings(dev, &ecmd))
- link_speed = ecmd.base.speed;
- else
- link_speed = SPEED_1000;
-
- q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
-
+ cbs_set_port_rate(dev, q);
cbs_disable_offload(dev, q);
} else {
err = cbs_enable_offload(dev, q, qopt, extack);
@@ -347,6 +393,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ int err;
if (!opt) {
NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory");
@@ -367,7 +414,17 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
qdisc_watchdog_init(&q->watchdog, sch);
- return cbs_change(sch, opt, extack);
+ err = cbs_change(sch, opt, extack);
+ if (err)
+ return err;
+
+ if (!q->offload) {
+ spin_lock(&cbs_list_lock);
+ list_add(&q->cbs_list, &cbs_list);
+ spin_unlock(&cbs_list_lock);
+ }
+
+ return 0;
}
static void cbs_destroy(struct Qdisc *sch)
@@ -375,8 +432,11 @@ static void cbs_destroy(struct Qdisc *sch)
struct cbs_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- qdisc_watchdog_cancel(&q->watchdog);
+ spin_lock(&cbs_list_lock);
+ list_del(&q->cbs_list);
+ spin_unlock(&cbs_list_lock);
+ qdisc_watchdog_cancel(&q->watchdog);
cbs_disable_offload(dev, q);
if (q->qdisc)
@@ -487,14 +547,24 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
+static struct notifier_block cbs_device_notifier = {
+ .notifier_call = cbs_dev_notifier,
+};
+
static int __init cbs_module_init(void)
{
+ int err = register_netdevice_notifier(&cbs_device_notifier);
+
+ if (err)
+ return err;
+
return register_qdisc(&cbs_qdisc_ops);
}
static void __exit cbs_module_exit(void)
{
unregister_qdisc(&cbs_qdisc_ops);
+ unregister_netdevice_notifier(&cbs_device_notifier);
}
module_init(cbs_module_init)
module_exit(cbs_module_exit)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 81356ef38d1d..848aab3693bd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
skb = __skb_dequeue(&q->skb_bad_txq);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_atomic_qlen_dec(q);
+ qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_atomic_qlen_inc(q);
+ qdisc_qstats_cpu_qlen_inc(q);
} else {
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
@@ -118,52 +118,36 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
spin_unlock(lock);
}
-static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- while (skb) {
- struct sk_buff *next = skb->next;
-
- __skb_queue_tail(&q->gso_skb, skb);
- q->qstats.requeues++;
- qdisc_qstats_backlog_inc(q, skb);
- q->q.qlen++; /* it's still part of the queue */
+ spinlock_t *lock = NULL;
- skb = next;
+ if (q->flags & TCQ_F_NOLOCK) {
+ lock = qdisc_lock(q);
+ spin_lock(lock);
}
- __netif_schedule(q);
-
- return 0;
-}
-static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
-{
- spinlock_t *lock = qdisc_lock(q);
-
- spin_lock(lock);
while (skb) {
struct sk_buff *next = skb->next;
__skb_queue_tail(&q->gso_skb, skb);
- qdisc_qstats_cpu_requeues_inc(q);
- qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_atomic_qlen_inc(q);
+ /* it's still part of the queue */
+ if (qdisc_is_percpu_stats(q)) {
+ qdisc_qstats_cpu_requeues_inc(q);
+ qdisc_qstats_cpu_backlog_inc(q, skb);
+ qdisc_qstats_cpu_qlen_inc(q);
+ } else {
+ q->qstats.requeues++;
+ qdisc_qstats_backlog_inc(q, skb);
+ q->q.qlen++;
+ }
skb = next;
}
- spin_unlock(lock);
-
+ if (lock)
+ spin_unlock(lock);
__netif_schedule(q);
-
- return 0;
-}
-
-static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
-{
- if (q->flags & TCQ_F_NOLOCK)
- return dev_requeue_skb_locked(skb, q);
- else
- return __dev_requeue_skb(skb, q);
}
static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -252,7 +236,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
skb = __skb_dequeue(&q->gso_skb);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_atomic_qlen_dec(q);
+ qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -645,11 +629,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
if (unlikely(err))
return qdisc_drop_cpu(skb, qdisc, to_free);
- qdisc_qstats_atomic_qlen_inc(qdisc);
- /* Note: skb can not be used after skb_array_produce(),
- * so we better not use qdisc_qstats_cpu_backlog_inc()
- */
- this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
+ qdisc_update_stats_at_enqueue(qdisc, pkt_len);
return NET_XMIT_SUCCESS;
}
@@ -668,9 +648,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
skb = __skb_array_consume(q);
}
if (likely(skb)) {
- qdisc_qstats_cpu_backlog_dec(qdisc, skb);
- qdisc_bstats_cpu_update(qdisc, skb);
- qdisc_qstats_atomic_qlen_dec(qdisc);
+ qdisc_update_stats_at_dequeue(qdisc, skb);
} else {
qdisc->empty = true;
}
@@ -716,6 +694,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
q->backlog = 0;
+ q->qlen = 0;
}
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c7041999eb5d..1b0fb80162e6 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -20,6 +20,9 @@
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+static LIST_HEAD(taprio_list);
+static DEFINE_SPINLOCK(taprio_list_lock);
+
#define TAPRIO_ALL_GATES_OPEN -1
struct sched_entry {
@@ -42,9 +45,9 @@ struct taprio_sched {
struct Qdisc *root;
s64 base_time;
int clockid;
- int picos_per_byte; /* Using picoseconds because for 10Gbps+
- * speeds it's sub-nanoseconds per byte
- */
+ atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
+ * speeds it's sub-nanoseconds per byte
+ */
size_t num_entries;
/* Protects the update side of the RCU protected current_entry */
@@ -53,6 +56,7 @@ struct taprio_sched {
struct list_head entries;
ktime_t (*get_time)(void);
struct hrtimer advance_timer;
+ struct list_head taprio_list;
};
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -117,7 +121,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
static inline int length_to_duration(struct taprio_sched *q, int len)
{
- return (len * q->picos_per_byte) / 1000;
+ return (len * atomic64_read(&q->picos_per_byte)) / 1000;
}
static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
@@ -129,6 +133,11 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
u32 gate_mask;
int i;
+ if (atomic64_read(&q->picos_per_byte) == -1) {
+ WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte.");
+ return NULL;
+ }
+
rcu_read_lock();
entry = rcu_dereference(q->current_entry);
/* if there's no entry, it means that the schedule didn't
@@ -233,7 +242,7 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
next->close_time = close_time;
atomic_set(&next->budget,
- (next->interval * 1000) / q->picos_per_byte);
+ (next->interval * 1000) / atomic64_read(&q->picos_per_byte));
first_run:
rcu_assign_pointer(q->current_entry, next);
@@ -567,7 +576,8 @@ static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
first->close_time = ktime_add_ns(start, first->interval);
atomic_set(&first->budget,
- (first->interval * 1000) / q->picos_per_byte);
+ (first->interval * 1000) /
+ atomic64_read(&q->picos_per_byte));
rcu_assign_pointer(q->current_entry, NULL);
spin_unlock_irqrestore(&q->current_entry_lock, flags);
@@ -575,6 +585,52 @@ static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
}
+static void taprio_set_picos_per_byte(struct net_device *dev,
+ struct taprio_sched *q)
+{
+ struct ethtool_link_ksettings ecmd;
+ int picos_per_byte = -1;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
+ ecmd.base.speed != SPEED_UNKNOWN)
+ picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
+ ecmd.base.speed * 1000 * 1000);
+
+ atomic64_set(&q->picos_per_byte, picos_per_byte);
+ netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
+ dev->name, (long long)atomic64_read(&q->picos_per_byte),
+ ecmd.base.speed);
+}
+
+static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net_device *qdev;
+ struct taprio_sched *q;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ if (event != NETDEV_UP && event != NETDEV_CHANGE)
+ return NOTIFY_DONE;
+
+ spin_lock(&taprio_list_lock);
+ list_for_each_entry(q, &taprio_list, taprio_list) {
+ qdev = qdisc_dev(q->root);
+ if (qdev == dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&taprio_list_lock);
+
+ if (found)
+ taprio_set_picos_per_byte(dev, q);
+
+ return NOTIFY_DONE;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -582,9 +638,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- struct ethtool_link_ksettings ecmd;
int i, err, size;
- s64 link_speed;
ktime_t start;
err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
@@ -657,14 +711,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
mqprio->prio_tc_map[i]);
}
- if (!__ethtool_get_link_ksettings(dev, &ecmd))
- link_speed = ecmd.base.speed;
- else
- link_speed = SPEED_1000;
-
- q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
- link_speed * 1000 * 1000);
-
+ taprio_set_picos_per_byte(dev, q);
start = taprio_get_start_time(sch);
if (!start)
return 0;
@@ -681,6 +728,10 @@ static void taprio_destroy(struct Qdisc *sch)
struct sched_entry *entry, *n;
unsigned int i;
+ spin_lock(&taprio_list_lock);
+ list_del(&q->taprio_list);
+ spin_unlock(&taprio_list_lock);
+
hrtimer_cancel(&q->advance_timer);
if (q->qdiscs) {
@@ -735,6 +786,10 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
if (!opt)
return -EINVAL;
+ spin_lock(&taprio_list_lock);
+ list_add(&q->taprio_list, &taprio_list);
+ spin_unlock(&taprio_list_lock);
+
return taprio_change(sch, opt, extack);
}
@@ -947,14 +1002,24 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
+static struct notifier_block taprio_device_notifier = {
+ .notifier_call = taprio_dev_notifier,
+};
+
static int __init taprio_module_init(void)
{
+ int err = register_netdevice_notifier(&taprio_device_notifier);
+
+ if (err)
+ return err;
+
return register_qdisc(&taprio_qdisc_ops);
}
static void __exit taprio_module_exit(void)
{
unregister_qdisc(&taprio_qdisc_ops);
+ unregister_netdevice_notifier(&taprio_device_notifier);
}
module_init(taprio_module_init);
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 102c6fefe38c..25e0b7e5189c 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -484,14 +484,15 @@ static struct sctp_ulpevent *sctp_intl_order(struct sctp_ulpq *ulpq,
}
static int sctp_enqueue_event(struct sctp_ulpq *ulpq,
- struct sctp_ulpevent *event)
+ struct sk_buff_head *skb_list)
{
- struct sk_buff *skb = sctp_event2skb(event);
struct sock *sk = ulpq->asoc->base.sk;
struct sctp_sock *sp = sctp_sk(sk);
- struct sk_buff_head *skb_list;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
- skb_list = (struct sk_buff_head *)skb->prev;
+ skb = __skb_peek(skb_list);
+ event = sctp_skb2event(skb);
if (sk->sk_shutdown & RCV_SHUTDOWN &&
(sk->sk_shutdown & SEND_SHUTDOWN ||
@@ -858,19 +859,24 @@ static int sctp_ulpevent_idata(struct sctp_ulpq *ulpq,
if (!(event->msg_flags & SCTP_DATA_UNORDERED)) {
event = sctp_intl_reasm(ulpq, event);
- if (event && event->msg_flags & MSG_EOR) {
+ if (event) {
skb_queue_head_init(&temp);
__skb_queue_tail(&temp, sctp_event2skb(event));
- event = sctp_intl_order(ulpq, event);
+ if (event->msg_flags & MSG_EOR)
+ event = sctp_intl_order(ulpq, event);
}
} else {
event = sctp_intl_reasm_uo(ulpq, event);
+ if (event) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ }
}
if (event) {
event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
- sctp_enqueue_event(ulpq, event);
+ sctp_enqueue_event(ulpq, &temp);
}
return event_eor;
@@ -944,20 +950,27 @@ out:
static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
{
struct sctp_ulpevent *event;
+ struct sk_buff_head temp;
if (!skb_queue_empty(&ulpq->reasm)) {
do {
event = sctp_intl_retrieve_first(ulpq);
- if (event)
- sctp_enqueue_event(ulpq, event);
+ if (event) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ sctp_enqueue_event(ulpq, &temp);
+ }
} while (event);
}
if (!skb_queue_empty(&ulpq->reasm_uo)) {
do {
event = sctp_intl_retrieve_first_uo(ulpq);
- if (event)
- sctp_enqueue_event(ulpq, event);
+ if (event) {
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ sctp_enqueue_event(ulpq, &temp);
+ }
} while (event);
}
}
@@ -1059,7 +1072,7 @@ static void sctp_intl_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
if (event) {
sctp_intl_retrieve_ordered(ulpq, event);
- sctp_enqueue_event(ulpq, event);
+ sctp_enqueue_event(ulpq, &temp);
}
}
@@ -1298,6 +1311,15 @@ static void sctp_handle_iftsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
ntohl(skip->mid), skip->flags);
}
+static int do_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
+{
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ return sctp_ulpq_tail_event(ulpq, &temp);
+}
+
static struct sctp_stream_interleave sctp_stream_interleave_0 = {
.data_chunk_len = sizeof(struct sctp_data_chunk),
.ftsn_chunk_len = sizeof(struct sctp_fwdtsn_chunk),
@@ -1306,7 +1328,7 @@ static struct sctp_stream_interleave sctp_stream_interleave_0 = {
.assign_number = sctp_chunk_assign_ssn,
.validate_data = sctp_validate_data,
.ulpevent_data = sctp_ulpq_tail_data,
- .enqueue_event = sctp_ulpq_tail_event,
+ .enqueue_event = do_ulpq_tail_event,
.renege_events = sctp_ulpq_renege,
.start_pd = sctp_ulpq_partial_delivery,
.abort_pd = sctp_ulpq_abort_pd,
@@ -1317,6 +1339,16 @@ static struct sctp_stream_interleave sctp_stream_interleave_0 = {
.handle_ftsn = sctp_handle_fwdtsn,
};
+static int do_sctp_enqueue_event(struct sctp_ulpq *ulpq,
+ struct sctp_ulpevent *event)
+{
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ return sctp_enqueue_event(ulpq, &temp);
+}
+
static struct sctp_stream_interleave sctp_stream_interleave_1 = {
.data_chunk_len = sizeof(struct sctp_idata_chunk),
.ftsn_chunk_len = sizeof(struct sctp_ifwdtsn_chunk),
@@ -1325,7 +1357,7 @@ static struct sctp_stream_interleave sctp_stream_interleave_1 = {
.assign_number = sctp_chunk_assign_mid,
.validate_data = sctp_validate_idata,
.ulpevent_data = sctp_ulpevent_idata,
- .enqueue_event = sctp_enqueue_event,
+ .enqueue_event = do_sctp_enqueue_event,
.renege_events = sctp_renege_events,
.start_pd = sctp_intl_start_pd,
.abort_pd = sctp_intl_abort_pd,
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 5dde92101743..7cdc3623fa35 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -116,12 +116,13 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
event = sctp_ulpq_reasm(ulpq, event);
/* Do ordering if needed. */
- if ((event) && (event->msg_flags & MSG_EOR)) {
+ if (event) {
/* Create a temporary list to collect chunks on. */
skb_queue_head_init(&temp);
__skb_queue_tail(&temp, sctp_event2skb(event));
- event = sctp_ulpq_order(ulpq, event);
+ if (event->msg_flags & MSG_EOR)
+ event = sctp_ulpq_order(ulpq, event);
}
/* Send event to the ULP. 'event' is the sctp_ulpevent for
@@ -129,7 +130,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
*/
if (event) {
event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0;
- sctp_ulpq_tail_event(ulpq, event);
+ sctp_ulpq_tail_event(ulpq, &temp);
}
return event_eor;
@@ -193,18 +194,17 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
}
-/* If the SKB of 'event' is on a list, it is the first such member
- * of that list.
- */
-int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
+int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list)
{
struct sock *sk = ulpq->asoc->base.sk;
struct sctp_sock *sp = sctp_sk(sk);
- struct sk_buff_head *queue, *skb_list;
- struct sk_buff *skb = sctp_event2skb(event);
+ struct sctp_ulpevent *event;
+ struct sk_buff_head *queue;
+ struct sk_buff *skb;
int clear_pd = 0;
- skb_list = (struct sk_buff_head *) skb->prev;
+ skb = __skb_peek(skb_list);
+ event = sctp_skb2event(skb);
/* If the socket is just going to throw this away, do not
* even try to deliver it.
@@ -257,13 +257,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
}
}
- /* If we are harvesting multiple skbs they will be
- * collected on a list.
- */
- if (skb_list)
- skb_queue_splice_tail_init(skb_list, queue);
- else
- __skb_queue_tail(queue, skb);
+ skb_queue_splice_tail_init(skb_list, queue);
/* Did we just complete partial delivery and need to get
* rolling again? Move pending data to the receive
@@ -738,25 +732,25 @@ void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 fwd_tsn)
static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq)
{
struct sctp_ulpevent *event = NULL;
- struct sk_buff_head temp;
if (skb_queue_empty(&ulpq->reasm))
return;
while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) {
- /* Do ordering if needed. */
- if ((event) && (event->msg_flags & MSG_EOR)) {
- skb_queue_head_init(&temp);
- __skb_queue_tail(&temp, sctp_event2skb(event));
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ /* Do ordering if needed. */
+ if (event->msg_flags & MSG_EOR)
event = sctp_ulpq_order(ulpq, event);
- }
/* Send event to the ULP. 'event' is the
* sctp_ulpevent for very first SKB on the temp' list.
*/
if (event)
- sctp_ulpq_tail_event(ulpq, event);
+ sctp_ulpq_tail_event(ulpq, &temp);
}
}
@@ -956,7 +950,7 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
if (event) {
/* see if we have more ordered that we can deliver */
sctp_ulpq_retrieve_ordered(ulpq, event);
- sctp_ulpq_tail_event(ulpq, event);
+ sctp_ulpq_tail_event(ulpq, &temp);
}
}
@@ -1082,7 +1076,11 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
event = sctp_ulpq_retrieve_first(ulpq);
/* Send event to the ULP. */
if (event) {
- sctp_ulpq_tail_event(ulpq, event);
+ struct sk_buff_head temp;
+
+ skb_queue_head_init(&temp);
+ __skb_queue_tail(&temp, sctp_event2skb(event));
+ sctp_ulpq_tail_event(ulpq, &temp);
sctp_ulpq_set_pd(ulpq);
return;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 77ef53596d18..e066899de72d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -134,11 +134,9 @@ static int smc_release(struct socket *sock)
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
- if (smc->connect_info && sk->sk_state == SMC_INIT)
+ if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
flush_work(&smc->connect_work);
- kfree(smc->connect_info);
- smc->connect_info = NULL;
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
@@ -452,6 +450,7 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
return 0;
@@ -491,46 +490,41 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
mutex_unlock(&smc_client_lgr_pending);
smc_conn_free(&smc->conn);
+ smc->connect_nonblock = 0;
return reason_code;
}
/* check if there is a rdma device available for this connection. */
/* called for connect and listen */
-static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
- u8 *ibport, unsigned short vlan_id, u8 gid[])
+static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
{
- int reason_code = 0;
-
/* PNET table look up: search active ib_device and port
* within same PNETID that also contains the ethernet device
* used for the internal TCP socket
*/
- smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
- gid);
- if (!(*ibdev))
- reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-
- return reason_code;
+ smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
+ if (!ini->ib_dev)
+ return SMC_CLC_DECL_NOSMCRDEV;
+ return 0;
}
/* check if there is an ISM device available for this connection. */
/* called for connect and listen */
-static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
{
/* Find ISM device with same PNETID as connecting interface */
- smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
- if (!(*ismdev))
- return SMC_CLC_DECL_CNFERR; /* configuration error */
+ smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
+ if (!ini->ism_dev)
+ return SMC_CLC_DECL_NOSMCDDEV;
return 0;
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
- struct smcd_dev *ismdev,
- unsigned short vlan_id)
+ struct smc_init_info *ini)
{
- if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
- return SMC_CLC_DECL_CNFERR;
+ if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
+ return SMC_CLC_DECL_ISMVLANERR;
return 0;
}
@@ -538,12 +532,11 @@ static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
* used, the VLAN ID will be registered again during the connection setup.
*/
static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
- struct smcd_dev *ismdev,
- unsigned short vlan_id)
+ struct smc_init_info *ini)
{
if (!is_smcd)
return 0;
- if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+ if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
@@ -551,13 +544,12 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc, int smc_type,
struct smc_clc_msg_accept_confirm *aclc,
- struct smc_ib_device *ibdev, u8 ibport,
- u8 gid[], struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
int rc = 0;
/* do inband token exchange */
- rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
+ rc = smc_clc_send_proposal(smc, smc_type, ini);
if (rc)
return rc;
/* receive SMC Accept CLC message */
@@ -568,23 +560,19 @@ static int smc_connect_clc(struct smc_sock *smc, int smc_type,
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
- struct smc_ib_device *ibdev, u8 ibport)
+ struct smc_init_info *ini)
{
- int local_contact = SMC_FIRST_CONTACT;
struct smc_link *link;
int reason_code = 0;
+ ini->is_smcd = false;
+ ini->ib_lcl = &aclc->lcl;
+ ini->ib_clcqpn = ntoh24(aclc->qpn);
+ ini->srv_first_contact = aclc->hdr.flag;
+
mutex_lock(&smc_client_lgr_pending);
- local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
- ibport, ntoh24(aclc->qpn), &aclc->lcl,
- NULL, 0);
- if (local_contact < 0) {
- if (local_contact == -ENOMEM)
- reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
- else if (local_contact == -ENOLINK)
- reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
- else
- reason_code = SMC_CLC_DECL_INTERR; /* other error */
+ reason_code = smc_conn_create(smc, ini);
+ if (reason_code) {
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
@@ -594,45 +582,48 @@ static int smc_connect_rdma(struct smc_sock *smc,
/* create send buffer and rmb */
if (smc_buf_create(smc, false))
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+ return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+ ini->cln_first_contact);
- if (local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, aclc);
if (smc_rmb_rtoken_handling(&smc->conn, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
- local_contact);
+ ini->cln_first_contact);
smc_close_init(smc);
smc_rx_init(smc);
- if (local_contact == SMC_FIRST_CONTACT) {
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (smc_ib_ready_link(link))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
- local_contact);
+ ini->cln_first_contact);
} else {
if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
- local_contact);
+ ini->cln_first_contact);
}
smc_rmb_sync_sg_for_device(&smc->conn);
reason_code = smc_clc_send_confirm(smc);
if (reason_code)
- return smc_connect_abort(smc, reason_code, local_contact);
+ return smc_connect_abort(smc, reason_code,
+ ini->cln_first_contact);
smc_tx_init(smc);
- if (local_contact == SMC_FIRST_CONTACT) {
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
reason_code = smc_clnt_conf_first_link(smc);
if (reason_code)
return smc_connect_abort(smc, reason_code,
- local_contact);
+ ini->cln_first_contact);
}
mutex_unlock(&smc_client_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
@@ -642,23 +633,26 @@ static int smc_connect_rdma(struct smc_sock *smc,
/* setup for ISM connection of client */
static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
- struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
- int local_contact = SMC_FIRST_CONTACT;
int rc = 0;
+ ini->is_smcd = true;
+ ini->ism_gid = aclc->gid;
+ ini->srv_first_contact = aclc->hdr.flag;
+
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
- NULL, ismdev, aclc->gid);
- if (local_contact < 0) {
+ rc = smc_conn_create(smc, ini);
+ if (rc) {
mutex_unlock(&smc_server_lgr_pending);
- return SMC_CLC_DECL_MEM;
+ return rc;
}
/* Create send and receive buffers */
if (smc_buf_create(smc, true))
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+ return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+ ini->cln_first_contact);
smc_conn_save_peer_info(smc, aclc);
smc_close_init(smc);
@@ -667,10 +661,11 @@ static int smc_connect_ism(struct smc_sock *smc,
rc = smc_clc_send_confirm(smc);
if (rc)
- return smc_connect_abort(smc, rc, local_contact);
+ return smc_connect_abort(smc, rc, ini->cln_first_contact);
mutex_unlock(&smc_server_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
+ smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE;
@@ -682,13 +677,9 @@ static int __smc_connect(struct smc_sock *smc)
{
bool ism_supported = false, rdma_supported = false;
struct smc_clc_msg_accept_confirm aclc;
- struct smc_ib_device *ibdev;
- struct smcd_dev *ismdev;
- u8 gid[SMC_GID_SIZE];
- unsigned short vlan;
+ struct smc_init_info ini = {0};
int smc_type;
int rc = 0;
- u8 ibport;
sock_hold(&smc->sk); /* sock put in passive closing */
@@ -703,20 +694,21 @@ static int __smc_connect(struct smc_sock *smc)
if (using_ipsec(smc))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
- /* check for VLAN ID */
- if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
+ return smc_connect_decline_fallback(smc,
+ SMC_CLC_DECL_GETVLANERR);
/* check if there is an ism device available */
- if (!smc_check_ism(smc, &ismdev) &&
- !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+ if (!smc_find_ism_device(smc, &ini) &&
+ !smc_connect_ism_vlan_setup(smc, &ini)) {
/* ISM is supported for this connection */
ism_supported = true;
smc_type = SMC_TYPE_D;
}
/* check if there is a rdma device available */
- if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
+ if (!smc_find_rdma_device(smc, &ini)) {
/* RDMA is supported for this connection */
rdma_supported = true;
if (ism_supported)
@@ -730,25 +722,25 @@ static int __smc_connect(struct smc_sock *smc)
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
/* perform CLC handshake */
- rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
+ rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
/* depending on previous steps, connect using rdma or ism */
if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
- rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+ rc = smc_connect_rdma(smc, &aclc, &ini);
else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
- rc = smc_connect_ism(smc, &aclc, ismdev);
+ rc = smc_connect_ism(smc, &aclc, &ini);
else
rc = SMC_CLC_DECL_MODEUNSUPP;
if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return smc_connect_decline_fallback(smc, rc);
}
- smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
+ smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
return 0;
}
@@ -756,17 +748,30 @@ static void smc_connect_work(struct work_struct *work)
{
struct smc_sock *smc = container_of(work, struct smc_sock,
connect_work);
- int rc;
+ long timeo = smc->sk.sk_sndtimeo;
+ int rc = 0;
- lock_sock(&smc->sk);
- rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
- smc->connect_info->alen, smc->connect_info->flags);
+ if (!timeo)
+ timeo = MAX_SCHEDULE_TIMEOUT;
+ lock_sock(smc->clcsock->sk);
if (smc->clcsock->sk->sk_err) {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
- goto out;
- }
- if (rc < 0) {
- smc->sk.sk_err = -rc;
+ } else if ((1 << smc->clcsock->sk->sk_state) &
+ (TCPF_SYN_SENT | TCP_SYN_RECV)) {
+ rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
+ if ((rc == -EPIPE) &&
+ ((1 << smc->clcsock->sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
+ rc = 0;
+ }
+ release_sock(smc->clcsock->sk);
+ lock_sock(&smc->sk);
+ if (rc != 0 || smc->sk.sk_err) {
+ smc->sk.sk_state = SMC_CLOSED;
+ if (rc == -EPIPE || rc == -EAGAIN)
+ smc->sk.sk_err = EPIPE;
+ else if (signal_pending(current))
+ smc->sk.sk_err = -sock_intr_errno(timeo);
goto out;
}
@@ -779,8 +784,6 @@ out:
smc->sk.sk_state_change(&smc->sk);
else
smc->sk.sk_write_space(&smc->sk);
- kfree(smc->connect_info);
- smc->connect_info = NULL;
release_sock(&smc->sk);
}
@@ -813,26 +816,18 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
smc_copy_sock_settings_to_clc(smc);
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
+ if (smc->connect_nonblock) {
+ rc = -EALREADY;
+ goto out;
+ }
+ rc = kernel_connect(smc->clcsock, addr, alen, flags);
+ if (rc && rc != -EINPROGRESS)
+ goto out;
if (flags & O_NONBLOCK) {
- if (smc->connect_info) {
- rc = -EALREADY;
- goto out;
- }
- smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
- if (!smc->connect_info) {
- rc = -ENOMEM;
- goto out;
- }
- smc->connect_info->alen = alen;
- smc->connect_info->flags = flags ^ O_NONBLOCK;
- memcpy(&smc->connect_info->addr, addr, alen);
- schedule_work(&smc->connect_work);
+ if (schedule_work(&smc->connect_work))
+ smc->connect_nonblock = 1;
rc = -EINPROGRESS;
} else {
- rc = kernel_connect(smc->clcsock, addr, alen, flags);
- if (rc)
- goto out;
-
rc = __smc_connect(smc);
if (rc < 0)
goto out;
@@ -1099,7 +1094,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
}
/* listen worker: check prefixes */
-static int smc_listen_rdma_check(struct smc_sock *new_smc,
+static int smc_listen_prfx_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc)
{
struct smc_clc_msg_proposal_prefix *pclc_prfx;
@@ -1107,25 +1102,21 @@ static int smc_listen_rdma_check(struct smc_sock *new_smc,
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
- return SMC_CLC_DECL_CNFERR;
+ return SMC_CLC_DECL_DIFFPREFIX;
return 0;
}
/* listen worker: initialize connection and buffers */
static int smc_listen_rdma_init(struct smc_sock *new_smc,
- struct smc_clc_msg_proposal *pclc,
- struct smc_ib_device *ibdev, u8 ibport,
- int *local_contact)
+ struct smc_init_info *ini)
{
+ int rc;
+
/* allocate connection / link group */
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
- &pclc->lcl, NULL, 0);
- if (*local_contact < 0) {
- if (*local_contact == -ENOMEM)
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
- return SMC_CLC_DECL_INTERR; /* other error */
- }
+ rc = smc_conn_create(new_smc, ini);
+ if (rc)
+ return rc;
/* create send buffer and rmb */
if (smc_buf_create(new_smc, false))
@@ -1137,33 +1128,30 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
/* listen worker: initialize connection and buffers for SMC-D */
static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
- struct smcd_dev *ismdev,
- int *local_contact)
+ struct smc_init_info *ini)
{
struct smc_clc_msg_smcd *pclc_smcd;
+ int rc;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
- ismdev, pclc_smcd->gid);
- if (*local_contact < 0) {
- if (*local_contact == -ENOMEM)
- return SMC_CLC_DECL_MEM;/* insufficient memory*/
- return SMC_CLC_DECL_INTERR; /* other error */
- }
+ ini->ism_gid = pclc_smcd->gid;
+ rc = smc_conn_create(new_smc, ini);
+ if (rc)
+ return rc;
/* Check if peer can be reached via ISM device */
if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
- if (*local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
- return SMC_CLC_DECL_CNFERR;
+ return SMC_CLC_DECL_SMCDNOTALK;
}
/* Create send and receive buffers */
if (smc_buf_create(new_smc, true)) {
- if (*local_contact == SMC_FIRST_CONTACT)
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_MEM;
@@ -1227,15 +1215,10 @@ static void smc_listen_work(struct work_struct *work)
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm cclc;
struct smc_clc_msg_proposal *pclc;
- struct smc_ib_device *ibdev;
+ struct smc_init_info ini = {0};
bool ism_supported = false;
- struct smcd_dev *ismdev;
u8 buf[SMC_CLC_MAX_LEN];
- int local_contact = 0;
- unsigned short vlan;
- int reason_code = 0;
int rc = 0;
- u8 ibport;
if (new_smc->use_fallback) {
smc_listen_out_connected(new_smc);
@@ -1254,17 +1237,26 @@ static void smc_listen_work(struct work_struct *work)
* wait for and receive SMC Proposal CLC message
*/
pclc = (struct smc_clc_msg_proposal *)&buf;
- reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
- SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
- if (reason_code) {
- smc_listen_decline(new_smc, reason_code, 0);
- return;
- }
+ rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
+ SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
+ if (rc)
+ goto out_decl;
/* IPSec connections opt out of SMC-R optimizations */
if (using_ipsec(new_smc)) {
- smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
- return;
+ rc = SMC_CLC_DECL_IPSEC;
+ goto out_decl;
+ }
+
+ /* check for matching IP prefix and subnet length */
+ rc = smc_listen_prfx_check(new_smc, pclc);
+ if (rc)
+ goto out_decl;
+
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
+ rc = SMC_CLC_DECL_GETVLANERR;
+ goto out_decl;
}
mutex_lock(&smc_server_lgr_pending);
@@ -1273,59 +1265,73 @@ static void smc_listen_work(struct work_struct *work)
smc_tx_init(new_smc);
/* check if ISM is available */
- if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
- !smc_check_ism(new_smc, &ismdev) &&
- !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
- ism_supported = true;
+ if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
+ ini.is_smcd = true; /* prepare ISM check */
+ rc = smc_find_ism_device(new_smc, &ini);
+ if (!rc)
+ rc = smc_listen_ism_init(new_smc, pclc, &ini);
+ if (!rc)
+ ism_supported = true;
+ else if (pclc->hdr.path == SMC_TYPE_D)
+ goto out_unlock; /* skip RDMA and decline */
}
/* check if RDMA is available */
- if (!ism_supported &&
- ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
- smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
- smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
- smc_listen_rdma_check(new_smc, pclc) ||
- smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
- &local_contact) ||
- smc_listen_rdma_reg(new_smc, local_contact))) {
- /* SMC not supported, decline */
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
- local_contact);
- return;
+ if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
+ /* prepare RDMA check */
+ memset(&ini, 0, sizeof(ini));
+ ini.is_smcd = false;
+ ini.ib_lcl = &pclc->lcl;
+ rc = smc_find_rdma_device(new_smc, &ini);
+ if (rc) {
+ /* no RDMA device found */
+ if (pclc->hdr.path == SMC_TYPE_B)
+ /* neither ISM nor RDMA device found */
+ rc = SMC_CLC_DECL_NOSMCDEV;
+ goto out_unlock;
+ }
+ rc = smc_listen_rdma_init(new_smc, &ini);
+ if (rc)
+ goto out_unlock;
+ rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
+ if (rc)
+ goto out_unlock;
}
/* send SMC Accept CLC message */
- rc = smc_clc_send_accept(new_smc, local_contact);
- if (rc) {
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, rc, local_contact);
- return;
- }
+ rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
+ if (rc)
+ goto out_unlock;
/* SMC-D does not need this lock any more */
if (ism_supported)
mutex_unlock(&smc_server_lgr_pending);
/* receive SMC Confirm CLC message */
- reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
- SMC_CLC_CONFIRM, CLC_WAIT_TIME);
- if (reason_code) {
+ rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
+ SMC_CLC_CONFIRM, CLC_WAIT_TIME);
+ if (rc) {
if (!ism_supported)
- mutex_unlock(&smc_server_lgr_pending);
- smc_listen_decline(new_smc, reason_code, local_contact);
- return;
+ goto out_unlock;
+ goto out_decl;
}
/* finish worker */
if (!ism_supported) {
- rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+ rc = smc_listen_rdma_finish(new_smc, &cclc,
+ ini.cln_first_contact);
mutex_unlock(&smc_server_lgr_pending);
if (rc)
return;
}
smc_conn_save_peer_info(new_smc, &cclc);
smc_listen_out_connected(new_smc);
+ return;
+
+out_unlock:
+ mutex_unlock(&smc_server_lgr_pending);
+out_decl:
+ smc_listen_decline(new_smc, rc, ini.cln_first_contact);
}
static void smc_tcp_listen_work(struct work_struct *work)
@@ -1571,8 +1577,8 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- __poll_t mask = 0;
struct smc_sock *smc;
+ __poll_t mask = 0;
if (!sk)
return EPOLLNVAL;
@@ -1582,8 +1588,6 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
/* delegate to CLC child sock */
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
sk->sk_err = smc->clcsock->sk->sk_err;
- if (sk->sk_err)
- mask |= EPOLLERR;
} else {
if (sk->sk_state != SMC_CLOSED)
sock_poll_wait(file, sock, wait);
@@ -1594,9 +1598,14 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
mask |= EPOLLHUP;
if (sk->sk_state == SMC_LISTEN) {
/* woken up by sk_data_ready in smc_listen_work() */
- mask = smc_accept_poll(sk);
+ mask |= smc_accept_poll(sk);
+ } else if (smc->use_fallback) { /* as result of connect_work()*/
+ mask |= smc->clcsock->ops->poll(file, smc->clcsock,
+ wait);
+ sk->sk_err = smc->clcsock->sk->sk_err;
} else {
- if (atomic_read(&smc->conn.sndbuf_space) ||
+ if ((sk->sk_state != SMC_INIT &&
+ atomic_read(&smc->conn.sndbuf_space)) ||
sk->sk_shutdown & SEND_SHUTDOWN) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else {
diff --git a/net/smc/smc.h b/net/smc/smc.h
index adbdf195eb08..878313f8d6c1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -190,18 +190,11 @@ struct smc_connection {
u64 peer_token; /* SMC-D token of peer */
};
-struct smc_connect_info {
- int flags;
- int alen;
- struct sockaddr addr;
-};
-
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
- struct smc_connect_info *connect_info; /* connect address & flags */
struct work_struct connect_work; /* handle non-blocking connect*/
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
@@ -219,6 +212,10 @@ struct smc_sock { /* smc sock container */
* started, waiting for unsent
* data to be sent
*/
+ u8 connect_nonblock : 1;
+ /* non-blocking connect in
+ * flight
+ */
struct mutex clcsock_release_lock;
/* protects clcsock of a listen
* socket
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index d53fd588d1f5..745afd82f281 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -385,8 +385,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
/* send CLC PROPOSAL message across internal TCP socket */
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_ib_device *ibdev, u8 ibport, u8 gid[],
- struct smcd_dev *ismdev)
+ struct smc_init_info *ini)
{
struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
@@ -416,8 +415,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
/* add SMC-R specifics */
memcpy(pclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
- memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE);
- memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
+ memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE);
+ memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
+ ETH_ALEN);
pclc.iparea_offset = htons(0);
}
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
@@ -425,7 +425,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
memset(&pclc_smcd, 0, sizeof(pclc_smcd));
plen += sizeof(pclc_smcd);
pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
- pclc_smcd.gid = ismdev->local_gid;
+ pclc_smcd.gid = ini->ism_dev->local_gid;
}
pclc.hdr.length = htons(plen);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 24658e8c0de4..ca209272e5fa 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -34,16 +34,22 @@
#define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */
#define SMC_CLC_DECL_PEERNOSMC 0x03010000 /* peer did not indicate SMC */
#define SMC_CLC_DECL_IPSEC 0x03020000 /* IPsec usage */
-#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found */
+#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */
+#define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */
+#define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */
+#define SMC_CLC_DECL_SMCDNOTALK 0x03030003 /* SMC-D dev can't talk to peer */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
+#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */
+#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
+#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
-#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
-#define SMC_CLC_DECL_ERR_RTOK 0x99990001 /* rtoken handling failed */
-#define SMC_CLC_DECL_ERR_RDYLNK 0x99990002 /* ib ready link failed */
-#define SMC_CLC_DECL_ERR_REGRMB 0x99990003 /* reg rmb failed */
+#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
+#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */
+#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
+#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
@@ -179,6 +185,7 @@ smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
}
struct smcd_dev;
+struct smc_init_info;
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
@@ -186,8 +193,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type, unsigned long timeout);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
- struct smcd_dev *ismdev);
+ struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 53a17cfa61af..2d2850adc2a3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -195,10 +195,7 @@ static void smc_lgr_free_work(struct work_struct *work)
}
/* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
- struct smc_ib_device *smcibdev, u8 ibport,
- char *peer_systemid, unsigned short vlan_id,
- struct smcd_dev *smcismdev, u64 peer_gid)
+static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
struct smc_link *lnk;
@@ -206,20 +203,21 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
int rc = 0;
int i;
- if (is_smcd && vlan_id) {
- rc = smc_ism_get_vlan(smcismdev, vlan_id);
- if (rc)
+ if (ini->is_smcd && ini->vlan_id) {
+ if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
+ rc = SMC_CLC_DECL_ISMVLANERR;
goto out;
+ }
}
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
- rc = -ENOMEM;
+ rc = SMC_CLC_DECL_MEM;
goto out;
}
- lgr->is_smcd = is_smcd;
+ lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
- lgr->vlan_id = vlan_id;
+ lgr->vlan_id = ini->vlan_id;
rwlock_init(&lgr->sndbufs_lock);
rwlock_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
@@ -231,29 +229,32 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
lgr->conns_all = RB_ROOT;
- if (is_smcd) {
+ if (ini->is_smcd) {
/* SMC-D specific settings */
- lgr->peer_gid = peer_gid;
- lgr->smcd = smcismdev;
+ lgr->peer_gid = ini->ism_gid;
+ lgr->smcd = ini->ism_dev;
} else {
/* SMC-R specific settings */
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
+ memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
+ SMC_SYSTEMID_LEN);
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
lnk->state = SMC_LNK_ACTIVATING;
lnk->link_id = SMC_SINGLE_LINK;
- lnk->smcibdev = smcibdev;
- lnk->ibport = ibport;
- lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
- if (!smcibdev->initialized)
- smc_ib_setup_per_ibdev(smcibdev);
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ lnk->path_mtu =
+ ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+ if (!ini->ib_dev->initialized)
+ smc_ib_setup_per_ibdev(ini->ib_dev);
get_random_bytes(rndvec, sizeof(rndvec));
lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
(rndvec[2] << 16);
rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- vlan_id, lnk->gid, &lnk->sgid_index);
+ ini->vlan_id, lnk->gid,
+ &lnk->sgid_index);
if (rc)
goto free_lgr;
rc = smc_llc_link_init(lnk);
@@ -289,6 +290,12 @@ clear_llc_lnk:
free_lgr:
kfree(lgr);
out:
+ if (rc < 0) {
+ if (rc == -ENOMEM)
+ rc = SMC_CLC_DECL_MEM;
+ else
+ rc = SMC_CLC_DECL_INTERR;
+ }
return rc;
}
@@ -528,13 +535,13 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
struct net_device *ndev;
int i, nest_lvl, rc = 0;
- *vlan_id = 0;
+ ini->vlan_id = 0;
if (!dst) {
rc = -ENOTCONN;
goto out;
@@ -546,7 +553,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
ndev = dst->dev;
if (is_vlan_dev(ndev)) {
- *vlan_id = vlan_dev_vlan_id(ndev);
+ ini->vlan_id = vlan_dev_vlan_id(ndev);
goto out_rel;
}
@@ -560,7 +567,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
lower = lower->next;
ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
if (is_vlan_dev(ndev)) {
- *vlan_id = vlan_dev_vlan_id(ndev);
+ ini->vlan_id = vlan_dev_vlan_id(ndev);
break;
}
}
@@ -594,24 +601,16 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
}
/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
- struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
- u64 peer_gid)
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
- int local_contact = SMC_FIRST_CONTACT;
struct smc_link_group *lgr;
- unsigned short vlan_id;
enum smc_lgr_role role;
int rc = 0;
+ ini->cln_first_contact = SMC_FIRST_CONTACT;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
- if (rc)
- return rc;
-
- if ((role == SMC_CLNT) && srv_first_contact)
+ if (role == SMC_CLNT && ini->srv_first_contact)
/* create new link group as well */
goto create;
@@ -619,14 +618,15 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
write_lock_bh(&lgr->conns_lock);
- if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
- smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
+ if ((ini->is_smcd ?
+ smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
+ smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
- lgr->vlan_id == vlan_id &&
+ lgr->vlan_id == ini->vlan_id &&
(role == SMC_CLNT ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
- local_contact = SMC_REUSE_CONTACT;
+ ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
if (delayed_work_pending(&lgr->free_work))
@@ -638,19 +638,18 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
}
spin_unlock_bh(&smc_lgr_list.lock);
- if (role == SMC_CLNT && !srv_first_contact &&
- (local_contact == SMC_FIRST_CONTACT)) {
+ if (role == SMC_CLNT && !ini->srv_first_contact &&
+ ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* Server reuses a link group, but Client wants to start
* a new one
* send out_of_sync decline, reason synchr. error
*/
- return -ENOLINK;
+ return SMC_CLC_DECL_SYNCERR;
}
create:
- if (local_contact == SMC_FIRST_CONTACT) {
- rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
- lcl->id_for_peer, vlan_id, smcd, peer_gid);
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+ rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
smc_lgr_register_conn(conn); /* add smc conn to lgr */
@@ -658,7 +657,7 @@ create:
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
- if (is_smcd) {
+ if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
smcd_cdc_rx_init(conn); /* init tasklet for this conn */
}
@@ -667,7 +666,7 @@ create:
#endif
out:
- return rc ? rc : local_contact;
+ return rc;
}
/* convert the RMB size into the compressed notation - minimum 16K.
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 8806d2afa6ed..c00ac61dc129 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -229,6 +229,24 @@ struct smc_link_group {
};
};
+struct smc_clc_msg_local;
+
+struct smc_init_info {
+ u8 is_smcd;
+ unsigned short vlan_id;
+ int srv_first_contact;
+ int cln_first_contact;
+ /* SMC-R */
+ struct smc_clc_msg_local *ib_lcl;
+ struct smc_ib_device *ib_dev;
+ u8 ib_gid[SMC_GID_SIZE];
+ u8 ib_port;
+ u32 ib_clcqpn;
+ /* SMC-D */
+ u64 ism_gid;
+ struct smcd_dev *ism_dev;
+};
+
/* Find the connection associated with the given alert token in the link group.
* To use rbtrees we have to implement our own search core.
* Requires @conns_lock
@@ -281,13 +299,10 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
-int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
+int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
- struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
- u64 peer_gid);
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
void smcd_conn_free(struct smc_connection *conn);
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
void smc_core_exit(void);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 3cdf81cf97a3..2b246b94a3af 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -26,6 +26,7 @@
#include "smc_pnet.h"
#include "smc_ib.h"
#include "smc_ism.h"
+#include "smc_core.h"
#define SMC_ASCII_BLANK 32
@@ -755,8 +756,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
* IB device and port
*/
static void smc_pnet_find_rdma_dev(struct net_device *netdev,
- struct smc_ib_device **smcibdev,
- u8 *ibport, unsigned short vlan_id, u8 gid[])
+ struct smc_init_info *ini)
{
struct smc_ib_device *ibdev;
@@ -776,10 +776,10 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
- !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
- NULL)) {
- *smcibdev = ibdev;
- *ibport = i;
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
break;
}
}
@@ -794,9 +794,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
* If nothing found, try to use handshake device
*/
static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
- struct smc_ib_device **smcibdev,
- u8 *ibport, unsigned short vlan_id,
- u8 gid[])
+ struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smc_ib_device *ibdev;
@@ -806,7 +804,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
- smc_pnet_find_rdma_dev(ndev, smcibdev, ibport, vlan_id, gid);
+ smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
@@ -817,10 +815,10 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
- !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
- NULL)) {
- *smcibdev = ibdev;
- *ibport = i;
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
goto out;
}
}
@@ -830,7 +828,7 @@ out:
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
- struct smcd_dev **smcismdev)
+ struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smcd_dev *ismdev;
@@ -844,7 +842,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
- *smcismdev = ismdev;
+ ini->ism_dev = ismdev;
break;
}
}
@@ -855,21 +853,18 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
*/
-void smc_pnet_find_roce_resource(struct sock *sk,
- struct smc_ib_device **smcibdev, u8 *ibport,
- unsigned short vlan_id, u8 gid[])
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- *smcibdev = NULL;
- *ibport = 0;
-
+ ini->ib_dev = NULL;
+ ini->ib_port = 0;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
- smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
+ smc_pnet_find_roce_by_pnetid(dst->dev, ini);
out_rel:
dst_release(dst);
@@ -877,17 +872,17 @@ out:
return;
}
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- *smcismdev = NULL;
+ ini->ism_dev = NULL;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
- smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
+ smc_pnet_find_ism_by_pnetid(dst->dev, ini);
out_rel:
dst_release(dst);
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 5eac42fb45d0..4564e4d69c2e 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -18,6 +18,7 @@
struct smc_ib_device;
struct smcd_dev;
+struct smc_init_info;
/**
* struct smc_pnettable - SMC PNET table anchor
@@ -43,9 +44,7 @@ int smc_pnet_init(void) __init;
int smc_pnet_net_init(struct net *net);
void smc_pnet_exit(void);
void smc_pnet_net_exit(struct net *net);
-void smc_pnet_find_roce_resource(struct sock *sk,
- struct smc_ib_device **smcibdev, u8 *ibport,
- unsigned short vlan_id, u8 gid[]);
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
#endif
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 860dcfb95ee4..68a0885b9319 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -299,7 +299,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
break;
}
- /* Positive extra indicates ore bytes than needed for the
+ /* Positive extra indicates more bytes than needed for the
* message
*/
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3469b5d4ed32..7478e2d4ec02 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -375,14 +375,15 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
if (n->capabilities == capabilities)
goto exit;
/* Same node may come back with new capabilities */
- write_lock_bh(&n->lock);
+ tipc_node_write_lock(n);
n->capabilities = capabilities;
for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
l = n->links[bearer_id].link;
if (l)
tipc_link_update_caps(l, capabilities);
}
- write_unlock_bh(&n->lock);
+ tipc_node_write_unlock_fast(n);
+
/* Calculate cluster capabilities */
tn->capabilities = TIPC_NODE_CAPABILITIES;
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
index 5cd7c1d1a5d5..7409722727ca 100644
--- a/samples/bpf/asm_goto_workaround.h
+++ b/samples/bpf/asm_goto_workaround.h
@@ -13,4 +13,5 @@
#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
#endif
+#define volatile(x...) volatile("")
#endif
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index f06063af9fcb..bb315ce1b866 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -28,6 +28,11 @@ static void print_ksym(__u64 addr)
if (!addr)
return;
sym = ksym_search(addr);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ return;
+ }
+
if (PRINT_RAW_ADDR)
printf("%s/%llx;", sym->name, addr);
else
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 216c7ecbbbe9..23b90a45c802 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -109,6 +109,11 @@ static void print_ip_map(int fd)
for (i = 0; i < max; i++) {
if (counts[i].ip > PAGE_OFFSET) {
sym = ksym_search(counts[i].ip);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ continue;
+ }
+
printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name,
counts[i].count);
} else {
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index 8d3e9cfa1909..2556af2d9b3e 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -37,8 +37,13 @@ int main(int ac, char **argv)
bpf_map_lookup_elem(map_fd[0], &next_key, &value);
assert(next_key == value);
sym = ksym_search(value);
- printf(" %s", sym->name);
key = next_key;
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ continue;
+ }
+
+ printf(" %s", sym->name);
}
if (key)
printf("\n");
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index d08046ab81f0..d4178f60e075 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -34,6 +34,11 @@ static void print_ksym(__u64 addr)
if (!addr)
return;
sym = ksym_search(addr);
+ if (!sym) {
+ printf("ksym not found. Is kallsyms loaded?\n");
+ return;
+ }
+
printf("%s;", sym->name);
if (!strcmp(sym->name, "sys_read"))
sys_read_seen = true;
diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci
new file mode 100644
index 000000000000..350145da7669
--- /dev/null
+++ b/scripts/coccinelle/api/stream_open.cocci
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+// Author: Kirill Smelkov (kirr@nexedi.com)
+//
+// Search for stream-like files that are using nonseekable_open and convert
+// them to stream_open. A stream-like file is a file that does not use ppos in
+// its read and write. Rationale for the conversion is to avoid deadlock in
+// between read and write.
+
+virtual report
+virtual patch
+virtual explain // explain decisions in the patch (SPFLAGS="-D explain")
+
+// stream-like reader & writer - ones that do not depend on f_pos.
+@ stream_reader @
+identifier readstream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+ ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos)
+ {
+ ... when != ppos
+ }
+
+@ stream_writer @
+identifier writestream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+ ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos)
+ {
+ ... when != ppos
+ }
+
+
+// a function that blocks
+@ blocks @
+identifier block_f;
+identifier wait_event =~ "^wait_event_.*";
+@@
+ block_f(...) {
+ ... when exists
+ wait_event(...)
+ ... when exists
+ }
+
+// stream_reader that can block inside.
+//
+// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait())
+// XXX currently reader_blocks supports only direct and 1-level indirect cases.
+@ reader_blocks_direct @
+identifier stream_reader.readstream;
+identifier wait_event =~ "^wait_event_.*";
+@@
+ readstream(...)
+ {
+ ... when exists
+ wait_event(...)
+ ... when exists
+ }
+
+@ reader_blocks_1 @
+identifier stream_reader.readstream;
+identifier blocks.block_f;
+@@
+ readstream(...)
+ {
+ ... when exists
+ block_f(...)
+ ... when exists
+ }
+
+@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @
+identifier stream_reader.readstream;
+@@
+ readstream(...) {
+ ...
+ }
+
+
+// file_operations + whether they have _any_ .read, .write, .llseek ... at all.
+//
+// XXX add support for file_operations xxx[N] = ... (sound/core/pcm_native.c)
+@ fops0 @
+identifier fops;
+@@
+ struct file_operations fops = {
+ ...
+ };
+
+@ has_read @
+identifier fops0.fops;
+identifier read_f;
+@@
+ struct file_operations fops = {
+ .read = read_f,
+ };
+
+@ has_read_iter @
+identifier fops0.fops;
+identifier read_iter_f;
+@@
+ struct file_operations fops = {
+ .read_iter = read_iter_f,
+ };
+
+@ has_write @
+identifier fops0.fops;
+identifier write_f;
+@@
+ struct file_operations fops = {
+ .write = write_f,
+ };
+
+@ has_write_iter @
+identifier fops0.fops;
+identifier write_iter_f;
+@@
+ struct file_operations fops = {
+ .write_iter = write_iter_f,
+ };
+
+@ has_llseek @
+identifier fops0.fops;
+identifier llseek_f;
+@@
+ struct file_operations fops = {
+ .llseek = llseek_f,
+ };
+
+@ has_no_llseek @
+identifier fops0.fops;
+@@
+ struct file_operations fops = {
+ .llseek = no_llseek,
+ };
+
+@ has_mmap @
+identifier fops0.fops;
+identifier mmap_f;
+@@
+ struct file_operations fops = {
+ .mmap = mmap_f,
+ };
+
+@ has_copy_file_range @
+identifier fops0.fops;
+identifier copy_file_range_f;
+@@
+ struct file_operations fops = {
+ .copy_file_range = copy_file_range_f,
+ };
+
+@ has_remap_file_range @
+identifier fops0.fops;
+identifier remap_file_range_f;
+@@
+ struct file_operations fops = {
+ .remap_file_range = remap_file_range_f,
+ };
+
+@ has_splice_read @
+identifier fops0.fops;
+identifier splice_read_f;
+@@
+ struct file_operations fops = {
+ .splice_read = splice_read_f,
+ };
+
+@ has_splice_write @
+identifier fops0.fops;
+identifier splice_write_f;
+@@
+ struct file_operations fops = {
+ .splice_write = splice_write_f,
+ };
+
+
+// file_operations that is candidate for stream_open conversion - it does not
+// use mmap and other methods that assume @offset access to file.
+//
+// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now.
+// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops".
+@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @
+identifier fops0.fops;
+@@
+ struct file_operations fops = {
+ };
+
+
+// ---- conversions ----
+
+// XXX .open = nonseekable_open -> .open = stream_open
+// XXX .open = func -> openfunc -> nonseekable_open
+
+// read & write
+//
+// if both are used in the same file_operations together with an opener -
+// under that conditions we can use stream_open instead of nonseekable_open.
+@ fops_rw depends on maybe_stream @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+identifier stream_writer.writestream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .read = readstream,
+ .write = writestream,
+ };
+
+@ report_rw depends on report @
+identifier fops_rw.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report && reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+ "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,))
+
+@ script:python depends on report && !reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+
+@ explain_rw_deadlocked depends on explain && reader_blocks @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read & write (was deadlock) */
+ ...>
+ }
+
+
+@ explain_rw_nodeadlock depends on explain && !reader_blocks @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read & write (no direct deadlock) */
+ ...>
+ }
+
+@ patch_rw depends on patch @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// read, but not write
+@ fops_r depends on maybe_stream && !has_write @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .read = readstream,
+ };
+
+@ report_r depends on report @
+identifier fops_r.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_r.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_r depends on explain @
+identifier fops_r.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read only */
+ ...>
+ }
+
+@ patch_r depends on patch @
+identifier fops_r.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// write, but not read
+@ fops_w depends on maybe_stream && !has_read @
+identifier fops0.fops, openfunc;
+identifier stream_writer.writestream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .write = writestream,
+ };
+
+@ report_w depends on report @
+identifier fops_w.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_w.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_w depends on explain @
+identifier fops_w.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* write only */
+ ...>
+ }
+
+@ patch_w depends on patch @
+identifier fops_w.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// no read, no write - don't change anything
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index dc0e8c5a1402..dd2b31ccca6a 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -35,7 +35,7 @@ set -e
info()
{
if [ "${quiet}" != "silent_" ]; then
- printf " %-7s %s\n" ${1} ${2}
+ printf " %-7s %s\n" "${1}" "${2}"
fi
}
@@ -91,6 +91,20 @@ vmlinux_link()
fi
}
+# generate .BTF typeinfo from DWARF debuginfo
+gen_btf()
+{
+ local pahole_ver;
+
+ pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
+ if [ "${pahole_ver}" -lt "113" ]; then
+ info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
+ exit 0
+ fi
+
+ info "BTF" ${1}
+ LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
+}
# Create ${2} .o file with all symbols from the ${1} object file
kallsyms()
@@ -248,6 +262,10 @@ fi
info LD vmlinux
vmlinux_link "${kallsymso}" vmlinux
+if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
+ gen_btf vmlinux
+fi
+
if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then
info SORTEX vmlinux
sortextable vmlinux
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index bcc9c6ead7fd..efdbf17f3915 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -125,7 +125,7 @@ out:
*/
int TSS_authhmac(unsigned char *digest, const unsigned char *key,
unsigned int keylen, unsigned char *h1,
- unsigned char *h2, unsigned char h3, ...)
+ unsigned char *h2, unsigned int h3, ...)
{
unsigned char paramdigest[SHA1_DIGEST_SIZE];
struct sdesc *sdesc;
@@ -135,13 +135,16 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key,
int ret;
va_list argp;
+ if (!chip)
+ return -ENODEV;
+
sdesc = init_sdesc(hashalg);
if (IS_ERR(sdesc)) {
pr_info("trusted_key: can't alloc %s\n", hash_alg);
return PTR_ERR(sdesc);
}
- c = h3;
+ c = !!h3;
ret = crypto_shash_init(&sdesc->shash);
if (ret < 0)
goto out;
@@ -196,6 +199,9 @@ int TSS_checkhmac1(unsigned char *buffer,
va_list argp;
int ret;
+ if (!chip)
+ return -ENODEV;
+
bufsize = LOAD32(buffer, TPM_SIZE_OFFSET);
tag = LOAD16(buffer, 0);
ordinal = command;
@@ -363,6 +369,9 @@ int trusted_tpm_send(unsigned char *cmd, size_t buflen)
{
int rc;
+ if (!chip)
+ return -ENODEV;
+
dump_tpm_buf(cmd);
rc = tpm_send(chip, cmd, buflen);
dump_tpm_buf(cmd);
@@ -429,6 +438,9 @@ int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
{
int ret;
+ if (!chip)
+ return -ENODEV;
+
INIT_BUF(tb);
store16(tb, TPM_TAG_RQU_COMMAND);
store32(tb, TPM_OIAP_SIZE);
@@ -1245,9 +1257,13 @@ static int __init init_trusted(void)
{
int ret;
+ /* encrypted_keys.ko depends on successful load of this module even if
+ * TPM is not used.
+ */
chip = tpm_default_chip();
if (!chip)
- return -ENOENT;
+ return 0;
+
ret = init_digests();
if (ret < 0)
goto err_put;
@@ -1269,10 +1285,12 @@ err_put:
static void __exit cleanup_trusted(void)
{
- put_device(&chip->dev);
- kfree(digests);
- trusted_shash_release();
- unregister_key_type(&key_type_trusted);
+ if (chip) {
+ put_device(&chip->dev);
+ kfree(digests);
+ trusted_shash_release();
+ unregister_key_type(&key_type_trusted);
+ }
}
late_initcall(init_trusted);
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
index 378c051fa177..3b9b41331c4f 100644
--- a/tools/arch/arm64/include/asm/barrier.h
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -14,6 +14,16 @@
#define wmb() asm volatile("dmb ishst" ::: "memory")
#define rmb() asm volatile("dmb ishld" ::: "memory")
+/*
+ * Kernel uses dmb variants on arm64 for smp_*() barriers. Pretty much the same
+ * implementation as above mb()/wmb()/rmb(), though for the latter kernel uses
+ * dsb. In any case, should above mb()/wmb()/rmb() change, make sure the below
+ * smp_*() don't.
+ */
+#define smp_mb() asm volatile("dmb ish" ::: "memory")
+#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
+#define smp_rmb() asm volatile("dmb ishld" ::: "memory")
+
#define smp_store_release(p, v) \
do { \
union { typeof(*p) __val; char __c[1]; } __u = \
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
index 58919868473c..0adf295dd5b6 100644
--- a/tools/arch/x86/include/asm/barrier.h
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -21,9 +21,12 @@
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#elif defined(__x86_64__)
-#define mb() asm volatile("mfence":::"memory")
-#define rmb() asm volatile("lfence":::"memory")
+#define mb() asm volatile("mfence" ::: "memory")
+#define rmb() asm volatile("lfence" ::: "memory")
#define wmb() asm volatile("sfence" ::: "memory")
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
#endif
#if defined(__x86_64__)
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index e63bce0755eb..8cafb9b31467 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -309,6 +309,48 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id,
return ret;
}
+static int btf_dumper_var(const struct btf_dumper *d, __u32 type_id,
+ __u8 bit_offset, const void *data)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+ int ret;
+
+ jsonw_start_object(d->jw);
+ jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off));
+ ret = btf_dumper_do_type(d, t->type, bit_offset, data);
+ jsonw_end_object(d->jw);
+
+ return ret;
+}
+
+static int btf_dumper_datasec(const struct btf_dumper *d, __u32 type_id,
+ const void *data)
+{
+ struct btf_var_secinfo *vsi;
+ const struct btf_type *t;
+ int ret = 0, i, vlen;
+
+ t = btf__type_by_id(d->btf, type_id);
+ if (!t)
+ return -EINVAL;
+
+ vlen = BTF_INFO_VLEN(t->info);
+ vsi = (struct btf_var_secinfo *)(t + 1);
+
+ jsonw_start_object(d->jw);
+ jsonw_name(d->jw, btf__name_by_offset(d->btf, t->name_off));
+ jsonw_start_array(d->jw);
+ for (i = 0; i < vlen; i++) {
+ ret = btf_dumper_do_type(d, vsi[i].type, 0, data + vsi[i].offset);
+ if (ret)
+ break;
+ }
+ jsonw_end_array(d->jw);
+ jsonw_end_object(d->jw);
+
+ return ret;
+}
+
static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
__u8 bit_offset, const void *data)
{
@@ -341,6 +383,10 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
return btf_dumper_modifier(d, type_id, bit_offset, data);
+ case BTF_KIND_VAR:
+ return btf_dumper_var(d, type_id, bit_offset, data);
+ case BTF_KIND_DATASEC:
+ return btf_dumper_datasec(d, type_id, data);
default:
jsonw_printf(d->jw, "(unsupported-kind");
return -EINVAL;
@@ -377,6 +423,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
{
const struct btf_type *proto_type;
const struct btf_array *array;
+ const struct btf_var *var;
const struct btf_type *t;
if (!type_id) {
@@ -440,6 +487,18 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
if (pos == -1)
return -1;
break;
+ case BTF_KIND_VAR:
+ var = (struct btf_var *)(t + 1);
+ if (var->linkage == BTF_VAR_STATIC)
+ BTF_PRINT_ARG("static ");
+ BTF_PRINT_TYPE(t->type);
+ BTF_PRINT_ARG(" %s",
+ btf__name_by_offset(btf, t->name_off));
+ break;
+ case BTF_KIND_DATASEC:
+ BTF_PRINT_ARG("section (\"%s\") ",
+ btf__name_by_offset(btf, t->name_off));
+ break;
case BTF_KIND_UNKN:
default:
return -1;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e0c650d91784..e96903078991 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -153,11 +153,13 @@ static int do_dump_btf(const struct btf_dumper *d,
/* start of key-value pair */
jsonw_start_object(d->jw);
- jsonw_name(d->jw, "key");
+ if (map_info->btf_key_type_id) {
+ jsonw_name(d->jw, "key");
- ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
- if (ret)
- goto err_end_obj;
+ ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
+ if (ret)
+ goto err_end_obj;
+ }
if (!map_is_per_cpu(map_info->type)) {
jsonw_name(d->jw, "value");
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index d2be5a06c339..81067803189e 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -249,6 +249,9 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
if (info->nr_map_ids)
show_prog_maps(fd, info->nr_map_ids);
+ if (info->btf_id)
+ jsonw_int_field(json_wtr, "btf_id", info->btf_id);
+
if (!hash_empty(prog_table.table)) {
struct pinned_obj *obj;
@@ -319,6 +322,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
}
}
+ if (info->btf_id)
+ printf("\n\tbtf_id %d\n", info->btf_id);
+
printf("\n");
}
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 7073dbe1ff27..0bb17bf88b18 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -195,6 +195,9 @@ static const char *print_imm(void *private_data,
if (insn->src_reg == BPF_PSEUDO_MAP_FD)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u]", insn->imm);
+ else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
else
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"0x%llx", (unsigned long long)full_imm);
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index cce0b02c0e28..ca28b6ab8db7 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -278,10 +278,29 @@
.off = 0, \
.imm = ((__u64) (IMM)) >> 32 })
+#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF1, \
+ .imm = IMM1 }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF2, \
+ .imm = IMM2 })
+
/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
#define BPF_LD_MAP_FD(DST, MAP_FD) \
- BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \
+ MAP_FD, 0)
+
+#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \
+ MAP_FD, VALUE_OFF)
/* Relative call */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 837024512baf..2e96d0b4bf65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -105,6 +105,7 @@ enum bpf_cmd {
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+ BPF_MAP_FREEZE,
};
enum bpf_map_type {
@@ -255,8 +256,19 @@ enum bpf_attach_type {
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
-/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd map fd
+ * insn[1].imm: 0 offset into value
+ * insn[0].off: 0 0
+ * insn[1].off: 0 0
+ * ldimm64 rewrite: address of map address of map[0]+offset
+ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ */
#define BPF_PSEUDO_MAP_FD 1
+#define BPF_PSEUDO_MAP_VALUE 2
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -283,7 +295,7 @@ enum bpf_attach_type {
#define BPF_OBJ_NAME_LEN 16U
-/* Flags for accessing BPF object */
+/* Flags for accessing BPF object from syscall side. */
#define BPF_F_RDONLY (1U << 3)
#define BPF_F_WRONLY (1U << 4)
@@ -293,6 +305,10 @@ enum bpf_attach_type {
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG (1U << 7)
+#define BPF_F_WRONLY_PROG (1U << 8)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
@@ -396,6 +412,13 @@ union bpf_attr {
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
+ __u32 ctx_size_in; /* input: len of ctx_in */
+ __u32 ctx_size_out; /* input/output: len of ctx_out
+ * returns ENOSPC if ctx_out
+ * is too small.
+ */
+ __aligned_u64 ctx_in;
+ __aligned_u64 ctx_out;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -1500,6 +1523,10 @@ union bpf_attr {
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; **len** is the length of the inner MAC header.
+ *
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -2641,10 +2668,16 @@ enum bpf_func_id {
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 7b7475ef2f17..9310652ca4f9 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -39,11 +39,11 @@ struct btf_type {
* struct, union and fwd
*/
__u32 info;
- /* "size" is used by INT, ENUM, STRUCT and UNION.
+ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- * FUNC and FUNC_PROTO.
+ * FUNC, FUNC_PROTO and VAR.
* "type" is a type_id referring to another type.
*/
union {
@@ -70,8 +70,10 @@ struct btf_type {
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
-#define BTF_KIND_MAX 13
-#define NR_BTF_KINDS 14
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
@@ -138,4 +140,26 @@ struct btf_param {
__u32 type;
};
+enum {
+ BTF_VAR_STATIC = 0,
+ BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+ __u32 linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+};
+
#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index 4db74758c674..7d9e182a1f51 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,3 +1,4 @@
libbpf_version.h
+libbpf.pc
FEATURE-DUMP.libbpf
test_libbpf
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 8e7c56e9590f..c6c06bc6683c 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -3,7 +3,7 @@
BPF_VERSION = 0
BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 2
+BPF_EXTRAVERSION = 3
MAKEFLAGS += --no-print-directory
@@ -90,6 +90,7 @@ LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION)
LIB_FILE = libbpf.a libbpf.so*
+PC_FILE = libbpf.pc
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
@@ -134,13 +135,14 @@ VERSION_SCRIPT := libbpf.map
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE))
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
-CMD_TARGETS = $(LIB_TARGET)
+CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
CXX_TEST_TARGET = $(OUTPUT)test_libbpf
@@ -187,6 +189,12 @@ $(OUTPUT)libbpf.a: $(BPF_IN)
$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
$(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@
+$(OUTPUT)libbpf.pc:
+ $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \
+ -e "s|@LIBDIR@|$(libdir_SQ)|" \
+ -e "s|@VERSION@|$(LIBBPF_VERSION)|" \
+ < libbpf.pc.template > $@
+
check: check_abi
check_abi: $(OUTPUT)libbpf.so
@@ -224,7 +232,11 @@ install_headers:
$(call do_install,btf.h,$(prefix)/include/bpf,644); \
$(call do_install,xsk.h,$(prefix)/include/bpf,644);
-install: install_lib
+install_pkgconfig: $(PC_FILE)
+ $(call QUIET_INSTALL, $(PC_FILE)) \
+ $(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
+
+install: install_lib install_pkgconfig
### Cleaning rules
@@ -234,7 +246,7 @@ config-clean:
clean:
$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
- *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS
+ *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9cd015574e83..955191c64b64 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -79,7 +79,6 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
{
- __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
@@ -89,8 +88,9 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
attr.value_size = create_attr->value_size;
attr.max_entries = create_attr->max_entries;
attr.map_flags = create_attr->map_flags;
- memcpy(attr.map_name, create_attr->name,
- min(name_len, BPF_OBJ_NAME_LEN - 1));
+ if (create_attr->name)
+ memcpy(attr.map_name, create_attr->name,
+ min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
attr.numa_node = create_attr->numa_node;
attr.btf_fd = create_attr->btf_fd;
attr.btf_key_type_id = create_attr->btf_key_type_id;
@@ -155,7 +155,6 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
int key_size, int inner_map_fd, int max_entries,
__u32 map_flags, int node)
{
- __u32 name_len = name ? strlen(name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
@@ -166,7 +165,9 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
attr.inner_map_fd = inner_map_fd;
attr.max_entries = max_entries;
attr.map_flags = map_flags;
- memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+ if (name)
+ memcpy(attr.map_name, name,
+ min(strlen(name), BPF_OBJ_NAME_LEN - 1));
if (node >= 0) {
attr.map_flags |= BPF_F_NUMA_NODE;
@@ -216,18 +217,15 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
void *finfo = NULL, *linfo = NULL;
union bpf_attr attr;
__u32 log_level;
- __u32 name_len;
int fd;
if (!load_attr || !log_buf != !log_buf_sz)
return -EINVAL;
log_level = load_attr->log_level;
- if (log_level > 2 || (log_level && !log_buf))
+ if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
return -EINVAL;
- name_len = load_attr->name ? strlen(load_attr->name) : 0;
-
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
@@ -253,8 +251,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
attr.line_info_rec_size = load_attr->line_info_rec_size;
attr.line_info_cnt = load_attr->line_info_cnt;
attr.line_info = ptr_to_u64(load_attr->line_info);
- memcpy(attr.prog_name, load_attr->name,
- min(name_len, BPF_OBJ_NAME_LEN - 1));
+ if (load_attr->name)
+ memcpy(attr.prog_name, load_attr->name,
+ min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
fd = sys_bpf_prog_load(&attr, sizeof(attr));
if (fd >= 0)
@@ -429,6 +428,16 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key)
return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
}
+int bpf_map_freeze(int fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+
+ return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+}
+
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr;
@@ -545,10 +554,15 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
attr.test.data_out = ptr_to_u64(test_attr->data_out);
attr.test.data_size_in = test_attr->data_size_in;
attr.test.data_size_out = test_attr->data_size_out;
+ attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
+ attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
+ attr.test.ctx_size_in = test_attr->ctx_size_in;
+ attr.test.ctx_size_out = test_attr->ctx_size_out;
attr.test.repeat = test_attr->repeat;
ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
test_attr->data_size_out = attr.test.data_size_out;
+ test_attr->ctx_size_out = attr.test.ctx_size_out;
test_attr->retval = attr.test.retval;
test_attr->duration = attr.test.duration;
return ret;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6ffdd79bea89..bc30783d1403 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -92,7 +92,7 @@ struct bpf_load_program_attr {
#define MAPS_RELAX_COMPAT 0x01
/* Recommend log buffer size */
-#define BPF_LOG_BUF_SIZE (256 * 1024)
+#define BPF_LOG_BUF_SIZE (16 * 1024 * 1024) /* verifier maximum in kernels <= 5.1 */
LIBBPF_API int
bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
char *log_buf, size_t log_buf_sz);
@@ -117,6 +117,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
void *value);
LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_map_freeze(int fd);
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
LIBBPF_API int bpf_obj_get(const char *pathname);
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
@@ -135,6 +136,11 @@ struct bpf_prog_test_run_attr {
* out: length of data_out */
__u32 retval; /* out: return code of the BPF program */
__u32 duration; /* out: average per repetition in ns */
+ const void *ctx_in; /* optional */
+ __u32 ctx_size_in;
+ void *ctx_out; /* optional */
+ __u32 ctx_size_out; /* in: max length of ctx_out
+ * out: length of cxt_out */
};
LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index cf119c9b6f27..701e7e28ada3 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -24,6 +24,8 @@
((k) == BTF_KIND_CONST) || \
((k) == BTF_KIND_RESTRICT))
+#define IS_VAR(k) ((k) == BTF_KIND_VAR)
+
static struct btf_type btf_void;
struct btf {
@@ -212,6 +214,10 @@ static int btf_type_size(struct btf_type *t)
return base_size + vlen * sizeof(struct btf_member);
case BTF_KIND_FUNC_PROTO:
return base_size + vlen * sizeof(struct btf_param);
+ case BTF_KIND_VAR:
+ return base_size + sizeof(struct btf_var);
+ case BTF_KIND_DATASEC:
+ return base_size + vlen * sizeof(struct btf_var_secinfo);
default:
pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info));
return -EINVAL;
@@ -283,6 +289,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
+ case BTF_KIND_DATASEC:
size = t->size;
goto done;
case BTF_KIND_PTR:
@@ -292,6 +299,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
+ case BTF_KIND_VAR:
type_id = t->type;
break;
case BTF_KIND_ARRAY:
@@ -326,7 +334,8 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
t = btf__type_by_id(btf, type_id);
while (depth < MAX_RESOLVE_DEPTH &&
!btf_type_is_void_or_null(t) &&
- IS_MODIFIER(BTF_INFO_KIND(t->info))) {
+ (IS_MODIFIER(BTF_INFO_KIND(t->info)) ||
+ IS_VAR(BTF_INFO_KIND(t->info)))) {
type_id = t->type;
t = btf__type_by_id(btf, type_id);
depth++;
@@ -408,6 +417,92 @@ done:
return btf;
}
+static int compare_vsi_off(const void *_a, const void *_b)
+{
+ const struct btf_var_secinfo *a = _a;
+ const struct btf_var_secinfo *b = _b;
+
+ return a->offset - b->offset;
+}
+
+static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
+ struct btf_type *t)
+{
+ __u32 size = 0, off = 0, i, vars = BTF_INFO_VLEN(t->info);
+ const char *name = btf__name_by_offset(btf, t->name_off);
+ const struct btf_type *t_var;
+ struct btf_var_secinfo *vsi;
+ struct btf_var *var;
+ int ret;
+
+ if (!name) {
+ pr_debug("No name found in string section for DATASEC kind.\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__section_size(obj, name, &size);
+ if (ret || !size || (t->size && t->size != size)) {
+ pr_debug("Invalid size for section %s: %u bytes\n", name, size);
+ return -ENOENT;
+ }
+
+ t->size = size;
+
+ for (i = 0, vsi = (struct btf_var_secinfo *)(t + 1);
+ i < vars; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ var = (struct btf_var *)(t_var + 1);
+
+ if (BTF_INFO_KIND(t_var->info) != BTF_KIND_VAR) {
+ pr_debug("Non-VAR type seen in section %s\n", name);
+ return -EINVAL;
+ }
+
+ if (var->linkage == BTF_VAR_STATIC)
+ continue;
+
+ name = btf__name_by_offset(btf, t_var->name_off);
+ if (!name) {
+ pr_debug("No name found in string section for VAR kind\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__variable_offset(obj, name, &off);
+ if (ret) {
+ pr_debug("No offset found in symbol table for VAR %s\n", name);
+ return -ENOENT;
+ }
+
+ vsi->offset = off;
+ }
+
+ qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
+ return 0;
+}
+
+int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
+{
+ int err = 0;
+ __u32 i;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ struct btf_type *t = btf->types[i];
+
+ /* Loader needs to fix up some of the things compiler
+ * couldn't get its hands on while emitting BTF. This
+ * is section size and global variable offset. We use
+ * the info from the ELF itself for this purpose.
+ */
+ if (BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC) {
+ err = btf_fixup_datasec(obj, btf, t);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
+
int btf__load(struct btf *btf)
{
__u32 log_buf_size = BPF_LOG_BUF_SIZE;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 28a1e1e59861..c7b399e81fce 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -21,6 +21,8 @@ struct btf;
struct btf_ext;
struct btf_type;
+struct bpf_object;
+
/*
* The .BTF.ext ELF section layout defined as
* struct btf_ext_header
@@ -57,6 +59,7 @@ struct btf_ext_header {
LIBBPF_API void btf__free(struct btf *btf);
LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 11c25d9ea431..67484cf32b2e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7,6 +7,7 @@
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
* Copyright (C) 2015 Huawei Inc.
* Copyright (C) 2017 Nicira, Inc.
+ * Copyright (C) 2019 Isovalent, Inc.
*/
#ifndef _GNU_SOURCE
@@ -52,6 +53,11 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
+ * compilation if user enables corresponding warning. Disable it explicitly.
+ */
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+
#define __printf(a, b) __attribute__((format(printf, a, b)))
static int __base_pr(enum libbpf_print_level level, const char *format,
@@ -144,6 +150,7 @@ struct bpf_program {
enum {
RELO_LD64,
RELO_CALL,
+ RELO_DATA,
} type;
int insn_idx;
union {
@@ -152,6 +159,7 @@ struct bpf_program {
};
} *reloc_desc;
int nr_reloc;
+ int log_level;
struct {
int nr;
@@ -176,6 +184,19 @@ struct bpf_program {
__u32 line_info_cnt;
};
+enum libbpf_map_type {
+ LIBBPF_MAP_UNSPEC,
+ LIBBPF_MAP_DATA,
+ LIBBPF_MAP_BSS,
+ LIBBPF_MAP_RODATA,
+};
+
+static const char * const libbpf_type_to_btf_name[] = {
+ [LIBBPF_MAP_DATA] = ".data",
+ [LIBBPF_MAP_BSS] = ".bss",
+ [LIBBPF_MAP_RODATA] = ".rodata",
+};
+
struct bpf_map {
int fd;
char *name;
@@ -187,11 +208,18 @@ struct bpf_map {
__u32 btf_value_type_id;
void *priv;
bpf_map_clear_priv_t clear_priv;
+ enum libbpf_map_type libbpf_type;
+};
+
+struct bpf_secdata {
+ void *rodata;
+ void *data;
};
static LIST_HEAD(bpf_objects_list);
struct bpf_object {
+ char name[BPF_OBJ_NAME_LEN];
char license[64];
__u32 kern_version;
@@ -199,6 +227,7 @@ struct bpf_object {
size_t nr_programs;
struct bpf_map *maps;
size_t nr_maps;
+ struct bpf_secdata sections;
bool loaded;
bool has_pseudo_calls;
@@ -214,6 +243,9 @@ struct bpf_object {
Elf *elf;
GElf_Ehdr ehdr;
Elf_Data *symbols;
+ Elf_Data *data;
+ Elf_Data *rodata;
+ Elf_Data *bss;
size_t strtabidx;
struct {
GElf_Shdr shdr;
@@ -222,6 +254,9 @@ struct bpf_object {
int nr_reloc;
int maps_shndx;
int text_shndx;
+ int data_shndx;
+ int rodata_shndx;
+ int bss_shndx;
} efile;
/*
* All loaded bpf_object is linked in a list, which is
@@ -443,6 +478,7 @@ static struct bpf_object *bpf_object__new(const char *path,
size_t obj_buf_sz)
{
struct bpf_object *obj;
+ char *end;
obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
if (!obj) {
@@ -451,8 +487,14 @@ static struct bpf_object *bpf_object__new(const char *path,
}
strcpy(obj->path, path);
- obj->efile.fd = -1;
+ /* Using basename() GNU version which doesn't modify arg. */
+ strncpy(obj->name, basename((void *)path),
+ sizeof(obj->name) - 1);
+ end = strchr(obj->name, '.');
+ if (end)
+ *end = 0;
+ obj->efile.fd = -1;
/*
* Caller of this function should also calls
* bpf_object__elf_finish() after data collection to return
@@ -462,6 +504,9 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.obj_buf = obj_buf;
obj->efile.obj_buf_sz = obj_buf_sz;
obj->efile.maps_shndx = -1;
+ obj->efile.data_shndx = -1;
+ obj->efile.rodata_shndx = -1;
+ obj->efile.bss_shndx = -1;
obj->loaded = false;
@@ -480,6 +525,9 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
obj->efile.elf = NULL;
}
obj->efile.symbols = NULL;
+ obj->efile.data = NULL;
+ obj->efile.rodata = NULL;
+ obj->efile.bss = NULL;
zfree(&obj->efile.reloc);
obj->efile.nr_reloc = 0;
@@ -621,27 +669,182 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
return false;
}
+static int bpf_object_search_section_size(const struct bpf_object *obj,
+ const char *name, size_t *d_size)
+{
+ const GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf *elf = obj->efile.elf;
+ Elf_Scn *scn = NULL;
+ int idx = 0;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ const char *sec_name;
+ Elf_Data *data;
+ GElf_Shdr sh;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warning("failed to get section(%d) header from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!sec_name) {
+ pr_warning("failed to get section(%d) name from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ if (strcmp(name, sec_name))
+ continue;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warning("failed to get section(%d) data from %s(%s)\n",
+ idx, name, obj->path);
+ return -EIO;
+ }
+
+ *d_size = data->d_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size)
+{
+ int ret = -ENOENT;
+ size_t d_size;
+
+ *size = 0;
+ if (!name) {
+ return -EINVAL;
+ } else if (!strcmp(name, ".data")) {
+ if (obj->efile.data)
+ *size = obj->efile.data->d_size;
+ } else if (!strcmp(name, ".bss")) {
+ if (obj->efile.bss)
+ *size = obj->efile.bss->d_size;
+ } else if (!strcmp(name, ".rodata")) {
+ if (obj->efile.rodata)
+ *size = obj->efile.rodata->d_size;
+ } else {
+ ret = bpf_object_search_section_size(obj, name, &d_size);
+ if (!ret)
+ *size = d_size;
+ }
+
+ return *size ? 0 : ret;
+}
+
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ const char *sname;
+ size_t si;
+
+ if (!name || !off)
+ return -EINVAL;
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
+ continue;
+
+ sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!sname) {
+ pr_warning("failed to get sym name string for var %s\n",
+ name);
+ return -EIO;
+ }
+ if (strcmp(name, sname) == 0) {
+ *off = sym.st_value;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static bool bpf_object__has_maps(const struct bpf_object *obj)
+{
+ return obj->efile.maps_shndx >= 0 ||
+ obj->efile.data_shndx >= 0 ||
+ obj->efile.rodata_shndx >= 0 ||
+ obj->efile.bss_shndx >= 0;
+}
+
+static int
+bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
+ enum libbpf_map_type type, Elf_Data *data,
+ void **data_buff)
+{
+ struct bpf_map_def *def = &map->def;
+ char map_name[BPF_OBJ_NAME_LEN];
+
+ map->libbpf_type = type;
+ map->offset = ~(typeof(map->offset))0;
+ snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
+ libbpf_type_to_btf_name[type]);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warning("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+
+ def->type = BPF_MAP_TYPE_ARRAY;
+ def->key_size = sizeof(int);
+ def->value_size = data->d_size;
+ def->max_entries = 1;
+ def->map_flags = type == LIBBPF_MAP_RODATA ?
+ BPF_F_RDONLY_PROG : 0;
+ if (data_buff) {
+ *data_buff = malloc(data->d_size);
+ if (!*data_buff) {
+ zfree(&map->name);
+ pr_warning("failed to alloc map content buffer\n");
+ return -ENOMEM;
+ }
+ memcpy(*data_buff, data->d_buf, data->d_size);
+ }
+
+ pr_debug("map %ld is \"%s\"\n", map - obj->maps, map->name);
+ return 0;
+}
+
static int
bpf_object__init_maps(struct bpf_object *obj, int flags)
{
+ int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
bool strict = !(flags & MAPS_RELAX_COMPAT);
- int i, map_idx, map_def_sz, nr_maps = 0;
- Elf_Scn *scn;
- Elf_Data *data = NULL;
Elf_Data *symbols = obj->efile.symbols;
+ Elf_Data *data = NULL;
+ int ret = 0;
- if (obj->efile.maps_shndx < 0)
- return -EINVAL;
if (!symbols)
return -EINVAL;
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
- scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
- if (scn)
- data = elf_getdata(scn, NULL);
- if (!scn || !data) {
- pr_warning("failed to get Elf_Data from map section %d\n",
- obj->efile.maps_shndx);
- return -EINVAL;
+ if (obj->efile.maps_shndx >= 0) {
+ Elf_Scn *scn = elf_getscn(obj->efile.elf,
+ obj->efile.maps_shndx);
+
+ if (scn)
+ data = elf_getdata(scn, NULL);
+ if (!scn || !data) {
+ pr_warning("failed to get Elf_Data from map section %d\n",
+ obj->efile.maps_shndx);
+ return -EINVAL;
+ }
}
/*
@@ -651,7 +854,13 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
*
* TODO: Detect array of map and report error.
*/
- for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ if (obj->efile.data_shndx >= 0)
+ nr_maps_glob++;
+ if (obj->efile.rodata_shndx >= 0)
+ nr_maps_glob++;
+ if (obj->efile.bss_shndx >= 0)
+ nr_maps_glob++;
+ for (i = 0; data && i < nr_syms; i++) {
GElf_Sym sym;
if (!gelf_getsym(symbols, i, &sym))
@@ -664,19 +873,21 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
/* Alloc obj->maps and fill nr_maps. */
pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
nr_maps, data->d_size);
-
- if (!nr_maps)
+ if (!nr_maps && !nr_maps_glob)
return 0;
/* Assume equally sized map definitions */
- map_def_sz = data->d_size / nr_maps;
- if (!data->d_size || (data->d_size % nr_maps) != 0) {
- pr_warning("unable to determine map definition size "
- "section %s, %d maps in %zd bytes\n",
- obj->path, nr_maps, data->d_size);
- return -EINVAL;
+ if (data) {
+ map_def_sz = data->d_size / nr_maps;
+ if (!data->d_size || (data->d_size % nr_maps) != 0) {
+ pr_warning("unable to determine map definition size "
+ "section %s, %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+ return -EINVAL;
+ }
}
+ nr_maps += nr_maps_glob;
obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
if (!obj->maps) {
pr_warning("alloc maps for object failed\n");
@@ -697,7 +908,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
/*
* Fill obj->maps using data in "maps" section.
*/
- for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ for (i = 0, map_idx = 0; data && i < nr_syms; i++) {
GElf_Sym sym;
const char *map_name;
struct bpf_map_def *def;
@@ -710,6 +921,8 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
map_name = elf_strptr(obj->efile.elf,
obj->efile.strtabidx,
sym.st_name);
+
+ obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
obj->maps[map_idx].offset = sym.st_value;
if (sym.st_value + map_def_sz > data->d_size) {
pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
@@ -758,8 +971,27 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
map_idx++;
}
- qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map);
- return 0;
+ /*
+ * Populate rest of obj->maps with libbpf internal maps.
+ */
+ if (obj->efile.data_shndx >= 0)
+ ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+ LIBBPF_MAP_DATA,
+ obj->efile.data,
+ &obj->sections.data);
+ if (!ret && obj->efile.rodata_shndx >= 0)
+ ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+ LIBBPF_MAP_RODATA,
+ obj->efile.rodata,
+ &obj->sections.rodata);
+ if (!ret && obj->efile.bss_shndx >= 0)
+ ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
+ LIBBPF_MAP_BSS,
+ obj->efile.bss, NULL);
+ if (!ret)
+ qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
+ compare_bpf_map);
+ return ret;
}
static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -785,6 +1017,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
Elf *elf = obj->efile.elf;
GElf_Ehdr *ep = &obj->efile.ehdr;
Elf_Data *btf_ext_data = NULL;
+ Elf_Data *btf_data = NULL;
Elf_Scn *scn = NULL;
int idx = 0, err = 0;
@@ -828,32 +1061,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
(int)sh.sh_link, (unsigned long)sh.sh_flags,
(int)sh.sh_type);
- if (strcmp(name, "license") == 0)
+ if (strcmp(name, "license") == 0) {
err = bpf_object__init_license(obj,
data->d_buf,
data->d_size);
- else if (strcmp(name, "version") == 0)
+ } else if (strcmp(name, "version") == 0) {
err = bpf_object__init_kversion(obj,
data->d_buf,
data->d_size);
- else if (strcmp(name, "maps") == 0)
+ } else if (strcmp(name, "maps") == 0) {
obj->efile.maps_shndx = idx;
- else if (strcmp(name, BTF_ELF_SEC) == 0) {
- obj->btf = btf__new(data->d_buf, data->d_size);
- if (IS_ERR(obj->btf)) {
- pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
- BTF_ELF_SEC, PTR_ERR(obj->btf));
- obj->btf = NULL;
- continue;
- }
- err = btf__load(obj->btf);
- if (err) {
- pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n",
- BTF_ELF_SEC, err);
- btf__free(obj->btf);
- obj->btf = NULL;
- err = 0;
- }
+ } else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ btf_data = data;
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
btf_ext_data = data;
} else if (sh.sh_type == SHT_SYMTAB) {
@@ -865,20 +1084,28 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
obj->efile.symbols = data;
obj->efile.strtabidx = sh.sh_link;
}
- } else if ((sh.sh_type == SHT_PROGBITS) &&
- (sh.sh_flags & SHF_EXECINSTR) &&
- (data->d_size > 0)) {
- if (strcmp(name, ".text") == 0)
- obj->efile.text_shndx = idx;
- err = bpf_object__add_program(obj, data->d_buf,
- data->d_size, name, idx);
- if (err) {
- char errmsg[STRERR_BUFSIZE];
- char *cp = libbpf_strerror_r(-err, errmsg,
- sizeof(errmsg));
-
- pr_warning("failed to alloc program %s (%s): %s",
- name, obj->path, cp);
+ } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
+ if (sh.sh_flags & SHF_EXECINSTR) {
+ if (strcmp(name, ".text") == 0)
+ obj->efile.text_shndx = idx;
+ err = bpf_object__add_program(obj, data->d_buf,
+ data->d_size, name, idx);
+ if (err) {
+ char errmsg[STRERR_BUFSIZE];
+ char *cp = libbpf_strerror_r(-err, errmsg,
+ sizeof(errmsg));
+
+ pr_warning("failed to alloc program %s (%s): %s",
+ name, obj->path, cp);
+ }
+ } else if (strcmp(name, ".data") == 0) {
+ obj->efile.data = data;
+ obj->efile.data_shndx = idx;
+ } else if (strcmp(name, ".rodata") == 0) {
+ obj->efile.rodata = data;
+ obj->efile.rodata_shndx = idx;
+ } else {
+ pr_debug("skip section(%d) %s\n", idx, name);
}
} else if (sh.sh_type == SHT_REL) {
void *reloc = obj->efile.reloc;
@@ -906,6 +1133,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
obj->efile.reloc[n].shdr = sh;
obj->efile.reloc[n].data = data;
}
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
+ obj->efile.bss = data;
+ obj->efile.bss_shndx = idx;
} else {
pr_debug("skip section(%d) %s\n", idx, name);
}
@@ -917,6 +1147,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
pr_warning("Corrupted ELF file: index of strtab invalid\n");
return LIBBPF_ERRNO__FORMAT;
}
+ if (btf_data) {
+ obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+ if (IS_ERR(obj->btf)) {
+ pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+ BTF_ELF_SEC, PTR_ERR(obj->btf));
+ obj->btf = NULL;
+ } else {
+ err = btf__finalize_data(obj, obj->btf);
+ if (!err)
+ err = btf__load(obj->btf);
+ if (err) {
+ pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n",
+ BTF_ELF_SEC, err);
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ err = 0;
+ }
+ }
+ }
if (btf_ext_data) {
if (!obj->btf) {
pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
@@ -932,7 +1181,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
}
}
}
- if (obj->efile.maps_shndx >= 0) {
+ if (bpf_object__has_maps(obj)) {
err = bpf_object__init_maps(obj, flags);
if (err)
goto out;
@@ -968,13 +1217,46 @@ bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
return NULL;
}
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.data_shndx ||
+ shndx == obj->efile.bss_shndx ||
+ shndx == obj->efile.rodata_shndx;
+}
+
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.maps_shndx;
+}
+
+static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.text_shndx ||
+ bpf_object__shndx_is_maps(obj, shndx) ||
+ bpf_object__shndx_is_data(obj, shndx);
+}
+
+static enum libbpf_map_type
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
+{
+ if (shndx == obj->efile.data_shndx)
+ return LIBBPF_MAP_DATA;
+ else if (shndx == obj->efile.bss_shndx)
+ return LIBBPF_MAP_BSS;
+ else if (shndx == obj->efile.rodata_shndx)
+ return LIBBPF_MAP_RODATA;
+ else
+ return LIBBPF_MAP_UNSPEC;
+}
+
static int
bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
Elf_Data *data, struct bpf_object *obj)
{
Elf_Data *symbols = obj->efile.symbols;
- int text_shndx = obj->efile.text_shndx;
- int maps_shndx = obj->efile.maps_shndx;
struct bpf_map *maps = obj->maps;
size_t nr_maps = obj->nr_maps;
int i, nrels;
@@ -994,7 +1276,10 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
GElf_Sym sym;
GElf_Rel rel;
unsigned int insn_idx;
+ unsigned int shdr_idx;
struct bpf_insn *insns = prog->insns;
+ enum libbpf_map_type type;
+ const char *name;
size_t map_idx;
if (!gelf_getrel(data, i, &rel)) {
@@ -1009,13 +1294,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
GELF_R_SYM(rel.r_info));
return -LIBBPF_ERRNO__FORMAT;
}
- pr_debug("relo for %lld value %lld name %d\n",
+
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name) ? : "<?>";
+
+ pr_debug("relo for %lld value %lld name %d (\'%s\')\n",
(long long) (rel.r_info >> 32),
- (long long) sym.st_value, sym.st_name);
+ (long long) sym.st_value, sym.st_name, name);
- if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
- pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
- prog->section_name, sym.st_shndx);
+ shdr_idx = sym.st_shndx;
+ if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
+ pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
+ prog->section_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
@@ -1040,24 +1330,39 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
return -LIBBPF_ERRNO__RELOC;
}
- /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
- for (map_idx = 0; map_idx < nr_maps; map_idx++) {
- if (maps[map_idx].offset == sym.st_value) {
- pr_debug("relocation: find map %zd (%s) for insn %u\n",
- map_idx, maps[map_idx].name, insn_idx);
- break;
+ if (bpf_object__shndx_is_maps(obj, shdr_idx) ||
+ bpf_object__shndx_is_data(obj, shdr_idx)) {
+ type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+ if (type != LIBBPF_MAP_UNSPEC &&
+ GELF_ST_BIND(sym.st_info) == STB_GLOBAL) {
+ pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
+ name, insn_idx, insns[insn_idx].code);
+ return -LIBBPF_ERRNO__RELOC;
}
- }
- if (map_idx >= nr_maps) {
- pr_warning("bpf relocation: map_idx %d large than %d\n",
- (int)map_idx, (int)nr_maps - 1);
- return -LIBBPF_ERRNO__RELOC;
- }
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ if (maps[map_idx].libbpf_type != type)
+ continue;
+ if (type != LIBBPF_MAP_UNSPEC ||
+ (type == LIBBPF_MAP_UNSPEC &&
+ maps[map_idx].offset == sym.st_value)) {
+ pr_debug("relocation: find map %zd (%s) for insn %u\n",
+ map_idx, maps[map_idx].name, insn_idx);
+ break;
+ }
+ }
+
+ if (map_idx >= nr_maps) {
+ pr_warning("bpf relocation: map_idx %d large than %d\n",
+ (int)map_idx, (int)nr_maps - 1);
+ return -LIBBPF_ERRNO__RELOC;
+ }
- prog->reloc_desc[i].type = RELO_LD64;
- prog->reloc_desc[i].insn_idx = insn_idx;
- prog->reloc_desc[i].map_idx = map_idx;
+ prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ?
+ RELO_DATA : RELO_LD64;
+ prog->reloc_desc[i].insn_idx = insn_idx;
+ prog->reloc_desc[i].map_idx = map_idx;
+ }
}
return 0;
}
@@ -1065,18 +1370,27 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
{
struct bpf_map_def *def = &map->def;
- __u32 key_type_id, value_type_id;
+ __u32 key_type_id = 0, value_type_id = 0;
int ret;
- ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
- def->value_size, &key_type_id,
- &value_type_id);
- if (ret)
+ if (!bpf_map__is_internal(map)) {
+ ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
+ def->value_size, &key_type_id,
+ &value_type_id);
+ } else {
+ /*
+ * LLVM annotates global data differently in BTF, that is,
+ * only as '.data', '.bss' or '.rodata'.
+ */
+ ret = btf__find_by_name(btf,
+ libbpf_type_to_btf_name[map->libbpf_type]);
+ }
+ if (ret < 0)
return ret;
map->btf_key_type_id = key_type_id;
- map->btf_value_type_id = value_type_id;
-
+ map->btf_value_type_id = bpf_map__is_internal(map) ?
+ ret : value_type_id;
return 0;
}
@@ -1188,6 +1502,34 @@ bpf_object__probe_caps(struct bpf_object *obj)
}
static int
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err, zero = 0;
+ __u8 *data;
+
+ /* Nothing to do here since kernel already zero-initializes .bss map. */
+ if (map->libbpf_type == LIBBPF_MAP_BSS)
+ return 0;
+
+ data = map->libbpf_type == LIBBPF_MAP_DATA ?
+ obj->sections.data : obj->sections.rodata;
+
+ err = bpf_map_update_elem(map->fd, &zero, data, 0);
+ /* Freeze .rodata map as read-only from syscall side. */
+ if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
+ err = bpf_map_freeze(map->fd);
+ if (err) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warning("Error freezing map(%s) as read-only: %s\n",
+ map->name, cp);
+ err = 0;
+ }
+ }
+ return err;
+}
+
+static int
bpf_object__create_maps(struct bpf_object *obj)
{
struct bpf_create_map_attr create_attr = {};
@@ -1244,6 +1586,7 @@ bpf_object__create_maps(struct bpf_object *obj)
size_t j;
err = *pfd;
+err_out:
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("failed to create map (name: '%s'): %s\n",
map->name, cp);
@@ -1251,6 +1594,15 @@ bpf_object__create_maps(struct bpf_object *obj)
zclose(obj->maps[j].fd);
return err;
}
+
+ if (bpf_map__is_internal(map)) {
+ err = bpf_object__populate_internal_map(obj, map);
+ if (err < 0) {
+ zclose(*pfd);
+ goto err_out;
+ }
+ }
+
pr_debug("create map %s: fd=%d\n", map->name, *pfd);
}
@@ -1405,21 +1757,29 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
return 0;
for (i = 0; i < prog->nr_reloc; i++) {
- if (prog->reloc_desc[i].type == RELO_LD64) {
+ if (prog->reloc_desc[i].type == RELO_LD64 ||
+ prog->reloc_desc[i].type == RELO_DATA) {
+ bool relo_data = prog->reloc_desc[i].type == RELO_DATA;
struct bpf_insn *insns = prog->insns;
int insn_idx, map_idx;
insn_idx = prog->reloc_desc[i].insn_idx;
map_idx = prog->reloc_desc[i].map_idx;
- if (insn_idx >= (int)prog->insns_cnt) {
+ if (insn_idx + 1 >= (int)prog->insns_cnt) {
pr_warning("relocation out of range: '%s'\n",
prog->section_name);
return -LIBBPF_ERRNO__RELOC;
}
- insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+
+ if (!relo_data) {
+ insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ } else {
+ insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insns[insn_idx + 1].imm = insns[insn_idx].imm;
+ }
insns[insn_idx].imm = obj->maps[map_idx].fd;
- } else {
+ } else if (prog->reloc_desc[i].type == RELO_CALL) {
err = bpf_program__reloc_text(prog, obj,
&prog->reloc_desc[i]);
if (err)
@@ -1494,6 +1854,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
{
struct bpf_load_program_attr load_attr;
char *cp, errmsg[STRERR_BUFSIZE];
+ int log_buf_size = BPF_LOG_BUF_SIZE;
char *log_buf;
int ret;
@@ -1514,21 +1875,30 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.line_info = prog->line_info;
load_attr.line_info_rec_size = prog->line_info_rec_size;
load_attr.line_info_cnt = prog->line_info_cnt;
+ load_attr.log_level = prog->log_level;
if (!load_attr.insns || !load_attr.insns_cnt)
return -EINVAL;
- log_buf = malloc(BPF_LOG_BUF_SIZE);
+retry_load:
+ log_buf = malloc(log_buf_size);
if (!log_buf)
pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
- ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
+ ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
if (ret >= 0) {
+ if (load_attr.log_level)
+ pr_debug("verifier log:\n%s", log_buf);
*pfd = ret;
ret = 0;
goto out;
}
+ if (errno == ENOSPC) {
+ log_buf_size <<= 1;
+ free(log_buf);
+ goto retry_load;
+ }
ret = -LIBBPF_ERRNO__LOAD;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warning("load bpf program failed: %s\n", cp);
@@ -2303,6 +2673,9 @@ void bpf_object__close(struct bpf_object *obj)
obj->maps[i].priv = NULL;
obj->maps[i].clear_priv = NULL;
}
+
+ zfree(&obj->sections.rodata);
+ zfree(&obj->sections.data);
zfree(&obj->maps);
obj->nr_maps = 0;
@@ -2780,6 +3153,11 @@ bool bpf_map__is_offload_neutral(struct bpf_map *map)
return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
}
+bool bpf_map__is_internal(struct bpf_map *map)
+{
+ return map->libbpf_type != LIBBPF_MAP_UNSPEC;
+}
+
void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
{
map->map_ifindex = ifindex;
@@ -2938,6 +3316,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
bpf_program__set_expected_attach_type(prog,
expected_attach_type);
+ prog->log_level = attr->log_level;
if (!first_prog)
first_prog = prog;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c70785cc8ef5..c5ff00515ce7 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -75,6 +75,10 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
size_t obj_buf_sz,
const char *name);
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size);
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off);
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
const char *path);
@@ -301,6 +305,7 @@ LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map);
LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
@@ -314,6 +319,7 @@ struct bpf_prog_load_attr {
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
int ifindex;
+ int log_level;
};
LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f3ce50500cf2..673001787cba 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -157,3 +157,10 @@ LIBBPF_0.0.2 {
bpf_program__bpil_addr_to_offs;
bpf_program__bpil_offs_to_addr;
} LIBBPF_0.0.1;
+
+LIBBPF_0.0.3 {
+ global:
+ bpf_map__is_internal;
+ bpf_map_freeze;
+ btf__finalize_data;
+} LIBBPF_0.0.2;
diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template
new file mode 100644
index 000000000000..ac17fcef2108
--- /dev/null
+++ b/tools/lib/bpf/libbpf.pc.template
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libbpf
+Description: BPF library
+Version: @VERSION@
+Libs: -L${libdir} -lbpf
+Requires.private: libelf
+Cflags: -I${includedir}
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 8d0078b65486..557ef8d1250d 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -259,7 +259,8 @@ out_umem_alloc:
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
- char bpf_log_buf[BPF_LOG_BUF_SIZE];
+ static const int log_buf_size = 16 * 1024;
+ char log_buf[log_buf_size];
int err, prog_fd;
/* This is the C-program:
@@ -308,10 +309,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
- "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf,
- BPF_LOG_BUF_SIZE);
+ "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
+ log_buf_size);
if (prog_fd < 0) {
- pr_warning("BPF log buffer:\n%s", bpf_log_buf);
+ pr_warning("BPF log buffer:\n%s", log_buf);
return prog_fd;
}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 77b73b892136..078283d073b0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -209,7 +209,7 @@ ifeq ($(DWARF2BTF),y)
endif
PROG_TESTS_H := $(OUTPUT)/prog_tests/tests.h
-$(OUTPUT)/test_progs: $(PROG_TESTS_H)
+test_progs.c: $(PROG_TESTS_H)
$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS)
$(OUTPUT)/test_progs: prog_tests/*.c
@@ -232,7 +232,7 @@ $(PROG_TESTS_H): $(PROG_TESTS_DIR) $(PROG_TESTS_FILES)
) > $(PROG_TESTS_H))
VERIFIER_TESTS_H := $(OUTPUT)/verifier/tests.h
-$(OUTPUT)/test_verifier: $(VERIFIER_TESTS_H)
+test_verifier.c: $(VERIFIER_TESTS_H)
$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS)
VERIFIER_TESTS_DIR = $(OUTPUT)/verifier
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 97d140961438..e85d62cb53d0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -9,14 +9,14 @@
#define SEC(NAME) __attribute__((section(NAME), used))
/* helper functions called from eBPF programs written in C */
-static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
(void *) BPF_FUNC_map_lookup_elem;
-static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+static int (*bpf_map_update_elem)(void *map, const void *key, const void *value,
unsigned long long flags) =
(void *) BPF_FUNC_map_update_elem;
-static int (*bpf_map_delete_elem)(void *map, void *key) =
+static int (*bpf_map_delete_elem)(void *map, const void *key) =
(void *) BPF_FUNC_map_delete_elem;
-static int (*bpf_map_push_elem)(void *map, void *value,
+static int (*bpf_map_push_elem)(void *map, const void *value,
unsigned long long flags) =
(void *) BPF_FUNC_map_push_elem;
static int (*bpf_map_pop_elem)(void *map, void *value) =
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index a42f4fc4dc11..8c976476f6fd 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -25,3 +25,11 @@ CONFIG_XDP_SOCKETS=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_IPV6_TUNNEL=y
CONFIG_IPV6_GRE=y
+CONFIG_NET_FOU=m
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_IPV6_FOU=m
+CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 77cafa66d048..7136ab9ffa73 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -52,7 +52,7 @@ static void detach_program(void)
sprintf(command, "rm -r %s", cfg_pin_path);
ret = system(command);
if (ret)
- error(1, errno, command);
+ error(1, errno, "%s", command);
}
static void parse_opts(int argc, char **argv)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index a64f7a02139c..cb827383db4d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -73,7 +73,7 @@ void test_bpf_obj_id(void)
info_len != sizeof(struct bpf_map_info) ||
strcmp((char *)map_infos[i].name, expected_map_name),
"get-map-info(fd)",
- "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+ "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
err, errno,
map_infos[i].type, BPF_MAP_TYPE_ARRAY,
info_len, sizeof(struct bpf_map_info),
@@ -117,7 +117,7 @@ void test_bpf_obj_id(void)
*(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
strcmp((char *)prog_infos[i].name, expected_prog_name),
"get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+ "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
err, errno, i,
prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
info_len, sizeof(struct bpf_prog_info),
@@ -185,7 +185,7 @@ void test_bpf_obj_id(void)
memcmp(&prog_info, &prog_infos[i], info_len) ||
*(int *)(long)prog_info.map_ids != saved_map_id,
"get-prog-info(next_id->fd)",
- "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
+ "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
err, errno, info_len, sizeof(struct bpf_prog_info),
memcmp(&prog_info, &prog_infos[i], info_len),
*(int *)(long)prog_info.map_ids, saved_map_id);
@@ -231,7 +231,7 @@ void test_bpf_obj_id(void)
memcmp(&map_info, &map_infos[i], info_len) ||
array_value != array_magic_value,
"check get-map-info(next_id->fd)",
- "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
+ "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
err, errno, info_len, sizeof(struct bpf_map_info),
memcmp(&map_info, &map_infos[i], info_len),
array_value, array_magic_value);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
new file mode 100644
index 000000000000..23b159d95c3f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <test_progs.h>
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_DEBUG)
+ return 0;
+
+ if (!strstr(format, "verifier log"))
+ return 0;
+ return vfprintf(stderr, "%s", args);
+}
+
+static int check_load(const char *file)
+{
+ struct bpf_prog_load_attr attr;
+ struct bpf_object *obj;
+ int err, prog_fd;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+ attr.log_level = 4;
+ err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ bpf_object__close(obj);
+ if (err)
+ error_cnt++;
+ return err;
+}
+
+void test_bpf_verif_scale(void)
+{
+ const char *file1 = "./test_verif_scale1.o";
+ const char *file2 = "./test_verif_scale2.o";
+ const char *file3 = "./test_verif_scale3.o";
+ int err;
+
+ if (verifier_stats)
+ libbpf_set_print(libbpf_debug_print);
+
+ err = check_load(file1);
+ err |= check_load(file2);
+ err |= check_load(file3);
+ if (!err)
+ printf("test_verif_scale:OK\n");
+ else
+ printf("test_verif_scale:FAIL\n");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index d7bb5beb1c57..c2a0a9d5591b 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -39,7 +39,7 @@ static int get_stack_print_output(void *data, int size)
} else {
for (i = 0; i < num_stack; i++) {
ks = ksym_search(raw_data[i]);
- if (strcmp(ks->name, nonjit_func) == 0) {
+ if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
found = true;
break;
}
@@ -56,7 +56,7 @@ static int get_stack_print_output(void *data, int size)
} else {
for (i = 0; i < num_stack; i++) {
ks = ksym_search(e->kern_stack[i]);
- if (strcmp(ks->name, nonjit_func) == 0) {
+ if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
good_kern_stack = true;
break;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
new file mode 100644
index 000000000000..d011079fb0bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+static void test_global_data_number(struct bpf_object *obj, __u32 duration)
+{
+ int i, err, map_fd;
+ uint64_t num;
+
+ map_fd = bpf_find_map(__func__, obj, "result_number");
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ struct {
+ char *name;
+ uint32_t key;
+ uint64_t num;
+ } tests[] = {
+ { "relocate .bss reference", 0, 0 },
+ { "relocate .data reference", 1, 42 },
+ { "relocate .rodata reference", 2, 24 },
+ { "relocate .bss reference", 3, 0 },
+ { "relocate .data reference", 4, 0xffeeff },
+ { "relocate .rodata reference", 5, 0xabab },
+ { "relocate .bss reference", 6, 1234 },
+ { "relocate .bss reference", 7, 0 },
+ { "relocate .rodata reference", 8, 0xab },
+ { "relocate .rodata reference", 9, 0x1111111111111111 },
+ { "relocate .rodata reference", 10, ~0 },
+ };
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
+ CHECK(err || num != tests[i].num, tests[i].name,
+ "err %d result %lx expected %lx\n",
+ err, num, tests[i].num);
+ }
+}
+
+static void test_global_data_string(struct bpf_object *obj, __u32 duration)
+{
+ int i, err, map_fd;
+ char str[32];
+
+ map_fd = bpf_find_map(__func__, obj, "result_string");
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ struct {
+ char *name;
+ uint32_t key;
+ char str[32];
+ } tests[] = {
+ { "relocate .rodata reference", 0, "abcdefghijklmnopqrstuvwxyz" },
+ { "relocate .data reference", 1, "abcdefghijklmnopqrstuvwxyz" },
+ { "relocate .bss reference", 2, "" },
+ { "relocate .data reference", 3, "abcdexghijklmnopqrstuvwxyz" },
+ { "relocate .bss reference", 4, "\0\0hello" },
+ };
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ err = bpf_map_lookup_elem(map_fd, &tests[i].key, str);
+ CHECK(err || memcmp(str, tests[i].str, sizeof(str)),
+ tests[i].name, "err %d result \'%s\' expected \'%s\'\n",
+ err, str, tests[i].str);
+ }
+}
+
+struct foo {
+ __u8 a;
+ __u32 b;
+ __u64 c;
+};
+
+static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
+{
+ int i, err, map_fd;
+ struct foo val;
+
+ map_fd = bpf_find_map(__func__, obj, "result_struct");
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ struct {
+ char *name;
+ uint32_t key;
+ struct foo val;
+ } tests[] = {
+ { "relocate .rodata reference", 0, { 42, 0xfefeefef, 0x1111111111111111ULL, } },
+ { "relocate .bss reference", 1, { } },
+ { "relocate .rodata reference", 2, { } },
+ { "relocate .data reference", 3, { 41, 0xeeeeefef, 0x2111111111111111ULL, } },
+ };
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ err = bpf_map_lookup_elem(map_fd, &tests[i].key, &val);
+ CHECK(err || memcmp(&val, &tests[i].val, sizeof(val)),
+ tests[i].name, "err %d result { %u, %u, %llu } expected { %u, %u, %llu }\n",
+ err, val.a, val.b, val.c, tests[i].val.a, tests[i].val.b, tests[i].val.c);
+ }
+}
+
+static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
+{
+ int err = -ENOMEM, map_fd, zero = 0;
+ struct bpf_map *map;
+ __u8 *buff;
+
+ map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+ if (!map || !bpf_map__is_internal(map)) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_map__fd(map);
+ if (map_fd < 0) {
+ error_cnt++;
+ return;
+ }
+
+ buff = malloc(bpf_map__def(map)->value_size);
+ if (buff)
+ err = bpf_map_update_elem(map_fd, &zero, buff, 0);
+ free(buff);
+ CHECK(!err || errno != EPERM, "test .rodata read-only map",
+ "err %d errno %d\n", err, errno);
+}
+
+void test_global_data(void)
+{
+ const char *file = "./test_global_data.o";
+ __u32 duration = 0, retval;
+ struct bpf_object *obj;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+ return;
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "pass global data run",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ test_global_data_number(obj, duration);
+ test_global_data_string(obj, duration);
+ test_global_data_struct(obj, duration);
+ test_global_data_rdonly(obj, duration);
+
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
new file mode 100644
index 000000000000..e95baa32e277
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_skb_ctx(void)
+{
+ struct __sk_buff skb = {
+ .cb[0] = 1,
+ .cb[1] = 2,
+ .cb[2] = 3,
+ .cb[3] = 4,
+ .cb[4] = 5,
+ .priority = 6,
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ };
+ struct bpf_object *obj;
+ int err;
+ int i;
+
+ err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &tattr.prog_fd);
+ if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ return;
+
+ /* ctx_in != NULL, ctx_size_in == 0 */
+
+ tattr.ctx_size_in = 0;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+ tattr.ctx_size_in = sizeof(skb);
+
+ /* ctx_out != NULL, ctx_size_out == 0 */
+
+ tattr.ctx_size_out = 0;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+ tattr.ctx_size_out = sizeof(skb);
+
+ /* non-zero [len, tc_index] fields should be rejected*/
+
+ skb.len = 1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+ skb.len = 0;
+
+ skb.tc_index = 1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+ skb.tc_index = 0;
+
+ /* non-zero [hash, sk] fields should be rejected */
+
+ skb.hash = 1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+ skb.hash = 0;
+
+ skb.sk = (struct bpf_sock *)1;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+ skb.sk = 0;
+
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err != 0 || tattr.retval,
+ "run",
+ "err %d errno %d retval %d\n",
+ err, errno, tattr.retval);
+
+ CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
+ "ctx_size_out",
+ "incorrect output size, want %lu have %u\n",
+ sizeof(skb), tattr.ctx_size_out);
+
+ for (i = 0; i < 5; i++)
+ CHECK_ATTR(skb.cb[i] != i + 2,
+ "ctx_out_cb",
+ "skb->cb[i] == %d, expected %d\n",
+ skb.cb[i], i + 2);
+ CHECK_ATTR(skb.priority != 7,
+ "ctx_out_priority",
+ "skb->priority == %d, expected %d\n",
+ skb.priority, 7);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
new file mode 100644
index 000000000000..5ab14e941980
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Isovalent, Inc.
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") result_number = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 11,
+};
+
+struct bpf_map_def SEC("maps") result_string = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = 32,
+ .max_entries = 5,
+};
+
+struct foo {
+ __u8 a;
+ __u32 b;
+ __u64 c;
+};
+
+struct bpf_map_def SEC("maps") result_struct = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct foo),
+ .max_entries = 5,
+};
+
+/* Relocation tests for __u64s. */
+static __u64 num0;
+static __u64 num1 = 42;
+static const __u64 num2 = 24;
+static __u64 num3 = 0;
+static __u64 num4 = 0xffeeff;
+static const __u64 num5 = 0xabab;
+static const __u64 num6 = 0xab;
+
+/* Relocation tests for strings. */
+static const char str0[32] = "abcdefghijklmnopqrstuvwxyz";
+static char str1[32] = "abcdefghijklmnopqrstuvwxyz";
+static char str2[32];
+
+/* Relocation tests for structs. */
+static const struct foo struct0 = {
+ .a = 42,
+ .b = 0xfefeefef,
+ .c = 0x1111111111111111ULL,
+};
+static struct foo struct1;
+static const struct foo struct2;
+static struct foo struct3 = {
+ .a = 41,
+ .b = 0xeeeeefef,
+ .c = 0x2111111111111111ULL,
+};
+
+#define test_reloc(map, num, var) \
+ do { \
+ __u32 key = num; \
+ bpf_map_update_elem(&result_##map, &key, var, 0); \
+ } while (0)
+
+SEC("static_data_load")
+int load_static_data(struct __sk_buff *skb)
+{
+ static const __u64 bar = ~0;
+
+ test_reloc(number, 0, &num0);
+ test_reloc(number, 1, &num1);
+ test_reloc(number, 2, &num2);
+ test_reloc(number, 3, &num3);
+ test_reloc(number, 4, &num4);
+ test_reloc(number, 5, &num5);
+ num4 = 1234;
+ test_reloc(number, 6, &num4);
+ test_reloc(number, 7, &num0);
+ test_reloc(number, 8, &num6);
+
+ test_reloc(string, 0, str0);
+ test_reloc(string, 1, str1);
+ test_reloc(string, 2, str2);
+ str1[5] = 'x';
+ test_reloc(string, 3, str1);
+ __builtin_memcpy(&str2[2], "hello", sizeof("hello"));
+ test_reloc(string, 4, str2);
+
+ test_reloc(struct, 0, &struct0);
+ test_reloc(struct, 1, &struct1);
+ test_reloc(struct, 2, &struct2);
+ test_reloc(struct, 3, &struct3);
+
+ test_reloc(number, 9, &struct0.c);
+ test_reloc(number, 10, &bar);
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h
new file mode 100644
index 000000000000..3d12c11a8d47
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_jhash.h
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+typedef unsigned int u32;
+
+static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+static ATTR
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(volatile u32 *)(k);
+ b += *(volatile u32 *)(k + 4);
+ c += *(volatile u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ c ^= a;
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
new file mode 100644
index 000000000000..7a80960d7df1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+SEC("skb_ctx")
+int process(struct __sk_buff *skb)
+{
+ #pragma clang loop unroll(full)
+ for (int i = 0; i < 5; i++) {
+ if (skb->cb[i] != i + 1)
+ return 1;
+ skb->cb[i]++;
+ }
+ skb->priority++;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index f541c2de947d..bcb00d737e95 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -11,7 +11,9 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/mpls.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/pkt_cls.h>
#include <linux/types.h>
@@ -20,16 +22,36 @@
static const int cfg_port = 8000;
-struct grev4hdr {
- struct iphdr ip;
+static const int cfg_udp_src = 20000;
+
+#define UDP_PORT 5555
+#define MPLS_OVER_UDP_PORT 6635
+#define ETH_OVER_UDP_PORT 7777
+
+/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
+static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
+ MPLS_LS_S_MASK | 0xff);
+
+struct gre_hdr {
__be16 flags;
__be16 protocol;
} __attribute__((packed));
-struct grev6hdr {
+union l4hdr {
+ struct udphdr udp;
+ struct gre_hdr gre;
+};
+
+struct v4hdr {
+ struct iphdr ip;
+ union l4hdr l4hdr;
+ __u8 pad[16]; /* enough space for L2 header */
+} __attribute__((packed));
+
+struct v6hdr {
struct ipv6hdr ip;
- __be16 flags;
- __be16 protocol;
+ union l4hdr l4hdr;
+ __u8 pad[16]; /* enough space for L2 header */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -47,13 +69,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
{
- struct grev4hdr h_outer;
+ __u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
+ struct v4hdr h_outer;
struct tcphdr tcph;
+ int olen, l2_len;
__u64 flags;
- int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
@@ -70,13 +94,58 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
+ olen = sizeof(h_outer.ip);
+ l2_len = 0;
+
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
- if (with_gre) {
+
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ l2_len = sizeof(mpls_label);
+ udp_dst = MPLS_OVER_UDP_PORT;
+ break;
+ case ETH_P_TEB:
+ l2_len = ETH_HLEN;
+ udp_dst = ETH_OVER_UDP_PORT;
+ break;
+ }
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
+ switch (encap_proto) {
+ case IPPROTO_GRE:
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
- olen = sizeof(h_outer);
- } else {
- olen = sizeof(h_outer.ip);
+ olen += sizeof(h_outer.l4hdr.gre);
+ h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
+ h_outer.l4hdr.gre.flags = 0;
+ break;
+ case IPPROTO_UDP:
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+ olen += sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+ h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
+ h_outer.l4hdr.udp.check = 0;
+ h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
+ sizeof(h_outer.l4hdr.udp) +
+ l2_len);
+ break;
+ case IPPROTO_IPIP:
+ break;
+ default:
+ return TC_ACT_OK;
+ }
+
+ /* add L2 encap (if specified) */
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ break;
+ case ETH_P_TEB:
+ if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+ ETH_HLEN))
+ return TC_ACT_SHOT;
+ break;
}
+ olen += l2_len;
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
@@ -85,16 +154,10 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
/* prepare new outer network header */
h_outer.ip = iph_inner;
h_outer.ip.tot_len = bpf_htons(olen +
- bpf_htons(h_outer.ip.tot_len));
- if (with_gre) {
- h_outer.ip.protocol = IPPROTO_GRE;
- h_outer.protocol = bpf_htons(ETH_P_IP);
- h_outer.flags = 0;
- } else {
- h_outer.ip.protocol = IPPROTO_IPIP;
- }
+ bpf_ntohs(h_outer.ip.tot_len));
+ h_outer.ip.protocol = encap_proto;
- set_ipv4_csum((void *)&h_outer.ip);
+ set_ipv4_csum(&h_outer.ip);
/* store new outer network header */
if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
@@ -104,13 +167,16 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
{
+ __u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
- struct grev6hdr h_outer;
+ struct v6hdr h_outer;
struct tcphdr tcph;
+ int olen, l2_len;
+ __u16 tot_len;
__u64 flags;
- int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
@@ -124,14 +190,58 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
+ olen = sizeof(h_outer.ip);
+ l2_len = 0;
+
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
- if (with_gre) {
+
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ l2_len = sizeof(mpls_label);
+ udp_dst = MPLS_OVER_UDP_PORT;
+ break;
+ case ETH_P_TEB:
+ l2_len = ETH_HLEN;
+ udp_dst = ETH_OVER_UDP_PORT;
+ break;
+ }
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
+ switch (encap_proto) {
+ case IPPROTO_GRE:
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
- olen = sizeof(h_outer);
- } else {
- olen = sizeof(h_outer.ip);
+ olen += sizeof(h_outer.l4hdr.gre);
+ h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
+ h_outer.l4hdr.gre.flags = 0;
+ break;
+ case IPPROTO_UDP:
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+ olen += sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+ h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
+ tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
+ sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.check = 0;
+ h_outer.l4hdr.udp.len = bpf_htons(tot_len);
+ break;
+ case IPPROTO_IPV6:
+ break;
+ default:
+ return TC_ACT_OK;
}
+ /* add L2 encap (if specified) */
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ break;
+ case ETH_P_TEB:
+ if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+ ETH_HLEN))
+ return TC_ACT_SHOT;
+ break;
+ }
+ olen += l2_len;
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
@@ -141,13 +251,8 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
h_outer.ip = iph_inner;
h_outer.ip.payload_len = bpf_htons(olen +
bpf_ntohs(h_outer.ip.payload_len));
- if (with_gre) {
- h_outer.ip.nexthdr = IPPROTO_GRE;
- h_outer.protocol = bpf_htons(ETH_P_IPV6);
- h_outer.flags = 0;
- } else {
- h_outer.ip.nexthdr = IPPROTO_IPV6;
- }
+
+ h_outer.ip.nexthdr = encap_proto;
/* store new outer network header */
if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
@@ -157,54 +262,168 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
return TC_ACT_OK;
}
-SEC("encap_ipip")
-int __encap_ipip(struct __sk_buff *skb)
+SEC("encap_ipip_none")
+int __encap_ipip_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
- return encap_ipv4(skb, false);
+ return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
else
return TC_ACT_OK;
}
-SEC("encap_gre")
-int __encap_gre(struct __sk_buff *skb)
+SEC("encap_gre_none")
+int __encap_gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
- return encap_ipv4(skb, true);
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
else
return TC_ACT_OK;
}
-SEC("encap_ip6tnl")
-int __encap_ip6tnl(struct __sk_buff *skb)
+SEC("encap_gre_mpls")
+int __encap_gre_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_gre_eth")
+int __encap_gre_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_none")
+int __encap_udp_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_mpls")
+int __encap_udp_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_eth")
+int __encap_udp_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6tnl_none")
+int __encap_ip6tnl_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
- return encap_ipv6(skb, false);
+ return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
else
return TC_ACT_OK;
}
-SEC("encap_ip6gre")
-int __encap_ip6gre(struct __sk_buff *skb)
+SEC("encap_ip6gre_none")
+int __encap_ip6gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
- return encap_ipv6(skb, true);
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_mpls")
+int __encap_ip6gre_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_eth")
+int __encap_ip6gre_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_none")
+int __encap_ip6udp_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_mpls")
+int __encap_ip6udp_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_eth")
+int __encap_ip6udp_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
else
return TC_ACT_OK;
}
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct grev6hdr)];
- int olen;
+ char buf[sizeof(struct v6hdr)];
+ struct gre_hdr greh;
+ struct udphdr udph;
+ int olen = len;
switch (proto) {
case IPPROTO_IPIP:
case IPPROTO_IPV6:
- olen = len;
break;
case IPPROTO_GRE:
- olen = len + 4 /* gre hdr */;
+ olen += sizeof(struct gre_hdr);
+ if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
+ return TC_ACT_OK;
+ switch (bpf_ntohs(greh.protocol)) {
+ case ETH_P_MPLS_UC:
+ olen += sizeof(mpls_label);
+ break;
+ case ETH_P_TEB:
+ olen += ETH_HLEN;
+ break;
+ }
+ break;
+ case IPPROTO_UDP:
+ olen += sizeof(struct udphdr);
+ if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
+ return TC_ACT_OK;
+ switch (bpf_ntohs(udph.dest)) {
+ case MPLS_OVER_UDP_PORT:
+ olen += sizeof(mpls_label);
+ break;
+ case ETH_OVER_UDP_PORT:
+ olen += ETH_HLEN;
+ break;
+ }
break;
default:
return TC_ACT_OK;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
new file mode 100644
index 000000000000..f3236ce35f31
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((noinline))
+#include "test_jhash.h"
+
+SEC("scale90_noinline")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int ret = 0, nh_off, i = 0;
+
+ nh_off = 14;
+
+ /* pragma unroll doesn't work on large loops */
+
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
new file mode 100644
index 000000000000..77830693eccb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((always_inline))
+#include "test_jhash.h"
+
+SEC("scale90_inline")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int ret = 0, nh_off, i = 0;
+
+ nh_off = 14;
+
+ /* pragma unroll doesn't work on large loops */
+
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
new file mode 100644
index 000000000000..1848da04ea41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#define ATTR __attribute__((noinline))
+#include "test_jhash.h"
+
+SEC("scale90_noinline32")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int ret = 0, nh_off, i = 0;
+
+ nh_off = 32;
+
+ /* pragma unroll doesn't work on large loops */
+
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index ec5794e4205b..44cd3378d216 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -85,6 +85,11 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
#define BTF_UNION_ENC(name, nr_elems, sz) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz)
+#define BTF_VAR_ENC(name, type, linkage) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), type), (linkage)
+#define BTF_VAR_SECINFO_ENC(type, offset, size) \
+ (type), (offset), (size)
+
#define BTF_MEMBER_ENC(name, type, bits_offset) \
(name), (type), (bits_offset)
#define BTF_ENUM_ENC(name, val) (name), (val)
@@ -291,7 +296,6 @@ static struct btf_raw_test raw_tests[] = {
.value_type_id = 3,
.max_entries = 4,
},
-
{
.descr = "struct test #3 Invalid member offset",
.raw_types = {
@@ -319,7 +323,664 @@ static struct btf_raw_test raw_tests[] = {
.btf_load_err = true,
.err_str = "Invalid member bits_offset",
},
-
+/*
+ * struct A {
+ * unsigned long long m;
+ * int n;
+ * char o;
+ * [3 bytes hole]
+ * int p[8];
+ * };
+ */
+{
+ .descr = "global data test #1",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test1_map",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 1,
+ .value_type_id = 5,
+ .max_entries = 4,
+},
+/*
+ * struct A {
+ * unsigned long long m;
+ * int n;
+ * char o;
+ * [3 bytes hole]
+ * int p[8];
+ * };
+ * static struct A t; <- in .bss
+ */
+{
+ .descr = "global data test #2",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48),
+ BTF_VAR_SECINFO_ENC(6, 0, 48),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+},
+{
+ .descr = "global data test #3",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0t\0.bss",
+ .str_sec_size = sizeof("\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 3,
+ .max_entries = 1,
+},
+{
+ .descr = "global data test #4, unsupported linkage",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_TBD, 1, 2), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0t\0.bss",
+ .str_sec_size = sizeof("\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Linkage not supported",
+},
+{
+ .descr = "global data test #5, invalid var type",
+ .raw_types = {
+ /* static void t */
+ BTF_VAR_ENC(NAME_TBD, 0, 0), /* [1] */
+ /* .bss section */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(1, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0t\0.bss",
+ .str_sec_size = sizeof("\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #6, invalid var type (fwd type)",
+ .raw_types = {
+ /* union A */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ /* static union A t */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type",
+},
+{
+ .descr = "global data test #7, invalid var type (fwd type)",
+ .raw_types = {
+ /* union A */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ /* static union A t */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(1, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type",
+},
+{
+ .descr = "global data test #8, invalid var size",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 48),
+ BTF_VAR_SECINFO_ENC(6, 0, 47),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid size",
+},
+{
+ .descr = "global data test #9, invalid var size",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46),
+ BTF_VAR_SECINFO_ENC(6, 0, 48),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid size",
+},
+{
+ .descr = "global data test #10, invalid var size",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* .bss section */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 46),
+ BTF_VAR_SECINFO_ENC(6, 0, 46),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 0,
+ .value_type_id = 7,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid size",
+},
+{
+ .descr = "global data test #11, multiple section members",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_VAR_SECINFO_ENC(7, 58, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+},
+{
+ .descr = "global data test #12, invalid offset",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_VAR_SECINFO_ENC(7, 60, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid offset+size",
+},
+{
+ .descr = "global data test #13, invalid offset",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_VAR_SECINFO_ENC(7, 12, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid offset",
+},
+{
+ .descr = "global data test #14, invalid offset",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 4), 48),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ /* } */
+ /* static struct A t */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ /* static int u */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [7] */
+ /* .bss section */ /* [8] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2), 62),
+ BTF_VAR_SECINFO_ENC(7, 58, 4),
+ BTF_VAR_SECINFO_ENC(6, 10, 48),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0t\0u\0.bss",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0t\0u\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 62,
+ .key_type_id = 0,
+ .value_type_id = 8,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid offset",
+},
+{
+ .descr = "global data test #15, not var kind",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(1, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0.bss",
+ .str_sec_size = sizeof("\0A\0t\0.bss"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Not a VAR kind member",
+},
+{
+ .descr = "global data test #16, invalid var referencing sec",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */
+ /* a section */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(3, 0, 4),
+ /* a section */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(6, 0, 4),
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [6] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #17, invalid var referencing var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [3] */
+ /* a section */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(3, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #18, invalid var loop",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [2] */
+ /* .bss section */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0aaa",
+ .str_sec_size = sizeof("\0A\0t\0aaa"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #19, invalid var referencing var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 3, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #20, invalid ptr referencing var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* PTR type_id=3 */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "global data test #21, var included in struct",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* VAR type_id=3; */
+ /* } */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid member",
+},
+{
+ .descr = "global data test #22, array of var",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0t\0s\0a\0a",
+ .str_sec_size = sizeof("\0A\0t\0s\0a\0a"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 0,
+ .value_type_id = 4,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+},
/* Test member exceeds the size of struct.
*
* struct A {
@@ -3677,6 +4338,7 @@ struct pprint_mapv {
} aenum;
uint32_t ui32b;
uint32_t bits2c:2;
+ uint8_t si8_4[2][2];
};
#ifdef __SIZEOF_INT128__
@@ -3729,7 +4391,7 @@ static struct btf_raw_test pprint_test_template[] = {
BTF_ENUM_ENC(NAME_TBD, 2),
BTF_ENUM_ENC(NAME_TBD, 3),
/* struct pprint_mapv */ /* [16] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 10), 40),
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 11), 40),
BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */
BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */
BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */
@@ -3740,9 +4402,12 @@ static struct btf_raw_test pprint_test_template[] = {
BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */
BTF_MEMBER_ENC(NAME_TBD, 11, 224), /* uint32_t ui32b */
BTF_MEMBER_ENC(NAME_TBD, 6, 256), /* bits2c */
+ BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */
+ BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"),
+ BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"),
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
@@ -3791,7 +4456,7 @@ static struct btf_raw_test pprint_test_template[] = {
BTF_ENUM_ENC(NAME_TBD, 2),
BTF_ENUM_ENC(NAME_TBD, 3),
/* struct pprint_mapv */ /* [16] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40),
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40),
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */
BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */
BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */
@@ -3802,9 +4467,12 @@ static struct btf_raw_test pprint_test_template[] = {
BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */
BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */
+ BTF_MEMBER_ENC(NAME_TBD, 17, 264), /* si8_4 */
+ BTF_TYPE_ARRAY_ENC(18, 1, 2), /* [17] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [18] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"),
+ BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0si8_4"),
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
@@ -3855,7 +4523,7 @@ static struct btf_raw_test pprint_test_template[] = {
BTF_ENUM_ENC(NAME_TBD, 2),
BTF_ENUM_ENC(NAME_TBD, 3),
/* struct pprint_mapv */ /* [16] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40),
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 11), 40),
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */
BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */
BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */
@@ -3866,13 +4534,16 @@ static struct btf_raw_test pprint_test_template[] = {
BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */
BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */
BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */
+ BTF_MEMBER_ENC(NAME_TBD, 20, BTF_MEMBER_OFFSET(0, 264)), /* si8_4 */
/* typedef unsigned int ___int */ /* [17] */
BTF_TYPEDEF_ENC(NAME_TBD, 18),
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */
+ BTF_TYPE_ARRAY_ENC(21, 1, 2), /* [20] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2), /* [21] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int"),
+ BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int\0si8_4"),
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
@@ -4007,6 +4678,10 @@ static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
v->aenum = i & 0x03;
v->ui32b = 4;
v->bits2c = 1;
+ v->si8_4[0][0] = (cpu + i) & 0xff;
+ v->si8_4[0][1] = (cpu + i + 1) & 0xff;
+ v->si8_4[1][0] = (cpu + i + 2) & 0xff;
+ v->si8_4[1][1] = (cpu + i + 3) & 0xff;
v = (void *)v + rounded_value_size;
}
}
@@ -4040,7 +4715,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
nexpected_line = snprintf(expected_line, line_size,
"%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
"{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
- "%u,0x%x}\n",
+ "%u,0x%x,[[%d,%d],[%d,%d]]}\n",
percpu_map ? "\tcpu" : "",
percpu_map ? cpu : next_key,
v->ui32, v->si32,
@@ -4054,7 +4729,9 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
v->ui8a[6], v->ui8a[7],
pprint_enum_str[v->aenum],
v->ui32b,
- v->bits2c);
+ v->bits2c,
+ v->si8_4[0][0], v->si8_4[0][1],
+ v->si8_4[1][0], v->si8_4[1][1]);
}
#ifdef __SIZEOF_INT128__
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 84bea3985d64..a7f95106119f 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1055,7 +1055,7 @@ try:
start_test("Test if netdev removal waits for translation...")
delay_msec = 500
- sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+ sim.dfs["sdev/bpf_bind_verifier_delay"] = delay_msec
start = time.time()
cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
(sim['ifname'], obj)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5d10aee9e277..bf5c90998916 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -9,6 +9,7 @@
int error_cnt, pass_cnt;
bool jit_enabled;
+bool verifier_stats = false;
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -162,12 +163,15 @@ void *spin_lock_thread(void *arg)
#include <prog_tests/tests.h>
#undef DECLARE
-int main(void)
+int main(int ac, char **av)
{
srand(time(NULL));
jit_enabled = is_jit_enabled();
+ if (ac == 2 && strcmp(av[1], "-s") == 0)
+ verifier_stats = true;
+
#define CALL
#include <prog_tests/tests.h>
#undef CALL
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 51a07367cd43..f095e1d4c657 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -40,6 +40,7 @@ typedef __u16 __sum16;
extern int error_cnt, pass_cnt;
extern bool jit_enabled;
+extern bool verifier_stats;
#define MAGIC_BYTES 123
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index c805adb88f3a..d4d8d5d3b06e 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -15,6 +15,12 @@ readonly ns2_v4=192.168.1.2
readonly ns1_v6=fd::1
readonly ns2_v6=fd::2
+# Must match port used by bpf program
+readonly udpport=5555
+# MPLSoverUDP
+readonly mplsudpport=6635
+readonly mplsproto=137
+
readonly infile="$(mktemp)"
readonly outfile="$(mktemp)"
@@ -38,8 +44,8 @@ setup() {
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
+ ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
+ ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
sleep 1
@@ -86,30 +92,44 @@ set -e
# no arguments: automated test, run all
if [[ "$#" -eq "0" ]]; then
echo "ipip"
- $0 ipv4 ipip 100
+ $0 ipv4 ipip none 100
echo "ip6ip6"
- $0 ipv6 ip6tnl 100
+ $0 ipv6 ip6tnl none 100
+
+ for mac in none mpls eth ; do
+ echo "ip gre $mac"
+ $0 ipv4 gre $mac 100
+
+ echo "ip6 gre $mac"
+ $0 ipv6 ip6gre $mac 100
+
+ echo "ip gre $mac gso"
+ $0 ipv4 gre $mac 2000
- echo "ip gre"
- $0 ipv4 gre 100
+ echo "ip6 gre $mac gso"
+ $0 ipv6 ip6gre $mac 2000
- echo "ip6 gre"
- $0 ipv6 ip6gre 100
+ echo "ip udp $mac"
+ $0 ipv4 udp $mac 100
- echo "ip gre gso"
- $0 ipv4 gre 2000
+ echo "ip6 udp $mac"
+ $0 ipv6 ip6udp $mac 100
- echo "ip6 gre gso"
- $0 ipv6 ip6gre 2000
+ echo "ip udp $mac gso"
+ $0 ipv4 udp $mac 2000
+
+ echo "ip6 udp $mac gso"
+ $0 ipv6 ip6udp $mac 2000
+ done
echo "OK. All tests passed"
exit 0
fi
-if [[ "$#" -ne "3" ]]; then
+if [[ "$#" -ne "4" ]]; then
echo "Usage: $0"
- echo " or: $0 <ipv4|ipv6> <tuntype> <data_len>"
+ echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
exit 1
fi
@@ -117,12 +137,24 @@ case "$1" in
"ipv4")
readonly addr1="${ns1_v4}"
readonly addr2="${ns2_v4}"
- readonly netcat_opt=-4
+ readonly ipproto=4
+ readonly netcat_opt=-${ipproto}
+ readonly foumod=fou
+ readonly foutype=ipip
+ readonly fouproto=4
+ readonly fouproto_mpls=${mplsproto}
+ readonly gretaptype=gretap
;;
"ipv6")
readonly addr1="${ns1_v6}"
readonly addr2="${ns2_v6}"
- readonly netcat_opt=-6
+ readonly ipproto=6
+ readonly netcat_opt=-${ipproto}
+ readonly foumod=fou6
+ readonly foutype=ip6tnl
+ readonly fouproto="41 -6"
+ readonly fouproto_mpls="${mplsproto} -6"
+ readonly gretaptype=ip6gretap
;;
*)
echo "unknown arg: $1"
@@ -131,9 +163,10 @@ case "$1" in
esac
readonly tuntype=$2
-readonly datalen=$3
+readonly mac=$3
+readonly datalen=$4
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
+echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
trap cleanup EXIT
@@ -150,16 +183,63 @@ verify_data
ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
ip netns exec "${ns1}" tc filter add dev veth1 egress \
bpf direct-action object-file ./test_tc_tunnel.o \
- section "encap_${tuntype}"
+ section "encap_${tuntype}_${mac}"
echo "test bpf encap without decap (expect failure)"
server_listen
! client_connect
+if [[ "$tuntype" =~ "udp" ]]; then
+ # Set up fou tunnel.
+ ttype="${foutype}"
+ targs="encap fou encap-sport auto encap-dport $udpport"
+ # fou may be a module; allow this to fail.
+ modprobe "${foumod}" ||true
+ if [[ "$mac" == "mpls" ]]; then
+ dport=${mplsudpport}
+ dproto=${fouproto_mpls}
+ tmode="mode any ttl 255"
+ else
+ dport=${udpport}
+ dproto=${fouproto}
+ fi
+ ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
+ targs="encap fou encap-sport auto encap-dport $dport"
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+ ttype=$gretaptype
+else
+ ttype=$tuntype
+ targs=""
+fi
+
# serverside, insert decap module
# server is still running
# client can connect again
-ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
- remote "${addr1}" local "${addr2}"
+ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
+ ${tmode} remote "${addr1}" local "${addr2}" $targs
+
+expect_tun_fail=0
+
+if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
+ # No support for MPLS IPv6 fou tunnel; expect failure.
+ expect_tun_fail=1
+elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
+ # No support for TEB fou tunnel; expect failure.
+ expect_tun_fail=1
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+ # Share ethernet address between tunnel/veth2 so L2 decap works.
+ ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
+ awk '/ether/ { print $2 }')
+ ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
+elif [[ "$mac" == "mpls" ]]; then
+ modprobe mpls_iptunnel ||true
+ modprobe mpls_gso ||true
+ ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
+ ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
+ ip netns exec "${ns2}" ip link set lo up
+ ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
+fi
+
# Because packets are decapped by the tunnel they arrive on testtun0 from
# the IP stack perspective. Ensure reverse path filtering is disabled
# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
@@ -169,16 +249,22 @@ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
# selected as the max of the "all" and device-specific values.
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
ip netns exec "${ns2}" ip link set dev testtun0 up
-echo "test bpf encap with tunnel device decap"
-client_connect
-verify_data
+if [[ "$expect_tun_fail" == 1 ]]; then
+ # This tunnel mode is not supported, so we expect failure.
+ echo "test bpf encap with tunnel device decap (expect failure)"
+ ! client_connect
+else
+ echo "test bpf encap with tunnel device decap"
+ client_connect
+ verify_data
+ server_listen
+fi
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
ip netns exec "${ns2}" tc filter add dev veth2 ingress \
bpf direct-action object-file ./test_tc_tunnel.o section decap
-server_listen
echo "test bpf encap with bpf decap"
client_connect
verify_data
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 19b5d03acc2a..e2ebcaddbe78 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,8 +50,9 @@
#include "../../../include/linux/filter.h"
#define MAX_INSNS BPF_MAXINSNS
+#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 14
+#define MAX_NR_MAPS 16
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -66,6 +67,7 @@ static int skips;
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
+ struct bpf_insn *fill_insns;
int fixup_map_hash_8b[MAX_FIXUPS];
int fixup_map_hash_48b[MAX_FIXUPS];
int fixup_map_hash_16b[MAX_FIXUPS];
@@ -80,9 +82,13 @@ struct bpf_test {
int fixup_cgroup_storage[MAX_FIXUPS];
int fixup_percpu_cgroup_storage[MAX_FIXUPS];
int fixup_map_spin_lock[MAX_FIXUPS];
+ int fixup_map_array_ro[MAX_FIXUPS];
+ int fixup_map_array_wo[MAX_FIXUPS];
+ int fixup_map_array_small[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t retval, retval_unpriv, insn_processed;
+ int prog_len;
enum {
UNDEF,
ACCEPT,
@@ -119,10 +125,11 @@ struct other_val {
static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
{
- /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+ /* test: {skb->data[0], vlan_push} x 51 + {skb->data[0], vlan_pop} x 51 */
#define PUSH_CNT 51
- unsigned int len = BPF_MAXINSNS;
- struct bpf_insn *insn = self->insns;
+ /* jump range is limited to 16 bit. PUSH_CNT of ld_abs needs room */
+ unsigned int len = (1 << 15) - PUSH_CNT * 2 * 5 * 6;
+ struct bpf_insn *insn = self->fill_insns;
int i = 0, j, k = 0;
insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
@@ -156,12 +163,14 @@ loop:
for (; i < len - 1; i++)
insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
insn[len - 1] = BPF_EXIT_INSN();
+ self->prog_len = len;
}
static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
{
- struct bpf_insn *insn = self->insns;
- unsigned int len = BPF_MAXINSNS;
+ struct bpf_insn *insn = self->fill_insns;
+ /* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */
+ unsigned int len = (1 << 15) / 6;
int i = 0;
insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
@@ -171,11 +180,12 @@ static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
while (i < len - 1)
insn[i++] = BPF_LD_ABS(BPF_B, 1);
insn[i] = BPF_EXIT_INSN();
+ self->prog_len = i + 1;
}
static void bpf_fill_rand_ld_dw(struct bpf_test *self)
{
- struct bpf_insn *insn = self->insns;
+ struct bpf_insn *insn = self->fill_insns;
uint64_t res = 0;
int i = 0;
@@ -193,6 +203,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
insn[i] = BPF_EXIT_INSN();
+ self->prog_len = i + 1;
res ^= (res >> 32);
self->retval = (uint32_t)res;
}
@@ -277,13 +288,15 @@ static bool skip_unsupported_map(enum bpf_map_type map_type)
return false;
}
-static int create_map(uint32_t type, uint32_t size_key,
- uint32_t size_value, uint32_t max_elem)
+static int __create_map(uint32_t type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem,
+ uint32_t extra_flags)
{
int fd;
fd = bpf_create_map(type, size_key, size_value, max_elem,
- type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
+ (type == BPF_MAP_TYPE_HASH ?
+ BPF_F_NO_PREALLOC : 0) | extra_flags);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
@@ -293,6 +306,12 @@ static int create_map(uint32_t type, uint32_t size_key,
return fd;
}
+static int create_map(uint32_t type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem)
+{
+ return __create_map(type, size_key, size_value, max_elem, 0);
+}
+
static void update_map(int fd, int index)
{
struct test_val value = {
@@ -519,9 +538,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_cgroup_storage = test->fixup_cgroup_storage;
int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
int *fixup_map_spin_lock = test->fixup_map_spin_lock;
+ int *fixup_map_array_ro = test->fixup_map_array_ro;
+ int *fixup_map_array_wo = test->fixup_map_array_wo;
+ int *fixup_map_array_small = test->fixup_map_array_small;
- if (test->fill_helper)
+ if (test->fill_helper) {
+ test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
test->fill_helper(test);
+ }
/* Allocating HTs with 1 elem is fine here, since we only test
* for verifier and not do a runtime lookup, so the only thing
@@ -642,6 +666,35 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_map_spin_lock++;
} while (*fixup_map_spin_lock);
}
+ if (*fixup_map_array_ro) {
+ map_fds[14] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(struct test_val), 1,
+ BPF_F_RDONLY_PROG);
+ update_map(map_fds[14], 0);
+ do {
+ prog[*fixup_map_array_ro].imm = map_fds[14];
+ fixup_map_array_ro++;
+ } while (*fixup_map_array_ro);
+ }
+ if (*fixup_map_array_wo) {
+ map_fds[15] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(struct test_val), 1,
+ BPF_F_WRONLY_PROG);
+ update_map(map_fds[15], 0);
+ do {
+ prog[*fixup_map_array_wo].imm = map_fds[15];
+ fixup_map_array_wo++;
+ } while (*fixup_map_array_wo);
+ }
+ if (*fixup_map_array_small) {
+ map_fds[16] = __create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ 1, 1, 0);
+ update_map(map_fds[16], 0);
+ do {
+ prog[*fixup_map_array_small].imm = map_fds[16];
+ fixup_map_array_small++;
+ } while (*fixup_map_array_small);
+ }
}
static int set_admin(bool admin)
@@ -718,12 +771,17 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
fixup_skips = skips;
do_test_fixup(test, prog_type, prog, map_fds);
+ if (test->fill_insns) {
+ prog = test->fill_insns;
+ prog_len = test->prog_len;
+ } else {
+ prog_len = probe_filter_length(prog);
+ }
/* If there were some map skips during fixup due to missing bpf
* features, skip this test.
*/
if (fixup_skips != skips)
return;
- prog_len = probe_filter_length(prog);
pflags = 0;
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
@@ -731,7 +789,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
pflags |= BPF_F_ANY_ALIGNMENT;
fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
- "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+ "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4);
if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
@@ -830,6 +888,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
goto fail_log;
}
close_fds:
+ if (test->fill_insns)
+ free(test->fill_insns);
close(fd_prog);
for (i = 0; i < MAX_NR_MAPS; i++)
close(map_fds[i]);
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4cdb63bf0521..9a9fc6c9b70b 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -52,6 +52,10 @@ struct ksym *ksym_search(long key)
int start = 0, end = sym_cnt;
int result;
+ /* kallsyms not loaded. return NULL */
+ if (sym_cnt <= 0)
+ return NULL;
+
while (start < end) {
size_t mid = start + (end - start) / 2;
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index 0dcecaf3ec6f..bcb83196e459 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -217,3 +217,162 @@
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "valid read map access into a read-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_ro = { 3 },
+ .result = ACCEPT,
+ .retval = 28,
+},
+{
+ "valid read map access into a read-only array 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_ro = { 3 },
+ .result = ACCEPT,
+ .retval = -29,
+},
+{
+ "invalid write map access into a read-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_ro = { 3 },
+ .result = REJECT,
+ .errstr = "write into map forbidden",
+},
+{
+ "invalid write map access into a read-only array 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_ro = { 4 },
+ .result = REJECT,
+ .errstr = "write into map forbidden",
+},
+{
+ "valid write map access into a write-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_wo = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "valid write map access into a write-only array 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_wo = { 4 },
+ .result = ACCEPT,
+ .retval = 0,
+},
+{
+ "invalid read map access into a write-only array 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_wo = { 3 },
+ .result = REJECT,
+ .errstr = "read from map forbidden",
+},
+{
+ "invalid read map access into a write-only array 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_array_wo = { 3 },
+ .result = REJECT,
+ .errstr = "read from map forbidden",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index c660deb582f1..b0fda2877119 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -705,7 +705,6 @@
.errstr = "invalid bpf_context access",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"check cb access: half, wrong type",
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
new file mode 100644
index 000000000000..b9fb28e8e224
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -0,0 +1,347 @@
+{
+ "direct map access, write test 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 32),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 40),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "R1 min value is outside of the array range",
+},
+{
+ "direct map access, write test 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, -1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 4, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "direct value offset of 4294967295 is not allowed",
+},
+{
+ "direct map access, write test 8",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, -1, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 9",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 4242),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer",
+},
+{
+ "direct map access, write test 10",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 11",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 48),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer",
+},
+{
+ "direct map access, write test 12",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "direct value offset of 536870912 is not allowed",
+},
+{
+ "direct map access, write test 13",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, (1<<29)-1),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer, value_size=48 off=536870911",
+},
+{
+ "direct map access, write test 14",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 47),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = ACCEPT,
+ .retval = 0xff,
+},
+{
+ "direct map access, write test 15",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = ACCEPT,
+ .retval = 0xffff,
+},
+{
+ "direct map access, write test 16",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 47),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 0, 0xffff),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=47 size=2",
+},
+{
+ "direct map access, write test 17",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 46),
+ BPF_LD_MAP_VALUE(BPF_REG_2, 0, 46),
+ BPF_ST_MEM(BPF_H, BPF_REG_2, 1, 0xffff),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1, 3 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=48 off=47 size=2",
+},
+{
+ "direct map access, write test 18",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+ BPF_ST_MEM(BPF_H, BPF_REG_1, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_small = { 1 },
+ .result = REJECT,
+ .errstr = "R1 min value is outside of the array range",
+},
+{
+ "direct map access, write test 19",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 0),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_small = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "direct map access, write test 20",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_small = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value pointer",
+},
+{
+ "direct map access, invalid insn test 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, 1, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 1, 0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+ "direct map access, invalid insn test 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, 0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+ "direct map access, invalid insn test 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, 0, ~0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_VALUE, ~0, ~0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_LD_IMM64 uses reserved fields",
+},
+{
+ "direct map access, invalid insn test 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, ~0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 8",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, ~0, ~0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "invalid bpf_ld_imm64 insn",
+},
+{
+ "direct map access, invalid insn test 9",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0, 0, 47),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result = REJECT,
+ .errstr = "unrecognized bpf_ld_imm64 insn",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ld_dw.c b/tools/testing/selftests/bpf/verifier/ld_dw.c
index d2c75b889598..0f18e62f0099 100644
--- a/tools/testing/selftests/bpf/verifier/ld_dw.c
+++ b/tools/testing/selftests/bpf/verifier/ld_dw.c
@@ -34,3 +34,12 @@
.result = ACCEPT,
.retval = 5,
},
+{
+ "ld_dw: xor semi-random 64 bit imms, test 5",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1000000 - 6,
+},
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index 1e536ff121a5..8504ac937809 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -40,7 +40,35 @@
.prog_type = BPF_PROG_TYPE_LWT_IN,
},
{
- "indirect variable-offset stack access",
+ "indirect variable-offset stack access, unbounded",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_3, 28),
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops,
+ bytes_received)),
+ /* Check the lower bound but don't check the upper one. */
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4),
+ /* Point the lower bound to initialized stack. Offset is now in range
+ * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.
+ */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_5, 8),
+ /* Dereference it indirectly. */
+ BPF_EMIT_CALL(BPF_FUNC_getsockopt),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 unbounded indirect variable offset stack access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+},
+{
+ "indirect variable-offset stack access, max out of bound",
.insns = {
/* Fill the top 8 bytes of the stack */
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -60,7 +88,161 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 5 },
- .errstr = "variable stack read R2",
+ .errstr = "R2 max value is outside of stack bound",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+ "indirect variable-offset stack access, min out of bound",
+ .insns = {
+ /* Fill the top 8 bytes of the stack */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516),
+ /* add it to fp. We now have either fp-516 or fp-512, but
+ * we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* dereference it indirectly */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 5 },
+ .errstr = "R2 min value is outside of stack bound",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+ "indirect variable-offset stack access, max_off+size > max_initialized",
+ .insns = {
+ /* Fill only the second from top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-12 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
+{
+ "indirect variable-offset stack access, min_off < min_initialized",
+ .insns = {
+ /* Fill only the top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-16 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_LWT_IN,
},
+{
+ "indirect variable-offset stack access, priv vs unpriv",
+ .insns = {
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 6 },
+ .errstr_unpriv = "R2 stack pointer arithmetic goes out of range, prohibited for !root",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "indirect variable-offset stack access, uninitialized",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_3, 28),
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_5, 8),
+ /* Dereference it indirectly. */
+ BPF_EMIT_CALL(BPF_FUNC_getsockopt),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+},
+{
+ "indirect variable-offset stack access, ok",
+ .insns = {
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 6 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+},
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 1080ff55a788..e941024869ff 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,8 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw"
+
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
@@ -48,6 +49,7 @@ setup()
{
set -e
ip netns add ns1
+ ip netns set ns1 auto
$IP link set dev lo up
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
@@ -698,6 +700,7 @@ route_setup()
set -e
ip netns add ns2
+ ip netns set ns2 auto
ip -netns ns2 link set dev lo up
ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
@@ -1442,6 +1445,70 @@ ipv4_route_metrics_test()
route_cleanup
}
+ipv4_route_v6_gw_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 route with IPv6 gateway tests"
+
+ route_setup
+ sleep 2
+
+ #
+ # single path route
+ #
+ run_cmd "$IP ro add 172.16.104.0/24 via inet6 2001:db8:101::2"
+ rc=$?
+ log_test $rc 0 "Single path route with IPv6 gateway"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1"
+ fi
+
+ run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1"
+ log_test $rc 0 "Single path route with IPv6 gateway - ping"
+
+ run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2"
+ rc=$?
+ log_test $rc 0 "Single path route delete"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.112.0/24"
+ fi
+
+ #
+ # multipath - v6 then v4
+ #
+ run_cmd "$IP ro add 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+ rc=$?
+ log_test $rc 0 "Multipath route add - v6 nexthop then v4"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ fi
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+ log_test $? 2 " Multipath route delete - nexthops in wrong order"
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+ log_test $? 0 " Multipath route delete exact match"
+
+ #
+ # multipath - v4 then v6
+ #
+ run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+ rc=$?
+ log_test $rc 0 "Multipath route add - v4 nexthop then v6"
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 weight 1 nexthop via inet6 2001:db8:101::2 dev veth1 weight 1"
+ fi
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via inet6 2001:db8:101::2 dev veth1 nexthop via 172.16.103.2 dev veth3"
+ log_test $? 2 " Multipath route delete - nexthops in wrong order"
+
+ run_cmd "$IP ro del 172.16.104.0/24 nexthop via 172.16.103.2 dev veth3 nexthop via inet6 2001:db8:101::2 dev veth1"
+ log_test $? 0 " Multipath route delete exact match"
+
+ route_cleanup
+}
################################################################################
# usage
@@ -1511,6 +1578,7 @@ do
ipv4_addr_metric) ipv4_addr_metric_test;;
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
+ ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 912b2dc50be3..524b15dabb3c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -116,6 +116,10 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+PAUSE_ON_FAIL=no
+VERBOSE=0
+TRACING=0
+
# Some systems don't have a ping6 binary anymore
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
@@ -222,6 +226,23 @@ err_flush() {
err_buf=
}
+run_cmd() {
+ cmd="$*"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out="$($cmd 2>&1)"
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ echo
+ fi
+
+ return $rc
+}
+
# Find the auto-generated name for this namespace
nsname() {
eval echo \$NS_$1
@@ -258,22 +279,22 @@ setup_fou_or_gue() {
fi
fi
- ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
- ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+ run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
+ run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
- ${ns_b} ip fou add port 5556 ipproto ${ipproto}
- ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
+ run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
+ run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
if [ "${inner}" = "4" ]; then
- ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
- ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
else
- ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
- ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
fi
- ${ns_a} ip link set ${encap}_a up
- ${ns_b} ip link set ${encap}_b up
+ run_cmd ${ns_a} ip link set ${encap}_a up
+ run_cmd ${ns_b} ip link set ${encap}_b up
}
setup_fou44() {
@@ -319,17 +340,17 @@ setup_namespaces() {
}
setup_veth() {
- ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
- ${ns_a} ip link set veth_b netns ${NS_B}
+ run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+ run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
- ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
- ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+ run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+ run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
- ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
- ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+ run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+ run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
- ${ns_a} ip link set veth_a up
- ${ns_b} ip link set veth_b up
+ run_cmd ${ns_a} ip link set veth_a up
+ run_cmd ${ns_b} ip link set veth_b up
}
setup_vti() {
@@ -342,14 +363,14 @@ setup_vti() {
[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
- ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
- ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+ run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+ run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
- ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
- ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+ run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+ run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
- ${ns_a} ip link set vti${proto}_a up
- ${ns_b} ip link set vti${proto}_b up
+ run_cmd ${ns_a} ip link set vti${proto}_a up
+ run_cmd ${ns_b} ip link set vti${proto}_b up
}
setup_vti4() {
@@ -375,17 +396,17 @@ setup_vxlan_or_geneve() {
opts_b=""
fi
- ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
- ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
+ run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
+ run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
- ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
- ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
- ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
- ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
- ${ns_a} ip link set ${type}_a up
- ${ns_b} ip link set ${type}_b up
+ run_cmd ${ns_a} ip link set ${type}_a up
+ run_cmd ${ns_b} ip link set ${type}_b up
}
setup_geneve4() {
@@ -409,15 +430,15 @@ setup_xfrm() {
veth_a_addr="${2}"
veth_b_addr="${3}"
- ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
- ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
- ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+ run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" || return 1
+ run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+ run_cmd "${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel"
+ run_cmd "${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel"
- ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
- ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+ run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+ run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel"
+ run_cmd "${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel"
+ run_cmd "${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel"
}
setup_xfrm4() {
@@ -481,7 +502,7 @@ setup() {
}
trace() {
- [ $tracing -eq 0 ] && return
+ [ $TRACING -eq 0 ] && return
for arg do
[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
@@ -597,8 +618,8 @@ test_pmtu_ipvX() {
mtu "${ns_b}" veth_B-R2 1500
# Create route exceptions
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} > /dev/null
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
# Check that exceptions have been created with the correct PMTU
pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
@@ -630,7 +651,7 @@ test_pmtu_ipvX() {
# Decrease remote MTU on path via R2, get new exception
mtu "${ns_r2}" veth_R2-B 400
mtu "${ns_b}" veth_B-R2 400
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
@@ -647,7 +668,7 @@ test_pmtu_ipvX() {
check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
# Get new exception
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
}
@@ -696,7 +717,7 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
# Check that exception was created
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
@@ -776,7 +797,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() {
mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
- ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
# Check that exception was created
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
@@ -834,13 +855,13 @@ test_pmtu_vti4_exception() {
# Send DF packet without exceeding link layer MTU, check that no
# exception is created
- ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
# Now exceed link layer MTU by one byte, check that exception is created
# with the right PMTU value
- ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
}
@@ -856,7 +877,7 @@ test_pmtu_vti6_exception() {
mtu "${ns_b}" veth_b 4000
mtu "${ns_a}" vti6_a 5000
mtu "${ns_b}" vti6_b 5000
- ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} > /dev/null
+ run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
# Check that exception was created
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
@@ -902,9 +923,9 @@ test_pmtu_vti6_default_mtu() {
test_pmtu_vti4_link_add_mtu() {
setup namespaces || return 2
- ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
[ $? -ne 0 ] && err " vti not supported" && return 2
- ${ns_a} ip link del vti4_a
+ run_cmd ${ns_a} ip link del vti4_a
fail=0
@@ -912,7 +933,7 @@ test_pmtu_vti4_link_add_mtu() {
max=$((65535 - 20))
# Check invalid values first
for v in $((min - 1)) $((max + 1)); do
- ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+ run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
# This can fail, or MTU can be adjusted to a proper value
[ $? -ne 0 ] && continue
mtu="$(link_get_mtu "${ns_a}" vti4_a)"
@@ -920,14 +941,14 @@ test_pmtu_vti4_link_add_mtu() {
err " vti tunnel created with invalid MTU ${mtu}"
fail=1
fi
- ${ns_a} ip link del vti4_a
+ run_cmd ${ns_a} ip link del vti4_a
done
# Now check valid values
for v in ${min} 1300 ${max}; do
- ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
mtu="$(link_get_mtu "${ns_a}" vti4_a)"
- ${ns_a} ip link del vti4_a
+ run_cmd ${ns_a} ip link del vti4_a
if [ "${mtu}" != "${v}" ]; then
err " vti MTU ${mtu} doesn't match configured value ${v}"
fail=1
@@ -940,9 +961,9 @@ test_pmtu_vti4_link_add_mtu() {
test_pmtu_vti6_link_add_mtu() {
setup namespaces || return 2
- ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
[ $? -ne 0 ] && err " vti6 not supported" && return 2
- ${ns_a} ip link del vti6_a
+ run_cmd ${ns_a} ip link del vti6_a
fail=0
@@ -950,7 +971,7 @@ test_pmtu_vti6_link_add_mtu() {
max=$((65535 - 40))
# Check invalid values first
for v in $((min - 1)) $((max + 1)); do
- ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+ run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
# This can fail, or MTU can be adjusted to a proper value
[ $? -ne 0 ] && continue
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
@@ -958,14 +979,14 @@ test_pmtu_vti6_link_add_mtu() {
err " vti6 tunnel created with invalid MTU ${v}"
fail=1
fi
- ${ns_a} ip link del vti6_a
+ run_cmd ${ns_a} ip link del vti6_a
done
# Now check valid values
for v in 68 1280 1300 $((65535 - 40)); do
- ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
- ${ns_a} ip link del vti6_a
+ run_cmd ${ns_a} ip link del vti6_a
if [ "${mtu}" != "${v}" ]; then
err " vti6 MTU ${mtu} doesn't match configured value ${v}"
fail=1
@@ -978,19 +999,19 @@ test_pmtu_vti6_link_add_mtu() {
test_pmtu_vti6_link_change_mtu() {
setup namespaces || return 2
- ${ns_a} ip link add dummy0 mtu 1500 type dummy
+ run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
[ $? -ne 0 ] && err " dummy not supported" && return 2
- ${ns_a} ip link add dummy1 mtu 3000 type dummy
- ${ns_a} ip link set dummy0 up
- ${ns_a} ip link set dummy1 up
+ run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
+ run_cmd ${ns_a} ip link set dummy0 up
+ run_cmd ${ns_a} ip link set dummy1 up
- ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
- ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+ run_cmd ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+ run_cmd ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
fail=0
# Create vti6 interface bound to device, passing MTU, check it
- ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
if [ ${mtu} -ne 1300 ]; then
err " vti6 MTU ${mtu} doesn't match configured value 1300"
@@ -999,7 +1020,7 @@ test_pmtu_vti6_link_change_mtu() {
# Move to another device with different MTU, without passing MTU, check
# MTU is adjusted
- ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+ run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
if [ ${mtu} -ne $((3000 - 40)) ]; then
err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
@@ -1007,7 +1028,7 @@ test_pmtu_vti6_link_change_mtu() {
fi
# Move it back, passing MTU, check MTU is not overridden
- ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
if [ ${mtu} -ne 1280 ]; then
err " vti6 MTU ${mtu} doesn't match configured value 1280"
@@ -1052,7 +1073,7 @@ test_cleanup_vxlanX_exception() {
# Fill exception cache for multiple CPUs (2)
# we can always use inner IPv4 for that
for cpu in ${cpu_list}; do
- taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} > /dev/null
+ run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
done
${ns_a} ip link del dev veth_A-R1 &
@@ -1084,29 +1105,33 @@ usage() {
exit 1
}
+################################################################################
+#
exitcode=0
desc=0
+
+while getopts :ptv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ t) if which tcpdump > /dev/null 2>&1; then
+ TRACING=1
+ else
+ echo "=== tcpdump not available, tracing disabled"
+ fi
+ ;;
+ *) usage;;
+ esac
+done
+shift $(($OPTIND-1))
+
IFS="
"
-tracing=0
for arg do
- if [ "${arg}" != "${arg#--*}" ]; then
- opt="${arg#--}"
- if [ "${opt}" = "trace" ]; then
- if which tcpdump > /dev/null 2>&1; then
- tracing=1
- else
- echo "=== tcpdump not available, tracing disabled"
- fi
- else
- usage
- fi
- else
- # Check first that all requested tests are available before
- # running any
- command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
- fi
+ # Check first that all requested tests are available before running any
+ command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
done
trap cleanup EXIT
@@ -1124,6 +1149,11 @@ for t in ${tests}; do
(
unset IFS
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\n##########################################################################\n\n"
+ fi
+
eval test_${name}
ret=$?
cleanup
@@ -1132,6 +1162,11 @@ for t in ${tests}; do
printf "TEST: %-60s [ OK ]\n" "${t}"
elif [ $ret -eq 1 ]; then
printf "TEST: %-60s [FAIL]\n" "${t}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "Pausing. Hit enter to continue"
+ read a
+ fi
err_flush
exit 1
elif [ $ret -eq 2 ]; then
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 99a5ffca1088..e2f92cefb8d5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -19,6 +19,26 @@
]
},
{
+ "id": "2638",
+ "name": "Add matchall and try to get it",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 clsact",
+ "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
+ ],
+ "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact"
+ ]
+ },
+ {
"id": "d052",
"name": "Add 1M filters with the same action",
"category": [
@@ -38,5 +58,25 @@
"$TC qdisc del dev $DEV2 ingress",
"/bin/rm $BATCH_FILE"
]
+ },
+ {
+ "id": "4cbd",
+ "name": "Try to add filter with duplicate key",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress",
+ "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index 40ea95ce2ead..828c18584624 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -22,6 +22,7 @@ TPM2_CC_UNSEAL = 0x015E
TPM2_CC_FLUSH_CONTEXT = 0x0165
TPM2_CC_START_AUTH_SESSION = 0x0176
TPM2_CC_GET_CAPABILITY = 0x017A
+TPM2_CC_GET_RANDOM = 0x017B
TPM2_CC_PCR_READ = 0x017E
TPM2_CC_POLICY_PCR = 0x017F
TPM2_CC_PCR_EXTEND = 0x0182
@@ -357,9 +358,9 @@ class Client:
self.flags = flags
if (self.flags & Client.FLAG_SPACE) == 0:
- self.tpm = open('/dev/tpm0', 'r+b')
+ self.tpm = open('/dev/tpm0', 'r+b', buffering=0)
else:
- self.tpm = open('/dev/tpmrm0', 'r+b')
+ self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0)
def close(self):
self.tpm.close()
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 3bb066fea4a0..d4973be53493 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -158,6 +158,69 @@ class SmokeTest(unittest.TestCase):
pass
self.assertEqual(rejected, True)
+ def test_read_partial_resp(self):
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+ hdr = self.client.tpm.read(10)
+ sz = struct.unpack('>I', hdr[2:6])[0]
+ rsp = self.client.tpm.read()
+ except:
+ pass
+ self.assertEqual(sz, 10 + 2 + 32)
+ self.assertEqual(len(rsp), 2 + 32)
+
+ def test_read_partial_overwrite(self):
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+ # Read part of the respone
+ rsp1 = self.client.tpm.read(15)
+
+ # Send a new cmd
+ self.client.tpm.write(cmd)
+
+ # Read the whole respone
+ rsp2 = self.client.tpm.read()
+ except:
+ pass
+ self.assertEqual(len(rsp1), 15)
+ self.assertEqual(len(rsp2), 10 + 2 + 32)
+
+ def test_send_two_cmds(self):
+ rejected = False
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+
+ # expect the second one to raise -EBUSY error
+ self.client.tpm.write(cmd)
+ rsp = self.client.tpm.read()
+
+ except IOError, e:
+ # read the response
+ rsp = self.client.tpm.read()
+ rejected = True
+ pass
+ except:
+ pass
+ self.assertEqual(rejected, True)
+
class SpaceTest(unittest.TestCase):
def setUp(self):
logging.basicConfig(filename='SpaceTest.log', level=logging.DEBUG)