diff options
733 files changed, 27299 insertions, 11718 deletions
@@ -2505,15 +2505,6 @@ W: http://www.rdrop.com/users/paulmck/ D: RCU and variants D: rcutorture module -N: Mike McLagan -E: mike.mclagan@linux.org -W: http://www.invlogic.com/~mmclagan -D: DLCI/FRAD drivers for Sangoma SDLAs -S: Innovative Logic Corp -S: Post Office Box 1068 -S: Laurel, Maryland 20732 -S: USA - N: Bradley McLean E: brad@bradpc.gaylord.com D: Device driver hacker diff --git a/Documentation/ABI/testing/sysfs-kernel-btf b/Documentation/ABI/testing/sysfs-kernel-btf index 2c9744b2cd59..fe96efdc9b6c 100644 --- a/Documentation/ABI/testing/sysfs-kernel-btf +++ b/Documentation/ABI/testing/sysfs-kernel-btf @@ -15,3 +15,11 @@ Description: information with description of all internal kernel types. See Documentation/bpf/btf.rst for detailed description of format itself. + +What: /sys/kernel/btf/<module-name> +Date: Nov 2020 +KernelVersion: 5.11 +Contact: bpf@vger.kernel.org +Description: + Read-only binary attribute exposing kernel module's BTF type + information as an add-on to the kernel's BTF (/sys/kernel/btf/vmlinux). diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 4e0c4ae44acd..5857347f6ce8 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -117,7 +117,6 @@ configure specific aspects of kernel behavior to your liking. unicode vga-softcursor video-output - wimax/index xfs .. only:: subproject and html diff --git a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml new file mode 100644 index 000000000000..5592f58fa6f0 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/hirschmann,hellcreek.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Hirschmann Hellcreek TSN Switch Device Tree Bindings + +allOf: + - $ref: dsa.yaml# + +maintainers: + - Andrew Lunn <andrew@lunn.ch> + - Florian Fainelli <f.fainelli@gmail.com> + - Vivien Didelot <vivien.didelot@gmail.com> + - Kurt Kanzenbach <kurt@linutronix.de> + +description: + The Hellcreek TSN Switch IP is a 802.1Q Ethernet compliant switch. It supports + the Precision Time Protocol, Hardware Timestamping as well the Time Aware + Shaper. + +properties: + compatible: + items: + - const: hirschmann,hellcreek-de1soc-r1 + + reg: + description: + The physical base address and size of TSN and PTP memory base + minItems: 2 + maxItems: 2 + + reg-names: + items: + - const: tsn + - const: ptp + + leds: + type: object + properties: + '#address-cells': + const: 1 + '#size-cells': + const: 0 + + patternProperties: + "^led@[01]$": + type: object + description: Hellcreek leds + $ref: ../../leds/common.yaml# + + properties: + reg: + items: + - enum: [0, 1] + description: Led number + + label: true + + default-state: true + + required: + - reg + + additionalProperties: false + + additionalProperties: false + +required: + - compatible + - reg + - reg-names + - ethernet-ports + - leds + +unevaluatedProperties: false + +examples: + - | + switch0: switch@ff240000 { + compatible = "hirschmann,hellcreek-de1soc-r1"; + reg = <0xff240000 0x1000>, + <0xff250000 0x1000>; + reg-names = "tsn", "ptp"; + dsa,member = <0 0>; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac0>; + }; + + port@2 { + reg = <2>; + label = "lan0"; + phy-handle = <&phy1>; + }; + + port@3 { + reg = <3>; + label = "lan1"; + phy-handle = <&phy2>; + }; + }; + + leds { + #address-cells = <1>; + #size-cells = <0>; + + led@0 { + reg = <0>; + label = "sync_good"; + default-state = "on"; + }; + + led@1 { + reg = <1>; + label = "is_gm"; + default-state = "off"; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/ftgmac100.txt b/Documentation/devicetree/bindings/net/ftgmac100.txt index f878c1103463..29234021f601 100644 --- a/Documentation/devicetree/bindings/net/ftgmac100.txt +++ b/Documentation/devicetree/bindings/net/ftgmac100.txt @@ -15,6 +15,7 @@ Required properties: - interrupts: Should contain ethernet controller interrupt Optional properties: +- phy-handle: See ethernet.txt file in the same directory. - phy-mode: See ethernet.txt file in the same directory. If the property is absent, "rgmii" is assumed. Supported values are "rgmii*" and "rmii" for aspeed parts. Other (unknown) parts will accept any value. @@ -32,6 +33,9 @@ Optional properties: - "MACCLK": The MAC IP clock - "RCLK": Clock gate for the RMII RCLK +Optional subnodes: +- mdio: See mdio.txt file in the same directory. + Example: mac0: ethernet@1e660000 { @@ -40,3 +44,24 @@ Example: interrupts = <2>; use-ncsi; }; + +Example with phy-handle: + + mac1: ethernet@1e680000 { + compatible = "aspeed,ast2500-mac", "faraday,ftgmac100"; + reg = <0x1e680000 0x180>; + interrupts = <2>; + + phy-handle = <&phy>; + phy-mode = "rgmii"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + phy: ethernet-phy@1 { + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <1>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 2735be1a8470..bc8347c6cf56 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -441,6 +441,8 @@ patternProperties: description: HiDeep Inc. "^himax,.*": description: Himax Technologies, Inc. + "^hirschmann,.*": + description: Hirschmann Automation and Control GmbH "^hisilicon,.*": description: Hisilicon Limited. "^hit,.*": diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 2a266b7e7b38..02c2d20dc673 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -46,7 +46,7 @@ Resources ========= The ``netdevsim`` driver exposes resources to control the number of FIB -entries and FIB rule entries that the driver will allow. +entries, FIB rule entries and nexthops that the driver will allow. .. code:: shell @@ -54,6 +54,7 @@ entries and FIB rule entries that the driver will allow. $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16 $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64 $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16 + $ devlink resource set netdevsim/netdevsim0 path /nexthops size 16 $ devlink dev reload netdevsim/netdevsim0 Driver-specific Traps diff --git a/Documentation/networking/framerelay.rst b/Documentation/networking/framerelay.rst deleted file mode 100644 index 6d904399ec6d..000000000000 --- a/Documentation/networking/framerelay.rst +++ /dev/null @@ -1,44 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -================ -Frame Relay (FR) -================ - -Frame Relay (FR) support for linux is built into a two tiered system of device -drivers. The upper layer implements RFC1490 FR specification, and uses the -Data Link Connection Identifier (DLCI) as its hardware address. Usually these -are assigned by your network supplier, they give you the number/numbers of -the Virtual Connections (VC) assigned to you. - -Each DLCI is a point-to-point link between your machine and a remote one. -As such, a separate device is needed to accommodate the routing. Within the -net-tools archives is 'dlcicfg'. This program will communicate with the -base "DLCI" device, and create new net devices named 'dlci00', 'dlci01'... -The configuration script will ask you how many DLCIs you need, as well as -how many DLCIs you want to assign to each Frame Relay Access Device (FRAD). - -The DLCI uses a number of function calls to communicate with the FRAD, all -of which are stored in the FRAD's private data area. assoc/deassoc, -activate/deactivate and dlci_config. The DLCI supplies a receive function -to the FRAD to accept incoming packets. - -With this initial offering, only 1 FRAD driver is available. With many thanks -to Sangoma Technologies, David Mandelstam & Gene Kozin, the S502A, S502E & -S508 are supported. This driver is currently set up for only FR, but as -Sangoma makes more firmware modules available, it can be updated to provide -them as well. - -Configuration of the FRAD makes use of another net-tools program, 'fradcfg'. -This program makes use of a configuration file (which dlcicfg can also read) -to specify the types of boards to be configured as FRADs, as well as perform -any board specific configuration. The Sangoma module of fradcfg loads the -FR firmware into the card, sets the irq/port/memory information, and provides -an initial configuration. - -Additional FRAD device drivers can be added as hardware is available. - -At this time, the dlcicfg and fradcfg programs have not been incorporated into -the net-tools distribution. They can be found at ftp.invlogic.com, in -/pub/linux. Note that with OS/2 FTPD, you end up in /pub by default, so just -use 'cd linux'. v0.10 is for use on pre-2.0.3 and earlier, v0.15 is for -pre-2.0.4 and later. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 63ef386afd0a..bc6020c945c4 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -52,7 +52,6 @@ Contents: eql fib_trie filter - framerelay generic-hdlc generic_netlink gen_stats @@ -70,6 +69,7 @@ Contents: lapb-module mac80211-injection mpls-sysctl + mptcp-sysctl multiqueue netconsole netdev-features diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 25e6673a085a..dd2b12a32b73 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1554,6 +1554,9 @@ igmpv3_unsolicited_report_interval - INTEGER Default: 1000 (1 seconds) +ignore_routes_with_linkdown - BOOLEAN + Ignore routes whose link is down when performing a FIB lookup. + promote_secondaries - BOOLEAN When a primary IP address is removed from this interface promote a corresponding secondary IP address instead of @@ -2642,6 +2645,37 @@ addr_scope_policy - INTEGER Default: 1 +udp_port - INTEGER + The listening port for the local UDP tunneling sock. Normally it's + using the IANA-assigned UDP port number 9899 (sctp-tunneling). + + This UDP sock is used for processing the incoming UDP-encapsulated + SCTP packets (from RFC6951), and shared by all applications in the + same net namespace. This UDP sock will be closed when the value is + set to 0. + + The value will also be used to set the src port of the UDP header + for the outgoing UDP-encapsulated SCTP packets. For the dest port, + please refer to 'encap_port' below. + + Default: 0 + +encap_port - INTEGER + The default remote UDP encapsulation port. + + This value is used to set the dest port of the UDP header for the + outgoing UDP-encapsulated SCTP packets by default. Users can also + change the value for each sock/asoc/transport by using setsockopt. + For further information, please refer to RFC6951. + + Note that when connecting to a remote server, the client should set + this to the port that the UDP tunneling sock on the peer server is + listening to and the local UDP tunneling sock on the client also + must be started. On the server, it would get the encap_port from + the incoming packet's source port. + + Default: 0 + ``/proc/sys/net/core/*`` ======================== diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst index d198fa5eaacd..ea55f462cefa 100644 --- a/Documentation/networking/kapi.rst +++ b/Documentation/networking/kapi.rst @@ -83,27 +83,6 @@ SUN RPC subsystem .. kernel-doc:: net/sunrpc/clnt.c :export: -WiMAX ------ - -.. kernel-doc:: net/wimax/op-msg.c - :export: - -.. kernel-doc:: net/wimax/op-reset.c - :export: - -.. kernel-doc:: net/wimax/op-rfkill.c - :export: - -.. kernel-doc:: net/wimax/stack.c - :export: - -.. kernel-doc:: include/net/wimax.h - :internal: - -.. kernel-doc:: include/uapi/linux/wimax.h - :internal: - Network device support ====================== diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst new file mode 100644 index 000000000000..6af0196c4297 --- /dev/null +++ b/Documentation/networking/mptcp-sysctl.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +MPTCP Sysfs variables +===================== + +/proc/sys/net/mptcp/* Variables +=============================== + +enabled - INTEGER + Control whether MPTCP sockets can be created. + + MPTCP sockets can be created if the value is nonzero. This is + a per-namespace sysctl. + + Default: 1 + +add_addr_timeout - INTEGER (seconds) + Set the timeout after which an ADD_ADDR control message will be + resent to an MPTCP peer that has not acknowledged a previous + ADD_ADDR message. + + The default value matches TCP_RTO_MAX. This is a per-namespace + sysctl. + + Default: 120 diff --git a/Documentation/process/magic-number.rst b/Documentation/process/magic-number.rst index eee9b44553b3..e02ff5ffb653 100644 --- a/Documentation/process/magic-number.rst +++ b/Documentation/process/magic-number.rst @@ -84,7 +84,6 @@ PPP_MAGIC 0x5002 ppp ``include/linux/ SSTATE_MAGIC 0x5302 serial_state ``include/linux/serial.h`` SLIP_MAGIC 0x5302 slip ``drivers/net/slip.h`` STRIP_MAGIC 0x5303 strip ``drivers/net/strip.c`` -X25_ASY_MAGIC 0x5303 x25_asy ``drivers/net/x25_asy.h`` SIXPACK_MAGIC 0x5304 sixpack ``drivers/net/hamradio/6pack.h`` AX25_MAGIC 0x5316 ax_disp ``drivers/net/mkiss.h`` TTY_MAGIC 0x5401 tty_struct ``include/linux/tty.h`` diff --git a/Documentation/translations/it_IT/process/magic-number.rst b/Documentation/translations/it_IT/process/magic-number.rst index 783e0de314a0..0243d32a0b59 100644 --- a/Documentation/translations/it_IT/process/magic-number.rst +++ b/Documentation/translations/it_IT/process/magic-number.rst @@ -90,7 +90,6 @@ PPP_MAGIC 0x5002 ppp ``include/linux/ SSTATE_MAGIC 0x5302 serial_state ``include/linux/serial.h`` SLIP_MAGIC 0x5302 slip ``drivers/net/slip.h`` STRIP_MAGIC 0x5303 strip ``drivers/net/strip.c`` -X25_ASY_MAGIC 0x5303 x25_asy ``drivers/net/x25_asy.h`` SIXPACK_MAGIC 0x5304 sixpack ``drivers/net/hamradio/6pack.h`` AX25_MAGIC 0x5316 ax_disp ``drivers/net/mkiss.h`` TTY_MAGIC 0x5401 tty_struct ``include/linux/tty.h`` diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst index ed5ab7e37f38..48bbd3ebad48 100644 --- a/Documentation/translations/zh_CN/admin-guide/index.rst +++ b/Documentation/translations/zh_CN/admin-guide/index.rst @@ -114,7 +114,6 @@ Todolist: unicode vga-softcursor video-output - wimax/index xfs .. only:: subproject and html diff --git a/Documentation/translations/zh_CN/process/magic-number.rst b/Documentation/translations/zh_CN/process/magic-number.rst index e4c225996af0..de182bf4191c 100644 --- a/Documentation/translations/zh_CN/process/magic-number.rst +++ b/Documentation/translations/zh_CN/process/magic-number.rst @@ -73,7 +73,6 @@ PPP_MAGIC 0x5002 ppp ``include/linux/ SSTATE_MAGIC 0x5302 serial_state ``include/linux/serial.h`` SLIP_MAGIC 0x5302 slip ``drivers/net/slip.h`` STRIP_MAGIC 0x5303 strip ``drivers/net/strip.c`` -X25_ASY_MAGIC 0x5303 x25_asy ``drivers/net/x25_asy.h`` SIXPACK_MAGIC 0x5304 sixpack ``drivers/net/hamradio/6pack.h`` AX25_MAGIC 0x5316 ax_disp ``drivers/net/mkiss.h`` TTY_MAGIC 0x5401 tty_struct ``include/linux/tty.h`` diff --git a/MAINTAINERS b/MAINTAINERS index c7f6924d34c7..a7e0f11def8e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6895,12 +6895,6 @@ S: Maintained W: http://floatingpoint.sourceforge.net/emulator/index.html F: arch/x86/math-emu/ -FRAME RELAY DLCI/FRAD (Sangoma drivers too) -L: netdev@vger.kernel.org -S: Orphan -F: drivers/net/wan/dlci.c -F: drivers/net/wan/sdla.c - FRAMEBUFFER LAYER M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> L: dri-devel@lists.freedesktop.org @@ -7888,6 +7882,15 @@ F: include/linux/hippidevice.h F: include/uapi/linux/if_hippi.h F: net/802/hippi.c +HIRSCHMANN HELLCREEK ETHERNET SWITCH DRIVER +M: Kurt Kanzenbach <kurt@linutronix.de> +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml +F: drivers/net/dsa/hirschmann/* +F: include/linux/platform_data/hirschmann-hellcreek.h +F: net/dsa/tag_hellcreek.c + HISILICON DMA DRIVER M: Zhou Wang <wangzhou1@hisilicon.com> L: dmaengine@vger.kernel.org @@ -9077,16 +9080,6 @@ W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git F: drivers/net/wireless/intel/iwlwifi/ -INTEL WIRELESS WIMAX CONNECTION 2400 -M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> -M: linux-wimax@intel.com -L: wimax@linuxwimax.org (subscribers-only) -S: Supported -W: http://linuxwimax.org -F: Documentation/admin-guide/wimax/i2400m.rst -F: drivers/net/wimax/i2400m/ -F: include/uapi/linux/wimax/i2400m.h - INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER M: Jithu Joseph <jithu.joseph@intel.com> R: Maurice Ma <maurice.ma@intel.com> @@ -12239,6 +12232,7 @@ L: mptcp@lists.01.org S: Maintained W: https://github.com/multipath-tcp/mptcp_net-next/wiki B: https://github.com/multipath-tcp/mptcp_net-next/issues +F: Documentation/networking/mptcp-sysctl.rst F: include/net/mptcp.h F: include/uapi/linux/mptcp.h F: net/mptcp/ @@ -18875,18 +18869,6 @@ S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wil6210 F: drivers/net/wireless/ath/wil6210/ -WIMAX STACK -M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> -M: linux-wimax@intel.com -L: wimax@linuxwimax.org (subscribers-only) -S: Supported -W: http://linuxwimax.org -F: Documentation/admin-guide/wimax/wimax.rst -F: include/linux/wimax/debug.h -F: include/net/wimax.h -F: include/uapi/linux/wimax.h -F: net/wimax/ - WINBOND CIR DRIVER M: David Härdeman <david@hardeman.nu> S: Maintained diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 27e7c0714b96..0d6edeb27659 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -141,7 +141,6 @@ CONFIG_HDLC_CISCO=m CONFIG_HDLC_FR=m CONFIG_HDLC_PPP=m CONFIG_HDLC_X25=m -CONFIG_DLCI=m CONFIG_WAN_ROUTER_DRIVERS=m CONFIG_ATM_TCP=m # CONFIG_INPUT_KEYBOARD is not set diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 9085f4d6c698..8a921c8ac233 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -228,9 +228,7 @@ CONFIG_FARSYNC=m CONFIG_DSCC4=m CONFIG_DSCC4_PCISYNC=y CONFIG_DSCC4_PCI_RST=y -CONFIG_DLCI=m CONFIG_LAPBETHER=m -CONFIG_X25_ASY=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 914af125a7fa..30dacce94198 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -378,9 +378,7 @@ CONFIG_FARSYNC=m CONFIG_DSCC4=m CONFIG_DSCC4_PCISYNC=y CONFIG_DSCC4_PCI_RST=y -CONFIG_DLCI=m CONFIG_LAPBETHER=m -CONFIG_X25_ASY=m # CONFIG_KEYBOARD_ATKBD is not set CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index ac811cfa6843..d7277c26e423 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -765,8 +765,7 @@ static void lanai_shutdown_tx_vci(struct lanai_dev *lanai, struct sk_buff *skb; unsigned long flags, timeout; int read, write, lastread = -1; - APRINTK(!in_interrupt(), - "lanai_shutdown_tx_vci called w/o process context!\n"); + if (lvcc->vbase == NULL) /* We were never bound to a VCI */ return; /* 15.2.1 - wait for queue to drain */ diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 09ad73361879..5c7e4df159b9 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -130,8 +130,9 @@ static int ns_open(struct atm_vcc *vcc); static void ns_close(struct atm_vcc *vcc); static void fill_tst(ns_dev * card, int n, vc_map * vc); static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb); +static int ns_send_bh(struct atm_vcc *vcc, struct sk_buff *skb); static int push_scqe(ns_dev * card, vc_map * vc, scq_info * scq, ns_scqe * tbd, - struct sk_buff *skb); + struct sk_buff *skb, bool may_sleep); static void process_tsq(ns_dev * card); static void drain_scq(ns_dev * card, scq_info * scq, int pos); static void process_rsq(ns_dev * card); @@ -160,6 +161,7 @@ static const struct atmdev_ops atm_ops = { .close = ns_close, .ioctl = ns_ioctl, .send = ns_send, + .send_bh = ns_send_bh, .phy_put = ns_phy_put, .phy_get = ns_phy_get, .proc_read = ns_proc_read, @@ -1620,7 +1622,7 @@ static void fill_tst(ns_dev * card, int n, vc_map * vc) card->tst_addr = new_tst; } -static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) +static int _ns_send(struct atm_vcc *vcc, struct sk_buff *skb, bool may_sleep) { ns_dev *card; vc_map *vc; @@ -1704,7 +1706,7 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) scq = card->scq0; } - if (push_scqe(card, vc, scq, &scqe, skb) != 0) { + if (push_scqe(card, vc, scq, &scqe, skb, may_sleep) != 0) { atomic_inc(&vcc->stats->tx_err); dma_unmap_single(&card->pcidev->dev, NS_PRV_DMA(skb), skb->len, DMA_TO_DEVICE); @@ -1716,8 +1718,18 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) return 0; } +static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + return _ns_send(vcc, skb, true); +} + +static int ns_send_bh(struct atm_vcc *vcc, struct sk_buff *skb) +{ + return _ns_send(vcc, skb, false); +} + static int push_scqe(ns_dev * card, vc_map * vc, scq_info * scq, ns_scqe * tbd, - struct sk_buff *skb) + struct sk_buff *skb, bool may_sleep) { unsigned long flags; ns_scqe tsr; @@ -1728,7 +1740,7 @@ static int push_scqe(ns_dev * card, vc_map * vc, scq_info * scq, ns_scqe * tbd, spin_lock_irqsave(&scq->lock, flags); while (scq->tail == scq->next) { - if (in_interrupt()) { + if (!may_sleep) { spin_unlock_irqrestore(&scq->lock, flags); printk("nicstar%d: Error pushing TBD.\n", card->index); return 1; @@ -1773,7 +1785,7 @@ static int push_scqe(ns_dev * card, vc_map * vc, scq_info * scq, ns_scqe * tbd, int has_run = 0; while (scq->tail == scq->next) { - if (in_interrupt()) { + if (!may_sleep) { data = scq_virt_to_bus(scq, scq->next); ns_write_sram(card, scq->scd, &data, 1); spin_unlock_irqrestore(&scq->lock, flags); diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 2cff5ddff225..ba9e721d61b7 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -1165,6 +1165,17 @@ int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir, } EXPORT_SYMBOL_GPL(mhi_queue_buf); +bool mhi_queue_is_full(struct mhi_device *mhi_dev, enum dma_data_direction dir) +{ + struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; + struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? + mhi_dev->ul_chan : mhi_dev->dl_chan; + struct mhi_ring *tre_ring = &mhi_chan->tre_ring; + + return mhi_is_ring_full(mhi_cntrl, tre_ring); +} +EXPORT_SYMBOL_GPL(mhi_queue_is_full); + int mhi_send_cmd(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, enum mhi_cmd_type cmd) diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index ec53528d8205..8163f5df8ebf 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -545,14 +545,10 @@ static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested) } } -static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np) +static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np, + bool sched_napi) { - /* - * In case of threaded ISR, for RT kernels in_irq() does not return - * appropriate value, so use in_serving_softirq to distinguish between - * softirq and irq contexts. - */ - if (unlikely(in_irq() || !in_serving_softirq())) { + if (sched_napi) { /* Disable QMan IRQ source and invoke NAPI */ qman_p_irqsource_remove(p, QM_PIRQ_DQRI); np->p = p; @@ -564,7 +560,8 @@ static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np) static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, struct qman_fq *rsp_fq, - const struct qm_dqrr_entry *dqrr) + const struct qm_dqrr_entry *dqrr, + bool sched_napi) { struct caam_napi *caam_napi = raw_cpu_ptr(&pcpu_qipriv.caam_napi); struct caam_drv_req *drv_req; @@ -573,7 +570,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, struct caam_drv_private *priv = dev_get_drvdata(qidev); u32 status; - if (caam_qi_napi_schedule(p, caam_napi)) + if (caam_qi_napi_schedule(p, caam_napi, sched_napi)) return qman_cb_dqrr_stop; fd = &dqrr->fd; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 12d29d54a081..08366e254b1d 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -932,7 +932,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) { char name[IB_DEVICE_NAME_MAX] = {}; - nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], + nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); if (strlen(name) == 0) { err = -EINVAL; @@ -1529,13 +1529,13 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) return -EINVAL; - nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], + nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(ibdev_name)); if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) return -EINVAL; - nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); - nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], + nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); + nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], sizeof(ndev_name)); ndev = dev_get_by_name(sock_net(skb->sk), ndev_name); @@ -1602,7 +1602,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) return -EINVAL; - nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], + nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], sizeof(client_name)); if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a40701a6e1b6..0b64aa87ab73 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1686,7 +1686,6 @@ static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) u32 extra_bytes; u32 tlen, qpnum; bool do_work, do_cnp; - struct hfi1_ipoib_dev_priv *priv; trace_hfi1_rcvhdr(packet); @@ -1734,8 +1733,7 @@ static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) if (unlikely(!skb)) goto drop; - priv = hfi1_ipoib_priv(netdev); - hfi1_ipoib_update_rx_netstats(priv, 1, skb->len); + dev_sw_netstats_rx_add(netdev, skb->len); skb->dev = netdev; skb->pkt_type = PACKET_HOST; diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index b8c9d0a003fb..f650cac9d424 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -110,7 +110,6 @@ struct hfi1_ipoib_dev_priv { const struct net_device_ops *netdev_ops; struct rvt_qp *qp; - struct pcpu_sw_netstats __percpu *netstats; }; /* hfi1 ipoib rdma netdev's private data structure */ @@ -126,32 +125,6 @@ hfi1_ipoib_priv(const struct net_device *dev) return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv; } -static inline void -hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv, - u64 packets, - u64 bytes) -{ - struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); - - u64_stats_update_begin(&netstats->syncp); - netstats->rx_packets += packets; - netstats->rx_bytes += bytes; - u64_stats_update_end(&netstats->syncp); -} - -static inline void -hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv, - u64 packets, - u64 bytes) -{ - struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); - - u64_stats_update_begin(&netstats->syncp); - netstats->tx_packets += packets; - netstats->tx_bytes += bytes; - u64_stats_update_end(&netstats->syncp); -} - int hfi1_ipoib_send_dma(struct net_device *dev, struct sk_buff *skb, struct ib_ah *address, diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index 9f71b9d706bd..3242290eb6a7 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -21,7 +21,7 @@ static int hfi1_ipoib_dev_init(struct net_device *dev) struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); int ret; - priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); ret = priv->netdev_ops->ndo_init(dev); if (ret) @@ -93,21 +93,12 @@ static int hfi1_ipoib_dev_stop(struct net_device *dev) return priv->netdev_ops->ndo_stop(dev); } -static void hfi1_ipoib_dev_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *storage) -{ - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); - - netdev_stats_to_stats64(storage, &dev->stats); - dev_fetch_sw_netstats(storage, priv->netstats); -} - static const struct net_device_ops hfi1_ipoib_netdev_ops = { .ndo_init = hfi1_ipoib_dev_init, .ndo_uninit = hfi1_ipoib_dev_uninit, .ndo_open = hfi1_ipoib_dev_open, .ndo_stop = hfi1_ipoib_dev_stop, - .ndo_get_stats64 = hfi1_ipoib_dev_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, }; static int hfi1_ipoib_send(struct net_device *dev, @@ -182,7 +173,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev) hfi1_ipoib_txreq_deinit(priv); hfi1_ipoib_rxq_deinit(priv->netdev); - free_percpu(priv->netstats); + free_percpu(dev->tstats); } static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 9df292b51a05..edd4eeac8dd1 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -121,7 +121,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) struct hfi1_ipoib_dev_priv *priv = tx->priv; if (likely(!tx->sdma_status)) { - hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len); + dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len); } else { ++priv->netdev->stats.tx_errors; dd_dev_warn(priv->dd, diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index c3dbe64e628e..4ee41924cdf1 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -426,6 +426,13 @@ config VSOCKMON mostly intended for developers or support to debug vsock issues. If unsure, say N. +config MHI_NET + tristate "MHI network driver" + depends on MHI_BUS + help + This is the network driver for MHI bus. It can be used with + QCOM based WWAN modems (like SDX55). Say Y or M. + endif # NET_CORE config SUNGEM_PHY @@ -489,8 +496,6 @@ source "drivers/net/usb/Kconfig" source "drivers/net/wireless/Kconfig" -source "drivers/net/wimax/Kconfig" - source "drivers/net/wan/Kconfig" source "drivers/net/ieee802154/Kconfig" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 72e18d505d1a..36e2e41ed2aa 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o obj-$(CONFIG_VSOCKMON) += vsockmon.o +obj-$(CONFIG_MHI_NET) += mhi_net.o # # Networking Drivers @@ -66,7 +67,6 @@ obj-$(CONFIG_NET_SB1000) += sb1000.o obj-$(CONFIG_SUNGEM_PHY) += sungem_phy.o obj-$(CONFIG_WAN) += wan/ obj-$(CONFIG_WLAN) += wireless/ -obj-$(CONFIG_WIMAX) += wimax/ obj-$(CONFIG_IEEE802154) += ieee802154/ obj-$(CONFIG_VMXNET3) += vmxnet3/ diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index ff0bea1554f9..28257bccec41 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -510,7 +510,7 @@ static const struct net_device_ops bareudp_netdev_ops = { .ndo_open = bareudp_open, .ndo_stop = bareudp_stop, .ndo_start_xmit = bareudp_xmit, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_fill_metadata_dst = bareudp_fill_metadata_dst, }; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 84ecbc6fa0ff..71c9677d135f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1228,14 +1228,14 @@ static netdev_features_t bond_fix_features(struct net_device *dev, } #define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ NETIF_F_HIGHDMA | NETIF_F_LRO) #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_RXCSUM | NETIF_F_ALL_TSO) + NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) #define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_ALL_TSO) + NETIF_F_GSO_SOFTWARE) static void bond_compute_features(struct bonding *bond) @@ -1291,8 +1291,7 @@ done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX | - NETIF_F_GSO_UDP_L4; + NETIF_F_HW_VLAN_STAG_TX; #ifdef CONFIG_XFRM_OFFLOAD bond_dev->hw_enc_features |= xfrm_features; #endif /* CONFIG_XFRM_OFFLOAD */ @@ -4721,7 +4720,7 @@ void bond_setup(struct net_device *bond_dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; + bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; #ifdef CONFIG_XFRM_OFFLOAD bond_dev->hw_features |= BOND_XFRM_FEATURES; #endif /* CONFIG_XFRM_OFFLOAD */ diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index d6ba9426be4d..fa47bab510bb 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -186,7 +186,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, } if (ifmp && tbp[IFLA_IFNAME]) { - nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); + nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); @@ -223,7 +223,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, /* register first device */ if (tb[IFLA_IFNAME]) - nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); + nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); else snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 2451f61a38e4..f6a0488589fc 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -24,6 +24,8 @@ config NET_DSA_LOOP This enables support for a fake mock-up switch chip which exercises the DSA APIs. +source "drivers/net/dsa/hirschmann/Kconfig" + config NET_DSA_LANTIQ_GSWIP tristate "Lantiq / Intel GSWIP" depends on HAS_IOMEM && NET_DSA diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index 4a943ccc2ca4..a84adb140a04 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX) += vitesse-vsc73xx-core.o obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM) += vitesse-vsc73xx-platform.o obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX_SPI) += vitesse-vsc73xx-spi.o obj-y += b53/ +obj-y += hirschmann/ obj-y += microchip/ obj-y += mv88e6xxx/ obj-y += ocelot/ diff --git a/drivers/net/dsa/hirschmann/Kconfig b/drivers/net/dsa/hirschmann/Kconfig new file mode 100644 index 000000000000..222dd35e2c9d --- /dev/null +++ b/drivers/net/dsa/hirschmann/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +config NET_DSA_HIRSCHMANN_HELLCREEK + tristate "Hirschmann Hellcreek TSN Switch support" + depends on HAS_IOMEM + depends on NET_DSA + depends on PTP_1588_CLOCK + select NET_DSA_TAG_HELLCREEK + help + This driver adds support for Hirschmann Hellcreek TSN switches. diff --git a/drivers/net/dsa/hirschmann/Makefile b/drivers/net/dsa/hirschmann/Makefile new file mode 100644 index 000000000000..f4075c2998b5 --- /dev/null +++ b/drivers/net/dsa/hirschmann/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_NET_DSA_HIRSCHMANN_HELLCREEK) += hellcreek_sw.o +hellcreek_sw-objs := hellcreek.o +hellcreek_sw-objs += hellcreek_ptp.o +hellcreek_sw-objs += hellcreek_hwtstamp.o diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c new file mode 100644 index 000000000000..d42f40c76ba5 --- /dev/null +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -0,0 +1,1339 @@ +// SPDX-License-Identifier: (GPL-2.0 or MIT) +/* + * DSA driver for: + * Hirschmann Hellcreek TSN switch. + * + * Copyright (C) 2019,2020 Linutronix GmbH + * Author Kurt Kanzenbach <kurt@linutronix.de> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_mdio.h> +#include <linux/platform_device.h> +#include <linux/bitops.h> +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> +#include <linux/etherdevice.h> +#include <linux/random.h> +#include <linux/iopoll.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <net/dsa.h> + +#include "hellcreek.h" +#include "hellcreek_ptp.h" +#include "hellcreek_hwtstamp.h" + +static const struct hellcreek_counter hellcreek_counter[] = { + { 0x00, "RxFiltered", }, + { 0x01, "RxOctets1k", }, + { 0x02, "RxVTAG", }, + { 0x03, "RxL2BAD", }, + { 0x04, "RxOverloadDrop", }, + { 0x05, "RxUC", }, + { 0x06, "RxMC", }, + { 0x07, "RxBC", }, + { 0x08, "RxRS<64", }, + { 0x09, "RxRS64", }, + { 0x0a, "RxRS65_127", }, + { 0x0b, "RxRS128_255", }, + { 0x0c, "RxRS256_511", }, + { 0x0d, "RxRS512_1023", }, + { 0x0e, "RxRS1024_1518", }, + { 0x0f, "RxRS>1518", }, + { 0x10, "TxTailDropQueue0", }, + { 0x11, "TxTailDropQueue1", }, + { 0x12, "TxTailDropQueue2", }, + { 0x13, "TxTailDropQueue3", }, + { 0x14, "TxTailDropQueue4", }, + { 0x15, "TxTailDropQueue5", }, + { 0x16, "TxTailDropQueue6", }, + { 0x17, "TxTailDropQueue7", }, + { 0x18, "RxTrafficClass0", }, + { 0x19, "RxTrafficClass1", }, + { 0x1a, "RxTrafficClass2", }, + { 0x1b, "RxTrafficClass3", }, + { 0x1c, "RxTrafficClass4", }, + { 0x1d, "RxTrafficClass5", }, + { 0x1e, "RxTrafficClass6", }, + { 0x1f, "RxTrafficClass7", }, + { 0x21, "TxOctets1k", }, + { 0x22, "TxVTAG", }, + { 0x23, "TxL2BAD", }, + { 0x25, "TxUC", }, + { 0x26, "TxMC", }, + { 0x27, "TxBC", }, + { 0x28, "TxTS<64", }, + { 0x29, "TxTS64", }, + { 0x2a, "TxTS65_127", }, + { 0x2b, "TxTS128_255", }, + { 0x2c, "TxTS256_511", }, + { 0x2d, "TxTS512_1023", }, + { 0x2e, "TxTS1024_1518", }, + { 0x2f, "TxTS>1518", }, + { 0x30, "TxTrafficClassOverrun0", }, + { 0x31, "TxTrafficClassOverrun1", }, + { 0x32, "TxTrafficClassOverrun2", }, + { 0x33, "TxTrafficClassOverrun3", }, + { 0x34, "TxTrafficClassOverrun4", }, + { 0x35, "TxTrafficClassOverrun5", }, + { 0x36, "TxTrafficClassOverrun6", }, + { 0x37, "TxTrafficClassOverrun7", }, + { 0x38, "TxTrafficClass0", }, + { 0x39, "TxTrafficClass1", }, + { 0x3a, "TxTrafficClass2", }, + { 0x3b, "TxTrafficClass3", }, + { 0x3c, "TxTrafficClass4", }, + { 0x3d, "TxTrafficClass5", }, + { 0x3e, "TxTrafficClass6", }, + { 0x3f, "TxTrafficClass7", }, +}; + +static u16 hellcreek_read(struct hellcreek *hellcreek, unsigned int offset) +{ + return readw(hellcreek->base + offset); +} + +static u16 hellcreek_read_ctrl(struct hellcreek *hellcreek) +{ + return readw(hellcreek->base + HR_CTRL_C); +} + +static u16 hellcreek_read_stat(struct hellcreek *hellcreek) +{ + return readw(hellcreek->base + HR_SWSTAT); +} + +static void hellcreek_write(struct hellcreek *hellcreek, u16 data, + unsigned int offset) +{ + writew(data, hellcreek->base + offset); +} + +static void hellcreek_select_port(struct hellcreek *hellcreek, int port) +{ + u16 val = port << HR_PSEL_PTWSEL_SHIFT; + + hellcreek_write(hellcreek, val, HR_PSEL); +} + +static void hellcreek_select_prio(struct hellcreek *hellcreek, int prio) +{ + u16 val = prio << HR_PSEL_PRTCWSEL_SHIFT; + + hellcreek_write(hellcreek, val, HR_PSEL); +} + +static void hellcreek_select_counter(struct hellcreek *hellcreek, int counter) +{ + u16 val = counter << HR_CSEL_SHIFT; + + hellcreek_write(hellcreek, val, HR_CSEL); + + /* Data sheet states to wait at least 20 internal clock cycles */ + ndelay(200); +} + +static void hellcreek_select_vlan(struct hellcreek *hellcreek, int vid, + bool pvid) +{ + u16 val = 0; + + /* Set pvid bit first */ + if (pvid) + val |= HR_VIDCFG_PVID; + hellcreek_write(hellcreek, val, HR_VIDCFG); + + /* Set vlan */ + val |= vid << HR_VIDCFG_VID_SHIFT; + hellcreek_write(hellcreek, val, HR_VIDCFG); +} + +static int hellcreek_wait_until_ready(struct hellcreek *hellcreek) +{ + u16 val; + + /* Wait up to 1ms, although 3 us should be enough */ + return readx_poll_timeout(hellcreek_read_ctrl, hellcreek, + val, val & HR_CTRL_C_READY, + 3, 1000); +} + +static int hellcreek_wait_until_transitioned(struct hellcreek *hellcreek) +{ + u16 val; + + return readx_poll_timeout_atomic(hellcreek_read_ctrl, hellcreek, + val, !(val & HR_CTRL_C_TRANSITION), + 1, 1000); +} + +static int hellcreek_wait_fdb_ready(struct hellcreek *hellcreek) +{ + u16 val; + + return readx_poll_timeout_atomic(hellcreek_read_stat, hellcreek, + val, !(val & HR_SWSTAT_BUSY), + 1, 1000); +} + +static int hellcreek_detect(struct hellcreek *hellcreek) +{ + u16 id, rel_low, rel_high, date_low, date_high, tgd_ver; + u8 tgd_maj, tgd_min; + u32 rel, date; + + id = hellcreek_read(hellcreek, HR_MODID_C); + rel_low = hellcreek_read(hellcreek, HR_REL_L_C); + rel_high = hellcreek_read(hellcreek, HR_REL_H_C); + date_low = hellcreek_read(hellcreek, HR_BLD_L_C); + date_high = hellcreek_read(hellcreek, HR_BLD_H_C); + tgd_ver = hellcreek_read(hellcreek, TR_TGDVER); + + if (id != hellcreek->pdata->module_id) + return -ENODEV; + + rel = rel_low | (rel_high << 16); + date = date_low | (date_high << 16); + tgd_maj = (tgd_ver & TR_TGDVER_REV_MAJ_MASK) >> TR_TGDVER_REV_MAJ_SHIFT; + tgd_min = (tgd_ver & TR_TGDVER_REV_MIN_MASK) >> TR_TGDVER_REV_MIN_SHIFT; + + dev_info(hellcreek->dev, "Module ID=%02x Release=%04x Date=%04x TGD Version=%02x.%02x\n", + id, rel, date, tgd_maj, tgd_min); + + return 0; +} + +static void hellcreek_feature_detect(struct hellcreek *hellcreek) +{ + u16 features; + + features = hellcreek_read(hellcreek, HR_FEABITS0); + + /* Currently we only detect the size of the FDB table */ + hellcreek->fdb_entries = ((features & HR_FEABITS0_FDBBINS_MASK) >> + HR_FEABITS0_FDBBINS_SHIFT) * 32; + + dev_info(hellcreek->dev, "Feature detect: FDB entries=%zu\n", + hellcreek->fdb_entries); +} + +static enum dsa_tag_protocol hellcreek_get_tag_protocol(struct dsa_switch *ds, + int port, + enum dsa_tag_protocol mp) +{ + return DSA_TAG_PROTO_HELLCREEK; +} + +static int hellcreek_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + u16 val; + + hellcreek_port = &hellcreek->ports[port]; + + dev_dbg(hellcreek->dev, "Enable port %d\n", port); + + mutex_lock(&hellcreek->reg_lock); + + hellcreek_select_port(hellcreek, port); + val = hellcreek_port->ptcfg; + val |= HR_PTCFG_ADMIN_EN; + hellcreek_write(hellcreek, val, HR_PTCFG); + hellcreek_port->ptcfg = val; + + mutex_unlock(&hellcreek->reg_lock); + + return 0; +} + +static void hellcreek_port_disable(struct dsa_switch *ds, int port) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + u16 val; + + hellcreek_port = &hellcreek->ports[port]; + + dev_dbg(hellcreek->dev, "Disable port %d\n", port); + + mutex_lock(&hellcreek->reg_lock); + + hellcreek_select_port(hellcreek, port); + val = hellcreek_port->ptcfg; + val &= ~HR_PTCFG_ADMIN_EN; + hellcreek_write(hellcreek, val, HR_PTCFG); + hellcreek_port->ptcfg = val; + + mutex_unlock(&hellcreek->reg_lock); +} + +static void hellcreek_get_strings(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hellcreek_counter); ++i) { + const struct hellcreek_counter *counter = &hellcreek_counter[i]; + + strlcpy(data + i * ETH_GSTRING_LEN, + counter->name, ETH_GSTRING_LEN); + } +} + +static int hellcreek_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + if (sset != ETH_SS_STATS) + return 0; + + return ARRAY_SIZE(hellcreek_counter); +} + +static void hellcreek_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + int i; + + hellcreek_port = &hellcreek->ports[port]; + + for (i = 0; i < ARRAY_SIZE(hellcreek_counter); ++i) { + const struct hellcreek_counter *counter = &hellcreek_counter[i]; + u8 offset = counter->offset + port * 64; + u16 high, low; + u64 value; + + mutex_lock(&hellcreek->reg_lock); + + hellcreek_select_counter(hellcreek, offset); + + /* The registers are locked internally by selecting the + * counter. So low and high can be read without reading high + * again. + */ + high = hellcreek_read(hellcreek, HR_CRDH); + low = hellcreek_read(hellcreek, HR_CRDL); + value = ((u64)high << 16) | low; + + hellcreek_port->counter_values[i] += value; + data[i] = hellcreek_port->counter_values[i]; + + mutex_unlock(&hellcreek->reg_lock); + } +} + +static u16 hellcreek_private_vid(int port) +{ + return VLAN_N_VID - port + 1; +} + +static int hellcreek_vlan_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct hellcreek *hellcreek = ds->priv; + int i; + + dev_dbg(hellcreek->dev, "VLAN prepare for port %d\n", port); + + /* Restriction: Make sure that nobody uses the "private" VLANs. These + * VLANs are internally used by the driver to ensure port + * separation. Thus, they cannot be used by someone else. + */ + for (i = 0; i < hellcreek->pdata->num_ports; ++i) { + const u16 restricted_vid = hellcreek_private_vid(i); + u16 vid; + + if (!dsa_is_user_port(ds, i)) + continue; + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) + if (vid == restricted_vid) + return -EBUSY; + } + + return 0; +} + +static void hellcreek_select_vlan_params(struct hellcreek *hellcreek, int port, + int *shift, int *mask) +{ + switch (port) { + case 0: + *shift = HR_VIDMBRCFG_P0MBR_SHIFT; + *mask = HR_VIDMBRCFG_P0MBR_MASK; + break; + case 1: + *shift = HR_VIDMBRCFG_P1MBR_SHIFT; + *mask = HR_VIDMBRCFG_P1MBR_MASK; + break; + case 2: + *shift = HR_VIDMBRCFG_P2MBR_SHIFT; + *mask = HR_VIDMBRCFG_P2MBR_MASK; + break; + case 3: + *shift = HR_VIDMBRCFG_P3MBR_SHIFT; + *mask = HR_VIDMBRCFG_P3MBR_MASK; + break; + default: + *shift = *mask = 0; + dev_err(hellcreek->dev, "Unknown port %d selected!\n", port); + } +} + +static void hellcreek_apply_vlan(struct hellcreek *hellcreek, int port, u16 vid, + bool pvid, bool untagged) +{ + int shift, mask; + u16 val; + + dev_dbg(hellcreek->dev, "Apply VLAN: port=%d vid=%u pvid=%d untagged=%d", + port, vid, pvid, untagged); + + mutex_lock(&hellcreek->reg_lock); + + hellcreek_select_port(hellcreek, port); + hellcreek_select_vlan(hellcreek, vid, pvid); + + /* Setup port vlan membership */ + hellcreek_select_vlan_params(hellcreek, port, &shift, &mask); + val = hellcreek->vidmbrcfg[vid]; + val &= ~mask; + if (untagged) + val |= HELLCREEK_VLAN_UNTAGGED_MEMBER << shift; + else + val |= HELLCREEK_VLAN_TAGGED_MEMBER << shift; + + hellcreek_write(hellcreek, val, HR_VIDMBRCFG); + hellcreek->vidmbrcfg[vid] = val; + + mutex_unlock(&hellcreek->reg_lock); +} + +static void hellcreek_unapply_vlan(struct hellcreek *hellcreek, int port, + u16 vid) +{ + int shift, mask; + u16 val; + + dev_dbg(hellcreek->dev, "Unapply VLAN: port=%d vid=%u\n", port, vid); + + mutex_lock(&hellcreek->reg_lock); + + hellcreek_select_vlan(hellcreek, vid, 0); + + /* Setup port vlan membership */ + hellcreek_select_vlan_params(hellcreek, port, &shift, &mask); + val = hellcreek->vidmbrcfg[vid]; + val &= ~mask; + val |= HELLCREEK_VLAN_NO_MEMBER << shift; + + hellcreek_write(hellcreek, val, HR_VIDMBRCFG); + hellcreek->vidmbrcfg[vid] = val; + + mutex_unlock(&hellcreek->reg_lock); +} + +static void hellcreek_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + struct hellcreek *hellcreek = ds->priv; + u16 vid; + + dev_dbg(hellcreek->dev, "Add VLANs (%d -- %d) on port %d, %s, %s\n", + vlan->vid_begin, vlan->vid_end, port, + untagged ? "untagged" : "tagged", + pvid ? "PVID" : "no PVID"); + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) + hellcreek_apply_vlan(hellcreek, port, vid, pvid, untagged); +} + +static int hellcreek_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct hellcreek *hellcreek = ds->priv; + u16 vid; + + dev_dbg(hellcreek->dev, "Remove VLANs (%d -- %d) on port %d\n", + vlan->vid_begin, vlan->vid_end, port); + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) + hellcreek_unapply_vlan(hellcreek, port, vid); + + return 0; +} + +static void hellcreek_port_stp_state_set(struct dsa_switch *ds, int port, + u8 state) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + const char *new_state; + u16 val; + + mutex_lock(&hellcreek->reg_lock); + + hellcreek_port = &hellcreek->ports[port]; + val = hellcreek_port->ptcfg; + + switch (state) { + case BR_STATE_DISABLED: + new_state = "DISABLED"; + val |= HR_PTCFG_BLOCKED; + val &= ~HR_PTCFG_LEARNING_EN; + break; + case BR_STATE_BLOCKING: + new_state = "BLOCKING"; + val |= HR_PTCFG_BLOCKED; + val &= ~HR_PTCFG_LEARNING_EN; + break; + case BR_STATE_LISTENING: + new_state = "LISTENING"; + val |= HR_PTCFG_BLOCKED; + val &= ~HR_PTCFG_LEARNING_EN; + break; + case BR_STATE_LEARNING: + new_state = "LEARNING"; + val |= HR_PTCFG_BLOCKED; + val |= HR_PTCFG_LEARNING_EN; + break; + case BR_STATE_FORWARDING: + new_state = "FORWARDING"; + val &= ~HR_PTCFG_BLOCKED; + val |= HR_PTCFG_LEARNING_EN; + break; + default: + new_state = "UNKNOWN"; + } + + hellcreek_select_port(hellcreek, port); + hellcreek_write(hellcreek, val, HR_PTCFG); + hellcreek_port->ptcfg = val; + + mutex_unlock(&hellcreek->reg_lock); + + dev_dbg(hellcreek->dev, "Configured STP state for port %d: %s\n", + port, new_state); +} + +static void hellcreek_setup_ingressflt(struct hellcreek *hellcreek, int port, + bool enable) +{ + struct hellcreek_port *hellcreek_port = &hellcreek->ports[port]; + u16 ptcfg; + + mutex_lock(&hellcreek->reg_lock); + + ptcfg = hellcreek_port->ptcfg; + + if (enable) + ptcfg |= HR_PTCFG_INGRESSFLT; + else + ptcfg &= ~HR_PTCFG_INGRESSFLT; + + hellcreek_select_port(hellcreek, port); + hellcreek_write(hellcreek, ptcfg, HR_PTCFG); + hellcreek_port->ptcfg = ptcfg; + + mutex_unlock(&hellcreek->reg_lock); +} + +static void hellcreek_setup_vlan_awareness(struct hellcreek *hellcreek, + bool enable) +{ + u16 swcfg; + + mutex_lock(&hellcreek->reg_lock); + + swcfg = hellcreek->swcfg; + + if (enable) + swcfg |= HR_SWCFG_VLAN_UNAWARE; + else + swcfg &= ~HR_SWCFG_VLAN_UNAWARE; + + hellcreek_write(hellcreek, swcfg, HR_SWCFG); + + mutex_unlock(&hellcreek->reg_lock); +} + +/* Default setup for DSA: VLAN <X>: CPU and Port <X> egress untagged. */ +static void hellcreek_setup_vlan_membership(struct dsa_switch *ds, int port, + bool enabled) +{ + const u16 vid = hellcreek_private_vid(port); + int upstream = dsa_upstream_port(ds, port); + struct hellcreek *hellcreek = ds->priv; + + /* Apply vid to port as egress untagged and port vlan id */ + if (enabled) + hellcreek_apply_vlan(hellcreek, port, vid, true, true); + else + hellcreek_unapply_vlan(hellcreek, port, vid); + + /* Apply vid to cpu port as well */ + if (enabled) + hellcreek_apply_vlan(hellcreek, upstream, vid, false, true); + else + hellcreek_unapply_vlan(hellcreek, upstream, vid); +} + +static int hellcreek_port_bridge_join(struct dsa_switch *ds, int port, + struct net_device *br) +{ + struct hellcreek *hellcreek = ds->priv; + + dev_dbg(hellcreek->dev, "Port %d joins a bridge\n", port); + + /* When joining a vlan_filtering bridge, keep the switch VLAN aware */ + if (!ds->vlan_filtering) + hellcreek_setup_vlan_awareness(hellcreek, false); + + /* Drop private vlans */ + hellcreek_setup_vlan_membership(ds, port, false); + + return 0; +} + +static void hellcreek_port_bridge_leave(struct dsa_switch *ds, int port, + struct net_device *br) +{ + struct hellcreek *hellcreek = ds->priv; + + dev_dbg(hellcreek->dev, "Port %d leaves a bridge\n", port); + + /* Enable VLAN awareness */ + hellcreek_setup_vlan_awareness(hellcreek, true); + + /* Enable private vlans */ + hellcreek_setup_vlan_membership(ds, port, true); +} + +static int __hellcreek_fdb_add(struct hellcreek *hellcreek, + const struct hellcreek_fdb_entry *entry) +{ + u16 meta = 0; + + dev_dbg(hellcreek->dev, "Add static FDB entry: MAC=%pM, MASK=0x%02x, " + "OBT=%d, REPRIO_EN=%d, PRIO=%d\n", entry->mac, entry->portmask, + entry->is_obt, entry->reprio_en, entry->reprio_tc); + + /* Add mac address */ + hellcreek_write(hellcreek, entry->mac[1] | (entry->mac[0] << 8), HR_FDBWDH); + hellcreek_write(hellcreek, entry->mac[3] | (entry->mac[2] << 8), HR_FDBWDM); + hellcreek_write(hellcreek, entry->mac[5] | (entry->mac[4] << 8), HR_FDBWDL); + + /* Meta data */ + meta |= entry->portmask << HR_FDBWRM0_PORTMASK_SHIFT; + if (entry->is_obt) + meta |= HR_FDBWRM0_OBT; + if (entry->reprio_en) { + meta |= HR_FDBWRM0_REPRIO_EN; + meta |= entry->reprio_tc << HR_FDBWRM0_REPRIO_TC_SHIFT; + } + hellcreek_write(hellcreek, meta, HR_FDBWRM0); + + /* Commit */ + hellcreek_write(hellcreek, 0x00, HR_FDBWRCMD); + + /* Wait until done */ + return hellcreek_wait_fdb_ready(hellcreek); +} + +static int __hellcreek_fdb_del(struct hellcreek *hellcreek, + const struct hellcreek_fdb_entry *entry) +{ + dev_dbg(hellcreek->dev, "Delete FDB entry: MAC=%pM!\n", entry->mac); + + /* Delete by matching idx */ + hellcreek_write(hellcreek, entry->idx | HR_FDBWRCMD_FDBDEL, HR_FDBWRCMD); + + /* Wait until done */ + return hellcreek_wait_fdb_ready(hellcreek); +} + +/* Retrieve the index of a FDB entry by mac address. Currently we search through + * the complete table in hardware. If that's too slow, we might have to cache + * the complete FDB table in software. + */ +static int hellcreek_fdb_get(struct hellcreek *hellcreek, + const unsigned char *dest, + struct hellcreek_fdb_entry *entry) +{ + size_t i; + + /* Set read pointer to zero: The read of HR_FDBMAX (read-only register) + * should reset the internal pointer. But, that doesn't work. The vendor + * suggested a subsequent write as workaround. Same for HR_FDBRDH below. + */ + hellcreek_read(hellcreek, HR_FDBMAX); + hellcreek_write(hellcreek, 0x00, HR_FDBMAX); + + /* We have to read the complete table, because the switch/driver might + * enter new entries anywhere. + */ + for (i = 0; i < hellcreek->fdb_entries; ++i) { + unsigned char addr[ETH_ALEN]; + u16 meta, mac; + + meta = hellcreek_read(hellcreek, HR_FDBMDRD); + mac = hellcreek_read(hellcreek, HR_FDBRDL); + addr[5] = mac & 0xff; + addr[4] = (mac & 0xff00) >> 8; + mac = hellcreek_read(hellcreek, HR_FDBRDM); + addr[3] = mac & 0xff; + addr[2] = (mac & 0xff00) >> 8; + mac = hellcreek_read(hellcreek, HR_FDBRDH); + addr[1] = mac & 0xff; + addr[0] = (mac & 0xff00) >> 8; + + /* Force next entry */ + hellcreek_write(hellcreek, 0x00, HR_FDBRDH); + + if (memcmp(addr, dest, ETH_ALEN)) + continue; + + /* Match found */ + entry->idx = i; + entry->portmask = (meta & HR_FDBMDRD_PORTMASK_MASK) >> + HR_FDBMDRD_PORTMASK_SHIFT; + entry->age = (meta & HR_FDBMDRD_AGE_MASK) >> + HR_FDBMDRD_AGE_SHIFT; + entry->is_obt = !!(meta & HR_FDBMDRD_OBT); + entry->pass_blocked = !!(meta & HR_FDBMDRD_PASS_BLOCKED); + entry->is_static = !!(meta & HR_FDBMDRD_STATIC); + entry->reprio_tc = (meta & HR_FDBMDRD_REPRIO_TC_MASK) >> + HR_FDBMDRD_REPRIO_TC_SHIFT; + entry->reprio_en = !!(meta & HR_FDBMDRD_REPRIO_EN); + memcpy(entry->mac, addr, sizeof(addr)); + + return 0; + } + + return -ENOENT; +} + +static int hellcreek_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) +{ + struct hellcreek_fdb_entry entry = { 0 }; + struct hellcreek *hellcreek = ds->priv; + int ret; + + dev_dbg(hellcreek->dev, "Add FDB entry for MAC=%pM\n", addr); + + mutex_lock(&hellcreek->reg_lock); + + ret = hellcreek_fdb_get(hellcreek, addr, &entry); + if (ret) { + /* Not found */ + memcpy(entry.mac, addr, sizeof(entry.mac)); + entry.portmask = BIT(port); + + ret = __hellcreek_fdb_add(hellcreek, &entry); + if (ret) { + dev_err(hellcreek->dev, "Failed to add FDB entry!\n"); + goto out; + } + } else { + /* Found */ + ret = __hellcreek_fdb_del(hellcreek, &entry); + if (ret) { + dev_err(hellcreek->dev, "Failed to delete FDB entry!\n"); + goto out; + } + + entry.portmask |= BIT(port); + + ret = __hellcreek_fdb_add(hellcreek, &entry); + if (ret) { + dev_err(hellcreek->dev, "Failed to add FDB entry!\n"); + goto out; + } + } + +out: + mutex_unlock(&hellcreek->reg_lock); + + return ret; +} + +static int hellcreek_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) +{ + struct hellcreek_fdb_entry entry = { 0 }; + struct hellcreek *hellcreek = ds->priv; + int ret; + + dev_dbg(hellcreek->dev, "Delete FDB entry for MAC=%pM\n", addr); + + mutex_lock(&hellcreek->reg_lock); + + ret = hellcreek_fdb_get(hellcreek, addr, &entry); + if (ret) { + /* Not found */ + dev_err(hellcreek->dev, "FDB entry for deletion not found!\n"); + } else { + /* Found */ + ret = __hellcreek_fdb_del(hellcreek, &entry); + if (ret) { + dev_err(hellcreek->dev, "Failed to delete FDB entry!\n"); + goto out; + } + + entry.portmask &= ~BIT(port); + + if (entry.portmask != 0x00) { + ret = __hellcreek_fdb_add(hellcreek, &entry); + if (ret) { + dev_err(hellcreek->dev, "Failed to add FDB entry!\n"); + goto out; + } + } + } + +out: + mutex_unlock(&hellcreek->reg_lock); + + return ret; +} + +static int hellcreek_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct hellcreek *hellcreek = ds->priv; + u16 entries; + size_t i; + + mutex_lock(&hellcreek->reg_lock); + + /* Set read pointer to zero: The read of HR_FDBMAX (read-only register) + * should reset the internal pointer. But, that doesn't work. The vendor + * suggested a subsequent write as workaround. Same for HR_FDBRDH below. + */ + entries = hellcreek_read(hellcreek, HR_FDBMAX); + hellcreek_write(hellcreek, 0x00, HR_FDBMAX); + + dev_dbg(hellcreek->dev, "FDB dump for port %d, entries=%d!\n", port, entries); + + /* Read table */ + for (i = 0; i < hellcreek->fdb_entries; ++i) { + unsigned char null_addr[ETH_ALEN] = { 0 }; + struct hellcreek_fdb_entry entry = { 0 }; + u16 meta, mac; + + meta = hellcreek_read(hellcreek, HR_FDBMDRD); + mac = hellcreek_read(hellcreek, HR_FDBRDL); + entry.mac[5] = mac & 0xff; + entry.mac[4] = (mac & 0xff00) >> 8; + mac = hellcreek_read(hellcreek, HR_FDBRDM); + entry.mac[3] = mac & 0xff; + entry.mac[2] = (mac & 0xff00) >> 8; + mac = hellcreek_read(hellcreek, HR_FDBRDH); + entry.mac[1] = mac & 0xff; + entry.mac[0] = (mac & 0xff00) >> 8; + + /* Force next entry */ + hellcreek_write(hellcreek, 0x00, HR_FDBRDH); + + /* Check valid */ + if (!memcmp(entry.mac, null_addr, ETH_ALEN)) + continue; + + entry.portmask = (meta & HR_FDBMDRD_PORTMASK_MASK) >> + HR_FDBMDRD_PORTMASK_SHIFT; + entry.is_static = !!(meta & HR_FDBMDRD_STATIC); + + /* Check port mask */ + if (!(entry.portmask & BIT(port))) + continue; + + cb(entry.mac, 0, entry.is_static, data); + } + + mutex_unlock(&hellcreek->reg_lock); + + return 0; +} + +static int hellcreek_vlan_filtering(struct dsa_switch *ds, int port, + bool vlan_filtering, + struct switchdev_trans *trans) +{ + struct hellcreek *hellcreek = ds->priv; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + dev_dbg(hellcreek->dev, "%s VLAN filtering on port %d\n", + vlan_filtering ? "Enable" : "Disable", port); + + /* Configure port to drop packages with not known vids */ + hellcreek_setup_ingressflt(hellcreek, port, vlan_filtering); + + /* Enable VLAN awareness on the switch. This save due to + * ds->vlan_filtering_is_global. + */ + hellcreek_setup_vlan_awareness(hellcreek, vlan_filtering); + + return 0; +} + +static int hellcreek_enable_ip_core(struct hellcreek *hellcreek) +{ + int ret; + u16 val; + + mutex_lock(&hellcreek->reg_lock); + + val = hellcreek_read(hellcreek, HR_CTRL_C); + val |= HR_CTRL_C_ENABLE; + hellcreek_write(hellcreek, val, HR_CTRL_C); + ret = hellcreek_wait_until_transitioned(hellcreek); + + mutex_unlock(&hellcreek->reg_lock); + + return ret; +} + +static void hellcreek_setup_cpu_and_tunnel_port(struct hellcreek *hellcreek) +{ + struct hellcreek_port *tunnel_port = &hellcreek->ports[TUNNEL_PORT]; + struct hellcreek_port *cpu_port = &hellcreek->ports[CPU_PORT]; + u16 ptcfg = 0; + + ptcfg |= HR_PTCFG_LEARNING_EN | HR_PTCFG_ADMIN_EN; + + mutex_lock(&hellcreek->reg_lock); + + hellcreek_select_port(hellcreek, CPU_PORT); + hellcreek_write(hellcreek, ptcfg, HR_PTCFG); + + hellcreek_select_port(hellcreek, TUNNEL_PORT); + hellcreek_write(hellcreek, ptcfg, HR_PTCFG); + + cpu_port->ptcfg = ptcfg; + tunnel_port->ptcfg = ptcfg; + + mutex_unlock(&hellcreek->reg_lock); +} + +static void hellcreek_setup_tc_identity_mapping(struct hellcreek *hellcreek) +{ + int i; + + /* The switch has multiple egress queues per port. The queue is selected + * via the PCP field in the VLAN header. The switch internally deals + * with traffic classes instead of PCP values and this mapping is + * configurable. + * + * The default mapping is (PCP - TC): + * 7 - 7 + * 6 - 6 + * 5 - 5 + * 4 - 4 + * 3 - 3 + * 2 - 1 + * 1 - 0 + * 0 - 2 + * + * The default should be an identity mapping. + */ + + for (i = 0; i < 8; ++i) { + mutex_lock(&hellcreek->reg_lock); + + hellcreek_select_prio(hellcreek, i); + hellcreek_write(hellcreek, + i << HR_PRTCCFG_PCP_TC_MAP_SHIFT, + HR_PRTCCFG); + + mutex_unlock(&hellcreek->reg_lock); + } +} + +static int hellcreek_setup_fdb(struct hellcreek *hellcreek) +{ + static struct hellcreek_fdb_entry ptp = { + /* MAC: 01-1B-19-00-00-00 */ + .mac = { 0x01, 0x1b, 0x19, 0x00, 0x00, 0x00 }, + .portmask = 0x03, /* Management ports */ + .age = 0, + .is_obt = 0, + .pass_blocked = 0, + .is_static = 1, + .reprio_tc = 6, /* TC: 6 as per IEEE 802.1AS */ + .reprio_en = 1, + }; + static struct hellcreek_fdb_entry p2p = { + /* MAC: 01-80-C2-00-00-0E */ + .mac = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e }, + .portmask = 0x03, /* Management ports */ + .age = 0, + .is_obt = 0, + .pass_blocked = 0, + .is_static = 1, + .reprio_tc = 6, /* TC: 6 as per IEEE 802.1AS */ + .reprio_en = 1, + }; + int ret; + + mutex_lock(&hellcreek->reg_lock); + ret = __hellcreek_fdb_add(hellcreek, &ptp); + if (ret) + goto out; + ret = __hellcreek_fdb_add(hellcreek, &p2p); +out: + mutex_unlock(&hellcreek->reg_lock); + + return ret; +} + +static int hellcreek_setup(struct dsa_switch *ds) +{ + struct hellcreek *hellcreek = ds->priv; + u16 swcfg = 0; + int ret, i; + + dev_dbg(hellcreek->dev, "Set up the switch\n"); + + /* Let's go */ + ret = hellcreek_enable_ip_core(hellcreek); + if (ret) { + dev_err(hellcreek->dev, "Failed to enable IP core!\n"); + return ret; + } + + /* Enable CPU/Tunnel ports */ + hellcreek_setup_cpu_and_tunnel_port(hellcreek); + + /* Switch config: Keep defaults, enable FDB aging and learning and tag + * each frame from/to cpu port for DSA tagging. Also enable the length + * aware shaping mode. This eliminates the need for Qbv guard bands. + */ + swcfg |= HR_SWCFG_FDBAGE_EN | + HR_SWCFG_FDBLRN_EN | + HR_SWCFG_ALWAYS_OBT | + (HR_SWCFG_LAS_ON << HR_SWCFG_LAS_MODE_SHIFT); + hellcreek->swcfg = swcfg; + hellcreek_write(hellcreek, swcfg, HR_SWCFG); + + /* Initial vlan membership to reflect port separation */ + for (i = 0; i < ds->num_ports; ++i) { + if (!dsa_is_user_port(ds, i)) + continue; + + hellcreek_setup_vlan_membership(ds, i, true); + } + + /* Configure PCP <-> TC mapping */ + hellcreek_setup_tc_identity_mapping(hellcreek); + + /* Allow VLAN configurations while not filtering which is the default + * for new DSA drivers. + */ + ds->configure_vlan_while_not_filtering = true; + + /* The VLAN awareness is a global switch setting. Therefore, mixed vlan + * filtering setups are not supported. + */ + ds->vlan_filtering_is_global = true; + + /* Intercept _all_ PTP multicast traffic */ + ret = hellcreek_setup_fdb(hellcreek); + if (ret) { + dev_err(hellcreek->dev, + "Failed to insert static PTP FDB entries\n"); + return ret; + } + + return 0; +} + +static void hellcreek_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + struct hellcreek *hellcreek = ds->priv; + + dev_dbg(hellcreek->dev, "Phylink validate for port %d\n", port); + + /* The MAC settings are a hardware configuration option and cannot be + * changed at run time or by strapping. Therefore the attached PHYs + * should be programmed to only advertise settings which are supported + * by the hardware. + */ + if (hellcreek->pdata->is_100_mbits) + phylink_set(mask, 100baseT_Full); + else + phylink_set(mask, 1000baseT_Full); + + bitmap_and(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static int +hellcreek_port_prechangeupper(struct dsa_switch *ds, int port, + struct netdev_notifier_changeupper_info *info) +{ + struct hellcreek *hellcreek = ds->priv; + bool used = true; + int ret = -EBUSY; + u16 vid; + int i; + + dev_dbg(hellcreek->dev, "Pre change upper for port %d\n", port); + + /* + * Deny VLAN devices on top of lan ports with the same VLAN ids, because + * it breaks the port separation due to the private VLANs. Example: + * + * lan0.100 *and* lan1.100 cannot be used in parallel. However, lan0.99 + * and lan1.100 works. + */ + + if (!is_vlan_dev(info->upper_dev)) + return 0; + + vid = vlan_dev_vlan_id(info->upper_dev); + + /* For all ports, check bitmaps */ + mutex_lock(&hellcreek->vlan_lock); + for (i = 0; i < hellcreek->pdata->num_ports; ++i) { + if (!dsa_is_user_port(ds, i)) + continue; + + if (port == i) + continue; + + used = used && test_bit(vid, hellcreek->ports[i].vlan_dev_bitmap); + } + + if (used) + goto out; + + /* Update bitmap */ + set_bit(vid, hellcreek->ports[port].vlan_dev_bitmap); + + ret = 0; + +out: + mutex_unlock(&hellcreek->vlan_lock); + + return ret; +} + +static const struct dsa_switch_ops hellcreek_ds_ops = { + .get_ethtool_stats = hellcreek_get_ethtool_stats, + .get_sset_count = hellcreek_get_sset_count, + .get_strings = hellcreek_get_strings, + .get_tag_protocol = hellcreek_get_tag_protocol, + .get_ts_info = hellcreek_get_ts_info, + .phylink_validate = hellcreek_phylink_validate, + .port_bridge_join = hellcreek_port_bridge_join, + .port_bridge_leave = hellcreek_port_bridge_leave, + .port_disable = hellcreek_port_disable, + .port_enable = hellcreek_port_enable, + .port_fdb_add = hellcreek_fdb_add, + .port_fdb_del = hellcreek_fdb_del, + .port_fdb_dump = hellcreek_fdb_dump, + .port_hwtstamp_set = hellcreek_port_hwtstamp_set, + .port_hwtstamp_get = hellcreek_port_hwtstamp_get, + .port_prechangeupper = hellcreek_port_prechangeupper, + .port_rxtstamp = hellcreek_port_rxtstamp, + .port_stp_state_set = hellcreek_port_stp_state_set, + .port_txtstamp = hellcreek_port_txtstamp, + .port_vlan_add = hellcreek_vlan_add, + .port_vlan_del = hellcreek_vlan_del, + .port_vlan_filtering = hellcreek_vlan_filtering, + .port_vlan_prepare = hellcreek_vlan_prepare, + .setup = hellcreek_setup, +}; + +static int hellcreek_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct hellcreek *hellcreek; + struct resource *res; + int ret, i; + + hellcreek = devm_kzalloc(dev, sizeof(*hellcreek), GFP_KERNEL); + if (!hellcreek) + return -ENOMEM; + + hellcreek->vidmbrcfg = devm_kcalloc(dev, VLAN_N_VID, + sizeof(*hellcreek->vidmbrcfg), + GFP_KERNEL); + if (!hellcreek->vidmbrcfg) + return -ENOMEM; + + hellcreek->pdata = of_device_get_match_data(dev); + + hellcreek->ports = devm_kcalloc(dev, hellcreek->pdata->num_ports, + sizeof(*hellcreek->ports), + GFP_KERNEL); + if (!hellcreek->ports) + return -ENOMEM; + + for (i = 0; i < hellcreek->pdata->num_ports; ++i) { + struct hellcreek_port *port = &hellcreek->ports[i]; + + port->counter_values = + devm_kcalloc(dev, + ARRAY_SIZE(hellcreek_counter), + sizeof(*port->counter_values), + GFP_KERNEL); + if (!port->counter_values) + return -ENOMEM; + + port->vlan_dev_bitmap = + devm_kcalloc(dev, + BITS_TO_LONGS(VLAN_N_VID), + sizeof(unsigned long), + GFP_KERNEL); + if (!port->vlan_dev_bitmap) + return -ENOMEM; + + port->hellcreek = hellcreek; + port->port = i; + } + + mutex_init(&hellcreek->reg_lock); + mutex_init(&hellcreek->vlan_lock); + mutex_init(&hellcreek->ptp_lock); + + hellcreek->dev = dev; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "tsn"); + if (!res) { + dev_err(dev, "No memory region provided!\n"); + return -ENODEV; + } + + hellcreek->base = devm_ioremap_resource(dev, res); + if (IS_ERR(hellcreek->base)) { + dev_err(dev, "No memory available!\n"); + return PTR_ERR(hellcreek->base); + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ptp"); + if (!res) { + dev_err(dev, "No PTP memory region provided!\n"); + return -ENODEV; + } + + hellcreek->ptp_base = devm_ioremap_resource(dev, res); + if (IS_ERR(hellcreek->ptp_base)) { + dev_err(dev, "No memory available!\n"); + return PTR_ERR(hellcreek->ptp_base); + } + + ret = hellcreek_detect(hellcreek); + if (ret) { + dev_err(dev, "No (known) chip found!\n"); + return ret; + } + + ret = hellcreek_wait_until_ready(hellcreek); + if (ret) { + dev_err(dev, "Switch didn't become ready!\n"); + return ret; + } + + hellcreek_feature_detect(hellcreek); + + hellcreek->ds = devm_kzalloc(dev, sizeof(*hellcreek->ds), GFP_KERNEL); + if (!hellcreek->ds) + return -ENOMEM; + + hellcreek->ds->dev = dev; + hellcreek->ds->priv = hellcreek; + hellcreek->ds->ops = &hellcreek_ds_ops; + hellcreek->ds->num_ports = hellcreek->pdata->num_ports; + hellcreek->ds->num_tx_queues = HELLCREEK_NUM_EGRESS_QUEUES; + + ret = dsa_register_switch(hellcreek->ds); + if (ret) { + dev_err(dev, "Unable to register switch\n"); + return ret; + } + + ret = hellcreek_ptp_setup(hellcreek); + if (ret) { + dev_err(dev, "Failed to setup PTP!\n"); + goto err_ptp_setup; + } + + ret = hellcreek_hwtstamp_setup(hellcreek); + if (ret) { + dev_err(dev, "Failed to setup hardware timestamping!\n"); + goto err_tstamp_setup; + } + + platform_set_drvdata(pdev, hellcreek); + + return 0; + +err_tstamp_setup: + hellcreek_ptp_free(hellcreek); +err_ptp_setup: + dsa_unregister_switch(hellcreek->ds); + + return ret; +} + +static int hellcreek_remove(struct platform_device *pdev) +{ + struct hellcreek *hellcreek = platform_get_drvdata(pdev); + + hellcreek_hwtstamp_free(hellcreek); + hellcreek_ptp_free(hellcreek); + dsa_unregister_switch(hellcreek->ds); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static const struct hellcreek_platform_data de1soc_r1_pdata = { + .num_ports = 4, + .is_100_mbits = 1, + .qbv_support = 1, + .qbv_on_cpu_port = 1, + .qbu_support = 0, + .module_id = 0x4c30, +}; + +static const struct of_device_id hellcreek_of_match[] = { + { + .compatible = "hirschmann,hellcreek-de1soc-r1", + .data = &de1soc_r1_pdata, + }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, hellcreek_of_match); + +static struct platform_driver hellcreek_driver = { + .probe = hellcreek_probe, + .remove = hellcreek_remove, + .driver = { + .name = "hellcreek", + .of_match_table = hellcreek_of_match, + }, +}; +module_platform_driver(hellcreek_driver); + +MODULE_AUTHOR("Kurt Kanzenbach <kurt@linutronix.de>"); +MODULE_DESCRIPTION("Hirschmann Hellcreek driver"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h new file mode 100644 index 000000000000..e81781ebc31c --- /dev/null +++ b/drivers/net/dsa/hirschmann/hellcreek.h @@ -0,0 +1,286 @@ +/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* + * DSA driver for: + * Hirschmann Hellcreek TSN switch. + * + * Copyright (C) 2019,2020 Linutronix GmbH + * Author Kurt Kanzenbach <kurt@linutronix.de> + */ + +#ifndef _HELLCREEK_H_ +#define _HELLCREEK_H_ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/leds.h> +#include <linux/platform_data/hirschmann-hellcreek.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/timecounter.h> +#include <net/dsa.h> + +/* Ports: + * - 0: CPU + * - 1: Tunnel + * - 2: TSN front port 1 + * - 3: TSN front port 2 + * - ... + */ +#define CPU_PORT 0 +#define TUNNEL_PORT 1 + +#define HELLCREEK_VLAN_NO_MEMBER 0x0 +#define HELLCREEK_VLAN_UNTAGGED_MEMBER 0x1 +#define HELLCREEK_VLAN_TAGGED_MEMBER 0x3 +#define HELLCREEK_NUM_EGRESS_QUEUES 8 + +/* Register definitions */ +#define HR_MODID_C (0 * 2) +#define HR_REL_L_C (1 * 2) +#define HR_REL_H_C (2 * 2) +#define HR_BLD_L_C (3 * 2) +#define HR_BLD_H_C (4 * 2) +#define HR_CTRL_C (5 * 2) +#define HR_CTRL_C_READY BIT(14) +#define HR_CTRL_C_TRANSITION BIT(13) +#define HR_CTRL_C_ENABLE BIT(0) + +#define HR_PSEL (0xa6 * 2) +#define HR_PSEL_PTWSEL_SHIFT 4 +#define HR_PSEL_PTWSEL_MASK GENMASK(5, 4) +#define HR_PSEL_PRTCWSEL_SHIFT 0 +#define HR_PSEL_PRTCWSEL_MASK GENMASK(2, 0) + +#define HR_PTCFG (0xa7 * 2) +#define HR_PTCFG_MLIMIT_EN BIT(13) +#define HR_PTCFG_UMC_FLT BIT(10) +#define HR_PTCFG_UUC_FLT BIT(9) +#define HR_PTCFG_UNTRUST BIT(8) +#define HR_PTCFG_TAG_REQUIRED BIT(7) +#define HR_PTCFG_PPRIO_SHIFT 4 +#define HR_PTCFG_PPRIO_MASK GENMASK(6, 4) +#define HR_PTCFG_INGRESSFLT BIT(3) +#define HR_PTCFG_BLOCKED BIT(2) +#define HR_PTCFG_LEARNING_EN BIT(1) +#define HR_PTCFG_ADMIN_EN BIT(0) + +#define HR_PRTCCFG (0xa8 * 2) +#define HR_PRTCCFG_PCP_TC_MAP_SHIFT 0 +#define HR_PRTCCFG_PCP_TC_MAP_MASK GENMASK(2, 0) + +#define HR_CSEL (0x8d * 2) +#define HR_CSEL_SHIFT 0 +#define HR_CSEL_MASK GENMASK(7, 0) +#define HR_CRDL (0x8e * 2) +#define HR_CRDH (0x8f * 2) + +#define HR_SWTRC_CFG (0x90 * 2) +#define HR_SWTRC0 (0x91 * 2) +#define HR_SWTRC1 (0x92 * 2) +#define HR_PFREE (0x93 * 2) +#define HR_MFREE (0x94 * 2) + +#define HR_FDBAGE (0x97 * 2) +#define HR_FDBMAX (0x98 * 2) +#define HR_FDBRDL (0x99 * 2) +#define HR_FDBRDM (0x9a * 2) +#define HR_FDBRDH (0x9b * 2) + +#define HR_FDBMDRD (0x9c * 2) +#define HR_FDBMDRD_PORTMASK_SHIFT 0 +#define HR_FDBMDRD_PORTMASK_MASK GENMASK(3, 0) +#define HR_FDBMDRD_AGE_SHIFT 4 +#define HR_FDBMDRD_AGE_MASK GENMASK(7, 4) +#define HR_FDBMDRD_OBT BIT(8) +#define HR_FDBMDRD_PASS_BLOCKED BIT(9) +#define HR_FDBMDRD_STATIC BIT(11) +#define HR_FDBMDRD_REPRIO_TC_SHIFT 12 +#define HR_FDBMDRD_REPRIO_TC_MASK GENMASK(14, 12) +#define HR_FDBMDRD_REPRIO_EN BIT(15) + +#define HR_FDBWDL (0x9d * 2) +#define HR_FDBWDM (0x9e * 2) +#define HR_FDBWDH (0x9f * 2) +#define HR_FDBWRM0 (0xa0 * 2) +#define HR_FDBWRM0_PORTMASK_SHIFT 0 +#define HR_FDBWRM0_PORTMASK_MASK GENMASK(3, 0) +#define HR_FDBWRM0_OBT BIT(8) +#define HR_FDBWRM0_PASS_BLOCKED BIT(9) +#define HR_FDBWRM0_REPRIO_TC_SHIFT 12 +#define HR_FDBWRM0_REPRIO_TC_MASK GENMASK(14, 12) +#define HR_FDBWRM0_REPRIO_EN BIT(15) +#define HR_FDBWRM1 (0xa1 * 2) + +#define HR_FDBWRCMD (0xa2 * 2) +#define HR_FDBWRCMD_FDBDEL BIT(9) + +#define HR_SWCFG (0xa3 * 2) +#define HR_SWCFG_GM_STATEMD BIT(15) +#define HR_SWCFG_LAS_MODE_SHIFT 12 +#define HR_SWCFG_LAS_MODE_MASK GENMASK(13, 12) +#define HR_SWCFG_LAS_OFF (0x00) +#define HR_SWCFG_LAS_ON (0x01) +#define HR_SWCFG_LAS_STATIC (0x10) +#define HR_SWCFG_CT_EN BIT(11) +#define HR_SWCFG_VLAN_UNAWARE BIT(10) +#define HR_SWCFG_ALWAYS_OBT BIT(9) +#define HR_SWCFG_FDBAGE_EN BIT(5) +#define HR_SWCFG_FDBLRN_EN BIT(4) + +#define HR_SWSTAT (0xa4 * 2) +#define HR_SWSTAT_FAIL BIT(4) +#define HR_SWSTAT_BUSY BIT(0) + +#define HR_SWCMD (0xa5 * 2) +#define HW_SWCMD_FLUSH BIT(0) + +#define HR_VIDCFG (0xaa * 2) +#define HR_VIDCFG_VID_SHIFT 0 +#define HR_VIDCFG_VID_MASK GENMASK(11, 0) +#define HR_VIDCFG_PVID BIT(12) + +#define HR_VIDMBRCFG (0xab * 2) +#define HR_VIDMBRCFG_P0MBR_SHIFT 0 +#define HR_VIDMBRCFG_P0MBR_MASK GENMASK(1, 0) +#define HR_VIDMBRCFG_P1MBR_SHIFT 2 +#define HR_VIDMBRCFG_P1MBR_MASK GENMASK(3, 2) +#define HR_VIDMBRCFG_P2MBR_SHIFT 4 +#define HR_VIDMBRCFG_P2MBR_MASK GENMASK(5, 4) +#define HR_VIDMBRCFG_P3MBR_SHIFT 6 +#define HR_VIDMBRCFG_P3MBR_MASK GENMASK(7, 6) + +#define HR_FEABITS0 (0xac * 2) +#define HR_FEABITS0_FDBBINS_SHIFT 4 +#define HR_FEABITS0_FDBBINS_MASK GENMASK(7, 4) +#define HR_FEABITS0_PCNT_SHIFT 8 +#define HR_FEABITS0_PCNT_MASK GENMASK(11, 8) +#define HR_FEABITS0_MCNT_SHIFT 12 +#define HR_FEABITS0_MCNT_MASK GENMASK(15, 12) + +#define TR_QTRACK (0xb1 * 2) +#define TR_TGDVER (0xb3 * 2) +#define TR_TGDVER_REV_MIN_MASK GENMASK(7, 0) +#define TR_TGDVER_REV_MIN_SHIFT 0 +#define TR_TGDVER_REV_MAJ_MASK GENMASK(15, 8) +#define TR_TGDVER_REV_MAJ_SHIFT 8 +#define TR_TGDSEL (0xb4 * 2) +#define TR_TGDSEL_TDGSEL_MASK GENMASK(1, 0) +#define TR_TGDSEL_TDGSEL_SHIFT 0 +#define TR_TGDCTRL (0xb5 * 2) +#define TR_TGDCTRL_GATE_EN BIT(0) +#define TR_TGDCTRL_CYC_SNAP BIT(4) +#define TR_TGDCTRL_SNAP_EST BIT(5) +#define TR_TGDCTRL_ADMINGATESTATES_MASK GENMASK(15, 8) +#define TR_TGDCTRL_ADMINGATESTATES_SHIFT 8 +#define TR_TGDSTAT0 (0xb6 * 2) +#define TR_TGDSTAT1 (0xb7 * 2) +#define TR_ESTWRL (0xb8 * 2) +#define TR_ESTWRH (0xb9 * 2) +#define TR_ESTCMD (0xba * 2) +#define TR_ESTCMD_ESTSEC_MASK GENMASK(2, 0) +#define TR_ESTCMD_ESTSEC_SHIFT 0 +#define TR_ESTCMD_ESTARM BIT(4) +#define TR_ESTCMD_ESTSWCFG BIT(5) +#define TR_EETWRL (0xbb * 2) +#define TR_EETWRH (0xbc * 2) +#define TR_EETCMD (0xbd * 2) +#define TR_EETCMD_EETSEC_MASK GEMASK(2, 0) +#define TR_EETCMD_EETSEC_SHIFT 0 +#define TR_EETCMD_EETARM BIT(4) +#define TR_CTWRL (0xbe * 2) +#define TR_CTWRH (0xbf * 2) +#define TR_LCNSL (0xc1 * 2) +#define TR_LCNSH (0xc2 * 2) +#define TR_LCS (0xc3 * 2) +#define TR_GCLDAT (0xc4 * 2) +#define TR_GCLDAT_GCLWRGATES_MASK GENMASK(7, 0) +#define TR_GCLDAT_GCLWRGATES_SHIFT 0 +#define TR_GCLDAT_GCLWRLAST BIT(8) +#define TR_GCLDAT_GCLOVRI BIT(9) +#define TR_GCLTIL (0xc5 * 2) +#define TR_GCLTIH (0xc6 * 2) +#define TR_GCLCMD (0xc7 * 2) +#define TR_GCLCMD_GCLWRADR_MASK GENMASK(7, 0) +#define TR_GCLCMD_GCLWRADR_SHIFT 0 +#define TR_GCLCMD_INIT_GATE_STATES_MASK GENMASK(15, 8) +#define TR_GCLCMD_INIT_GATE_STATES_SHIFT 8 + +struct hellcreek_counter { + u8 offset; + const char *name; +}; + +struct hellcreek; + +/* State flags for hellcreek_port_hwtstamp::state */ +enum { + HELLCREEK_HWTSTAMP_ENABLED, + HELLCREEK_HWTSTAMP_TX_IN_PROGRESS, +}; + +/* A structure to hold hardware timestamping information per port */ +struct hellcreek_port_hwtstamp { + /* Timestamping state */ + unsigned long state; + + /* Resources for receive timestamping */ + struct sk_buff_head rx_queue; /* For synchronization messages */ + + /* Resources for transmit timestamping */ + unsigned long tx_tstamp_start; + struct sk_buff *tx_skb; + + /* Current timestamp configuration */ + struct hwtstamp_config tstamp_config; +}; + +struct hellcreek_port { + struct hellcreek *hellcreek; + unsigned long *vlan_dev_bitmap; + int port; + u16 ptcfg; /* ptcfg shadow */ + u64 *counter_values; + + /* Per-port timestamping resources */ + struct hellcreek_port_hwtstamp port_hwtstamp; +}; + +struct hellcreek_fdb_entry { + size_t idx; + unsigned char mac[ETH_ALEN]; + u8 portmask; + u8 age; + u8 is_obt; + u8 pass_blocked; + u8 is_static; + u8 reprio_tc; + u8 reprio_en; +}; + +struct hellcreek { + const struct hellcreek_platform_data *pdata; + struct device *dev; + struct dsa_switch *ds; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; + struct hellcreek_port *ports; + struct delayed_work overflow_work; + struct led_classdev led_is_gm; + struct led_classdev led_sync_good; + struct mutex reg_lock; /* Switch IP register lock */ + struct mutex vlan_lock; /* VLAN bitmaps lock */ + struct mutex ptp_lock; /* PTP IP register lock */ + void __iomem *base; + void __iomem *ptp_base; + u16 swcfg; /* swcfg shadow */ + u8 *vidmbrcfg; /* vidmbrcfg shadow */ + u64 seconds; /* PTP seconds */ + u64 last_ts; /* Used for overflow detection */ + u16 status_out; /* ptp.status_out shadow */ + size_t fdb_entries; +}; + +#endif /* _HELLCREEK_H_ */ diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c new file mode 100644 index 000000000000..69dd9a2e8bb6 --- /dev/null +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * DSA driver for: + * Hirschmann Hellcreek TSN switch. + * + * Copyright (C) 2019,2020 Hochschule Offenburg + * Copyright (C) 2019,2020 Linutronix GmbH + * Authors: Kamil Alkhouri <kamil.alkhouri@hs-offenburg.de> + * Kurt Kanzenbach <kurt@linutronix.de> + */ + +#include <linux/ptp_classify.h> + +#include "hellcreek.h" +#include "hellcreek_hwtstamp.h" +#include "hellcreek_ptp.h" + +int hellcreek_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info) +{ + struct hellcreek *hellcreek = ds->priv; + + info->phc_index = hellcreek->ptp_clock ? + ptp_clock_index(hellcreek->ptp_clock) : -1; + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + /* enabled tx timestamping */ + info->tx_types = BIT(HWTSTAMP_TX_ON); + + /* L2 & L4 PTPv2 event rx messages are timestamped */ + info->rx_filters = BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +} + +/* Enabling/disabling TX and RX HW timestamping for different PTP messages is + * not available in the switch. Thus, this function only serves as a check if + * the user requested what is actually available or not + */ +static int hellcreek_set_hwtstamp_config(struct hellcreek *hellcreek, int port, + struct hwtstamp_config *config) +{ + struct hellcreek_port_hwtstamp *ps = + &hellcreek->ports[port].port_hwtstamp; + bool tx_tstamp_enable = false; + bool rx_tstamp_enable = false; + + /* Interaction with the timestamp hardware is prevented here. It is + * enabled when this config function ends successfully + */ + clear_bit_unlock(HELLCREEK_HWTSTAMP_ENABLED, &ps->state); + + /* Reserved for future extensions */ + if (config->flags) + return -EINVAL; + + switch (config->tx_type) { + case HWTSTAMP_TX_ON: + tx_tstamp_enable = true; + break; + + /* TX HW timestamping can't be disabled on the switch */ + case HWTSTAMP_TX_OFF: + config->tx_type = HWTSTAMP_TX_ON; + break; + + default: + return -ERANGE; + } + + switch (config->rx_filter) { + /* RX HW timestamping can't be disabled on the switch */ + case HWTSTAMP_FILTER_NONE: + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + break; + + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + rx_tstamp_enable = true; + break; + + /* RX HW timestamping can't be enabled for all messages on the switch */ + case HWTSTAMP_FILTER_ALL: + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + break; + + default: + return -ERANGE; + } + + if (!tx_tstamp_enable) + return -ERANGE; + + if (!rx_tstamp_enable) + return -ERANGE; + + /* If this point is reached, then the requested hwtstamp config is + * compatible with the hwtstamp offered by the switch. Therefore, + * enable the interaction with the HW timestamping + */ + set_bit(HELLCREEK_HWTSTAMP_ENABLED, &ps->state); + + return 0; +} + +int hellcreek_port_hwtstamp_set(struct dsa_switch *ds, int port, + struct ifreq *ifr) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port_hwtstamp *ps; + struct hwtstamp_config config; + int err; + + ps = &hellcreek->ports[port].port_hwtstamp; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = hellcreek_set_hwtstamp_config(hellcreek, port, &config); + if (err) + return err; + + /* Save the chosen configuration to be returned later */ + memcpy(&ps->tstamp_config, &config, sizeof(config)); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +int hellcreek_port_hwtstamp_get(struct dsa_switch *ds, int port, + struct ifreq *ifr) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port_hwtstamp *ps; + struct hwtstamp_config *config; + + ps = &hellcreek->ports[port].port_hwtstamp; + config = &ps->tstamp_config; + + return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? + -EFAULT : 0; +} + +/* Returns a pointer to the PTP header if the caller should time stamp, or NULL + * if the caller should not. + */ +static struct ptp_header *hellcreek_should_tstamp(struct hellcreek *hellcreek, + int port, struct sk_buff *skb, + unsigned int type) +{ + struct hellcreek_port_hwtstamp *ps = + &hellcreek->ports[port].port_hwtstamp; + struct ptp_header *hdr; + + hdr = ptp_parse_header(skb, type); + if (!hdr) + return NULL; + + if (!test_bit(HELLCREEK_HWTSTAMP_ENABLED, &ps->state)) + return NULL; + + return hdr; +} + +static u64 hellcreek_get_reserved_field(const struct ptp_header *hdr) +{ + return be32_to_cpu(hdr->reserved2); +} + +static void hellcreek_clear_reserved_field(struct ptp_header *hdr) +{ + hdr->reserved2 = 0; +} + +static int hellcreek_ptp_hwtstamp_available(struct hellcreek *hellcreek, + unsigned int ts_reg) +{ + u16 status; + + status = hellcreek_ptp_read(hellcreek, ts_reg); + + if (status & PR_TS_STATUS_TS_LOST) + dev_err(hellcreek->dev, + "Tx time stamp lost! This should never happen!\n"); + + /* If hwtstamp is not available, this means the previous hwtstamp was + * successfully read, and the one we need is not yet available + */ + return (status & PR_TS_STATUS_TS_AVAIL) ? 1 : 0; +} + +/* Get nanoseconds timestamp from timestamping unit */ +static u64 hellcreek_ptp_hwtstamp_read(struct hellcreek *hellcreek, + unsigned int ts_reg) +{ + u16 nsl, nsh; + + nsh = hellcreek_ptp_read(hellcreek, ts_reg); + nsh = hellcreek_ptp_read(hellcreek, ts_reg); + nsh = hellcreek_ptp_read(hellcreek, ts_reg); + nsh = hellcreek_ptp_read(hellcreek, ts_reg); + nsl = hellcreek_ptp_read(hellcreek, ts_reg); + + return (u64)nsl | ((u64)nsh << 16); +} + +static int hellcreek_txtstamp_work(struct hellcreek *hellcreek, + struct hellcreek_port_hwtstamp *ps, int port) +{ + struct skb_shared_hwtstamps shhwtstamps; + unsigned int status_reg, data_reg; + struct sk_buff *tmp_skb; + int ts_status; + u64 ns = 0; + + if (!ps->tx_skb) + return 0; + + switch (port) { + case 2: + status_reg = PR_TS_TX_P1_STATUS_C; + data_reg = PR_TS_TX_P1_DATA_C; + break; + case 3: + status_reg = PR_TS_TX_P2_STATUS_C; + data_reg = PR_TS_TX_P2_DATA_C; + break; + default: + dev_err(hellcreek->dev, "Wrong port for timestamping!\n"); + return 0; + } + + ts_status = hellcreek_ptp_hwtstamp_available(hellcreek, status_reg); + + /* Not available yet? */ + if (ts_status == 0) { + /* Check whether the operation of reading the tx timestamp has + * exceeded its allowed period + */ + if (time_is_before_jiffies(ps->tx_tstamp_start + + TX_TSTAMP_TIMEOUT)) { + dev_err(hellcreek->dev, + "Timeout while waiting for Tx timestamp!\n"); + goto free_and_clear_skb; + } + + /* The timestamp should be available quickly, while getting it + * in high priority. Restart the work + */ + return 1; + } + + mutex_lock(&hellcreek->ptp_lock); + ns = hellcreek_ptp_hwtstamp_read(hellcreek, data_reg); + ns += hellcreek_ptp_gettime_seconds(hellcreek, ns); + mutex_unlock(&hellcreek->ptp_lock); + + /* Now we have the timestamp in nanoseconds, store it in the correct + * structure in order to send it to the user + */ + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + + tmp_skb = ps->tx_skb; + ps->tx_skb = NULL; + + /* skb_complete_tx_timestamp() frees up the client to make another + * timestampable transmit. We have to be ready for it by clearing the + * ps->tx_skb "flag" beforehand + */ + clear_bit_unlock(HELLCREEK_HWTSTAMP_TX_IN_PROGRESS, &ps->state); + + /* Deliver a clone of the original outgoing tx_skb with tx hwtstamp */ + skb_complete_tx_timestamp(tmp_skb, &shhwtstamps); + + return 0; + +free_and_clear_skb: + dev_kfree_skb_any(ps->tx_skb); + ps->tx_skb = NULL; + clear_bit_unlock(HELLCREEK_HWTSTAMP_TX_IN_PROGRESS, &ps->state); + + return 0; +} + +static void hellcreek_get_rxts(struct hellcreek *hellcreek, + struct hellcreek_port_hwtstamp *ps, + struct sk_buff *skb, struct sk_buff_head *rxq, + int port) +{ + struct skb_shared_hwtstamps *shwt; + struct sk_buff_head received; + unsigned long flags; + + /* The latched timestamp belongs to one of the received frames. */ + __skb_queue_head_init(&received); + + /* Lock & disable interrupts */ + spin_lock_irqsave(&rxq->lock, flags); + + /* Add the reception queue "rxq" to the "received" queue an reintialize + * "rxq". From now on, we deal with "received" not with "rxq" + */ + skb_queue_splice_tail_init(rxq, &received); + + spin_unlock_irqrestore(&rxq->lock, flags); + + for (; skb; skb = __skb_dequeue(&received)) { + struct ptp_header *hdr; + unsigned int type; + u64 ns; + + /* Get nanoseconds from ptp packet */ + type = SKB_PTP_TYPE(skb); + hdr = ptp_parse_header(skb, type); + ns = hellcreek_get_reserved_field(hdr); + hellcreek_clear_reserved_field(hdr); + + /* Add seconds part */ + mutex_lock(&hellcreek->ptp_lock); + ns += hellcreek_ptp_gettime_seconds(hellcreek, ns); + mutex_unlock(&hellcreek->ptp_lock); + + /* Save time stamp */ + shwt = skb_hwtstamps(skb); + memset(shwt, 0, sizeof(*shwt)); + shwt->hwtstamp = ns_to_ktime(ns); + netif_rx_ni(skb); + } +} + +static void hellcreek_rxtstamp_work(struct hellcreek *hellcreek, + struct hellcreek_port_hwtstamp *ps, + int port) +{ + struct sk_buff *skb; + + skb = skb_dequeue(&ps->rx_queue); + if (skb) + hellcreek_get_rxts(hellcreek, ps, skb, &ps->rx_queue, port); +} + +long hellcreek_hwtstamp_work(struct ptp_clock_info *ptp) +{ + struct hellcreek *hellcreek = ptp_to_hellcreek(ptp); + struct dsa_switch *ds = hellcreek->ds; + int i, restart = 0; + + for (i = 0; i < ds->num_ports; i++) { + struct hellcreek_port_hwtstamp *ps; + + if (!dsa_is_user_port(ds, i)) + continue; + + ps = &hellcreek->ports[i].port_hwtstamp; + + if (test_bit(HELLCREEK_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) + restart |= hellcreek_txtstamp_work(hellcreek, ps, i); + + hellcreek_rxtstamp_work(hellcreek, ps, i); + } + + return restart ? 1 : -1; +} + +bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port_hwtstamp *ps; + struct ptp_header *hdr; + + ps = &hellcreek->ports[port].port_hwtstamp; + + /* Check if the driver is expected to do HW timestamping */ + if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + /* Make sure the message is a PTP message that needs to be timestamped + * and the interaction with the HW timestamping is enabled. If not, stop + * here + */ + hdr = hellcreek_should_tstamp(hellcreek, port, clone, type); + if (!hdr) + return false; + + if (test_and_set_bit_lock(HELLCREEK_HWTSTAMP_TX_IN_PROGRESS, + &ps->state)) + return false; + + ps->tx_skb = clone; + + /* store the number of ticks occurred since system start-up till this + * moment + */ + ps->tx_tstamp_start = jiffies; + + ptp_schedule_worker(hellcreek->ptp_clock, 0); + + return true; +} + +bool hellcreek_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port_hwtstamp *ps; + struct ptp_header *hdr; + + ps = &hellcreek->ports[port].port_hwtstamp; + + /* This check only fails if the user did not initialize hardware + * timestamping beforehand. + */ + if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT) + return false; + + /* Make sure the message is a PTP message that needs to be timestamped + * and the interaction with the HW timestamping is enabled. If not, stop + * here + */ + hdr = hellcreek_should_tstamp(hellcreek, port, skb, type); + if (!hdr) + return false; + + SKB_PTP_TYPE(skb) = type; + + skb_queue_tail(&ps->rx_queue, skb); + + ptp_schedule_worker(hellcreek->ptp_clock, 0); + + return true; +} + +static void hellcreek_hwtstamp_port_setup(struct hellcreek *hellcreek, int port) +{ + struct hellcreek_port_hwtstamp *ps = + &hellcreek->ports[port].port_hwtstamp; + + skb_queue_head_init(&ps->rx_queue); +} + +int hellcreek_hwtstamp_setup(struct hellcreek *hellcreek) +{ + struct dsa_switch *ds = hellcreek->ds; + int i; + + /* Initialize timestamping ports. */ + for (i = 0; i < ds->num_ports; ++i) { + if (!dsa_is_user_port(ds, i)) + continue; + + hellcreek_hwtstamp_port_setup(hellcreek, i); + } + + /* Select the synchronized clock as the source timekeeper for the + * timestamps and enable inline timestamping. + */ + hellcreek_ptp_write(hellcreek, PR_SETTINGS_C_TS_SRC_TK_MASK | + PR_SETTINGS_C_RES3TS, + PR_SETTINGS_C); + + return 0; +} + +void hellcreek_hwtstamp_free(struct hellcreek *hellcreek) +{ + /* Nothing todo */ +} diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h new file mode 100644 index 000000000000..c0745ffa1ebb --- /dev/null +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * DSA driver for: + * Hirschmann Hellcreek TSN switch. + * + * Copyright (C) 2019,2020 Hochschule Offenburg + * Copyright (C) 2019,2020 Linutronix GmbH + * Authors: Kurt Kanzenbach <kurt@linutronix.de> + * Kamil Alkhouri <kamil.alkhouri@hs-offenburg.de> + */ + +#ifndef _HELLCREEK_HWTSTAMP_H_ +#define _HELLCREEK_HWTSTAMP_H_ + +#include <net/dsa.h> +#include "hellcreek.h" + +/* Timestamp Register */ +#define PR_TS_RX_P1_STATUS_C (0x1d * 2) +#define PR_TS_RX_P1_DATA_C (0x1e * 2) +#define PR_TS_TX_P1_STATUS_C (0x1f * 2) +#define PR_TS_TX_P1_DATA_C (0x20 * 2) +#define PR_TS_RX_P2_STATUS_C (0x25 * 2) +#define PR_TS_RX_P2_DATA_C (0x26 * 2) +#define PR_TS_TX_P2_STATUS_C (0x27 * 2) +#define PR_TS_TX_P2_DATA_C (0x28 * 2) + +#define PR_TS_STATUS_TS_AVAIL BIT(2) +#define PR_TS_STATUS_TS_LOST BIT(3) + +#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb)) + +/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX + * timestamp. When working properly, hardware will produce a timestamp + * within 1ms. Software may enounter delays, so the timeout is set + * accordingly. + */ +#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(40) + +int hellcreek_port_hwtstamp_set(struct dsa_switch *ds, int port, + struct ifreq *ifr); +int hellcreek_port_hwtstamp_get(struct dsa_switch *ds, int port, + struct ifreq *ifr); + +bool hellcreek_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); +bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); + +int hellcreek_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info); + +long hellcreek_hwtstamp_work(struct ptp_clock_info *ptp); + +int hellcreek_hwtstamp_setup(struct hellcreek *chip); +void hellcreek_hwtstamp_free(struct hellcreek *chip); + +#endif /* _HELLCREEK_HWTSTAMP_H_ */ diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c new file mode 100644 index 000000000000..2572c6087bb5 --- /dev/null +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * DSA driver for: + * Hirschmann Hellcreek TSN switch. + * + * Copyright (C) 2019,2020 Hochschule Offenburg + * Copyright (C) 2019,2020 Linutronix GmbH + * Authors: Kamil Alkhouri <kamil.alkhouri@hs-offenburg.de> + * Kurt Kanzenbach <kurt@linutronix.de> + */ + +#include <linux/ptp_clock_kernel.h> +#include "hellcreek.h" +#include "hellcreek_ptp.h" +#include "hellcreek_hwtstamp.h" + +u16 hellcreek_ptp_read(struct hellcreek *hellcreek, unsigned int offset) +{ + return readw(hellcreek->ptp_base + offset); +} + +void hellcreek_ptp_write(struct hellcreek *hellcreek, u16 data, + unsigned int offset) +{ + writew(data, hellcreek->ptp_base + offset); +} + +/* Get nanoseconds from PTP clock */ +static u64 hellcreek_ptp_clock_read(struct hellcreek *hellcreek) +{ + u16 nsl, nsh; + + /* Take a snapshot */ + hellcreek_ptp_write(hellcreek, PR_COMMAND_C_SS, PR_COMMAND_C); + + /* The time of the day is saved as 96 bits. However, due to hardware + * limitations the seconds are not or only partly kept in the PTP + * core. Currently only three bits for the seconds are available. That's + * why only the nanoseconds are used and the seconds are tracked in + * software. Anyway due to internal locking all five registers should be + * read. + */ + nsh = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); + nsh = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); + nsh = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); + nsh = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); + nsl = hellcreek_ptp_read(hellcreek, PR_SS_SYNC_DATA_C); + + return (u64)nsl | ((u64)nsh << 16); +} + +static u64 __hellcreek_ptp_gettime(struct hellcreek *hellcreek) +{ + u64 ns; + + ns = hellcreek_ptp_clock_read(hellcreek); + if (ns < hellcreek->last_ts) + hellcreek->seconds++; + hellcreek->last_ts = ns; + ns += hellcreek->seconds * NSEC_PER_SEC; + + return ns; +} + +/* Retrieve the seconds parts in nanoseconds for a packet timestamped with @ns. + * There has to be a check whether an overflow occurred between the packet + * arrival and now. If so use the correct seconds (-1) for calculating the + * packet arrival time. + */ +u64 hellcreek_ptp_gettime_seconds(struct hellcreek *hellcreek, u64 ns) +{ + u64 s; + + __hellcreek_ptp_gettime(hellcreek); + if (hellcreek->last_ts > ns) + s = hellcreek->seconds * NSEC_PER_SEC; + else + s = (hellcreek->seconds - 1) * NSEC_PER_SEC; + + return s; +} + +static int hellcreek_ptp_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct hellcreek *hellcreek = ptp_to_hellcreek(ptp); + u64 ns; + + mutex_lock(&hellcreek->ptp_lock); + ns = __hellcreek_ptp_gettime(hellcreek); + mutex_unlock(&hellcreek->ptp_lock); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int hellcreek_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct hellcreek *hellcreek = ptp_to_hellcreek(ptp); + u16 secl, nsh, nsl; + + secl = ts->tv_sec & 0xffff; + nsh = ((u32)ts->tv_nsec & 0xffff0000) >> 16; + nsl = ts->tv_nsec & 0xffff; + + mutex_lock(&hellcreek->ptp_lock); + + /* Update overflow data structure */ + hellcreek->seconds = ts->tv_sec; + hellcreek->last_ts = ts->tv_nsec; + + /* Set time in clock */ + hellcreek_ptp_write(hellcreek, 0x00, PR_CLOCK_WRITE_C); + hellcreek_ptp_write(hellcreek, 0x00, PR_CLOCK_WRITE_C); + hellcreek_ptp_write(hellcreek, secl, PR_CLOCK_WRITE_C); + hellcreek_ptp_write(hellcreek, nsh, PR_CLOCK_WRITE_C); + hellcreek_ptp_write(hellcreek, nsl, PR_CLOCK_WRITE_C); + + mutex_unlock(&hellcreek->ptp_lock); + + return 0; +} + +static int hellcreek_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct hellcreek *hellcreek = ptp_to_hellcreek(ptp); + u16 negative = 0, addendh, addendl; + u32 addend; + u64 adj; + + if (scaled_ppm < 0) { + negative = 1; + scaled_ppm = -scaled_ppm; + } + + /* IP-Core adjusts the nominal frequency by adding or subtracting 1 ns + * from the 8 ns (period of the oscillator) every time the accumulator + * register overflows. The value stored in the addend register is added + * to the accumulator register every 8 ns. + * + * addend value = (2^30 * accumulator_overflow_rate) / + * oscillator_frequency + * where: + * + * oscillator_frequency = 125 MHz + * accumulator_overflow_rate = 125 MHz * scaled_ppm * 2^-16 * 10^-6 * 8 + */ + adj = scaled_ppm; + adj <<= 11; + addend = (u32)div_u64(adj, 15625); + + addendh = (addend & 0xffff0000) >> 16; + addendl = addend & 0xffff; + + negative = (negative << 15) & 0x8000; + + mutex_lock(&hellcreek->ptp_lock); + + /* Set drift register */ + hellcreek_ptp_write(hellcreek, negative, PR_CLOCK_DRIFT_C); + hellcreek_ptp_write(hellcreek, 0x00, PR_CLOCK_DRIFT_C); + hellcreek_ptp_write(hellcreek, 0x00, PR_CLOCK_DRIFT_C); + hellcreek_ptp_write(hellcreek, addendh, PR_CLOCK_DRIFT_C); + hellcreek_ptp_write(hellcreek, addendl, PR_CLOCK_DRIFT_C); + + mutex_unlock(&hellcreek->ptp_lock); + + return 0; +} + +static int hellcreek_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct hellcreek *hellcreek = ptp_to_hellcreek(ptp); + u16 negative = 0, counth, countl; + u32 count_val; + + /* If the offset is larger than IP-Core slow offset resources. Don't + * consider slow adjustment. Rather, add the offset directly to the + * current time + */ + if (abs(delta) > MAX_SLOW_OFFSET_ADJ) { + struct timespec64 now, then = ns_to_timespec64(delta); + + hellcreek_ptp_gettime(ptp, &now); + now = timespec64_add(now, then); + hellcreek_ptp_settime(ptp, &now); + + return 0; + } + + if (delta < 0) { + negative = 1; + delta = -delta; + } + + /* 'count_val' does not exceed the maximum register size (2^30) */ + count_val = div_s64(delta, MAX_NS_PER_STEP); + + counth = (count_val & 0xffff0000) >> 16; + countl = count_val & 0xffff; + + negative = (negative << 15) & 0x8000; + + mutex_lock(&hellcreek->ptp_lock); + + /* Set offset write register */ + hellcreek_ptp_write(hellcreek, negative, PR_CLOCK_OFFSET_C); + hellcreek_ptp_write(hellcreek, MAX_NS_PER_STEP, PR_CLOCK_OFFSET_C); + hellcreek_ptp_write(hellcreek, MIN_CLK_CYCLES_BETWEEN_STEPS, + PR_CLOCK_OFFSET_C); + hellcreek_ptp_write(hellcreek, countl, PR_CLOCK_OFFSET_C); + hellcreek_ptp_write(hellcreek, counth, PR_CLOCK_OFFSET_C); + + mutex_unlock(&hellcreek->ptp_lock); + + return 0; +} + +static int hellcreek_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +static void hellcreek_ptp_overflow_check(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct hellcreek *hellcreek; + + hellcreek = dw_overflow_to_hellcreek(dw); + + mutex_lock(&hellcreek->ptp_lock); + __hellcreek_ptp_gettime(hellcreek); + mutex_unlock(&hellcreek->ptp_lock); + + schedule_delayed_work(&hellcreek->overflow_work, + HELLCREEK_OVERFLOW_PERIOD); +} + +static enum led_brightness hellcreek_get_brightness(struct hellcreek *hellcreek, + int led) +{ + return (hellcreek->status_out & led) ? 1 : 0; +} + +static void hellcreek_set_brightness(struct hellcreek *hellcreek, int led, + enum led_brightness b) +{ + mutex_lock(&hellcreek->ptp_lock); + + if (b) + hellcreek->status_out |= led; + else + hellcreek->status_out &= ~led; + + hellcreek_ptp_write(hellcreek, hellcreek->status_out, STATUS_OUT); + + mutex_unlock(&hellcreek->ptp_lock); +} + +static void hellcreek_led_sync_good_set(struct led_classdev *ldev, + enum led_brightness b) +{ + struct hellcreek *hellcreek = led_to_hellcreek(ldev, led_sync_good); + + hellcreek_set_brightness(hellcreek, STATUS_OUT_SYNC_GOOD, b); +} + +static enum led_brightness hellcreek_led_sync_good_get(struct led_classdev *ldev) +{ + struct hellcreek *hellcreek = led_to_hellcreek(ldev, led_sync_good); + + return hellcreek_get_brightness(hellcreek, STATUS_OUT_SYNC_GOOD); +} + +static void hellcreek_led_is_gm_set(struct led_classdev *ldev, + enum led_brightness b) +{ + struct hellcreek *hellcreek = led_to_hellcreek(ldev, led_is_gm); + + hellcreek_set_brightness(hellcreek, STATUS_OUT_IS_GM, b); +} + +static enum led_brightness hellcreek_led_is_gm_get(struct led_classdev *ldev) +{ + struct hellcreek *hellcreek = led_to_hellcreek(ldev, led_is_gm); + + return hellcreek_get_brightness(hellcreek, STATUS_OUT_IS_GM); +} + +/* There two available LEDs internally called sync_good and is_gm. However, the + * user might want to use a different label and specify the default state. Take + * those properties from device tree. + */ +static int hellcreek_led_setup(struct hellcreek *hellcreek) +{ + struct device_node *leds, *led = NULL; + const char *label, *state; + int ret = -EINVAL; + + leds = of_find_node_by_name(hellcreek->dev->of_node, "leds"); + if (!leds) { + dev_err(hellcreek->dev, "No LEDs specified in device tree!\n"); + return ret; + } + + hellcreek->status_out = 0; + + led = of_get_next_available_child(leds, led); + if (!led) { + dev_err(hellcreek->dev, "First LED not specified!\n"); + goto out; + } + + ret = of_property_read_string(led, "label", &label); + hellcreek->led_sync_good.name = ret ? "sync_good" : label; + + ret = of_property_read_string(led, "default-state", &state); + if (!ret) { + if (!strcmp(state, "on")) + hellcreek->led_sync_good.brightness = 1; + else if (!strcmp(state, "off")) + hellcreek->led_sync_good.brightness = 0; + else if (!strcmp(state, "keep")) + hellcreek->led_sync_good.brightness = + hellcreek_get_brightness(hellcreek, + STATUS_OUT_SYNC_GOOD); + } + + hellcreek->led_sync_good.max_brightness = 1; + hellcreek->led_sync_good.brightness_set = hellcreek_led_sync_good_set; + hellcreek->led_sync_good.brightness_get = hellcreek_led_sync_good_get; + + led = of_get_next_available_child(leds, led); + if (!led) { + dev_err(hellcreek->dev, "Second LED not specified!\n"); + ret = -EINVAL; + goto out; + } + + ret = of_property_read_string(led, "label", &label); + hellcreek->led_is_gm.name = ret ? "is_gm" : label; + + ret = of_property_read_string(led, "default-state", &state); + if (!ret) { + if (!strcmp(state, "on")) + hellcreek->led_is_gm.brightness = 1; + else if (!strcmp(state, "off")) + hellcreek->led_is_gm.brightness = 0; + else if (!strcmp(state, "keep")) + hellcreek->led_is_gm.brightness = + hellcreek_get_brightness(hellcreek, + STATUS_OUT_IS_GM); + } + + hellcreek->led_is_gm.max_brightness = 1; + hellcreek->led_is_gm.brightness_set = hellcreek_led_is_gm_set; + hellcreek->led_is_gm.brightness_get = hellcreek_led_is_gm_get; + + /* Set initial state */ + if (hellcreek->led_sync_good.brightness == 1) + hellcreek_set_brightness(hellcreek, STATUS_OUT_SYNC_GOOD, 1); + if (hellcreek->led_is_gm.brightness == 1) + hellcreek_set_brightness(hellcreek, STATUS_OUT_IS_GM, 1); + + /* Register both leds */ + led_classdev_register(hellcreek->dev, &hellcreek->led_sync_good); + led_classdev_register(hellcreek->dev, &hellcreek->led_is_gm); + + ret = 0; + +out: + of_node_put(leds); + + return ret; +} + +int hellcreek_ptp_setup(struct hellcreek *hellcreek) +{ + u16 status; + int ret; + + /* Set up the overflow work */ + INIT_DELAYED_WORK(&hellcreek->overflow_work, + hellcreek_ptp_overflow_check); + + /* Setup PTP clock */ + hellcreek->ptp_clock_info.owner = THIS_MODULE; + snprintf(hellcreek->ptp_clock_info.name, + sizeof(hellcreek->ptp_clock_info.name), + dev_name(hellcreek->dev)); + + /* IP-Core can add up to 0.5 ns per 8 ns cycle, which means + * accumulator_overflow_rate shall not exceed 62.5 MHz (which adjusts + * the nominal frequency by 6.25%) + */ + hellcreek->ptp_clock_info.max_adj = 62500000; + hellcreek->ptp_clock_info.n_alarm = 0; + hellcreek->ptp_clock_info.n_pins = 0; + hellcreek->ptp_clock_info.n_ext_ts = 0; + hellcreek->ptp_clock_info.n_per_out = 0; + hellcreek->ptp_clock_info.pps = 0; + hellcreek->ptp_clock_info.adjfine = hellcreek_ptp_adjfine; + hellcreek->ptp_clock_info.adjtime = hellcreek_ptp_adjtime; + hellcreek->ptp_clock_info.gettime64 = hellcreek_ptp_gettime; + hellcreek->ptp_clock_info.settime64 = hellcreek_ptp_settime; + hellcreek->ptp_clock_info.enable = hellcreek_ptp_enable; + hellcreek->ptp_clock_info.do_aux_work = hellcreek_hwtstamp_work; + + hellcreek->ptp_clock = ptp_clock_register(&hellcreek->ptp_clock_info, + hellcreek->dev); + if (IS_ERR(hellcreek->ptp_clock)) + return PTR_ERR(hellcreek->ptp_clock); + + /* Enable the offset correction process, if no offset correction is + * already taking place + */ + status = hellcreek_ptp_read(hellcreek, PR_CLOCK_STATUS_C); + if (!(status & PR_CLOCK_STATUS_C_OFS_ACT)) + hellcreek_ptp_write(hellcreek, + status | PR_CLOCK_STATUS_C_ENA_OFS, + PR_CLOCK_STATUS_C); + + /* Enable the drift correction process */ + hellcreek_ptp_write(hellcreek, status | PR_CLOCK_STATUS_C_ENA_DRIFT, + PR_CLOCK_STATUS_C); + + /* LED setup */ + ret = hellcreek_led_setup(hellcreek); + if (ret) { + if (hellcreek->ptp_clock) + ptp_clock_unregister(hellcreek->ptp_clock); + return ret; + } + + schedule_delayed_work(&hellcreek->overflow_work, + HELLCREEK_OVERFLOW_PERIOD); + + return 0; +} + +void hellcreek_ptp_free(struct hellcreek *hellcreek) +{ + led_classdev_unregister(&hellcreek->led_is_gm); + led_classdev_unregister(&hellcreek->led_sync_good); + cancel_delayed_work_sync(&hellcreek->overflow_work); + if (hellcreek->ptp_clock) + ptp_clock_unregister(hellcreek->ptp_clock); + hellcreek->ptp_clock = NULL; +} diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.h b/drivers/net/dsa/hirschmann/hellcreek_ptp.h new file mode 100644 index 000000000000..0b51392c7e56 --- /dev/null +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * DSA driver for: + * Hirschmann Hellcreek TSN switch. + * + * Copyright (C) 2019,2020 Hochschule Offenburg + * Copyright (C) 2019,2020 Linutronix GmbH + * Authors: Kurt Kanzenbach <kurt@linutronix.de> + * Kamil Alkhouri <kamil.alkhouri@hs-offenburg.de> + */ + +#ifndef _HELLCREEK_PTP_H_ +#define _HELLCREEK_PTP_H_ + +#include <linux/bitops.h> +#include <linux/ptp_clock_kernel.h> + +#include "hellcreek.h" + +/* Every jump in time is 7 ns */ +#define MAX_NS_PER_STEP 7L + +/* Correct offset at every clock cycle */ +#define MIN_CLK_CYCLES_BETWEEN_STEPS 0 + +/* Maximum available slow offset resources */ +#define MAX_SLOW_OFFSET_ADJ \ + ((unsigned long long)((1 << 30) - 1) * MAX_NS_PER_STEP) + +/* four times a second overflow check */ +#define HELLCREEK_OVERFLOW_PERIOD (HZ / 4) + +/* PTP Register */ +#define PR_SETTINGS_C (0x09 * 2) +#define PR_SETTINGS_C_RES3TS BIT(4) +#define PR_SETTINGS_C_TS_SRC_TK_SHIFT 8 +#define PR_SETTINGS_C_TS_SRC_TK_MASK GENMASK(9, 8) +#define PR_COMMAND_C (0x0a * 2) +#define PR_COMMAND_C_SS BIT(0) + +#define PR_CLOCK_STATUS_C (0x0c * 2) +#define PR_CLOCK_STATUS_C_ENA_DRIFT BIT(12) +#define PR_CLOCK_STATUS_C_OFS_ACT BIT(13) +#define PR_CLOCK_STATUS_C_ENA_OFS BIT(14) + +#define PR_CLOCK_READ_C (0x0d * 2) +#define PR_CLOCK_WRITE_C (0x0e * 2) +#define PR_CLOCK_OFFSET_C (0x0f * 2) +#define PR_CLOCK_DRIFT_C (0x10 * 2) + +#define PR_SS_FREE_DATA_C (0x12 * 2) +#define PR_SS_SYNT_DATA_C (0x14 * 2) +#define PR_SS_SYNC_DATA_C (0x16 * 2) +#define PR_SS_DRAC_DATA_C (0x18 * 2) + +#define STATUS_OUT (0x60 * 2) +#define STATUS_OUT_SYNC_GOOD BIT(0) +#define STATUS_OUT_IS_GM BIT(1) + +int hellcreek_ptp_setup(struct hellcreek *hellcreek); +void hellcreek_ptp_free(struct hellcreek *hellcreek); +u16 hellcreek_ptp_read(struct hellcreek *hellcreek, unsigned int offset); +void hellcreek_ptp_write(struct hellcreek *hellcreek, u16 data, + unsigned int offset); +u64 hellcreek_ptp_gettime_seconds(struct hellcreek *hellcreek, u64 ns); + +#define ptp_to_hellcreek(ptp) \ + container_of(ptp, struct hellcreek, ptp_clock_info) + +#define dw_overflow_to_hellcreek(dw) \ + container_of(dw, struct hellcreek, overflow_work) + +#define led_to_hellcreek(ldev, led) \ + container_of(ldev, struct hellcreek, led) + +#endif /* _HELLCREEK_PTP_H_ */ diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index de7692b763d8..6408402a44f5 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -558,7 +558,7 @@ mt7531_pad_setup(struct dsa_switch *ds, phy_interface_t interface) val |= 0x190000 << RG_COREPLL_SDM_PCW_S; mt7530_write(priv, MT7531_PLLGP_CR0, val); break; - }; + } /* Set feedback divide ratio update signal to high */ val = mt7530_read(priv, MT7531_PLLGP_CR0); @@ -1021,6 +1021,53 @@ mt7530_port_disable(struct dsa_switch *ds, int port) mutex_unlock(&priv->reg_mutex); } +static int +mt7530_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +{ + struct mt7530_priv *priv = ds->priv; + struct mii_bus *bus = priv->bus; + int length; + u32 val; + + /* When a new MTU is set, DSA always set the CPU port's MTU to the + * largest MTU of the slave ports. Because the switch only has a global + * RX length register, only allowing CPU port here is enough. + */ + if (!dsa_is_cpu_port(ds, port)) + return 0; + + mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); + + val = mt7530_mii_read(priv, MT7530_GMACCR); + val &= ~MAX_RX_PKT_LEN_MASK; + + /* RX length also includes Ethernet header, MTK tag, and FCS length */ + length = new_mtu + ETH_HLEN + MTK_HDR_LEN + ETH_FCS_LEN; + if (length <= 1522) { + val |= MAX_RX_PKT_LEN_1522; + } else if (length <= 1536) { + val |= MAX_RX_PKT_LEN_1536; + } else if (length <= 1552) { + val |= MAX_RX_PKT_LEN_1552; + } else { + val &= ~MAX_RX_JUMBO_MASK; + val |= MAX_RX_JUMBO(DIV_ROUND_UP(length, 1024)); + val |= MAX_RX_PKT_LEN_JUMBO; + } + + mt7530_mii_write(priv, MT7530_GMACCR, val); + + mutex_unlock(&bus->mdio_lock); + + return 0; +} + +static int +mt7530_port_max_mtu(struct dsa_switch *ds, int port) +{ + return MT7530_MAX_MTU; +} + static void mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state) { @@ -2519,6 +2566,8 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .get_sset_count = mt7530_get_sset_count, .port_enable = mt7530_port_enable, .port_disable = mt7530_port_disable, + .port_change_mtu = mt7530_port_change_mtu, + .port_max_mtu = mt7530_port_max_mtu, .port_stp_state_set = mt7530_stp_state_set, .port_bridge_join = mt7530_port_bridge_join, .port_bridge_leave = mt7530_port_bridge_leave, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 9278a8e3d04e..ee3523a7537e 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -11,6 +11,9 @@ #define MT7530_NUM_FDB_RECORDS 2048 #define MT7530_ALL_MEMBERS 0xff +#define MTK_HDR_LEN 4 +#define MT7530_MAX_MTU (15 * 1024 - ETH_HLEN - ETH_FCS_LEN - MTK_HDR_LEN) + enum mt753x_id { ID_MT7530 = 0, ID_MT7621 = 1, @@ -289,6 +292,15 @@ enum mt7530_vlan_port_attr { #define MT7531_DBG_CNT(x) (0x3018 + (x) * 0x100) #define MT7531_DIS_CLR BIT(31) +#define MT7530_GMACCR 0x30e0 +#define MAX_RX_JUMBO(x) ((x) << 2) +#define MAX_RX_JUMBO_MASK GENMASK(5, 2) +#define MAX_RX_PKT_LEN_MASK GENMASK(1, 0) +#define MAX_RX_PKT_LEN_1522 0x0 +#define MAX_RX_PKT_LEN_1536 0x1 +#define MAX_RX_PKT_LEN_1552 0x2 +#define MAX_RX_PKT_LEN_JUMBO 0x3 + /* Register for MIB */ #define MT7530_PORT_MIB_COUNTER(x) (0x4000 + (x) * 0x100) #define MT7530_MIB_CCR 0x4fe0 diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 34cca0a4b31c..e8258db8c21e 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1442,7 +1442,7 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) static int mv88e6xxx_vtu_setup(struct mv88e6xxx_chip *chip) { - if (!chip->info->max_vid) + if (!mv88e6xxx_max_vid(chip)) return 0; return mv88e6xxx_g1_vtu_flush(chip); @@ -1484,7 +1484,7 @@ int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap) } /* Set every FID bit used by the VLAN entries */ - vlan.vid = chip->info->max_vid; + vlan.vid = mv88e6xxx_max_vid(chip); vlan.valid = false; do { @@ -1496,7 +1496,7 @@ int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap) break; set_bit(vlan.fid, fid_bitmap); - } while (vlan.vid < chip->info->max_vid); + } while (vlan.vid < mv88e6xxx_max_vid(chip)); return 0; } @@ -1587,7 +1587,7 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, int err; if (switchdev_trans_ph_prepare(trans)) - return chip->info->max_vid ? 0 : -EOPNOTSUPP; + return mv88e6xxx_max_vid(chip) ? 0 : -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_set_8021q_mode(chip, port, mode); @@ -1603,7 +1603,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds->priv; int err; - if (!chip->info->max_vid) + if (!mv88e6xxx_max_vid(chip)) return -EOPNOTSUPP; /* If the requested port doesn't belong to the same bridge as the VLAN @@ -1973,7 +1973,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, u8 member; u16 vid; - if (!chip->info->max_vid) + if (!mv88e6xxx_max_vid(chip)) return; if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) @@ -2051,7 +2051,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, u16 pvid, vid; int err = 0; - if (!chip->info->max_vid) + if (!mv88e6xxx_max_vid(chip)) return -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); @@ -2157,7 +2157,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, return err; /* Dump VLANs' Filtering Information Databases */ - vlan.vid = chip->info->max_vid; + vlan.vid = mv88e6xxx_max_vid(chip); vlan.valid = false; do { @@ -2172,7 +2172,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, cb, data); if (err) return err; - } while (vlan.vid < chip->info->max_vid); + } while (vlan.vid < mv88e6xxx_max_vid(chip)); return err; } @@ -2853,6 +2853,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) chip->ds = ds; ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); + ds->configure_vlan_while_not_filtering = true; mv88e6xxx_reg_lock(chip); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 81c244fc0419..7faa61b7f8f8 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -245,6 +245,7 @@ enum mv88e6xxx_region_id { MV88E6XXX_REGION_GLOBAL1 = 0, MV88E6XXX_REGION_GLOBAL2, MV88E6XXX_REGION_ATU, + MV88E6XXX_REGION_VTU, _MV88E6XXX_REGION_MAX, }; @@ -672,6 +673,11 @@ static inline unsigned int mv88e6xxx_num_ports(struct mv88e6xxx_chip *chip) return chip->info->num_ports; } +static inline unsigned int mv88e6xxx_max_vid(struct mv88e6xxx_chip *chip) +{ + return chip->info->max_vid; +} + static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip) { return GENMASK((s32)mv88e6xxx_num_ports(chip) - 1, 0); diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index ade04c036fd9..21953d6d484c 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -417,6 +417,92 @@ out: return err; } +/** + * struct mv88e6xxx_devlink_vtu_entry - Devlink VTU entry + * @fid: Global1/2: FID and VLAN policy. + * @sid: Global1/3: SID, unknown filters and learning. + * @op: Global1/5: FID (old chipsets). + * @vid: Global1/6: VID, valid, and page. + * @data: Global1/7-9: Membership data and priority override. + * @resvd: Reserved. Also happens to align the size to 16B. + * + * The VTU entry format varies between chipset generations, the + * descriptions above represent the superset of all possible + * information, not all fields are valid on all devices. Since this is + * a low-level debug interface, copy all data verbatim and defer + * parsing to the consumer. + */ +struct mv88e6xxx_devlink_vtu_entry { + u16 fid; + u16 sid; + u16 op; + u16 vid; + u16 data[3]; + u16 resvd; +}; + +static int mv88e6xxx_region_vtu_snapshot(struct devlink *dl, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u8 **data) +{ + struct mv88e6xxx_devlink_vtu_entry *table, *entry; + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_vtu_entry vlan; + int err; + + table = kcalloc(mv88e6xxx_max_vid(chip) + 1, + sizeof(struct mv88e6xxx_devlink_vtu_entry), + GFP_KERNEL); + if (!table) + return -ENOMEM; + + entry = table; + vlan.vid = mv88e6xxx_max_vid(chip); + vlan.valid = false; + + mv88e6xxx_reg_lock(chip); + + do { + err = mv88e6xxx_g1_vtu_getnext(chip, &vlan); + if (err) + break; + + if (!vlan.valid) + break; + + err = err ? : mv88e6xxx_g1_read(chip, MV88E6352_G1_VTU_FID, + &entry->fid); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6352_G1_VTU_SID, + &entry->sid); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, + &entry->op); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_VID, + &entry->vid); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_DATA1, + &entry->data[0]); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_DATA2, + &entry->data[1]); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_DATA3, + &entry->data[2]); + if (err) + break; + + entry++; + } while (vlan.vid < mv88e6xxx_max_vid(chip)); + + mv88e6xxx_reg_unlock(chip); + + if (err) { + kfree(table); + return err; + } + + *data = (u8 *)table; + return 0; +} + static int mv88e6xxx_region_port_snapshot(struct devlink_port *devlink_port, const struct devlink_port_region_ops *ops, struct netlink_ext_ack *extack, @@ -475,6 +561,12 @@ static struct devlink_region_ops mv88e6xxx_region_atu_ops = { .destructor = kfree, }; +static struct devlink_region_ops mv88e6xxx_region_vtu_ops = { + .name = "vtu", + .snapshot = mv88e6xxx_region_vtu_snapshot, + .destructor = kfree, +}; + static const struct devlink_port_region_ops mv88e6xxx_region_port_ops = { .name = "port", .snapshot = mv88e6xxx_region_port_snapshot, @@ -498,6 +590,10 @@ static struct mv88e6xxx_region mv88e6xxx_regions[] = { .ops = &mv88e6xxx_region_atu_ops /* calculated at runtime */ }, + [MV88E6XXX_REGION_VTU] = { + .ops = &mv88e6xxx_region_vtu_ops + /* calculated at runtime */ + }, }; static void @@ -576,9 +672,16 @@ static int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds, ops = mv88e6xxx_regions[i].ops; size = mv88e6xxx_regions[i].size; - if (i == MV88E6XXX_REGION_ATU) + switch (i) { + case MV88E6XXX_REGION_ATU: size = mv88e6xxx_num_databases(chip) * sizeof(struct mv88e6xxx_devlink_atu_entry); + break; + case MV88E6XXX_REGION_VTU: + size = mv88e6xxx_max_vid(chip) * + sizeof(struct mv88e6xxx_devlink_vtu_entry); + break; + } region = dsa_devlink_region_create(ds, ops, 1, size); if (IS_ERR(region)) diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index e05abe61fa11..80a182c5b98a 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -330,6 +330,8 @@ void mv88e6xxx_g1_atu_prob_irq_free(struct mv88e6xxx_chip *chip); int mv88e6165_g1_atu_get_hash(struct mv88e6xxx_chip *chip, u8 *hash); int mv88e6165_g1_atu_set_hash(struct mv88e6xxx_chip *chip, u8 hash); +int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry); int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 1048509a849b..66ddf67b8737 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -307,8 +307,8 @@ static int mv88e6xxx_g1_vtu_stu_get(struct mv88e6xxx_chip *chip, return 0; } -static int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) +int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry) { int err; diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index f791860d495f..ada75fa15861 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -112,10 +112,32 @@ static void felix_bridge_leave(struct dsa_switch *ds, int port, ocelot_port_bridge_leave(ocelot, port, br); } -/* This callback needs to be present */ static int felix_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { + struct ocelot *ocelot = ds->priv; + u16 vid, flags = vlan->flags; + int err; + + /* Ocelot switches copy frames as-is to the CPU, so the flags: + * egress-untagged or not, pvid or not, make no difference. This + * behavior is already better than what DSA just tries to approximate + * when it installs the VLAN with the same flags on the CPU port. + * Just accept any configuration, and don't let ocelot deny installing + * multiple native VLANs on the NPI port, because the switch doesn't + * look at the port tag settings towards the NPI interface anyway. + */ + if (port == ocelot->npi) + return 0; + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { + err = ocelot_vlan_prepare(ocelot, port, vid, + flags & BRIDGE_VLAN_INFO_PVID, + flags & BRIDGE_VLAN_INFO_UNTAGGED); + if (err) + return err; + } + return 0; } @@ -135,9 +157,6 @@ static void felix_vlan_add(struct dsa_switch *ds, int port, u16 vid; int err; - if (dsa_is_cpu_port(ds, port)) - flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { err = ocelot_vlan_add(ocelot, port, vid, flags & BRIDGE_VLAN_INFO_PVID, diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index bab3a9bb5e6f..f82ad7419508 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -124,7 +124,7 @@ static void dummy_setup(struct net_device *dev) dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST; - dev->features |= NETIF_F_ALL_TSO; + dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->features |= NETIF_F_GSO_ENCAP_ALL; dev->hw_features |= dev->features; diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index d60a86aa8aa8..9aac7119d382 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -175,7 +175,6 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres) default: return MAC8390_APPLE; } - break; case NUBUS_DRSW_APPLE: switch (fres->dr_hw) { @@ -186,11 +185,9 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres) default: return MAC8390_APPLE; } - break; case NUBUS_DRSW_ASANTE: return MAC8390_ASANTE; - break; case NUBUS_DRSW_TECHWORKS: case NUBUS_DRSW_DAYNA2: @@ -199,11 +196,9 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres) return MAC8390_CABLETRON; else return MAC8390_APPLE; - break; case NUBUS_DRSW_FARALLON: return MAC8390_FARALLON; - break; case NUBUS_DRSW_KINETICS: switch (fres->dr_hw) { @@ -212,7 +207,6 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres) default: return MAC8390_KINETICS; } - break; case NUBUS_DRSW_DAYNA: /* @@ -224,7 +218,6 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres) return MAC8390_NONE; else return MAC8390_DAYNA; - break; } return MAC8390_NONE; } diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index 1c97e39b478e..e9756d0ea5b8 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -710,7 +710,7 @@ static void ne_block_output(struct net_device *dev, int count, retry: #endif -#ifdef NE8390_RW_BUGFIX +#ifdef NE_RW_BUGFIX /* Handle the read-before-write bug the same way as the Crynwr packet driver -- the NatSemi method doesn't work. Actually this doesn't always work either, but if you have diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c index bc6edb3f1af3..d6715008e04d 100644 --- a/drivers/net/ethernet/8390/ne2k-pci.c +++ b/drivers/net/ethernet/8390/ne2k-pci.c @@ -610,7 +610,7 @@ static void ne2k_pci_block_output(struct net_device *dev, int count, /* We should already be in page 0, but to be safe... */ outb(E8390_PAGE0+E8390_START+E8390_NODMA, nic_base + NE_CMD); -#ifdef NE8390_RW_BUGFIX +#ifdef NE_RW_BUGFIX /* Handle the read-before-write bug the same way as the * Crynwr packet driver -- the NatSemi method doesn't work. * Actually this doesn't always work either, but if you have diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index bf5e0e9bd0e2..6c049864dac0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1474,7 +1474,7 @@ int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map) for (i = 0; i < sizeof(cfg->prio_tc_map); i++) cfg->prio_tc_map[i] = cfg->tcs * i / 8; - cfg->is_qos = (tcs != 0 ? true : false); + cfg->is_qos = !!tcs; cfg->is_ptp = (cfg->tcs <= AQ_HW_PTP_TC); if (!cfg->is_ptp) netdev_warn(self->ndev, "%s\n", diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 5de47f6fde5a..1f5da4e4f4b2 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -77,10 +77,12 @@ #define MACB_RBQPH 0x04D4 /* GEM register offsets. */ +#define GEM_NCR 0x0000 /* Network Control */ #define GEM_NCFGR 0x0004 /* Network Config */ #define GEM_USRIO 0x000c /* User IO */ #define GEM_DMACFG 0x0010 /* DMA Configuration */ #define GEM_JML 0x0048 /* Jumbo Max Length */ +#define GEM_HS_MAC_CONFIG 0x0050 /* GEM high speed config */ #define GEM_HRB 0x0080 /* Hash Bottom */ #define GEM_HRT 0x0084 /* Hash Top */ #define GEM_SA1B 0x0088 /* Specific1 Bottom */ @@ -166,6 +168,9 @@ #define GEM_DCFG7 0x0298 /* Design Config 7 */ #define GEM_DCFG8 0x029C /* Design Config 8 */ #define GEM_DCFG10 0x02A4 /* Design Config 10 */ +#define GEM_DCFG12 0x02AC /* Design Config 12 */ +#define GEM_USX_CONTROL 0x0A80 /* High speed PCS control register */ +#define GEM_USX_STATUS 0x0A88 /* High speed PCS status register */ #define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */ #define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */ @@ -272,11 +277,19 @@ #define MACB_IRXFCS_OFFSET 19 #define MACB_IRXFCS_SIZE 1 +/* GEM specific NCR bitfields. */ +#define GEM_ENABLE_HS_MAC_OFFSET 31 +#define GEM_ENABLE_HS_MAC_SIZE 1 + /* GEM specific NCFGR bitfields. */ +#define GEM_FD_OFFSET 1 /* Full duplex */ +#define GEM_FD_SIZE 1 #define GEM_GBE_OFFSET 10 /* Gigabit mode enable */ #define GEM_GBE_SIZE 1 #define GEM_PCSSEL_OFFSET 11 #define GEM_PCSSEL_SIZE 1 +#define GEM_PAE_OFFSET 13 /* Pause enable */ +#define GEM_PAE_SIZE 1 #define GEM_CLK_OFFSET 18 /* MDC clock division */ #define GEM_CLK_SIZE 3 #define GEM_DBW_OFFSET 21 /* Data bus width */ @@ -461,11 +474,17 @@ #define MACB_REV_OFFSET 0 #define MACB_REV_SIZE 16 +/* Bitfield in HS_MAC_CONFIG */ +#define GEM_HS_MAC_SPEED_OFFSET 0 +#define GEM_HS_MAC_SPEED_SIZE 3 + /* Bitfields in DCFG1. */ #define GEM_IRQCOR_OFFSET 23 #define GEM_IRQCOR_SIZE 1 #define GEM_DBWDEF_OFFSET 25 #define GEM_DBWDEF_SIZE 3 +#define GEM_NO_PCS_OFFSET 0 +#define GEM_NO_PCS_SIZE 1 /* Bitfields in DCFG2. */ #define GEM_RX_PKT_BUFF_OFFSET 20 @@ -500,6 +519,28 @@ #define GEM_RXBD_RDBUFF_OFFSET 8 #define GEM_RXBD_RDBUFF_SIZE 4 +/* Bitfields in DCFG12. */ +#define GEM_HIGH_SPEED_OFFSET 26 +#define GEM_HIGH_SPEED_SIZE 1 + +/* Bitfields in USX_CONTROL. */ +#define GEM_USX_CTRL_SPEED_OFFSET 14 +#define GEM_USX_CTRL_SPEED_SIZE 3 +#define GEM_SERDES_RATE_OFFSET 12 +#define GEM_SERDES_RATE_SIZE 2 +#define GEM_RX_SCR_BYPASS_OFFSET 9 +#define GEM_RX_SCR_BYPASS_SIZE 1 +#define GEM_TX_SCR_BYPASS_OFFSET 8 +#define GEM_TX_SCR_BYPASS_SIZE 1 +#define GEM_TX_EN_OFFSET 1 +#define GEM_TX_EN_SIZE 1 +#define GEM_SIGNAL_OK_OFFSET 0 +#define GEM_SIGNAL_OK_SIZE 1 + +/* Bitfields in USX_STATUS. */ +#define GEM_USX_BLOCK_LOCK_OFFSET 0 +#define GEM_USX_BLOCK_LOCK_SIZE 1 + /* Bitfields in TISUBN */ #define GEM_SUBNSINCR_OFFSET 0 #define GEM_SUBNSINCRL_OFFSET 24 @@ -663,6 +704,8 @@ #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 #define MACB_CAPS_MACB_IS_GEM 0x80000000 +#define MACB_CAPS_PCS 0x01000000 +#define MACB_CAPS_HIGH_SPEED 0x02000000 /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 @@ -1201,6 +1244,7 @@ struct macb { struct mii_bus *mii_bus; struct phylink *phylink; struct phylink_config phylink_config; + struct phylink_pcs phylink_pcs; u32 caps; unsigned int dma_burst_length; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 286f0341bdf8..7b1d195787dc 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -84,6 +84,9 @@ struct sifive_fu540_macb_mgmt { #define MACB_WOL_HAS_MAGIC_PACKET (0x1 << 0) #define MACB_WOL_ENABLED (0x1 << 1) +#define HS_SPEED_10000M 4 +#define MACB_SERDES_RATE_10G 1 + /* Graceful stop timeouts in us. We should allow up to * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions) */ @@ -513,6 +516,7 @@ static void macb_validate(struct phylink_config *config, state->interface != PHY_INTERFACE_MODE_RMII && state->interface != PHY_INTERFACE_MODE_GMII && state->interface != PHY_INTERFACE_MODE_SGMII && + state->interface != PHY_INTERFACE_MODE_10GBASER && !phy_interface_mode_is_rgmii(state->interface)) { bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); return; @@ -525,10 +529,31 @@ static void macb_validate(struct phylink_config *config, return; } + if (state->interface == PHY_INTERFACE_MODE_10GBASER && + !(bp->caps & MACB_CAPS_HIGH_SPEED && + bp->caps & MACB_CAPS_PCS)) { + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + return; + } + phylink_set_port_modes(mask); phylink_set(mask, Autoneg); phylink_set(mask, Asym_Pause); + if (bp->caps & MACB_CAPS_GIGABIT_MODE_AVAILABLE && + (state->interface == PHY_INTERFACE_MODE_NA || + state->interface == PHY_INTERFACE_MODE_10GBASER)) { + phylink_set(mask, 10000baseCR_Full); + phylink_set(mask, 10000baseER_Full); + phylink_set(mask, 10000baseKR_Full); + phylink_set(mask, 10000baseLR_Full); + phylink_set(mask, 10000baseLRM_Full); + phylink_set(mask, 10000baseSR_Full); + phylink_set(mask, 10000baseT_Full); + if (state->interface != PHY_INTERFACE_MODE_NA) + goto out; + } + phylink_set(mask, 10baseT_Half); phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Half); @@ -545,23 +570,90 @@ static void macb_validate(struct phylink_config *config, if (!(bp->caps & MACB_CAPS_NO_GIGABIT_HALF)) phylink_set(mask, 1000baseT_Half); } - +out: bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); bitmap_and(state->advertising, state->advertising, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static void macb_mac_pcs_get_state(struct phylink_config *config, +static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, + int duplex) +{ + struct macb *bp = container_of(pcs, struct macb, phylink_pcs); + u32 config; + + config = gem_readl(bp, USX_CONTROL); + config = GEM_BFINS(SERDES_RATE, MACB_SERDES_RATE_10G, config); + config = GEM_BFINS(USX_CTRL_SPEED, HS_SPEED_10000M, config); + config &= ~(GEM_BIT(TX_SCR_BYPASS) | GEM_BIT(RX_SCR_BYPASS)); + config |= GEM_BIT(TX_EN); + gem_writel(bp, USX_CONTROL, config); +} + +static void macb_usx_pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state) { + struct macb *bp = container_of(pcs, struct macb, phylink_pcs); + u32 val; + + state->speed = SPEED_10000; + state->duplex = 1; + state->an_complete = 1; + + val = gem_readl(bp, USX_STATUS); + state->link = !!(val & GEM_BIT(USX_BLOCK_LOCK)); + val = gem_readl(bp, NCFGR); + if (val & GEM_BIT(PAE)) + state->pause = MLO_PAUSE_RX; +} + +static int macb_usx_pcs_config(struct phylink_pcs *pcs, + unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) +{ + struct macb *bp = container_of(pcs, struct macb, phylink_pcs); + + gem_writel(bp, USX_CONTROL, gem_readl(bp, USX_CONTROL) | + GEM_BIT(SIGNAL_OK)); + + return 0; +} + +static void macb_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ state->link = 0; } -static void macb_mac_an_restart(struct phylink_config *config) +static void macb_pcs_an_restart(struct phylink_pcs *pcs) { /* Not supported */ } +static int macb_pcs_config(struct phylink_pcs *pcs, + unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) +{ + return 0; +} + +static const struct phylink_pcs_ops macb_phylink_usx_pcs_ops = { + .pcs_get_state = macb_usx_pcs_get_state, + .pcs_config = macb_usx_pcs_config, + .pcs_link_up = macb_usx_pcs_link_up, +}; + +static const struct phylink_pcs_ops macb_phylink_pcs_ops = { + .pcs_get_state = macb_pcs_get_state, + .pcs_an_restart = macb_pcs_an_restart, + .pcs_config = macb_pcs_config, +}; + static void macb_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -569,25 +661,35 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, struct macb *bp = netdev_priv(ndev); unsigned long flags; u32 old_ctrl, ctrl; + u32 old_ncr, ncr; spin_lock_irqsave(&bp->lock, flags); old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR); + old_ncr = ncr = macb_or_gem_readl(bp, NCR); if (bp->caps & MACB_CAPS_MACB_IS_EMAC) { if (state->interface == PHY_INTERFACE_MODE_RMII) ctrl |= MACB_BIT(RM9200_RMII); } else if (macb_is_gem(bp)) { ctrl &= ~(GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + ncr &= ~GEM_BIT(ENABLE_HS_MAC); - if (state->interface == PHY_INTERFACE_MODE_SGMII) + if (state->interface == PHY_INTERFACE_MODE_SGMII) { ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); + } else if (state->interface == PHY_INTERFACE_MODE_10GBASER) { + ctrl |= GEM_BIT(PCSSEL); + ncr |= GEM_BIT(ENABLE_HS_MAC); + } } /* Apply the new configuration, if any */ if (old_ctrl ^ ctrl) macb_or_gem_writel(bp, NCFGR, ctrl); + if (old_ncr ^ ncr) + macb_or_gem_writel(bp, NCR, ncr); + spin_unlock_irqrestore(&bp->lock, flags); } @@ -664,6 +766,10 @@ static void macb_mac_link_up(struct phylink_config *config, macb_or_gem_writel(bp, NCFGR, ctrl); + if (bp->phy_interface == PHY_INTERFACE_MODE_10GBASER) + gem_writel(bp, HS_MAC_CONFIG, GEM_BFINS(HS_MAC_SPEED, HS_SPEED_10000M, + gem_readl(bp, HS_MAC_CONFIG))); + spin_unlock_irqrestore(&bp->lock, flags); /* Enable Rx and Tx */ @@ -672,10 +778,28 @@ static void macb_mac_link_up(struct phylink_config *config, netif_tx_wake_all_queues(ndev); } +static int macb_mac_prepare(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct macb *bp = netdev_priv(ndev); + + if (interface == PHY_INTERFACE_MODE_10GBASER) + bp->phylink_pcs.ops = &macb_phylink_usx_pcs_ops; + else if (interface == PHY_INTERFACE_MODE_SGMII) + bp->phylink_pcs.ops = &macb_phylink_pcs_ops; + else + bp->phylink_pcs.ops = NULL; + + if (bp->phylink_pcs.ops) + phylink_set_pcs(bp->phylink, &bp->phylink_pcs); + + return 0; +} + static const struct phylink_mac_ops macb_phylink_ops = { .validate = macb_validate, - .mac_pcs_get_state = macb_mac_pcs_get_state, - .mac_an_restart = macb_mac_an_restart, + .mac_prepare = macb_mac_prepare, .mac_config = macb_mac_config, .mac_link_down = macb_mac_link_down, .mac_link_up = macb_mac_link_up, @@ -3524,6 +3648,11 @@ static void macb_configure_caps(struct macb *bp, dcfg = gem_readl(bp, DCFG1); if (GEM_BFEXT(IRQCOR, dcfg) == 0) bp->caps |= MACB_CAPS_ISR_CLEAR_ON_WRITE; + if (GEM_BFEXT(NO_PCS, dcfg) == 0) + bp->caps |= MACB_CAPS_PCS; + dcfg = gem_readl(bp, DCFG12); + if (GEM_BFEXT(HIGH_SPEED, dcfg) == 1) + bp->caps |= MACB_CAPS_HIGH_SPEED; dcfg = gem_readl(bp, DCFG2); if ((dcfg & (GEM_BIT(RX_PKT_BUFF) | GEM_BIT(TX_PKT_BUFF))) == 0) bp->caps |= MACB_CAPS_FIFO_MODE; @@ -3582,19 +3711,13 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, } if (IS_ERR_OR_NULL(*pclk)) { - err = PTR_ERR(*pclk); - if (!err) - err = -ENODEV; - + err = IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV; dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); return err; } if (IS_ERR_OR_NULL(*hclk)) { - err = PTR_ERR(*hclk); - if (!err) - err = -ENODEV; - + err = IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV; dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); return err; } diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c index 2a6d1cadac9e..30254e4cf70f 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c @@ -27,7 +27,6 @@ #include "cn66xx_device.h" #include "cn68xx_device.h" #include "cn68xx_regs.h" -#include "cn68xx_device.h" static void lio_cn68xx_set_dpi_regs(struct octeon_device *oct) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 17410fe86626..7d49fd4edc9e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2671,7 +2671,7 @@ do { \ seq_printf(seq, "%-12s", s); \ for (i = 0; i < n; ++i) \ seq_printf(seq, " %16" fmt_spec, v); \ - seq_putc(seq, '\n'); \ + seq_putc(seq, '\n'); \ } while (0) #define S(s, v) S3("s", s, v) #define T3(fmt_spec, s, v) S3(fmt_spec, s, tx[i].v) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 2d3dfdd2a716..e7b78b68eaac 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -235,6 +235,7 @@ struct chtls_dev { struct list_head na_node; unsigned int send_page_order; int max_host_sndbuf; + u32 round_robin_cnt; struct key_map kmap; unsigned int cdev_state; }; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 96d561653496..63aacc184f68 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1217,8 +1217,9 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->sndbuf = csk->snd_win; csk->ulp_mode = ULP_MODE_TLS; step = cdev->lldi->nrxq / cdev->lldi->nchan; - csk->rss_qid = cdev->lldi->rxq_ids[port_id * step]; rxq_idx = port_id * step; + rxq_idx += cdev->round_robin_cnt++ % step; + csk->rss_qid = cdev->lldi->rxq_ids[rxq_idx]; csk->txq_idx = (rxq_idx < cdev->lldi->ntxq) ? rxq_idx : port_id * step; csk->sndbuf = newsk->sk_sndbuf; diff --git a/drivers/net/ethernet/davicom/Kconfig b/drivers/net/ethernet/davicom/Kconfig index 76247103e566..7af86b6d4150 100644 --- a/drivers/net/ethernet/davicom/Kconfig +++ b/drivers/net/ethernet/davicom/Kconfig @@ -5,7 +5,7 @@ config DM9000 tristate "DM9000 support" - depends on ARM || MIPS || COLDFIRE || NIOS2 + depends on ARM || MIPS || COLDFIRE || NIOS2 || COMPILE_TEST select CRC32 select MII help diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 5c6c8c5ec747..3fdc70dab5c1 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -232,32 +232,29 @@ static void dm9000_inblk_32bit(void __iomem *reg, void *data, int count) static void dm9000_dumpblk_8bit(void __iomem *reg, int count) { int i; - int tmp; for (i = 0; i < count; i++) - tmp = readb(reg); + readb(reg); } static void dm9000_dumpblk_16bit(void __iomem *reg, int count) { int i; - int tmp; count = (count + 1) >> 1; for (i = 0; i < count; i++) - tmp = readw(reg); + readw(reg); } static void dm9000_dumpblk_32bit(void __iomem *reg, int count) { int i; - int tmp; count = (count + 3) >> 2; for (i = 0; i < count; i++) - tmp = readl(reg); + readl(reg); } /* diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index d9f6c19940ef..c3cbe55205a7 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -2175,11 +2175,21 @@ out: static SIMPLE_DEV_PM_OPS(de_pm_ops, de_suspend, de_resume); +static void de_shutdown(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + rtnl_lock(); + dev_close(dev); + rtnl_unlock(); +} + static struct pci_driver de_driver = { .name = DRV_NAME, .id_table = de_pci_tbl, .probe = de_init_one, .remove = de_remove_one, + .shutdown = de_shutdown, .driver.pm = &de_pm_ops, }; diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index e7b0d7de40fd..c1dcd6ca1457 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1293,7 +1293,9 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) static unsigned char last_phys_addr[ETH_ALEN] = { 0x00, 'L', 'i', 'n', 'u', 'x' }; +#if defined(__i386__) || defined(__x86_64__) /* Patch up x86 BIOS bug. */ static int last_irq; +#endif int i, irq; unsigned short sum; unsigned char *ee_data; @@ -1617,7 +1619,9 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < 6; i++) last_phys_addr[i] = dev->dev_addr[i]; +#if defined(__i386__) || defined(__x86_64__) /* Patch up x86 BIOS bug. */ last_irq = irq; +#endif /* The lower four bits are the media type. */ if (board_idx >= 0 && board_idx < MAX_UNITS) { diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 80fb1f537bb3..88bfe2107938 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1044,10 +1044,39 @@ static void ftgmac100_adjust_link(struct net_device *netdev) schedule_work(&priv->reset_task); } -static int ftgmac100_mii_probe(struct ftgmac100 *priv, phy_interface_t intf) +static int ftgmac100_mii_probe(struct net_device *netdev) { - struct net_device *netdev = priv->netdev; + struct ftgmac100 *priv = netdev_priv(netdev); + struct platform_device *pdev = to_platform_device(priv->dev); + struct device_node *np = pdev->dev.of_node; struct phy_device *phydev; + phy_interface_t phy_intf; + int err; + + /* Default to RGMII. It's a gigabit part after all */ + err = of_get_phy_mode(np, &phy_intf); + if (err) + phy_intf = PHY_INTERFACE_MODE_RGMII; + + /* Aspeed only supports these. I don't know about other IP + * block vendors so I'm going to just let them through for + * now. Note that this is only a warning if for some obscure + * reason the DT really means to lie about it or it's a newer + * part we don't know about. + * + * On the Aspeed SoC there are additionally straps and SCU + * control bits that could tell us what the interface is + * (or allow us to configure it while the IP block is held + * in reset). For now I chose to keep this driver away from + * those SoC specific bits and assume the device-tree is + * right and the SCU has been configured properly by pinmux + * or the firmware. + */ + if (priv->is_aspeed && !(phy_interface_mode_is_rgmii(phy_intf))) { + netdev_warn(netdev, + "Unsupported PHY mode %s !\n", + phy_modes(phy_intf)); + } phydev = phy_find_first(priv->mii_bus); if (!phydev) { @@ -1056,7 +1085,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv, phy_interface_t intf) } phydev = phy_connect(netdev, phydev_name(phydev), - &ftgmac100_adjust_link, intf); + &ftgmac100_adjust_link, phy_intf); if (IS_ERR(phydev)) { netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name); @@ -1601,8 +1630,8 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); struct platform_device *pdev = to_platform_device(priv->dev); - phy_interface_t phy_intf = PHY_INTERFACE_MODE_RGMII; struct device_node *np = pdev->dev.of_node; + struct device_node *mdio_np; int i, err = 0; u32 reg; @@ -1623,39 +1652,6 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR); } - /* Get PHY mode from device-tree */ - if (np) { - /* Default to RGMII. It's a gigabit part after all */ - err = of_get_phy_mode(np, &phy_intf); - if (err) - phy_intf = PHY_INTERFACE_MODE_RGMII; - - /* Aspeed only supports these. I don't know about other IP - * block vendors so I'm going to just let them through for - * now. Note that this is only a warning if for some obscure - * reason the DT really means to lie about it or it's a newer - * part we don't know about. - * - * On the Aspeed SoC there are additionally straps and SCU - * control bits that could tell us what the interface is - * (or allow us to configure it while the IP block is held - * in reset). For now I chose to keep this driver away from - * those SoC specific bits and assume the device-tree is - * right and the SCU has been configured properly by pinmux - * or the firmware. - */ - if (priv->is_aspeed && - phy_intf != PHY_INTERFACE_MODE_RMII && - phy_intf != PHY_INTERFACE_MODE_RGMII && - phy_intf != PHY_INTERFACE_MODE_RGMII_ID && - phy_intf != PHY_INTERFACE_MODE_RGMII_RXID && - phy_intf != PHY_INTERFACE_MODE_RGMII_TXID) { - netdev_warn(netdev, - "Unsupported PHY mode %s !\n", - phy_modes(phy_intf)); - } - } - priv->mii_bus->name = "ftgmac100_mdio"; snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id); @@ -1667,35 +1663,38 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) for (i = 0; i < PHY_MAX_ADDR; i++) priv->mii_bus->irq[i] = PHY_POLL; - err = mdiobus_register(priv->mii_bus); + mdio_np = of_get_child_by_name(np, "mdio"); + + err = of_mdiobus_register(priv->mii_bus, mdio_np); if (err) { dev_err(priv->dev, "Cannot register MDIO bus!\n"); goto err_register_mdiobus; } - err = ftgmac100_mii_probe(priv, phy_intf); - if (err) { - dev_err(priv->dev, "MII Probe failed!\n"); - goto err_mii_probe; - } + of_node_put(mdio_np); return 0; -err_mii_probe: - mdiobus_unregister(priv->mii_bus); err_register_mdiobus: mdiobus_free(priv->mii_bus); return err; } +static void ftgmac100_phy_disconnect(struct net_device *netdev) +{ + if (!netdev->phydev) + return; + + phy_disconnect(netdev->phydev); +} + static void ftgmac100_destroy_mdio(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); - if (!netdev->phydev) + if (!priv->mii_bus) return; - phy_disconnect(netdev->phydev); mdiobus_unregister(priv->mii_bus); mdiobus_free(priv->mii_bus); } @@ -1830,22 +1829,33 @@ static int ftgmac100_probe(struct platform_device *pdev) if (np && of_get_property(np, "use-ncsi", NULL)) { if (!IS_ENABLED(CONFIG_NET_NCSI)) { dev_err(&pdev->dev, "NCSI stack not enabled\n"); - goto err_ncsi_dev; + goto err_phy_connect; } dev_info(&pdev->dev, "Using NCSI interface\n"); priv->use_ncsi = true; priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler); if (!priv->ndev) - goto err_ncsi_dev; + goto err_phy_connect; } else if (np && of_get_property(np, "phy-handle", NULL)) { struct phy_device *phy; + /* Support "mdio"/"phy" child nodes for ast2400/2500 with + * an embedded MDIO controller. Automatically scan the DTS for + * available PHYs and register them. + */ + if (of_device_is_compatible(np, "aspeed,ast2400-mac") || + of_device_is_compatible(np, "aspeed,ast2500-mac")) { + err = ftgmac100_setup_mdio(netdev); + if (err) + goto err_setup_mdio; + } + phy = of_phy_get_and_connect(priv->netdev, np, &ftgmac100_adjust_link); if (!phy) { dev_err(&pdev->dev, "Failed to connect to phy\n"); - goto err_setup_mdio; + goto err_phy_connect; } /* Indicate that we support PAUSE frames (see comment in @@ -1865,12 +1875,19 @@ static int ftgmac100_probe(struct platform_device *pdev) err = ftgmac100_setup_mdio(netdev); if (err) goto err_setup_mdio; + + err = ftgmac100_mii_probe(netdev); + if (err) { + dev_err(priv->dev, "MII probe failed!\n"); + goto err_ncsi_dev; + } + } if (priv->is_aspeed) { err = ftgmac100_setup_clk(priv); if (err) - goto err_ncsi_dev; + goto err_phy_connect; } /* Default ring sizes */ @@ -1906,6 +1923,8 @@ static int ftgmac100_probe(struct platform_device *pdev) err_register_netdev: clk_disable_unprepare(priv->rclk); clk_disable_unprepare(priv->clk); +err_phy_connect: + ftgmac100_phy_disconnect(netdev); err_ncsi_dev: if (priv->ndev) ncsi_unregister_dev(priv->ndev); @@ -1940,6 +1959,7 @@ static int ftgmac100_remove(struct platform_device *pdev) */ cancel_work_sync(&priv->reset_task); + ftgmac100_phy_disconnect(netdev); ftgmac100_destroy_mdio(netdev); iounmap(priv->base); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index d9c285948fc2..8867693e0fb3 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2305,9 +2305,9 @@ static void dpaa_tx_conf(struct net_device *net_dev, } static inline int dpaa_eth_napi_schedule(struct dpaa_percpu_priv *percpu_priv, - struct qman_portal *portal) + struct qman_portal *portal, bool sched_napi) { - if (unlikely(in_irq() || !in_serving_softirq())) { + if (sched_napi) { /* Disable QMan IRQ and invoke NAPI */ qman_p_irqsource_remove(portal, QM_PIRQ_DQRI); @@ -2321,7 +2321,8 @@ static inline int dpaa_eth_napi_schedule(struct dpaa_percpu_priv *percpu_priv, static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal, struct qman_fq *fq, - const struct qm_dqrr_entry *dq) + const struct qm_dqrr_entry *dq, + bool sched_napi) { struct dpaa_fq *dpaa_fq = container_of(fq, struct dpaa_fq, fq_base); struct dpaa_percpu_priv *percpu_priv; @@ -2337,7 +2338,7 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal, percpu_priv = this_cpu_ptr(priv->percpu_priv); - if (dpaa_eth_napi_schedule(percpu_priv, portal)) + if (dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi)) return qman_cb_dqrr_stop; dpaa_eth_refill_bpools(priv); @@ -2348,7 +2349,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal, static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, struct qman_fq *fq, - const struct qm_dqrr_entry *dq) + const struct qm_dqrr_entry *dq, + bool sched_napi) { struct skb_shared_hwtstamps *shhwtstamps; struct rtnl_link_stats64 *percpu_stats; @@ -2380,7 +2382,7 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, percpu_priv = this_cpu_ptr(priv->percpu_priv); percpu_stats = &percpu_priv->stats; - if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal))) + if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi))) return qman_cb_dqrr_stop; /* Make sure we didn't run out of buffers */ @@ -2465,7 +2467,8 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, static enum qman_cb_dqrr_result conf_error_dqrr(struct qman_portal *portal, struct qman_fq *fq, - const struct qm_dqrr_entry *dq) + const struct qm_dqrr_entry *dq, + bool sched_napi) { struct dpaa_percpu_priv *percpu_priv; struct net_device *net_dev; @@ -2476,7 +2479,7 @@ static enum qman_cb_dqrr_result conf_error_dqrr(struct qman_portal *portal, percpu_priv = this_cpu_ptr(priv->percpu_priv); - if (dpaa_eth_napi_schedule(percpu_priv, portal)) + if (dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi)) return qman_cb_dqrr_stop; dpaa_tx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid); @@ -2486,7 +2489,8 @@ static enum qman_cb_dqrr_result conf_error_dqrr(struct qman_portal *portal, static enum qman_cb_dqrr_result conf_dflt_dqrr(struct qman_portal *portal, struct qman_fq *fq, - const struct qm_dqrr_entry *dq) + const struct qm_dqrr_entry *dq, + bool sched_napi) { struct dpaa_percpu_priv *percpu_priv; struct net_device *net_dev; @@ -2500,7 +2504,7 @@ static enum qman_cb_dqrr_result conf_dflt_dqrr(struct qman_portal *portal, percpu_priv = this_cpu_ptr(priv->percpu_priv); - if (dpaa_eth_napi_schedule(percpu_priv, portal)) + if (dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi)) return qman_cb_dqrr_stop; dpaa_tx_conf(net_dev, priv, percpu_priv, &dq->fd, fq->fqid); @@ -2546,7 +2550,7 @@ static void dpaa_eth_napi_enable(struct dpaa_priv *priv) for_each_online_cpu(i) { percpu_priv = per_cpu_ptr(priv->percpu_priv, i); - percpu_priv->np.down = 0; + percpu_priv->np.down = false; napi_enable(&percpu_priv->np.napi); } } @@ -2559,7 +2563,7 @@ static void dpaa_eth_napi_disable(struct dpaa_priv *priv) for_each_online_cpu(i) { percpu_priv = per_cpu_ptr(priv->percpu_priv, i); - percpu_priv->np.down = 1; + percpu_priv->np.down = true; napi_disable(&percpu_priv->np.napi); } } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index fc2075ea57fe..c78d12229730 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -50,40 +50,6 @@ drop_packet_err: return NETDEV_TX_OK; } -static bool enetc_tx_csum(struct sk_buff *skb, union enetc_tx_bd *txbd) -{ - int l3_start, l3_hsize; - u16 l3_flags, l4_flags; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - return false; - - switch (skb->csum_offset) { - case offsetof(struct tcphdr, check): - l4_flags = ENETC_TXBD_L4_TCP; - break; - case offsetof(struct udphdr, check): - l4_flags = ENETC_TXBD_L4_UDP; - break; - default: - skb_checksum_help(skb); - return false; - } - - l3_start = skb_network_offset(skb); - l3_hsize = skb_network_header_len(skb); - - l3_flags = 0; - if (skb->protocol == htons(ETH_P_IPV6)) - l3_flags = ENETC_TXBD_L3_IPV6; - - /* write BD fields */ - txbd->l3_csoff = enetc_txbd_l3_csoff(l3_start, l3_hsize, l3_flags); - txbd->l4_csoff = l4_flags; - - return true; -} - static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring, struct enetc_tx_swbd *tx_swbd) { @@ -149,22 +115,16 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, if (do_vlan || do_tstamp) flags |= ENETC_TXBD_FLAGS_EX; - if (enetc_tx_csum(skb, &temp_bd)) - flags |= ENETC_TXBD_FLAGS_CSUM | ENETC_TXBD_FLAGS_L4CS; - else if (tx_ring->tsd_enable) + if (tx_ring->tsd_enable) flags |= ENETC_TXBD_FLAGS_TSE | ENETC_TXBD_FLAGS_TXSTART; /* first BD needs frm_len and offload flags set */ temp_bd.frm_len = cpu_to_le16(skb->len); temp_bd.flags = flags; - if (flags & ENETC_TXBD_FLAGS_TSE) { - u32 temp; - - temp = (skb->skb_mstamp_ns >> 5 & ENETC_TXBD_TXSTART_MASK) - | (flags << ENETC_TXBD_FLAGS_OFFSET); - temp_bd.txstart = cpu_to_le32(temp); - } + if (flags & ENETC_TXBD_FLAGS_TSE) + temp_bd.txstart = enetc_txbd_set_tx_start(skb->skb_mstamp_ns, + flags); if (flags & ENETC_TXBD_FLAGS_EX) { u8 e_flags = 0; @@ -1925,8 +1885,7 @@ static void enetc_kfree_si(struct enetc_si *si) static void enetc_detect_errata(struct enetc_si *si) { if (si->pdev->revision == ENETC_REV1) - si->errata = ENETC_ERR_TXCSUM | ENETC_ERR_VLAN_ISOL | - ENETC_ERR_UCMCSWP; + si->errata = ENETC_ERR_VLAN_ISOL | ENETC_ERR_UCMCSWP; } int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index dd0fb0c066d7..8532d23b54f5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -147,9 +147,8 @@ struct enetc_msg_swbd { #define ENETC_REV1 0x1 enum enetc_errata { - ENETC_ERR_TXCSUM = BIT(0), - ENETC_ERR_VLAN_ISOL = BIT(1), - ENETC_ERR_UCMCSWP = BIT(2), + ENETC_ERR_VLAN_ISOL = BIT(0), + ENETC_ERR_UCMCSWP = BIT(1), }; #define ENETC_SI_F_QBV BIT(0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index eb6bbf1113c7..861bd04638e0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -477,8 +477,7 @@ union enetc_tx_bd { __le16 frm_len; union { struct { - __le16 l3_csoff; - u8 l4_csoff; + u8 reserved[3]; u8 flags; }; /* default layout */ __le32 txstart; @@ -501,41 +500,37 @@ union enetc_tx_bd { } wb; /* writeback descriptor */ }; -#define ENETC_TXBD_FLAGS_L4CS BIT(0) -#define ENETC_TXBD_FLAGS_TSE BIT(1) -#define ENETC_TXBD_FLAGS_W BIT(2) -#define ENETC_TXBD_FLAGS_CSUM BIT(3) -#define ENETC_TXBD_FLAGS_TXSTART BIT(4) -#define ENETC_TXBD_FLAGS_EX BIT(6) -#define ENETC_TXBD_FLAGS_F BIT(7) +enum enetc_txbd_flags { + ENETC_TXBD_FLAGS_RES0 = BIT(0), /* reserved */ + ENETC_TXBD_FLAGS_TSE = BIT(1), + ENETC_TXBD_FLAGS_W = BIT(2), + ENETC_TXBD_FLAGS_RES3 = BIT(3), /* reserved */ + ENETC_TXBD_FLAGS_TXSTART = BIT(4), + ENETC_TXBD_FLAGS_EX = BIT(6), + ENETC_TXBD_FLAGS_F = BIT(7) +}; #define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0) #define ENETC_TXBD_FLAGS_OFFSET 24 + +static inline __le32 enetc_txbd_set_tx_start(u64 tx_start, u8 flags) +{ + u32 temp; + + temp = (tx_start >> 5 & ENETC_TXBD_TXSTART_MASK) | + (flags << ENETC_TXBD_FLAGS_OFFSET); + + return cpu_to_le32(temp); +} + static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd) { memset(txbd, 0, sizeof(*txbd)); } -/* L3 csum flags */ -#define ENETC_TXBD_L3_IPCS BIT(7) -#define ENETC_TXBD_L3_IPV6 BIT(15) - -#define ENETC_TXBD_L3_START_MASK GENMASK(6, 0) -#define ENETC_TXBD_L3_SET_HSIZE(val) ((((val) >> 2) & 0x7f) << 8) - /* Extension flags */ #define ENETC_TXBD_E_FLAGS_VLAN_INS BIT(0) #define ENETC_TXBD_E_FLAGS_TWO_STEP_PTP BIT(2) -static inline __le16 enetc_txbd_l3_csoff(int start, int hdr_sz, u16 l3_flags) -{ - return cpu_to_le16(l3_flags | ENETC_TXBD_L3_SET_HSIZE(hdr_sz) | - (start & ENETC_TXBD_L3_START_MASK)); -} - -/* L4 csum flags */ -#define ENETC_TXBD_L4_UDP BIT(5) -#define ENETC_TXBD_L4_TCP BIT(6) - union enetc_rx_bd { struct { __le64 addr; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 419306342ac5..ecdc2af8c292 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -714,22 +714,16 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->watchdog_timeo = 5 * HZ; ndev->max_mtu = ENETC_MAX_MTU; - ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK; - ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | - NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (si->num_rss) ndev->hw_features |= NETIF_F_RXHASH; - if (si->errata & ENETC_ERR_TXCSUM) { - ndev->hw_features &= ~NETIF_F_HW_CSUM; - ndev->features &= ~NETIF_F_HW_CSUM; - } - ndev->priv_flags |= IFF_UNICAST_FLT; if (si->hw_features & ENETC_SI_F_QBV) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 7b5c82c7e4e5..39c1a09e69a9 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -120,22 +120,16 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->watchdog_timeo = 5 * HZ; ndev->max_mtu = ENETC_MAX_MTU; - ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | - NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (si->num_rss) ndev->hw_features |= NETIF_F_RXHASH; - if (si->errata & ENETC_ERR_TXCSUM) { - ndev->hw_features &= ~NETIF_F_HW_CSUM; - ndev->features &= ~NETIF_F_HW_CSUM; - } - /* pick up primary MAC address from SI */ enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 912c51e327d6..f9d4d234a2af 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -278,6 +278,7 @@ struct hnae3_dev_specs { u16 rss_ind_tbl_size; u16 rss_key_size; u16 int_ql_max; /* max value of interrupt coalesce based on INT_QL */ + u16 max_int_gl; /* max value of interrupt coalesce based on INT_GL */ u8 max_non_tso_bd_num; /* max BD number of one non-TSO packet */ }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index dc9a85745e62..a5ebca888ee0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -349,6 +349,7 @@ static void hns3_dbg_dev_specs(struct hnae3_handle *h) dev_info(priv->dev, "Desc num per RX queue: %u\n", kinfo->num_rx_desc); dev_info(priv->dev, "Total number of enabled TCs: %u\n", kinfo->num_tc); dev_info(priv->dev, "MAX INT QL: %u\n", dev_specs->int_ql_max); + dev_info(priv->dev, "MAX INT GL: %u\n", dev_specs->max_int_gl); } static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a362516a3185..999a2aaad847 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -211,8 +211,8 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector, * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing */ - if (rl_reg > 0 && !tqp_vector->tx_group.coal.gl_adapt_enable && - !tqp_vector->rx_group.coal.gl_adapt_enable) + if (rl_reg > 0 && !tqp_vector->tx_group.coal.adapt_enable && + !tqp_vector->rx_group.coal.adapt_enable) /* According to the hardware, the range of rl_reg is * 0-59 and the unit is 4. */ @@ -224,48 +224,99 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector, void hns3_set_vector_coalesce_rx_gl(struct hns3_enet_tqp_vector *tqp_vector, u32 gl_value) { - u32 rx_gl_reg = hns3_gl_usec_to_reg(gl_value); + u32 new_val; - writel(rx_gl_reg, tqp_vector->mask_addr + HNS3_VECTOR_GL0_OFFSET); + if (tqp_vector->rx_group.coal.unit_1us) + new_val = gl_value | HNS3_INT_GL_1US; + else + new_val = hns3_gl_usec_to_reg(gl_value); + + writel(new_val, tqp_vector->mask_addr + HNS3_VECTOR_GL0_OFFSET); } void hns3_set_vector_coalesce_tx_gl(struct hns3_enet_tqp_vector *tqp_vector, u32 gl_value) { - u32 tx_gl_reg = hns3_gl_usec_to_reg(gl_value); + u32 new_val; + + if (tqp_vector->tx_group.coal.unit_1us) + new_val = gl_value | HNS3_INT_GL_1US; + else + new_val = hns3_gl_usec_to_reg(gl_value); + + writel(new_val, tqp_vector->mask_addr + HNS3_VECTOR_GL1_OFFSET); +} + +void hns3_set_vector_coalesce_tx_ql(struct hns3_enet_tqp_vector *tqp_vector, + u32 ql_value) +{ + writel(ql_value, tqp_vector->mask_addr + HNS3_VECTOR_TX_QL_OFFSET); +} - writel(tx_gl_reg, tqp_vector->mask_addr + HNS3_VECTOR_GL1_OFFSET); +void hns3_set_vector_coalesce_rx_ql(struct hns3_enet_tqp_vector *tqp_vector, + u32 ql_value) +{ + writel(ql_value, tqp_vector->mask_addr + HNS3_VECTOR_RX_QL_OFFSET); } -static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector, - struct hns3_nic_priv *priv) +static void hns3_vector_coalesce_init(struct hns3_enet_tqp_vector *tqp_vector, + struct hns3_nic_priv *priv) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev); + struct hns3_enet_coalesce *tx_coal = &tqp_vector->tx_group.coal; + struct hns3_enet_coalesce *rx_coal = &tqp_vector->rx_group.coal; + /* initialize the configuration for interrupt coalescing. * 1. GL (Interrupt Gap Limiter) * 2. RL (Interrupt Rate Limiter) + * 3. QL (Interrupt Quantity Limiter) * * Default: enable interrupt coalescing self-adaptive and GL */ - tqp_vector->tx_group.coal.gl_adapt_enable = 1; - tqp_vector->rx_group.coal.gl_adapt_enable = 1; + tx_coal->adapt_enable = 1; + rx_coal->adapt_enable = 1; + + tx_coal->int_gl = HNS3_INT_GL_50K; + rx_coal->int_gl = HNS3_INT_GL_50K; - tqp_vector->tx_group.coal.int_gl = HNS3_INT_GL_50K; - tqp_vector->rx_group.coal.int_gl = HNS3_INT_GL_50K; + rx_coal->flow_level = HNS3_FLOW_LOW; + tx_coal->flow_level = HNS3_FLOW_LOW; + + /* device version above V3(include V3), GL can configure 1us + * unit, so uses 1us unit. + */ + if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) { + tx_coal->unit_1us = 1; + rx_coal->unit_1us = 1; + } - tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW; - tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW; + if (ae_dev->dev_specs.int_ql_max) { + tx_coal->ql_enable = 1; + rx_coal->ql_enable = 1; + tx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max; + rx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max; + tx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG; + rx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG; + } } -static void hns3_vector_gl_rl_init_hw(struct hns3_enet_tqp_vector *tqp_vector, - struct hns3_nic_priv *priv) +static void +hns3_vector_coalesce_init_hw(struct hns3_enet_tqp_vector *tqp_vector, + struct hns3_nic_priv *priv) { + struct hns3_enet_coalesce *tx_coal = &tqp_vector->tx_group.coal; + struct hns3_enet_coalesce *rx_coal = &tqp_vector->rx_group.coal; struct hnae3_handle *h = priv->ae_handle; - hns3_set_vector_coalesce_tx_gl(tqp_vector, - tqp_vector->tx_group.coal.int_gl); - hns3_set_vector_coalesce_rx_gl(tqp_vector, - tqp_vector->rx_group.coal.int_gl); + hns3_set_vector_coalesce_tx_gl(tqp_vector, tx_coal->int_gl); + hns3_set_vector_coalesce_rx_gl(tqp_vector, rx_coal->int_gl); hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting); + + if (tx_coal->ql_enable) + hns3_set_vector_coalesce_tx_ql(tqp_vector, tx_coal->int_ql); + + if (rx_coal->ql_enable) + hns3_set_vector_coalesce_rx_ql(tqp_vector, rx_coal->int_ql); } static int hns3_nic_set_real_num_queue(struct net_device *netdev) @@ -3333,14 +3384,14 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector) tqp_vector->last_jiffies + msecs_to_jiffies(1000))) return; - if (rx_group->coal.gl_adapt_enable) { + if (rx_group->coal.adapt_enable) { rx_update = hns3_get_new_int_gl(rx_group); if (rx_update) hns3_set_vector_coalesce_rx_gl(tqp_vector, rx_group->coal.int_gl); } - if (tx_group->coal.gl_adapt_enable) { + if (tx_group->coal.adapt_enable) { tx_update = hns3_get_new_int_gl(tx_group); if (tx_update) hns3_set_vector_coalesce_tx_gl(tqp_vector, @@ -3536,7 +3587,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv) for (i = 0; i < priv->vector_num; i++) { tqp_vector = &priv->tqp_vector[i]; - hns3_vector_gl_rl_init_hw(tqp_vector, priv); + hns3_vector_coalesce_init_hw(tqp_vector, priv); tqp_vector->num_tqps = 0; } @@ -3632,7 +3683,7 @@ static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv) tqp_vector->idx = i; tqp_vector->mask_addr = vector[i].io_addr; tqp_vector->vector_irq = vector[i].vector; - hns3_vector_gl_rl_init(tqp_vector, priv); + hns3_vector_coalesce_init(tqp_vector, priv); } out: diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 1c81dea0da1e..8d3365231bfd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -181,6 +181,8 @@ enum hns3_nic_state { #define HNS3_VECTOR_GL2_OFFSET 0x300 #define HNS3_VECTOR_RL_OFFSET 0x900 #define HNS3_VECTOR_RL_EN_B 6 +#define HNS3_VECTOR_TX_QL_OFFSET 0xe00 +#define HNS3_VECTOR_RX_QL_OFFSET 0xf00 #define HNS3_RING_EN_B 0 @@ -418,18 +420,25 @@ enum hns3_flow_level_range { HNS3_FLOW_ULTRA = 3, }; -#define HNS3_INT_GL_MAX 0x1FE0 #define HNS3_INT_GL_50K 0x0014 #define HNS3_INT_GL_20K 0x0032 #define HNS3_INT_GL_18K 0x0036 #define HNS3_INT_GL_8K 0x007C +#define HNS3_INT_GL_1US BIT(31) + #define HNS3_INT_RL_MAX 0x00EC #define HNS3_INT_RL_ENABLE_MASK 0x40 +#define HNS3_INT_QL_DEFAULT_CFG 0x20 + struct hns3_enet_coalesce { u16 int_gl; - u8 gl_adapt_enable; + u16 int_ql; + u16 int_ql_max; + u8 adapt_enable:1; + u8 ql_enable:1; + u8 unit_1us:1; enum hns3_flow_level_range flow_level; }; @@ -595,6 +604,10 @@ void hns3_set_vector_coalesce_tx_gl(struct hns3_enet_tqp_vector *tqp_vector, u32 gl_value); void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector, u32 rl_value); +void hns3_set_vector_coalesce_rx_ql(struct hns3_enet_tqp_vector *tqp_vector, + u32 ql_value); +void hns3_set_vector_coalesce_tx_ql(struct hns3_enet_tqp_vector *tqp_vector, + u32 ql_value); void hns3_enable_vlan_filter(struct net_device *netdev, bool enable); void hns3_request_update_promisc_mode(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 6b07b2771172..c30d5d3786c4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1105,9 +1105,9 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue, rx_vector = priv->ring[queue_num + queue].tqp_vector; cmd->use_adaptive_tx_coalesce = - tx_vector->tx_group.coal.gl_adapt_enable; + tx_vector->tx_group.coal.adapt_enable; cmd->use_adaptive_rx_coalesce = - rx_vector->rx_group.coal.gl_adapt_enable; + rx_vector->rx_group.coal.adapt_enable; cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl; cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl; @@ -1115,6 +1115,9 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue, cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting; cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting; + cmd->tx_max_coalesced_frames = tx_vector->tx_group.coal.int_ql; + cmd->rx_max_coalesced_frames = rx_vector->rx_group.coal.int_ql; + return 0; } @@ -1127,22 +1130,30 @@ static int hns3_get_coalesce(struct net_device *netdev, static int hns3_check_gl_coalesce_para(struct net_device *netdev, struct ethtool_coalesce *cmd) { + struct hnae3_handle *handle = hns3_get_handle(netdev); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev); u32 rx_gl, tx_gl; - if (cmd->rx_coalesce_usecs > HNS3_INT_GL_MAX) { + if (cmd->rx_coalesce_usecs > ae_dev->dev_specs.max_int_gl) { netdev_err(netdev, - "Invalid rx-usecs value, rx-usecs range is 0-%d\n", - HNS3_INT_GL_MAX); + "invalid rx-usecs value, rx-usecs range is 0-%u\n", + ae_dev->dev_specs.max_int_gl); return -EINVAL; } - if (cmd->tx_coalesce_usecs > HNS3_INT_GL_MAX) { + if (cmd->tx_coalesce_usecs > ae_dev->dev_specs.max_int_gl) { netdev_err(netdev, - "Invalid tx-usecs value, tx-usecs range is 0-%d\n", - HNS3_INT_GL_MAX); + "invalid tx-usecs value, tx-usecs range is 0-%u\n", + ae_dev->dev_specs.max_int_gl); return -EINVAL; } + /* device version above V3(include V3), GL uses 1us unit, + * so the round down is not needed. + */ + if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) + return 0; + rx_gl = hns3_gl_round_down(cmd->rx_coalesce_usecs); if (rx_gl != cmd->rx_coalesce_usecs) { netdev_info(netdev, @@ -1188,6 +1199,29 @@ static int hns3_check_rl_coalesce_para(struct net_device *netdev, return 0; } +static int hns3_check_ql_coalesce_param(struct net_device *netdev, + struct ethtool_coalesce *cmd) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev); + + if ((cmd->tx_max_coalesced_frames || cmd->rx_max_coalesced_frames) && + !ae_dev->dev_specs.int_ql_max) { + netdev_err(netdev, "coalesced frames is not supported\n"); + return -EOPNOTSUPP; + } + + if (cmd->tx_max_coalesced_frames > ae_dev->dev_specs.int_ql_max || + cmd->rx_max_coalesced_frames > ae_dev->dev_specs.int_ql_max) { + netdev_err(netdev, + "invalid coalesced_frames value, range is 0-%u\n", + ae_dev->dev_specs.int_ql_max); + return -ERANGE; + } + + return 0; +} + static int hns3_check_coalesce_para(struct net_device *netdev, struct ethtool_coalesce *cmd) { @@ -1207,6 +1241,10 @@ static int hns3_check_coalesce_para(struct net_device *netdev, return ret; } + ret = hns3_check_ql_coalesce_param(netdev, cmd); + if (ret) + return ret; + if (cmd->use_adaptive_tx_coalesce == 1 || cmd->use_adaptive_rx_coalesce == 1) { netdev_info(netdev, @@ -1230,14 +1268,17 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev, tx_vector = priv->ring[queue].tqp_vector; rx_vector = priv->ring[queue_num + queue].tqp_vector; - tx_vector->tx_group.coal.gl_adapt_enable = + tx_vector->tx_group.coal.adapt_enable = cmd->use_adaptive_tx_coalesce; - rx_vector->rx_group.coal.gl_adapt_enable = + rx_vector->rx_group.coal.adapt_enable = cmd->use_adaptive_rx_coalesce; tx_vector->tx_group.coal.int_gl = cmd->tx_coalesce_usecs; rx_vector->rx_group.coal.int_gl = cmd->rx_coalesce_usecs; + tx_vector->tx_group.coal.int_ql = cmd->tx_max_coalesced_frames; + rx_vector->rx_group.coal.int_ql = cmd->rx_max_coalesced_frames; + hns3_set_vector_coalesce_tx_gl(tx_vector, tx_vector->tx_group.coal.int_gl); hns3_set_vector_coalesce_rx_gl(rx_vector, @@ -1245,6 +1286,13 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev, hns3_set_vector_coalesce_rl(tx_vector, h->kinfo.int_rl_setting); hns3_set_vector_coalesce_rl(rx_vector, h->kinfo.int_rl_setting); + + if (tx_vector->tx_group.coal.ql_enable) + hns3_set_vector_coalesce_tx_ql(tx_vector, + tx_vector->tx_group.coal.int_ql); + if (rx_vector->rx_group.coal.ql_enable) + hns3_set_vector_coalesce_rx_ql(rx_vector, + rx_vector->rx_group.coal.int_ql); } static int hns3_set_coalesce(struct net_device *netdev, @@ -1471,7 +1519,8 @@ static int hns3_get_module_eeprom(struct net_device *netdev, #define HNS3_ETHTOOL_COALESCE (ETHTOOL_COALESCE_USECS | \ ETHTOOL_COALESCE_USE_ADAPTIVE | \ ETHTOOL_COALESCE_RX_USECS_HIGH | \ - ETHTOOL_COALESCE_TX_USECS_HIGH) + ETHTOOL_COALESCE_TX_USECS_HIGH | \ + ETHTOOL_COALESCE_MAX_FRAMES) static const struct ethtool_ops hns3vf_ethtool_ops = { .supported_coalesce_params = HNS3_ETHTOOL_COALESCE, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 096e26a2e16b..5b7967c309b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1103,6 +1103,14 @@ struct hclge_dev_specs_0_cmd { __le32 max_tm_rate; }; +#define HCLGE_DEF_MAX_INT_GL 0x1FE0U + +struct hclge_dev_specs_1_cmd { + __le32 rsv0; + __le16 max_int_gl; + u8 rsv1[18]; +}; + int hclge_cmd_init(struct hclge_dev *hdev); static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 3606240025a8..f990f6915226 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -4,7 +4,6 @@ #include "hclge_main.h" #include "hclge_dcb.h" #include "hclge_tm.h" -#include "hclge_dcb.h" #include "hnae3.h" #define BW_PERCENT 100 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 1f026408ad38..710200119fe8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1366,6 +1366,7 @@ static void hclge_set_default_dev_specs(struct hclge_dev *hdev) ae_dev->dev_specs.rss_ind_tbl_size = HCLGE_RSS_IND_TBL_SIZE; ae_dev->dev_specs.rss_key_size = HCLGE_RSS_KEY_SIZE; ae_dev->dev_specs.max_tm_rate = HCLGE_ETHER_MAX_RATE; + ae_dev->dev_specs.max_int_gl = HCLGE_DEF_MAX_INT_GL; } static void hclge_parse_dev_specs(struct hclge_dev *hdev, @@ -1373,14 +1374,18 @@ static void hclge_parse_dev_specs(struct hclge_dev *hdev, { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); struct hclge_dev_specs_0_cmd *req0; + struct hclge_dev_specs_1_cmd *req1; req0 = (struct hclge_dev_specs_0_cmd *)desc[0].data; + req1 = (struct hclge_dev_specs_1_cmd *)desc[1].data; ae_dev->dev_specs.max_non_tso_bd_num = req0->max_non_tso_bd_num; ae_dev->dev_specs.rss_ind_tbl_size = le16_to_cpu(req0->rss_ind_tbl_size); + ae_dev->dev_specs.int_ql_max = le16_to_cpu(req0->int_ql_max); ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size); ae_dev->dev_specs.max_tm_rate = le32_to_cpu(req0->max_tm_rate); + ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl); } static void hclge_check_dev_specs(struct hclge_dev *hdev) @@ -1395,6 +1400,8 @@ static void hclge_check_dev_specs(struct hclge_dev *hdev) dev_specs->rss_key_size = HCLGE_RSS_KEY_SIZE; if (!dev_specs->max_tm_rate) dev_specs->max_tm_rate = HCLGE_ETHER_MAX_RATE; + if (!dev_specs->max_int_gl) + dev_specs->max_int_gl = HCLGE_DEF_MAX_INT_GL; } static int hclge_query_dev_specs(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index 9460c128c095..f94f5d443ebc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -285,6 +285,14 @@ struct hclgevf_dev_specs_0_cmd { u8 rsv1[5]; }; +#define HCLGEVF_DEF_MAX_INT_GL 0x1FE0U + +struct hclgevf_dev_specs_1_cmd { + __le32 rsv0; + __le16 max_int_gl; + u8 rsv1[18]; +}; + static inline void hclgevf_write_reg(void __iomem *base, u32 reg, u32 value) { writel(value, base + reg); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index c8e3fdd5999c..71007e74e9d2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2991,6 +2991,7 @@ static void hclgevf_set_default_dev_specs(struct hclgevf_dev *hdev) HCLGEVF_MAX_NON_TSO_BD_NUM; ae_dev->dev_specs.rss_ind_tbl_size = HCLGEVF_RSS_IND_TBL_SIZE; ae_dev->dev_specs.rss_key_size = HCLGEVF_RSS_KEY_SIZE; + ae_dev->dev_specs.max_int_gl = HCLGEVF_DEF_MAX_INT_GL; } static void hclgevf_parse_dev_specs(struct hclgevf_dev *hdev, @@ -2998,13 +2999,17 @@ static void hclgevf_parse_dev_specs(struct hclgevf_dev *hdev, { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); struct hclgevf_dev_specs_0_cmd *req0; + struct hclgevf_dev_specs_1_cmd *req1; req0 = (struct hclgevf_dev_specs_0_cmd *)desc[0].data; + req1 = (struct hclgevf_dev_specs_1_cmd *)desc[1].data; ae_dev->dev_specs.max_non_tso_bd_num = req0->max_non_tso_bd_num; ae_dev->dev_specs.rss_ind_tbl_size = le16_to_cpu(req0->rss_ind_tbl_size); + ae_dev->dev_specs.int_ql_max = le16_to_cpu(req0->int_ql_max); ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size); + ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl); } static void hclgevf_check_dev_specs(struct hclgevf_dev *hdev) @@ -3017,6 +3022,8 @@ static void hclgevf_check_dev_specs(struct hclgevf_dev *hdev) dev_specs->rss_ind_tbl_size = HCLGEVF_RSS_IND_TBL_SIZE; if (!dev_specs->rss_key_size) dev_specs->rss_key_size = HCLGEVF_RSS_KEY_SIZE; + if (!dev_specs->max_int_gl) + dev_specs->max_int_gl = HCLGEVF_DEF_MAX_INT_GL; } static int hclgevf_query_dev_specs(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 4a9041ee1b39..5d559117f78c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1834,8 +1834,13 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, struct netdev_queue *nq, bool napi) { unsigned int bytes_compl = 0, pkts_compl = 0; + struct xdp_frame_bulk bq; int i; + xdp_frame_bulk_init(&bq); + + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + for (i = 0; i < num; i++) { struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index]; struct mvneta_tx_desc *tx_desc = txq->descs + @@ -1857,9 +1862,12 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, if (napi && buf->type == MVNETA_TYPE_XDP_TX) xdp_return_frame_rx_napi(buf->xdpf); else - xdp_return_frame(buf->xdpf); + xdp_return_frame_bulk(buf->xdpf, &bq); } } + xdp_flush_frame_bulk(&bq); + + rcu_read_unlock(); netdev_tx_completed_queue(nq, pkts_compl, bytes_compl); } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f6616c8933ca..3069e192d773 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2440,8 +2440,13 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, struct mvpp2_tx_queue *txq, struct mvpp2_txq_pcpu *txq_pcpu, int num) { + struct xdp_frame_bulk bq; int i; + xdp_frame_bulk_init(&bq); + + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + for (i = 0; i < num; i++) { struct mvpp2_txq_pcpu_buf *tx_buf = txq_pcpu->buffs + txq_pcpu->txq_get_index; @@ -2454,10 +2459,13 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, dev_kfree_skb_any(tx_buf->skb); else if (tx_buf->type == MVPP2_TYPE_XDP_TX || tx_buf->type == MVPP2_TYPE_XDP_NDO) - xdp_return_frame(tx_buf->xdpf); + xdp_return_frame_bulk(tx_buf->xdpf, &bq); mvpp2_txq_inc_get(txq_pcpu); } + xdp_flush_frame_bulk(&bq); + + rcu_read_unlock(); } static inline struct mvpp2_rx_queue *mvpp2_get_rx_queue(struct mvpp2_port *port, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 2f7a861d0c7b..ffc681b67f1c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -9,4 +9,4 @@ obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o octeontx2_mbox-y := mbox.o rvu_trace.o octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ - rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o + rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 8f17e26dca53..7d0f96290943 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -145,6 +145,16 @@ int cgx_get_cgxid(void *cgxd) return cgx->cgx_id; } +u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + u64 cfg; + + cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_CFG); + + return (cfg & CMR_P2X_SEL_MASK) >> CMR_P2X_SEL_SHIFT; +} + /* Ensure the required lock for event queue(where asynchronous events are * posted) is acquired before calling this API. Else an asynchronous event(with * latest link status) can reach the destination before this function returns @@ -814,8 +824,7 @@ static int cgx_lmac_verify_fwi_version(struct cgx *cgx) minor_ver = FIELD_GET(RESP_MINOR_VER, resp); dev_dbg(dev, "Firmware command interface version = %d.%d\n", major_ver, minor_ver); - if (major_ver != CGX_FIRMWARE_MAJOR_VER || - minor_ver != CGX_FIRMWARE_MINOR_VER) + if (major_ver != CGX_FIRMWARE_MAJOR_VER) return -EIO; else return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 27ca3291682b..bcfc3e5f66bb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -27,6 +27,10 @@ /* Registers */ #define CGXX_CMRX_CFG 0x00 +#define CMR_P2X_SEL_MASK GENMASK_ULL(61, 59) +#define CMR_P2X_SEL_SHIFT 59ULL +#define CMR_P2X_SEL_NIX0 1ULL +#define CMR_P2X_SEL_NIX1 2ULL #define CMR_EN BIT_ULL(55) #define DATA_PKT_TX_EN BIT_ULL(53) #define DATA_PKT_RX_EN BIT_ULL(54) @@ -142,5 +146,6 @@ int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id, int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause); void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable); +u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index f48eb66ed021..17f6f42f4453 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -162,6 +162,8 @@ enum nix_scheduler { #define NIX_RX_ACTIONOP_UCAST_IPSEC (0x2ull) #define NIX_RX_ACTIONOP_MCAST (0x3ull) #define NIX_RX_ACTIONOP_RSS (0x4ull) +/* Use the RX action set in the default unicast entry */ +#define NIX_RX_ACTION_DEFAULT (0xfull) /* NIX TX action operation*/ #define NIX_TX_ACTIONOP_DROP (0x0ull) @@ -174,8 +176,12 @@ enum nix_scheduler { #define NPC_MCAM_KEY_X2 1 #define NPC_MCAM_KEY_X4 2 -#define NIX_INTF_RX 0 -#define NIX_INTF_TX 1 +#define NIX_INTFX_RX(a) (0x0ull | (a) << 1) +#define NIX_INTFX_TX(a) (0x1ull | (a) << 1) + +/* Default interfaces are NIX0_RX and NIX0_TX */ +#define NIX_INTF_RX NIX_INTFX_RX(0) +#define NIX_INTF_TX NIX_INTFX_TX(0) #define NIX_INTF_TYPE_CGX 0 #define NIX_INTF_TYPE_LBK 1 @@ -206,6 +212,8 @@ enum ndc_idx_e { NIX0_RX = 0x0, NIX0_TX = 0x1, NPA0_U = 0x2, + NIX1_RX = 0x4, + NIX1_TX = 0x5, }; enum ndc_ctype_e { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 263a21129416..cb4e3d86b58b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -86,7 +86,7 @@ struct mbox_msghdr { #define OTX2_MBOX_REQ_SIG (0xdead) #define OTX2_MBOX_RSP_SIG (0xbeef) u16 sig; /* Signature, for validating corrupted msgs */ -#define OTX2_MBOX_VERSION (0x0001) +#define OTX2_MBOX_VERSION (0x0007) u16 ver; /* Version of msg's structure for this ID */ u16 next_msgoff; /* Offset of next msg within mailbox region */ int rc; /* Msg process'ed response code */ @@ -188,10 +188,19 @@ M(NPC_MCAM_ALLOC_AND_WRITE_ENTRY, 0x600b, npc_mcam_alloc_and_write_entry, \ npc_mcam_alloc_and_write_entry_rsp) \ M(NPC_GET_KEX_CFG, 0x600c, npc_get_kex_cfg, \ msg_req, npc_get_kex_cfg_rsp) \ +M(NPC_INSTALL_FLOW, 0x600d, npc_install_flow, \ + npc_install_flow_req, npc_install_flow_rsp) \ +M(NPC_DELETE_FLOW, 0x600e, npc_delete_flow, \ + npc_delete_flow_req, msg_rsp) \ +M(NPC_MCAM_READ_ENTRY, 0x600f, npc_mcam_read_entry, \ + npc_mcam_read_entry_req, \ + npc_mcam_read_entry_rsp) \ +M(NPC_MCAM_READ_BASE_RULE, 0x6011, npc_read_base_steer_rule, \ + msg_req, npc_mcam_read_base_rule_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ -M(NIX_LF_FREE, 0x8001, nix_lf_free, msg_req, msg_rsp) \ +M(NIX_LF_FREE, 0x8001, nix_lf_free, nix_lf_free_req, msg_rsp) \ M(NIX_AQ_ENQ, 0x8002, nix_aq_enq, nix_aq_enq_req, nix_aq_enq_rsp) \ M(NIX_HWCTX_DISABLE, 0x8003, nix_hwctx_disable, \ hwctx_disable_req, msg_rsp) \ @@ -200,7 +209,8 @@ M(NIX_TXSCH_ALLOC, 0x8004, nix_txsch_alloc, \ M(NIX_TXSCH_FREE, 0x8005, nix_txsch_free, nix_txsch_free_req, msg_rsp) \ M(NIX_TXSCHQ_CFG, 0x8006, nix_txschq_cfg, nix_txschq_config, msg_rsp) \ M(NIX_STATS_RST, 0x8007, nix_stats_rst, msg_req, msg_rsp) \ -M(NIX_VTAG_CFG, 0x8008, nix_vtag_cfg, nix_vtag_config, msg_rsp) \ +M(NIX_VTAG_CFG, 0x8008, nix_vtag_cfg, nix_vtag_config, \ + nix_vtag_config_rsp) \ M(NIX_RSS_FLOWKEY_CFG, 0x8009, nix_rss_flowkey_cfg, \ nix_rss_flowkey_cfg, \ nix_rss_flowkey_cfg_rsp) \ @@ -216,7 +226,6 @@ M(NIX_SET_RX_CFG, 0x8010, nix_set_rx_cfg, nix_rx_cfg, msg_rsp) \ M(NIX_LSO_FORMAT_CFG, 0x8011, nix_lso_format_cfg, \ nix_lso_format_cfg, \ nix_lso_format_cfg_rsp) \ -M(NIX_RXVLAN_ALLOC, 0x8012, nix_rxvlan_alloc, msg_req, msg_rsp) \ M(NIX_LF_PTP_TX_ENABLE, 0x8013, nix_lf_ptp_tx_enable, msg_req, msg_rsp) \ M(NIX_LF_PTP_TX_DISABLE, 0x8014, nix_lf_ptp_tx_disable, msg_req, msg_rsp) \ M(NIX_BP_ENABLE, 0x8016, nix_bp_enable, nix_bp_cfg_req, \ @@ -271,6 +280,17 @@ struct ready_msg_rsp { * or to detach partial of a cetain resource type. * Rest of the fields specify how many of what type to * be attached. + * To request LFs from two blocks of same type this mailbox + * can be sent twice as below: + * struct rsrc_attach *attach; + * .. Allocate memory for message .. + * attach->cptlfs = 3; <3 LFs from CPT0> + * .. Send message .. + * .. Allocate memory for message .. + * attach->modify = 1; + * attach->cpt_blkaddr = BLKADDR_CPT1; + * attach->cptlfs = 2; <2 LFs from CPT1> + * .. Send message .. */ struct rsrc_attach { struct mbox_msghdr hdr; @@ -281,6 +301,7 @@ struct rsrc_attach { u16 ssow; u16 timlfs; u16 cptlfs; + int cpt_blkaddr; /* BLKADDR_CPT0/BLKADDR_CPT1 or 0 for BLKADDR_CPT0 */ }; /* Structure for relinquishing resources. @@ -314,6 +335,8 @@ struct msix_offset_rsp { u16 ssow_msixoff[MAX_RVU_BLKLF_CNT]; u16 timlf_msixoff[MAX_RVU_BLKLF_CNT]; u16 cptlf_msixoff[MAX_RVU_BLKLF_CNT]; + u8 cpt1_lfs; + u16 cpt1_lf_msixoff[MAX_RVU_BLKLF_CNT]; }; struct get_hw_cap_rsp { @@ -459,6 +482,20 @@ enum nix_af_status { NIX_AF_ERR_LSO_CFG_FAIL = -418, NIX_AF_INVAL_NPA_PF_FUNC = -419, NIX_AF_INVAL_SSO_PF_FUNC = -420, + NIX_AF_ERR_TX_VTAG_NOSPC = -421, + NIX_AF_ERR_RX_VTAG_INUSE = -422, +}; + +/* For NIX RX vtag action */ +enum nix_rx_vtag0_type { + NIX_AF_LFX_RX_VTAG_TYPE0, /* reserved for rx vlan offload */ + NIX_AF_LFX_RX_VTAG_TYPE1, + NIX_AF_LFX_RX_VTAG_TYPE2, + NIX_AF_LFX_RX_VTAG_TYPE3, + NIX_AF_LFX_RX_VTAG_TYPE4, + NIX_AF_LFX_RX_VTAG_TYPE5, + NIX_AF_LFX_RX_VTAG_TYPE6, + NIX_AF_LFX_RX_VTAG_TYPE7, }; /* For NIX LF context alloc and init */ @@ -491,6 +528,16 @@ struct nix_lf_alloc_rsp { u8 lf_tx_stats; /* NIX_AF_CONST1::LF_TX_STATS */ u16 cints; /* NIX_AF_CONST2::CINTS */ u16 qints; /* NIX_AF_CONST2::QINTS */ + u8 cgx_links; /* No. of CGX links present in HW */ + u8 lbk_links; /* No. of LBK links present in HW */ + u8 sdp_links; /* No. of SDP links present in HW */ +}; + +struct nix_lf_free_req { + struct mbox_msghdr hdr; +#define NIX_LF_DISABLE_FLOWS BIT_ULL(0) +#define NIX_LF_DONT_FREE_TX_VTAG BIT_ULL(1) + u64 flags; }; /* NIX AQ enqueue msg */ @@ -583,14 +630,40 @@ struct nix_vtag_config { union { /* valid when cfg_type is '0' */ struct { - /* tx vlan0 tag(C-VLAN) */ - u64 vlan0; - /* tx vlan1 tag(S-VLAN) */ - u64 vlan1; - /* insert tx vlan tag */ - u8 insert_vlan :1; - /* insert tx double vlan tag */ - u8 double_vlan :1; + u64 vtag0; + u64 vtag1; + + /* cfg_vtag0 & cfg_vtag1 fields are valid + * when free_vtag0 & free_vtag1 are '0's. + */ + /* cfg_vtag0 = 1 to configure vtag0 */ + u8 cfg_vtag0 :1; + /* cfg_vtag1 = 1 to configure vtag1 */ + u8 cfg_vtag1 :1; + + /* vtag0_idx & vtag1_idx are only valid when + * both cfg_vtag0 & cfg_vtag1 are '0's, + * these fields are used along with free_vtag0 + * & free_vtag1 to free the nix lf's tx_vlan + * configuration. + * + * Denotes the indices of tx_vtag def registers + * that needs to be cleared and freed. + */ + int vtag0_idx; + int vtag1_idx; + + /* free_vtag0 & free_vtag1 fields are valid + * when cfg_vtag0 & cfg_vtag1 are '0's. + */ + /* free_vtag0 = 1 clears vtag0 configuration + * vtag0_idx denotes the index to be cleared. + */ + u8 free_vtag0 :1; + /* free_vtag1 = 1 clears vtag1 configuration + * vtag1_idx denotes the index to be cleared. + */ + u8 free_vtag1 :1; } tx; /* valid when cfg_type is '1' */ @@ -605,6 +678,17 @@ struct nix_vtag_config { }; }; +struct nix_vtag_config_rsp { + struct mbox_msghdr hdr; + int vtag0_idx; + int vtag1_idx; + /* Indices of tx_vtag def registers used to configure + * tx vtag0 & vtag1 headers, these indices are valid + * when nix_vtag_config mbox requested for vtag0 and/ + * or vtag1 configuration. + */ +}; + struct nix_rss_flowkey_cfg { struct mbox_msghdr hdr; int mcam_index; /* MCAM entry index to modify */ @@ -865,6 +949,87 @@ struct npc_get_kex_cfg_rsp { u8 mkex_pfl_name[MKEX_NAME_LEN]; }; +struct flow_msg { + unsigned char dmac[6]; + unsigned char smac[6]; + __be16 etype; + __be16 vlan_etype; + __be16 vlan_tci; + union { + __be32 ip4src; + __be32 ip6src[4]; + }; + union { + __be32 ip4dst; + __be32 ip6dst[4]; + }; + u8 tos; + u8 ip_ver; + u8 ip_proto; + u8 tc; + __be16 sport; + __be16 dport; +}; + +struct npc_install_flow_req { + struct mbox_msghdr hdr; + struct flow_msg packet; + struct flow_msg mask; + u64 features; + u16 entry; + u16 channel; + u8 intf; + u8 set_cntr; /* If counter is available set counter for this entry ? */ + u8 default_rule; + u8 append; /* overwrite(0) or append(1) flow to default rule? */ + u16 vf; + /* action */ + u32 index; + u16 match_id; + u8 flow_key_alg; + u8 op; + /* vtag rx action */ + u8 vtag0_type; + u8 vtag0_valid; + u8 vtag1_type; + u8 vtag1_valid; + /* vtag tx action */ + u16 vtag0_def; + u8 vtag0_op; + u16 vtag1_def; + u8 vtag1_op; +}; + +struct npc_install_flow_rsp { + struct mbox_msghdr hdr; + int counter; /* negative if no counter else counter number */ +}; + +struct npc_delete_flow_req { + struct mbox_msghdr hdr; + u16 entry; + u16 start;/*Disable range of entries */ + u16 end; + u8 all; /* PF + VFs */ +}; + +struct npc_mcam_read_entry_req { + struct mbox_msghdr hdr; + u16 entry; /* MCAM entry to read */ +}; + +struct npc_mcam_read_entry_rsp { + struct mbox_msghdr hdr; + struct mcam_entry entry_data; + u8 intf; + u8 enable; +}; + +struct npc_mcam_read_base_rule_rsp { + struct mbox_msghdr hdr; + struct mcam_entry entry; +}; + enum ptp_op { PTP_OP_ADJFINE = 0, PTP_OP_GET_CLOCK = 1, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 91a9d00e4fb5..a1f79445db71 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -140,6 +140,63 @@ enum npc_kpu_lh_ltype { NPC_LT_LH_CUSTOM1 = 0xF, }; +/* NPC port kind defines how the incoming or outgoing packets + * are processed. NPC accepts packets from up to 64 pkinds. + * Software assigns pkind for each incoming port such as CGX + * Ethernet interfaces, LBK interfaces, etc. + */ +enum npc_pkind_type { + NPC_TX_DEF_PKIND = 63ULL, /* NIX-TX PKIND */ +}; + +/* list of known and supported fields in packet header and + * fields present in key structure. + */ +enum key_fields { + NPC_DMAC, + NPC_SMAC, + NPC_ETYPE, + NPC_OUTER_VID, + NPC_TOS, + NPC_SIP_IPV4, + NPC_DIP_IPV4, + NPC_SIP_IPV6, + NPC_DIP_IPV6, + NPC_SPORT_TCP, + NPC_DPORT_TCP, + NPC_SPORT_UDP, + NPC_DPORT_UDP, + NPC_SPORT_SCTP, + NPC_DPORT_SCTP, + NPC_HEADER_FIELDS_MAX, + NPC_CHAN = NPC_HEADER_FIELDS_MAX, /* Valid when Rx */ + NPC_PF_FUNC, /* Valid when Tx */ + NPC_ERRLEV, + NPC_ERRCODE, + NPC_LXMB, + NPC_LA, + NPC_LB, + NPC_LC, + NPC_LD, + NPC_LE, + NPC_LF, + NPC_LG, + NPC_LH, + /* Ethertype for untagged frame */ + NPC_ETYPE_ETHER, + /* Ethertype for single tagged frame */ + NPC_ETYPE_TAG1, + /* Ethertype for double tagged frame */ + NPC_ETYPE_TAG2, + /* outer vlan tci for single tagged frame */ + NPC_VLAN_TAG1, + /* outer vlan tci for double tagged frame */ + NPC_VLAN_TAG2, + /* other header fields programmed to extract but not of our interest */ + NPC_UNKNOWN, + NPC_KEY_FIELDS_MAX, +}; + struct npc_kpu_profile_cam { u8 state; u8 state_mask; @@ -300,11 +357,63 @@ struct nix_rx_action { /* NPC_AF_INTFX_KEX_CFG field masks */ #define NPC_PARSE_NIBBLE GENMASK_ULL(30, 0) +/* NPC_PARSE_KEX_S nibble definitions for each field */ +#define NPC_PARSE_NIBBLE_CHAN GENMASK_ULL(2, 0) +#define NPC_PARSE_NIBBLE_ERRLEV BIT_ULL(3) +#define NPC_PARSE_NIBBLE_ERRCODE GENMASK_ULL(5, 4) +#define NPC_PARSE_NIBBLE_L2L3_BCAST BIT_ULL(6) +#define NPC_PARSE_NIBBLE_LA_FLAGS GENMASK_ULL(8, 7) +#define NPC_PARSE_NIBBLE_LA_LTYPE BIT_ULL(9) +#define NPC_PARSE_NIBBLE_LB_FLAGS GENMASK_ULL(11, 10) +#define NPC_PARSE_NIBBLE_LB_LTYPE BIT_ULL(12) +#define NPC_PARSE_NIBBLE_LC_FLAGS GENMASK_ULL(14, 13) +#define NPC_PARSE_NIBBLE_LC_LTYPE BIT_ULL(15) +#define NPC_PARSE_NIBBLE_LD_FLAGS GENMASK_ULL(17, 16) +#define NPC_PARSE_NIBBLE_LD_LTYPE BIT_ULL(18) +#define NPC_PARSE_NIBBLE_LE_FLAGS GENMASK_ULL(20, 19) +#define NPC_PARSE_NIBBLE_LE_LTYPE BIT_ULL(21) +#define NPC_PARSE_NIBBLE_LF_FLAGS GENMASK_ULL(23, 22) +#define NPC_PARSE_NIBBLE_LF_LTYPE BIT_ULL(24) +#define NPC_PARSE_NIBBLE_LG_FLAGS GENMASK_ULL(26, 25) +#define NPC_PARSE_NIBBLE_LG_LTYPE BIT_ULL(27) +#define NPC_PARSE_NIBBLE_LH_FLAGS GENMASK_ULL(29, 28) +#define NPC_PARSE_NIBBLE_LH_LTYPE BIT_ULL(30) + +struct nix_tx_action { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd_63_48 :16; + u64 match_id :16; + u64 index :20; + u64 rsvd_11_8 :8; + u64 op :4; +#else + u64 op :4; + u64 rsvd_11_8 :8; + u64 index :20; + u64 match_id :16; + u64 rsvd_63_48 :16; +#endif +}; + /* NIX Receive Vtag Action Structure */ -#define VTAG0_VALID_BIT BIT_ULL(15) -#define VTAG0_TYPE_MASK GENMASK_ULL(14, 12) -#define VTAG0_LID_MASK GENMASK_ULL(10, 8) -#define VTAG0_RELPTR_MASK GENMASK_ULL(7, 0) +#define RX_VTAG0_VALID_BIT BIT_ULL(15) +#define RX_VTAG0_TYPE_MASK GENMASK_ULL(14, 12) +#define RX_VTAG0_LID_MASK GENMASK_ULL(10, 8) +#define RX_VTAG0_RELPTR_MASK GENMASK_ULL(7, 0) +#define RX_VTAG1_VALID_BIT BIT_ULL(47) +#define RX_VTAG1_TYPE_MASK GENMASK_ULL(46, 44) +#define RX_VTAG1_LID_MASK GENMASK_ULL(42, 40) +#define RX_VTAG1_RELPTR_MASK GENMASK_ULL(39, 32) + +/* NIX Transmit Vtag Action Structure */ +#define TX_VTAG0_DEF_MASK GENMASK_ULL(25, 16) +#define TX_VTAG0_OP_MASK GENMASK_ULL(13, 12) +#define TX_VTAG0_LID_MASK GENMASK_ULL(10, 8) +#define TX_VTAG0_RELPTR_MASK GENMASK_ULL(7, 0) +#define TX_VTAG1_DEF_MASK GENMASK_ULL(57, 48) +#define TX_VTAG1_OP_MASK GENMASK_ULL(45, 44) +#define TX_VTAG1_LID_MASK GENMASK_ULL(42, 40) +#define TX_VTAG1_RELPTR_MASK GENMASK_ULL(39, 32) struct npc_mcam_kex { /* MKEX Profle Header */ @@ -357,4 +466,24 @@ struct npc_lt_def_cfg { struct npc_lt_def pck_iip4; }; +struct rvu_npc_mcam_rule { + struct flow_msg packet; + struct flow_msg mask; + u8 intf; + union { + struct nix_tx_action tx_action; + struct nix_rx_action rx_action; + }; + u64 vtag_action; + struct list_head list; + u64 features; + u16 owner; + u16 entry; + u16 cntr; + bool has_cntr; + u8 default_rule; + bool enable; + bool vfvlan_cfg; +}; + #endif /* NPC_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index 77bb4ed32600..b192692b4fc4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -148,6 +148,20 @@ (((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \ ((flags_ena) << 6) | ((key_ofs) & 0x3F)) +/* Rx parse key extract nibble enable */ +#define NPC_PARSE_NIBBLE_INTF_RX (NPC_PARSE_NIBBLE_CHAN | \ + NPC_PARSE_NIBBLE_LA_LTYPE | \ + NPC_PARSE_NIBBLE_LB_LTYPE | \ + NPC_PARSE_NIBBLE_LC_LTYPE | \ + NPC_PARSE_NIBBLE_LD_LTYPE | \ + NPC_PARSE_NIBBLE_LE_LTYPE) +/* Tx parse key extract nibble enable */ +#define NPC_PARSE_NIBBLE_INTF_TX (NPC_PARSE_NIBBLE_LA_LTYPE | \ + NPC_PARSE_NIBBLE_LB_LTYPE | \ + NPC_PARSE_NIBBLE_LC_LTYPE | \ + NPC_PARSE_NIBBLE_LD_LTYPE | \ + NPC_PARSE_NIBBLE_LE_LTYPE) + enum npc_kpu_parser_state { NPC_S_NA = 0, NPC_S_KPU1_ETHER, @@ -13380,14 +13394,15 @@ static const struct npc_lt_def_cfg npc_lt_defaults = { }, }; -static const struct npc_mcam_kex npc_mkex_default = { +static struct npc_mcam_kex npc_mkex_default = { .mkex_sign = MKEX_SIGN, .name = "default", .kpu_version = NPC_KPU_PROFILE_VER, .keyx_cfg = { - /* nibble: LA..LE (ltype only) + Channel */ - [NIX_INTF_RX] = ((u64)NPC_MCAM_KEY_X2 << 32) | 0x49247, - [NIX_INTF_TX] = ((u64)NPC_MCAM_KEY_X2 << 32) | ((1ULL << 19) - 1), + /* nibble: LA..LE (ltype only) + channel */ + [NIX_INTF_RX] = ((u64)NPC_MCAM_KEY_X2 << 32) | NPC_PARSE_NIBBLE_INTF_RX, + /* nibble: LA..LE (ltype only) */ + [NIX_INTF_TX] = ((u64)NPC_MCAM_KEY_X2 << 32) | NPC_PARSE_NIBBLE_INTF_TX, }, .intf_lid_lt_ld = { /* Default RX MCAM KEX profile */ @@ -13405,12 +13420,14 @@ static const struct npc_mcam_kex npc_mkex_default = { /* Layer B: Single VLAN (CTAG) */ /* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */ [NPC_LT_LB_CTAG] = { - KEX_LD_CFG(0x03, 0x0, 0x1, 0x0, 0x4), + KEX_LD_CFG(0x03, 0x2, 0x1, 0x0, 0x4), }, /* Layer B: Stacked VLAN (STAG|QinQ) */ [NPC_LT_LB_STAG_QINQ] = { - /* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */ - KEX_LD_CFG(0x03, 0x4, 0x1, 0x0, 0x4), + /* Outer VLAN: 2 bytes, KW0[63:48] */ + KEX_LD_CFG(0x01, 0x2, 0x1, 0x0, 0x6), + /* Ethertype: 2 bytes, KW0[47:32] */ + KEX_LD_CFG(0x01, 0x8, 0x1, 0x0, 0x4), }, [NPC_LT_LB_FDSA] = { /* SWITCH PORT: 1 byte, KW0[63:48] */ @@ -13436,17 +13453,71 @@ static const struct npc_mcam_kex npc_mkex_default = { [NPC_LID_LD] = { /* Layer D:UDP */ [NPC_LT_LD_UDP] = { - /* SPORT: 2 bytes, KW3[15:0] */ - KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18), - /* DPORT: 2 bytes, KW3[31:16] */ - KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a), + /* SPORT+DPORT: 4 bytes, KW3[31:0] */ + KEX_LD_CFG(0x3, 0x0, 0x1, 0x0, 0x18), + }, + /* Layer D:TCP */ + [NPC_LT_LD_TCP] = { + /* SPORT+DPORT: 4 bytes, KW3[31:0] */ + KEX_LD_CFG(0x3, 0x0, 0x1, 0x0, 0x18), + }, + }, + }, + + /* Default TX MCAM KEX profile */ + [NIX_INTF_TX] = { + [NPC_LID_LA] = { + /* Layer A: NIX_INST_HDR_S + Ethernet */ + /* NIX appends 8 bytes of NIX_INST_HDR_S at the + * start of each TX packet supplied to NPC. + */ + [NPC_LT_LA_IH_NIX_ETHER] = { + /* PF_FUNC: 2B , KW0 [47:32] */ + KEX_LD_CFG(0x01, 0x0, 0x1, 0x0, 0x4), + /* DMAC: 6 bytes, KW1[63:16] */ + KEX_LD_CFG(0x05, 0x8, 0x1, 0x0, 0xa), + }, + }, + [NPC_LID_LB] = { + /* Layer B: Single VLAN (CTAG) */ + [NPC_LT_LB_CTAG] = { + /* CTAG VLAN[2..3] KW0[63:48] */ + KEX_LD_CFG(0x01, 0x2, 0x1, 0x0, 0x6), + /* CTAG VLAN[2..3] KW1[15:0] */ + KEX_LD_CFG(0x01, 0x4, 0x1, 0x0, 0x8), + }, + /* Layer B: Stacked VLAN (STAG|QinQ) */ + [NPC_LT_LB_STAG_QINQ] = { + /* Outer VLAN: 2 bytes, KW0[63:48] */ + KEX_LD_CFG(0x01, 0x2, 0x1, 0x0, 0x6), + /* Outer VLAN: 2 Bytes, KW1[15:0] */ + KEX_LD_CFG(0x01, 0x8, 0x1, 0x0, 0x8), + }, + }, + [NPC_LID_LC] = { + /* Layer C: IPv4 */ + [NPC_LT_LC_IP] = { + /* SIP+DIP: 8 bytes, KW2[63:0] */ + KEX_LD_CFG(0x07, 0xc, 0x1, 0x0, 0x10), + /* TOS: 1 byte, KW1[63:56] */ + KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0xf), + }, + /* Layer C: IPv6 */ + [NPC_LT_LC_IP6] = { + /* Everything up to SADDR: 8 bytes, KW2[63:0] */ + KEX_LD_CFG(0x07, 0x0, 0x1, 0x0, 0x10), + }, + }, + [NPC_LID_LD] = { + /* Layer D:UDP */ + [NPC_LT_LD_UDP] = { + /* SPORT+DPORT: 4 bytes, KW3[31:0] */ + KEX_LD_CFG(0x3, 0x0, 0x1, 0x0, 0x18), }, /* Layer D:TCP */ [NPC_LT_LD_TCP] = { - /* SPORT: 2 bytes, KW3[15:0] */ - KEX_LD_CFG(0x1, 0x0, 0x1, 0x0, 0x18), - /* DPORT: 2 bytes, KW3[31:16] */ - KEX_LD_CFG(0x1, 0x2, 0x1, 0x0, 0x1a), + /* SPORT+DPORT: 4 bytes, KW3[31:0] */ + KEX_LD_CFG(0x3, 0x0, 0x1, 0x0, 0x18), }, }, }, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index e1f918960730..9f901c0edcbb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -66,6 +66,7 @@ static void rvu_setup_hw_capabilities(struct rvu *rvu) hw->cap.nix_shaping = true; hw->cap.nix_tx_link_bp = true; hw->cap.nix_rx_multicast = true; + hw->rvu = rvu; if (is_rvu_96xx_B0(rvu)) { hw->cap.nix_fixed_txschq_mapping = true; @@ -210,6 +211,9 @@ int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot) * multiple blocks of same type. * * @pcifunc has to be zero when no LF is yet attached. + * + * For a pcifunc if LFs are attached from multiple blocks of same type, then + * return blkaddr of first encountered block. */ int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc) { @@ -258,20 +262,39 @@ int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc) devnum = rvu_get_pf(pcifunc); } - /* Check if the 'pcifunc' has a NIX LF from 'BLKADDR_NIX0' */ + /* Check if the 'pcifunc' has a NIX LF from 'BLKADDR_NIX0' or + * 'BLKADDR_NIX1'. + */ if (blktype == BLKTYPE_NIX) { - reg = is_pf ? RVU_PRIV_PFX_NIX0_CFG : RVU_PRIV_HWVFX_NIX0_CFG; + reg = is_pf ? RVU_PRIV_PFX_NIXX_CFG(0) : + RVU_PRIV_HWVFX_NIXX_CFG(0); cfg = rvu_read64(rvu, BLKADDR_RVUM, reg | (devnum << 16)); - if (cfg) + if (cfg) { blkaddr = BLKADDR_NIX0; + goto exit; + } + + reg = is_pf ? RVU_PRIV_PFX_NIXX_CFG(1) : + RVU_PRIV_HWVFX_NIXX_CFG(1); + cfg = rvu_read64(rvu, BLKADDR_RVUM, reg | (devnum << 16)); + if (cfg) + blkaddr = BLKADDR_NIX1; } - /* Check if the 'pcifunc' has a CPT LF from 'BLKADDR_CPT0' */ if (blktype == BLKTYPE_CPT) { - reg = is_pf ? RVU_PRIV_PFX_CPT0_CFG : RVU_PRIV_HWVFX_CPT0_CFG; + reg = is_pf ? RVU_PRIV_PFX_CPTX_CFG(0) : + RVU_PRIV_HWVFX_CPTX_CFG(0); cfg = rvu_read64(rvu, BLKADDR_RVUM, reg | (devnum << 16)); - if (cfg) + if (cfg) { blkaddr = BLKADDR_CPT0; + goto exit; + } + + reg = is_pf ? RVU_PRIV_PFX_CPTX_CFG(1) : + RVU_PRIV_HWVFX_CPTX_CFG(1); + cfg = rvu_read64(rvu, BLKADDR_RVUM, reg | (devnum << 16)); + if (cfg) + blkaddr = BLKADDR_CPT1; } exit: @@ -306,31 +329,36 @@ static void rvu_update_rsrc_map(struct rvu *rvu, struct rvu_pfvf *pfvf, block->fn_map[lf] = attach ? pcifunc : 0; - switch (block->type) { - case BLKTYPE_NPA: + switch (block->addr) { + case BLKADDR_NPA: pfvf->npalf = attach ? true : false; num_lfs = pfvf->npalf; break; - case BLKTYPE_NIX: + case BLKADDR_NIX0: + case BLKADDR_NIX1: pfvf->nixlf = attach ? true : false; num_lfs = pfvf->nixlf; break; - case BLKTYPE_SSO: + case BLKADDR_SSO: attach ? pfvf->sso++ : pfvf->sso--; num_lfs = pfvf->sso; break; - case BLKTYPE_SSOW: + case BLKADDR_SSOW: attach ? pfvf->ssow++ : pfvf->ssow--; num_lfs = pfvf->ssow; break; - case BLKTYPE_TIM: + case BLKADDR_TIM: attach ? pfvf->timlfs++ : pfvf->timlfs--; num_lfs = pfvf->timlfs; break; - case BLKTYPE_CPT: + case BLKADDR_CPT0: attach ? pfvf->cptlfs++ : pfvf->cptlfs--; num_lfs = pfvf->cptlfs; break; + case BLKADDR_CPT1: + attach ? pfvf->cpt1_lfs++ : pfvf->cpt1_lfs--; + num_lfs = pfvf->cpt1_lfs; + break; } reg = is_pf ? block->pf_lfcnt_reg : block->vf_lfcnt_reg; @@ -466,12 +494,16 @@ static void rvu_reset_all_blocks(struct rvu *rvu) /* Do a HW reset of all RVU blocks */ rvu_block_reset(rvu, BLKADDR_NPA, NPA_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_NIX0, NIX_AF_BLK_RST); + rvu_block_reset(rvu, BLKADDR_NIX1, NIX_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_NPC, NPC_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_SSO, SSO_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_TIM, TIM_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_CPT0, CPT_AF_BLK_RST); + rvu_block_reset(rvu, BLKADDR_CPT1, CPT_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_NDC_NIX0_RX, NDC_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_NDC_NIX0_TX, NDC_AF_BLK_RST); + rvu_block_reset(rvu, BLKADDR_NDC_NIX1_RX, NDC_AF_BLK_RST); + rvu_block_reset(rvu, BLKADDR_NDC_NIX1_TX, NDC_AF_BLK_RST); rvu_block_reset(rvu, BLKADDR_NDC_NPA0, NDC_AF_BLK_RST); } @@ -695,6 +727,10 @@ static void rvu_setup_pfvf_macaddress(struct rvu *rvu) u64 *mac; for (pf = 0; pf < hw->total_pfs; pf++) { + /* For PF0(AF), Assign MAC address to only VFs (LBKVFs) */ + if (!pf) + goto lbkvf; + if (!is_pf_cgxmapped(rvu, pf)) continue; /* Assign MAC address to PF */ @@ -708,8 +744,10 @@ static void rvu_setup_pfvf_macaddress(struct rvu *rvu) } else { eth_random_addr(pfvf->mac_addr); } + ether_addr_copy(pfvf->default_mac, pfvf->mac_addr); - /* Assign MAC address to VFs */ +lbkvf: + /* Assign MAC address to VFs*/ rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); for (vf = 0; vf < numvfs; vf++, hwvf++) { pfvf = &rvu->hwvf[hwvf]; @@ -722,6 +760,7 @@ static void rvu_setup_pfvf_macaddress(struct rvu *rvu) } else { eth_random_addr(pfvf->mac_addr); } + ether_addr_copy(pfvf->default_mac, pfvf->mac_addr); } } } @@ -757,6 +796,62 @@ static void rvu_fwdata_exit(struct rvu *rvu) iounmap(rvu->fwdata); } +static int rvu_setup_nix_hw_resource(struct rvu *rvu, int blkaddr) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkid; + u64 cfg; + + /* Init NIX LF's bitmap */ + block = &hw->block[blkaddr]; + if (!block->implemented) + return 0; + blkid = (blkaddr == BLKADDR_NIX0) ? 0 : 1; + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); + block->lf.max = cfg & 0xFFF; + block->addr = blkaddr; + block->type = BLKTYPE_NIX; + block->lfshift = 8; + block->lookup_reg = NIX_AF_RVU_LF_CFG_DEBUG; + block->pf_lfcnt_reg = RVU_PRIV_PFX_NIXX_CFG(blkid); + block->vf_lfcnt_reg = RVU_PRIV_HWVFX_NIXX_CFG(blkid); + block->lfcfg_reg = NIX_PRIV_LFX_CFG; + block->msixcfg_reg = NIX_PRIV_LFX_INT_CFG; + block->lfreset_reg = NIX_AF_LF_RST; + sprintf(block->name, "NIX%d", blkid); + rvu->nix_blkaddr[blkid] = blkaddr; + return rvu_alloc_bitmap(&block->lf); +} + +static int rvu_setup_cpt_hw_resource(struct rvu *rvu, int blkaddr) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkid; + u64 cfg; + + /* Init CPT LF's bitmap */ + block = &hw->block[blkaddr]; + if (!block->implemented) + return 0; + blkid = (blkaddr == BLKADDR_CPT0) ? 0 : 1; + cfg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS0); + block->lf.max = cfg & 0xFF; + block->addr = blkaddr; + block->type = BLKTYPE_CPT; + block->multislot = true; + block->lfshift = 3; + block->lookup_reg = CPT_AF_RVU_LF_CFG_DEBUG; + block->pf_lfcnt_reg = RVU_PRIV_PFX_CPTX_CFG(blkid); + block->vf_lfcnt_reg = RVU_PRIV_HWVFX_CPTX_CFG(blkid); + block->lfcfg_reg = CPT_PRIV_LFX_CFG; + block->msixcfg_reg = CPT_PRIV_LFX_INT_CFG; + block->lfreset_reg = CPT_AF_LF_RST; + sprintf(block->name, "CPT%d", blkid); + return rvu_alloc_bitmap(&block->lf); +} + static int rvu_setup_hw_resources(struct rvu *rvu) { struct rvu_hwinfo *hw = rvu->hw; @@ -791,27 +886,13 @@ static int rvu_setup_hw_resources(struct rvu *rvu) return err; nix: - /* Init NIX LF's bitmap */ - block = &hw->block[BLKADDR_NIX0]; - if (!block->implemented) - goto sso; - cfg = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_CONST2); - block->lf.max = cfg & 0xFFF; - block->addr = BLKADDR_NIX0; - block->type = BLKTYPE_NIX; - block->lfshift = 8; - block->lookup_reg = NIX_AF_RVU_LF_CFG_DEBUG; - block->pf_lfcnt_reg = RVU_PRIV_PFX_NIX0_CFG; - block->vf_lfcnt_reg = RVU_PRIV_HWVFX_NIX0_CFG; - block->lfcfg_reg = NIX_PRIV_LFX_CFG; - block->msixcfg_reg = NIX_PRIV_LFX_INT_CFG; - block->lfreset_reg = NIX_AF_LF_RST; - sprintf(block->name, "NIX"); - err = rvu_alloc_bitmap(&block->lf); + err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX0); + if (err) + return err; + err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX1); if (err) return err; -sso: /* Init SSO group's bitmap */ block = &hw->block[BLKADDR_SSO]; if (!block->implemented) @@ -877,28 +958,13 @@ tim: return err; cpt: - /* Init CPT LF's bitmap */ - block = &hw->block[BLKADDR_CPT0]; - if (!block->implemented) - goto init; - cfg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS0); - block->lf.max = cfg & 0xFF; - block->addr = BLKADDR_CPT0; - block->type = BLKTYPE_CPT; - block->multislot = true; - block->lfshift = 3; - block->lookup_reg = CPT_AF_RVU_LF_CFG_DEBUG; - block->pf_lfcnt_reg = RVU_PRIV_PFX_CPT0_CFG; - block->vf_lfcnt_reg = RVU_PRIV_HWVFX_CPT0_CFG; - block->lfcfg_reg = CPT_PRIV_LFX_CFG; - block->msixcfg_reg = CPT_PRIV_LFX_INT_CFG; - block->lfreset_reg = CPT_AF_LF_RST; - sprintf(block->name, "CPT"); - err = rvu_alloc_bitmap(&block->lf); + err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT0); + if (err) + return err; + err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT1); if (err) return err; -init: /* Allocate memory for PFVF data */ rvu->pf = devm_kcalloc(rvu->dev, hw->total_pfs, sizeof(struct rvu_pfvf), GFP_KERNEL); @@ -1025,7 +1091,30 @@ int rvu_mbox_handler_ready(struct rvu *rvu, struct msg_req *req, /* Get current count of a RVU block's LF/slots * provisioned to a given RVU func. */ -static u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blktype) +u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blkaddr) +{ + switch (blkaddr) { + case BLKADDR_NPA: + return pfvf->npalf ? 1 : 0; + case BLKADDR_NIX0: + case BLKADDR_NIX1: + return pfvf->nixlf ? 1 : 0; + case BLKADDR_SSO: + return pfvf->sso; + case BLKADDR_SSOW: + return pfvf->ssow; + case BLKADDR_TIM: + return pfvf->timlfs; + case BLKADDR_CPT0: + return pfvf->cptlfs; + case BLKADDR_CPT1: + return pfvf->cpt1_lfs; + } + return 0; +} + +/* Return true if LFs of block type are attached to pcifunc */ +static bool is_blktype_attached(struct rvu_pfvf *pfvf, int blktype) { switch (blktype) { case BLKTYPE_NPA: @@ -1033,15 +1122,16 @@ static u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blktype) case BLKTYPE_NIX: return pfvf->nixlf ? 1 : 0; case BLKTYPE_SSO: - return pfvf->sso; + return !!pfvf->sso; case BLKTYPE_SSOW: - return pfvf->ssow; + return !!pfvf->ssow; case BLKTYPE_TIM: - return pfvf->timlfs; + return !!pfvf->timlfs; case BLKTYPE_CPT: - return pfvf->cptlfs; + return pfvf->cptlfs || pfvf->cpt1_lfs; } - return 0; + + return false; } bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype) @@ -1054,7 +1144,7 @@ bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype) pfvf = rvu_get_pfvf(rvu, pcifunc); /* Check if this PFFUNC has a LF of type blktype attached */ - if (!rvu_get_rsrc_mapcount(pfvf, blktype)) + if (!is_blktype_attached(pfvf, blktype)) return false; return true; @@ -1093,9 +1183,12 @@ static void rvu_detach_block(struct rvu *rvu, int pcifunc, int blktype) if (blkaddr < 0) return; + if (blktype == BLKTYPE_NIX) + rvu_nix_reset_mac(pfvf, pcifunc); + block = &hw->block[blkaddr]; - num_lfs = rvu_get_rsrc_mapcount(pfvf, block->type); + num_lfs = rvu_get_rsrc_mapcount(pfvf, block->addr); if (!num_lfs) return; @@ -1146,6 +1239,8 @@ static int rvu_detach_rsrcs(struct rvu *rvu, struct rsrc_detach *detach, continue; else if ((blkid == BLKADDR_NIX0) && !detach->nixlf) continue; + else if ((blkid == BLKADDR_NIX1) && !detach->nixlf) + continue; else if ((blkid == BLKADDR_SSO) && !detach->sso) continue; else if ((blkid == BLKADDR_SSOW) && !detach->ssow) @@ -1154,6 +1249,8 @@ static int rvu_detach_rsrcs(struct rvu *rvu, struct rsrc_detach *detach, continue; else if ((blkid == BLKADDR_CPT0) && !detach->cptlfs) continue; + else if ((blkid == BLKADDR_CPT1) && !detach->cptlfs) + continue; } rvu_detach_block(rvu, pcifunc, block->type); } @@ -1169,8 +1266,73 @@ int rvu_mbox_handler_detach_resources(struct rvu *rvu, return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc); } -static void rvu_attach_block(struct rvu *rvu, int pcifunc, - int blktype, int num_lfs) +static int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + int blkaddr = BLKADDR_NIX0, vf; + struct rvu_pfvf *pf; + + /* All CGX mapped PFs are set with assigned NIX block during init */ + if (is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) { + pf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK); + blkaddr = pf->nix_blkaddr; + } else if (is_afvf(pcifunc)) { + vf = pcifunc - 1; + /* Assign NIX based on VF number. All even numbered VFs get + * NIX0 and odd numbered gets NIX1 + */ + blkaddr = (vf & 1) ? BLKADDR_NIX1 : BLKADDR_NIX0; + /* NIX1 is not present on all silicons */ + if (!is_block_implemented(rvu->hw, BLKADDR_NIX1)) + blkaddr = BLKADDR_NIX0; + } + + switch (blkaddr) { + case BLKADDR_NIX1: + pfvf->nix_blkaddr = BLKADDR_NIX1; + pfvf->nix_rx_intf = NIX_INTFX_RX(1); + pfvf->nix_tx_intf = NIX_INTFX_TX(1); + break; + case BLKADDR_NIX0: + default: + pfvf->nix_blkaddr = BLKADDR_NIX0; + pfvf->nix_rx_intf = NIX_INTFX_RX(0); + pfvf->nix_tx_intf = NIX_INTFX_TX(0); + break; + } + + return pfvf->nix_blkaddr; +} + +static int rvu_get_attach_blkaddr(struct rvu *rvu, int blktype, + u16 pcifunc, struct rsrc_attach *attach) +{ + int blkaddr; + + switch (blktype) { + case BLKTYPE_NIX: + blkaddr = rvu_get_nix_blkaddr(rvu, pcifunc); + break; + case BLKTYPE_CPT: + if (attach->hdr.ver < RVU_MULTI_BLK_VER) + return rvu_get_blkaddr(rvu, blktype, 0); + blkaddr = attach->cpt_blkaddr ? attach->cpt_blkaddr : + BLKADDR_CPT0; + if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) + return -ENODEV; + break; + default: + return rvu_get_blkaddr(rvu, blktype, 0); + }; + + if (is_block_implemented(rvu->hw, blkaddr)) + return blkaddr; + + return -ENODEV; +} + +static void rvu_attach_block(struct rvu *rvu, int pcifunc, int blktype, + int num_lfs, struct rsrc_attach *attach) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); struct rvu_hwinfo *hw = rvu->hw; @@ -1182,7 +1344,7 @@ static void rvu_attach_block(struct rvu *rvu, int pcifunc, if (!num_lfs) return; - blkaddr = rvu_get_blkaddr(rvu, blktype, 0); + blkaddr = rvu_get_attach_blkaddr(rvu, blktype, pcifunc, attach); if (blkaddr < 0) return; @@ -1211,12 +1373,12 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, struct rsrc_attach *req, u16 pcifunc) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + int free_lfs, mappedlfs, blkaddr; struct rvu_hwinfo *hw = rvu->hw; struct rvu_block *block; - int free_lfs, mappedlfs; /* Only one NPA LF can be attached */ - if (req->npalf && !rvu_get_rsrc_mapcount(pfvf, BLKTYPE_NPA)) { + if (req->npalf && !is_blktype_attached(pfvf, BLKTYPE_NPA)) { block = &hw->block[BLKADDR_NPA]; free_lfs = rvu_rsrc_free_count(&block->lf); if (!free_lfs) @@ -1229,8 +1391,12 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, } /* Only one NIX LF can be attached */ - if (req->nixlf && !rvu_get_rsrc_mapcount(pfvf, BLKTYPE_NIX)) { - block = &hw->block[BLKADDR_NIX0]; + if (req->nixlf && !is_blktype_attached(pfvf, BLKTYPE_NIX)) { + blkaddr = rvu_get_attach_blkaddr(rvu, BLKTYPE_NIX, + pcifunc, req); + if (blkaddr < 0) + return blkaddr; + block = &hw->block[blkaddr]; free_lfs = rvu_rsrc_free_count(&block->lf); if (!free_lfs) goto fail; @@ -1250,7 +1416,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, pcifunc, req->sso, block->lf.max); return -EINVAL; } - mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type); + mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr); free_lfs = rvu_rsrc_free_count(&block->lf); /* Check if additional resources are available */ if (req->sso > mappedlfs && @@ -1266,7 +1432,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, pcifunc, req->sso, block->lf.max); return -EINVAL; } - mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type); + mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr); free_lfs = rvu_rsrc_free_count(&block->lf); if (req->ssow > mappedlfs && ((req->ssow - mappedlfs) > free_lfs)) @@ -1281,7 +1447,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, pcifunc, req->timlfs, block->lf.max); return -EINVAL; } - mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type); + mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr); free_lfs = rvu_rsrc_free_count(&block->lf); if (req->timlfs > mappedlfs && ((req->timlfs - mappedlfs) > free_lfs)) @@ -1289,14 +1455,18 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, } if (req->cptlfs) { - block = &hw->block[BLKADDR_CPT0]; + blkaddr = rvu_get_attach_blkaddr(rvu, BLKTYPE_CPT, + pcifunc, req); + if (blkaddr < 0) + return blkaddr; + block = &hw->block[blkaddr]; if (req->cptlfs > block->lf.max) { dev_err(&rvu->pdev->dev, "Func 0x%x: Invalid CPTLF req, %d > max %d\n", pcifunc, req->cptlfs, block->lf.max); return -EINVAL; } - mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type); + mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr); free_lfs = rvu_rsrc_free_count(&block->lf); if (req->cptlfs > mappedlfs && ((req->cptlfs - mappedlfs) > free_lfs)) @@ -1310,6 +1480,22 @@ fail: return -ENOSPC; } +static bool rvu_attach_from_same_block(struct rvu *rvu, int blktype, + struct rsrc_attach *attach) +{ + int blkaddr, num_lfs; + + blkaddr = rvu_get_attach_blkaddr(rvu, blktype, + attach->hdr.pcifunc, attach); + if (blkaddr < 0) + return false; + + num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, attach->hdr.pcifunc), + blkaddr); + /* Requester already has LFs from given block ? */ + return !!num_lfs; +} + int rvu_mbox_handler_attach_resources(struct rvu *rvu, struct rsrc_attach *attach, struct msg_rsp *rsp) @@ -1330,10 +1516,10 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, /* Now attach the requested resources */ if (attach->npalf) - rvu_attach_block(rvu, pcifunc, BLKTYPE_NPA, 1); + rvu_attach_block(rvu, pcifunc, BLKTYPE_NPA, 1, attach); if (attach->nixlf) - rvu_attach_block(rvu, pcifunc, BLKTYPE_NIX, 1); + rvu_attach_block(rvu, pcifunc, BLKTYPE_NIX, 1, attach); if (attach->sso) { /* RVU func doesn't know which exact LF or slot is attached @@ -1343,25 +1529,30 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, */ if (attach->modify) rvu_detach_block(rvu, pcifunc, BLKTYPE_SSO); - rvu_attach_block(rvu, pcifunc, BLKTYPE_SSO, attach->sso); + rvu_attach_block(rvu, pcifunc, BLKTYPE_SSO, + attach->sso, attach); } if (attach->ssow) { if (attach->modify) rvu_detach_block(rvu, pcifunc, BLKTYPE_SSOW); - rvu_attach_block(rvu, pcifunc, BLKTYPE_SSOW, attach->ssow); + rvu_attach_block(rvu, pcifunc, BLKTYPE_SSOW, + attach->ssow, attach); } if (attach->timlfs) { if (attach->modify) rvu_detach_block(rvu, pcifunc, BLKTYPE_TIM); - rvu_attach_block(rvu, pcifunc, BLKTYPE_TIM, attach->timlfs); + rvu_attach_block(rvu, pcifunc, BLKTYPE_TIM, + attach->timlfs, attach); } if (attach->cptlfs) { - if (attach->modify) + if (attach->modify && + rvu_attach_from_same_block(rvu, BLKTYPE_CPT, attach)) rvu_detach_block(rvu, pcifunc, BLKTYPE_CPT); - rvu_attach_block(rvu, pcifunc, BLKTYPE_CPT, attach->cptlfs); + rvu_attach_block(rvu, pcifunc, BLKTYPE_CPT, + attach->cptlfs, attach); } exit: @@ -1439,7 +1630,7 @@ int rvu_mbox_handler_msix_offset(struct rvu *rvu, struct msg_req *req, struct rvu_hwinfo *hw = rvu->hw; u16 pcifunc = req->hdr.pcifunc; struct rvu_pfvf *pfvf; - int lf, slot; + int lf, slot, blkaddr; pfvf = rvu_get_pfvf(rvu, pcifunc); if (!pfvf->msix.bmap) @@ -1449,8 +1640,14 @@ int rvu_mbox_handler_msix_offset(struct rvu *rvu, struct msg_req *req, lf = rvu_get_lf(rvu, &hw->block[BLKADDR_NPA], pcifunc, 0); rsp->npa_msixoff = rvu_get_msix_offset(rvu, pfvf, BLKADDR_NPA, lf); - lf = rvu_get_lf(rvu, &hw->block[BLKADDR_NIX0], pcifunc, 0); - rsp->nix_msixoff = rvu_get_msix_offset(rvu, pfvf, BLKADDR_NIX0, lf); + /* Get BLKADDR from which LFs are attached to pcifunc */ + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (blkaddr < 0) { + rsp->nix_msixoff = MSIX_VECTOR_INVALID; + } else { + lf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0); + rsp->nix_msixoff = rvu_get_msix_offset(rvu, pfvf, blkaddr, lf); + } rsp->sso = pfvf->sso; for (slot = 0; slot < rsp->sso; slot++) { @@ -1479,6 +1676,14 @@ int rvu_mbox_handler_msix_offset(struct rvu *rvu, struct msg_req *req, rsp->cptlf_msixoff[slot] = rvu_get_msix_offset(rvu, pfvf, BLKADDR_CPT0, lf); } + + rsp->cpt1_lfs = pfvf->cpt1_lfs; + for (slot = 0; slot < rsp->cpt1_lfs; slot++) { + lf = rvu_get_lf(rvu, &hw->block[BLKADDR_CPT1], pcifunc, slot); + rsp->cpt1_lf_msixoff[slot] = + rvu_get_msix_offset(rvu, pfvf, BLKADDR_CPT1, lf); + } + return 0; } @@ -1932,7 +2137,7 @@ static void rvu_blklf_teardown(struct rvu *rvu, u16 pcifunc, u8 blkaddr) block = &rvu->hw->block[blkaddr]; num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc), - block->type); + block->addr); if (!num_lfs) return; for (slot = 0; slot < num_lfs; slot++) { @@ -1941,7 +2146,7 @@ static void rvu_blklf_teardown(struct rvu *rvu, u16 pcifunc, u8 blkaddr) continue; /* Cleanup LF and reset it */ - if (block->addr == BLKADDR_NIX0) + if (block->addr == BLKADDR_NIX0 || block->addr == BLKADDR_NIX1) rvu_nix_lf_teardown(rvu, pcifunc, block->addr, lf); else if (block->addr == BLKADDR_NPA) rvu_npa_lf_teardown(rvu, pcifunc, lf); @@ -1963,7 +2168,9 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) * 3. Cleanup pools (NPA) */ rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NIX0); + rvu_blklf_teardown(rvu, pcifunc, BLKADDR_NIX1); rvu_blklf_teardown(rvu, pcifunc, BLKADDR_CPT0); + rvu_blklf_teardown(rvu, pcifunc, BLKADDR_CPT1); rvu_blklf_teardown(rvu, pcifunc, BLKADDR_TIM); rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSOW); rvu_blklf_teardown(rvu, pcifunc, BLKADDR_SSO); @@ -2445,7 +2652,7 @@ static void rvu_enable_afvf_intr(struct rvu *rvu) #define PCI_DEVID_OCTEONTX2_LBK 0xA061 -static int lbk_get_num_chans(void) +int rvu_get_num_lbk_chans(void) { struct pci_dev *pdev; void __iomem *base; @@ -2480,7 +2687,7 @@ static int rvu_enable_sriov(struct rvu *rvu) return 0; } - chans = lbk_get_num_chans(); + chans = rvu_get_num_lbk_chans(); if (chans < 0) return chans; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 90eed3160915..37774bac32b0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -15,6 +15,7 @@ #include "rvu_struct.h" #include "common.h" #include "mbox.h" +#include "npc.h" /* PCI device IDs */ #define PCI_DEVID_OCTEONTX2_RVU_AF 0xA065 @@ -28,6 +29,7 @@ #define PCI_MBOX_BAR_NUM 4 #define NAME_SIZE 32 +#define MAX_NIX_BLKS 2 /* PF_FUNC */ #define RVU_PFVF_PF_SHIFT 10 @@ -104,6 +106,36 @@ struct nix_mce_list { int max; }; +/* layer metadata to uniquely identify a packet header field */ +struct npc_layer_mdata { + u8 lid; + u8 ltype; + u8 hdr; + u8 key; + u8 len; +}; + +/* Structure to represent a field present in the + * generated key. A key field may present anywhere and can + * be of any size in the generated key. Once this structure + * is populated for fields of interest then field's presence + * and location (if present) can be known. + */ +struct npc_key_field { + /* Masks where all set bits indicate position + * of a field in the key + */ + u64 kw_mask[NPC_MAX_KWS_IN_KEY]; + /* Number of words in the key a field spans. If a field is + * of 16 bytes and key offset is 4 then the field will use + * 4 bytes in KW0, 8 bytes in KW1 and 4 bytes in KW2 and + * nr_kws will be 3(KW0, KW1 and KW2). + */ + int nr_kws; + /* used by packet header fields */ + struct npc_layer_mdata layer_mdata; +}; + struct npc_mcam { struct rsrc_bmap counters; struct mutex lock; /* MCAM entries and counters update lock */ @@ -115,6 +147,7 @@ struct npc_mcam { u16 *entry2cntr_map; u16 *cntr2pfvf_map; u16 *cntr_refcnt; + u16 *entry2target_pffunc; u8 keysize; /* MCAM keysize 112/224/448 bits */ u8 banks; /* Number of MCAM banks */ u8 banks_per_entry;/* Number of keywords in key */ @@ -127,6 +160,12 @@ struct npc_mcam { u16 hprio_count; u16 hprio_end; u16 rx_miss_act_cntr; /* Counter for RX MISS action */ + /* fields present in the generated key */ + struct npc_key_field tx_key_fields[NPC_KEY_FIELDS_MAX]; + struct npc_key_field rx_key_fields[NPC_KEY_FIELDS_MAX]; + u64 tx_features; + u64 rx_features; + struct list_head mcam_rules; }; /* Structure for per RVU func info ie PF/VF */ @@ -137,6 +176,7 @@ struct rvu_pfvf { u16 ssow; u16 cptlfs; u16 timlfs; + u16 cpt1_lfs; u8 cgx_lmac; /* Block LF's MSIX vector info */ @@ -169,19 +209,22 @@ struct rvu_pfvf { u16 maxlen; u16 minlen; + u8 pf_set_vf_cfg; u8 mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */ + u8 default_mac[ETH_ALEN]; /* MAC address from FWdata */ /* Broadcast pkt replication info */ u16 bcast_mce_idx; struct nix_mce_list bcast_mce_list; - /* VLAN offload */ - struct mcam_entry entry; - int rxvlan_index; - bool rxvlan; + struct rvu_npc_mcam_rule *def_ucast_rule; bool cgx_in_use; /* this PF/VF using CGX? */ int cgx_users; /* number of cgx users - used only by PFs */ + + u8 nix_blkaddr; /* BLKADDR_NIX0/1 assigned to this PF */ + u8 nix_rx_intf; /* NIX0_RX/NIX1_RX interface to NPC */ + u8 nix_tx_intf; /* NIX0_TX/NIX1_TX interface to NPC */ }; struct nix_txsch { @@ -218,12 +261,22 @@ struct nix_lso { u8 in_use; }; +struct nix_txvlan { +#define NIX_TX_VTAG_DEF_MAX 0x400 + struct rsrc_bmap rsrc; + u16 *entry2pfvf_map; + struct mutex rsrc_lock; /* Serialize resource alloc/free */ +}; + struct nix_hw { + int blkaddr; + struct rvu *rvu; struct nix_txsch txsch[NIX_TXSCH_LVL_CNT]; /* Tx schedulers */ struct nix_mcast mcast; struct nix_flowkey flowkey; struct nix_mark_format mark_format; struct nix_lso lso; + struct nix_txvlan txvlan; }; /* RVU block's capabilities or functionality, @@ -251,10 +304,16 @@ struct rvu_hwinfo { u8 lbk_links; u8 sdp_links; u8 npc_kpus; /* No of parser units */ + u8 npc_pkinds; /* No of port kinds */ + u8 npc_intfs; /* No of interfaces */ + u8 npc_kpu_entries; /* No of KPU entries */ + u16 npc_counters; /* No of match stats counters */ + bool npc_ext_set; /* Extended register set */ struct hw_cap cap; struct rvu_block block[BLK_COUNT]; /* Block info */ - struct nix_hw *nix0; + struct nix_hw *nix; + struct rvu *rvu; struct npc_pkind pkind; struct npc_mcam mcam; }; @@ -300,7 +359,7 @@ struct npc_kpu_profile_adapter { const struct npc_lt_def_cfg *lt_def; const struct npc_kpu_profile_action *ikpu; /* array[pkinds] */ const struct npc_kpu_profile *kpu; /* array[kpus] */ - const struct npc_mcam_kex *mkex; + struct npc_mcam_kex *mkex; size_t pkinds; size_t kpus; }; @@ -315,6 +374,7 @@ struct rvu { struct rvu_pfvf *hwvf; struct mutex rsrc_lock; /* Serialize resource alloc/free */ int vfs; /* Number of VFs attached to RVU */ + int nix_blkaddr[MAX_NIX_BLKS]; /* Mbox */ struct mbox_wq_info afpf_wq_info; @@ -420,6 +480,7 @@ void rvu_free_rsrc(struct rsrc_bmap *rsrc, int id); int rvu_rsrc_free_count(struct rsrc_bmap *rsrc); int rvu_alloc_rsrc_contig(struct rsrc_bmap *rsrc, int nrsrc); bool rvu_rsrc_check_contig(struct rsrc_bmap *rsrc, int nrsrc); +u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blkaddr); int rvu_get_pf(u16 pcifunc); struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc); void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf); @@ -429,6 +490,7 @@ int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot); int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf); int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc); int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero); +int rvu_get_num_lbk_chans(void); /* RVU HW reg validation */ enum regmap_block { @@ -485,6 +547,9 @@ int rvu_get_nixlf_count(struct rvu *rvu); void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf); int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr); int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add); +struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr); +int rvu_get_next_nix_blkaddr(struct rvu *rvu, int blkaddr); +void rvu_nix_reset_mac(struct rvu_pfvf *pfvf, int pcifunc); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); @@ -501,8 +566,8 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable); -int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf); +void rvu_npc_free_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_enable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, @@ -513,6 +578,24 @@ void rvu_npc_get_mcam_entry_alloc_info(struct rvu *rvu, u16 pcifunc, void rvu_npc_get_mcam_counter_alloc_info(struct rvu *rvu, u16 pcifunc, int blkaddr, int *alloc_cnt, int *enable_cnt); +bool is_npc_intf_tx(u8 intf); +bool is_npc_intf_rx(u8 intf); +bool is_npc_interface_valid(struct rvu *rvu, u8 intf); +int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena); +int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel); +int npc_flow_steering_init(struct rvu *rvu, int blkaddr); +const char *npc_get_field_name(u8 hdr); +bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf, + u16 pcifunc, u8 intf, struct mcam_entry *entry, + int *entry_index); +int npc_get_bank(struct npc_mcam *mcam, int index); +void npc_mcam_enable_flows(struct rvu *rvu, u16 target); +void npc_mcam_disable_flows(struct rvu *rvu, u16 target); +void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, int index, bool enable); +void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, u16 src, struct mcam_entry *entry, + u8 *intf, u8 *ena); #ifdef CONFIG_DEBUG_FS void rvu_dbg_init(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index fa9152ff5e2a..d298b9357177 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -74,6 +74,20 @@ void *rvu_cgx_pdata(u8 cgx_id, struct rvu *rvu) return rvu->cgx_idmap[cgx_id]; } +/* Based on P2X connectivity find mapped NIX block for a PF */ +static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf, + int cgx_id, int lmac_id) +{ + struct rvu_pfvf *pfvf = &rvu->pf[pf]; + u8 p2x; + + p2x = cgx_lmac_get_p2x(cgx_id, lmac_id); + /* Firmware sets P2X_SELECT as either NIX0 or NIX1 */ + pfvf->nix_blkaddr = BLKADDR_NIX0; + if (p2x == CMR_P2X_SEL_NIX1) + pfvf->nix_blkaddr = BLKADDR_NIX1; +} + static int rvu_map_cgx_lmac_pf(struct rvu *rvu) { struct npc_pkind *pkind = &rvu->hw->pkind; @@ -117,6 +131,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) rvu->cgxlmac2pf_map[CGX_OFFSET(cgx) + lmac] = 1 << pf; free_pkind = rvu_alloc_rsrc(&pkind->rsrc); pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16; + rvu_map_cgx_nix_block(rvu, pf, cgx, lmac); rvu->cgx_mapped_pfs++; } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 77adad4adb1b..39e1a614aaf8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -224,18 +224,53 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, RVU_DEBUG_FOPS(rsrc_status, rsrc_attach_status, NULL); -static bool rvu_dbg_is_valid_lf(struct rvu *rvu, int blktype, int lf, +static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) +{ + struct rvu *rvu = filp->private; + struct pci_dev *pdev = NULL; + char cgx[10], lmac[10]; + struct rvu_pfvf *pfvf; + int pf, domain, blkid; + u8 cgx_id, lmac_id; + u16 pcifunc; + + domain = 2; + seq_puts(filp, "PCI dev\t\tRVU PF Func\tNIX block\tCGX\tLMAC\n"); + for (pf = 0; pf < rvu->hw->total_pfs; pf++) { + if (!is_pf_cgxmapped(rvu, pf)) + continue; + + pdev = pci_get_domain_bus_and_slot(domain, pf + 1, 0); + if (!pdev) + continue; + + cgx[0] = 0; + lmac[0] = 0; + pcifunc = pf << 10; + pfvf = rvu_get_pfvf(rvu, pcifunc); + + if (pfvf->nix_blkaddr == BLKADDR_NIX0) + blkid = 0; + else + blkid = 1; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, + &lmac_id); + sprintf(cgx, "CGX%d", cgx_id); + sprintf(lmac, "LMAC%d", lmac_id); + seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n", + dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac); + } + return 0; +} + +RVU_DEBUG_SEQ_FOPS(rvu_pf_cgx_map, rvu_pf_cgx_map_display, NULL); + +static bool rvu_dbg_is_valid_lf(struct rvu *rvu, int blkaddr, int lf, u16 *pcifunc) { struct rvu_block *block; struct rvu_hwinfo *hw; - int blkaddr; - - blkaddr = rvu_get_blkaddr(rvu, blktype, 0); - if (blkaddr < 0) { - dev_warn(rvu->dev, "Invalid blktype\n"); - return false; - } hw = rvu->hw; block = &hw->block[blkaddr]; @@ -291,10 +326,12 @@ static int rvu_dbg_qsize_display(struct seq_file *filp, void *unsused, { void (*print_qsize)(struct seq_file *filp, struct rvu_pfvf *pfvf) = NULL; + struct dentry *current_dir; struct rvu_pfvf *pfvf; struct rvu *rvu; int qsize_id; u16 pcifunc; + int blkaddr; rvu = filp->private; switch (blktype) { @@ -312,7 +349,15 @@ static int rvu_dbg_qsize_display(struct seq_file *filp, void *unsused, return -EINVAL; } - if (!rvu_dbg_is_valid_lf(rvu, blktype, qsize_id, &pcifunc)) + if (blktype == BLKTYPE_NPA) { + blkaddr = BLKADDR_NPA; + } else { + current_dir = filp->file->f_path.dentry->d_parent; + blkaddr = (!strcmp(current_dir->d_name.name, "nix1") ? + BLKADDR_NIX1 : BLKADDR_NIX0); + } + + if (!rvu_dbg_is_valid_lf(rvu, blkaddr, qsize_id, &pcifunc)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -329,6 +374,8 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, struct seq_file *seqfile = filp->private_data; char *cmd_buf, *cmd_buf_tmp, *subtoken; struct rvu *rvu = seqfile->private; + struct dentry *current_dir; + int blkaddr; u16 pcifunc; int ret, lf; @@ -355,7 +402,15 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, goto qsize_write_done; } - if (!rvu_dbg_is_valid_lf(rvu, blktype, lf, &pcifunc)) { + if (blktype == BLKTYPE_NPA) { + blkaddr = BLKADDR_NPA; + } else { + current_dir = filp->f_path.dentry->d_parent; + blkaddr = (!strcmp(current_dir->d_name.name, "nix1") ? + BLKADDR_NIX1 : BLKADDR_NIX0); + } + + if (!rvu_dbg_is_valid_lf(rvu, blkaddr, lf, &pcifunc)) { ret = -EINVAL; goto qsize_write_done; } @@ -498,7 +553,7 @@ static int rvu_dbg_npa_ctx_display(struct seq_file *m, void *unused, int ctype) return -EINVAL; } - if (!rvu_dbg_is_valid_lf(rvu, BLKTYPE_NPA, npalf, &pcifunc)) + if (!rvu_dbg_is_valid_lf(rvu, BLKADDR_NPA, npalf, &pcifunc)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -556,7 +611,7 @@ static int write_npa_ctx(struct rvu *rvu, bool all, int max_id = 0; u16 pcifunc; - if (!rvu_dbg_is_valid_lf(rvu, BLKTYPE_NPA, npalf, &pcifunc)) + if (!rvu_dbg_is_valid_lf(rvu, BLKADDR_NPA, npalf, &pcifunc)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -704,9 +759,17 @@ static void ndc_cache_stats(struct seq_file *s, int blk_addr, int ctype, int transaction) { u64 req, out_req, lat, cant_alloc; - struct rvu *rvu = s->private; + struct nix_hw *nix_hw; + struct rvu *rvu; int port; + if (blk_addr == BLKADDR_NDC_NPA0) { + rvu = s->private; + } else { + nix_hw = s->private; + rvu = nix_hw->rvu; + } + for (port = 0; port < NDC_MAX_PORT; port++) { req = rvu_read64(rvu, blk_addr, NDC_AF_PORTX_RTX_RWX_REQ_PC (port, ctype, transaction)); @@ -749,9 +812,17 @@ RVU_DEBUG_SEQ_FOPS(npa_ndc_cache, npa_ndc_cache_display, NULL); static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr) { - struct rvu *rvu = s->private; + struct nix_hw *nix_hw; + struct rvu *rvu; int bank, max_bank; + if (blk_addr == BLKADDR_NDC_NPA0) { + rvu = s->private; + } else { + nix_hw = s->private; + rvu = nix_hw->rvu; + } + max_bank = NDC_MAX_BANK(rvu, blk_addr); for (bank = 0; bank < max_bank; bank++) { seq_printf(s, "BANK:%d\n", bank); @@ -767,16 +838,30 @@ static int ndc_blk_hits_miss_stats(struct seq_file *s, int idx, int blk_addr) static int rvu_dbg_nix_ndc_rx_cache_display(struct seq_file *filp, void *unused) { - return ndc_blk_cache_stats(filp, NIX0_RX, - BLKADDR_NDC_NIX0_RX); + struct nix_hw *nix_hw = filp->private; + int blkaddr = 0; + int ndc_idx = 0; + + blkaddr = (nix_hw->blkaddr == BLKADDR_NIX1 ? + BLKADDR_NDC_NIX1_RX : BLKADDR_NDC_NIX0_RX); + ndc_idx = (nix_hw->blkaddr == BLKADDR_NIX1 ? NIX1_RX : NIX0_RX); + + return ndc_blk_cache_stats(filp, ndc_idx, blkaddr); } RVU_DEBUG_SEQ_FOPS(nix_ndc_rx_cache, nix_ndc_rx_cache_display, NULL); static int rvu_dbg_nix_ndc_tx_cache_display(struct seq_file *filp, void *unused) { - return ndc_blk_cache_stats(filp, NIX0_TX, - BLKADDR_NDC_NIX0_TX); + struct nix_hw *nix_hw = filp->private; + int blkaddr = 0; + int ndc_idx = 0; + + blkaddr = (nix_hw->blkaddr == BLKADDR_NIX1 ? + BLKADDR_NDC_NIX1_TX : BLKADDR_NDC_NIX0_TX); + ndc_idx = (nix_hw->blkaddr == BLKADDR_NIX1 ? NIX1_TX : NIX0_TX); + + return ndc_blk_cache_stats(filp, ndc_idx, blkaddr); } RVU_DEBUG_SEQ_FOPS(nix_ndc_tx_cache, nix_ndc_tx_cache_display, NULL); @@ -792,8 +877,14 @@ RVU_DEBUG_SEQ_FOPS(npa_ndc_hits_miss, npa_ndc_hits_miss_display, NULL); static int rvu_dbg_nix_ndc_rx_hits_miss_display(struct seq_file *filp, void *unused) { - return ndc_blk_hits_miss_stats(filp, - NPA0_U, BLKADDR_NDC_NIX0_RX); + struct nix_hw *nix_hw = filp->private; + int ndc_idx = NPA0_U; + int blkaddr = 0; + + blkaddr = (nix_hw->blkaddr == BLKADDR_NIX1 ? + BLKADDR_NDC_NIX1_RX : BLKADDR_NDC_NIX0_RX); + + return ndc_blk_hits_miss_stats(filp, ndc_idx, blkaddr); } RVU_DEBUG_SEQ_FOPS(nix_ndc_rx_hits_miss, nix_ndc_rx_hits_miss_display, NULL); @@ -801,8 +892,14 @@ RVU_DEBUG_SEQ_FOPS(nix_ndc_rx_hits_miss, nix_ndc_rx_hits_miss_display, NULL); static int rvu_dbg_nix_ndc_tx_hits_miss_display(struct seq_file *filp, void *unused) { - return ndc_blk_hits_miss_stats(filp, - NPA0_U, BLKADDR_NDC_NIX0_TX); + struct nix_hw *nix_hw = filp->private; + int ndc_idx = NPA0_U; + int blkaddr = 0; + + blkaddr = (nix_hw->blkaddr == BLKADDR_NIX1 ? + BLKADDR_NDC_NIX1_TX : BLKADDR_NDC_NIX0_TX); + + return ndc_blk_hits_miss_stats(filp, ndc_idx, blkaddr); } RVU_DEBUG_SEQ_FOPS(nix_ndc_tx_hits_miss, nix_ndc_tx_hits_miss_display, NULL); @@ -969,7 +1066,8 @@ static int rvu_dbg_nix_queue_ctx_display(struct seq_file *filp, { void (*print_nix_ctx)(struct seq_file *filp, struct nix_aq_enq_rsp *rsp) = NULL; - struct rvu *rvu = filp->private; + struct nix_hw *nix_hw = filp->private; + struct rvu *rvu = nix_hw->rvu; struct nix_aq_enq_req aq_req; struct nix_aq_enq_rsp rsp; char *ctype_string = NULL; @@ -1001,7 +1099,7 @@ static int rvu_dbg_nix_queue_ctx_display(struct seq_file *filp, return -EINVAL; } - if (!rvu_dbg_is_valid_lf(rvu, BLKTYPE_NIX, nixlf, &pcifunc)) + if (!rvu_dbg_is_valid_lf(rvu, nix_hw->blkaddr, nixlf, &pcifunc)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -1053,13 +1151,15 @@ static int rvu_dbg_nix_queue_ctx_display(struct seq_file *filp, } static int write_nix_queue_ctx(struct rvu *rvu, bool all, int nixlf, - int id, int ctype, char *ctype_string) + int id, int ctype, char *ctype_string, + struct seq_file *m) { + struct nix_hw *nix_hw = m->private; struct rvu_pfvf *pfvf; int max_id = 0; u16 pcifunc; - if (!rvu_dbg_is_valid_lf(rvu, BLKTYPE_NIX, nixlf, &pcifunc)) + if (!rvu_dbg_is_valid_lf(rvu, nix_hw->blkaddr, nixlf, &pcifunc)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -1119,7 +1219,8 @@ static ssize_t rvu_dbg_nix_queue_ctx_write(struct file *filp, int ctype) { struct seq_file *m = filp->private_data; - struct rvu *rvu = m->private; + struct nix_hw *nix_hw = m->private; + struct rvu *rvu = nix_hw->rvu; char *cmd_buf, *ctype_string; int nixlf, id = 0, ret; bool all = false; @@ -1155,7 +1256,7 @@ static ssize_t rvu_dbg_nix_queue_ctx_write(struct file *filp, goto done; } else { ret = write_nix_queue_ctx(rvu, all, nixlf, id, ctype, - ctype_string); + ctype_string, m); } done: kfree(cmd_buf); @@ -1259,49 +1360,67 @@ static int rvu_dbg_nix_qsize_display(struct seq_file *filp, void *unused) RVU_DEBUG_SEQ_FOPS(nix_qsize, nix_qsize_display, nix_qsize_write); -static void rvu_dbg_nix_init(struct rvu *rvu) +static void rvu_dbg_nix_init(struct rvu *rvu, int blkaddr) { const struct device *dev = &rvu->pdev->dev; + struct nix_hw *nix_hw; struct dentry *pfile; - rvu->rvu_dbg.nix = debugfs_create_dir("nix", rvu->rvu_dbg.root); - if (!rvu->rvu_dbg.nix) { - dev_err(rvu->dev, "create debugfs dir failed for nix\n"); + if (!is_block_implemented(rvu->hw, blkaddr)) return; + + if (blkaddr == BLKADDR_NIX0) { + rvu->rvu_dbg.nix = debugfs_create_dir("nix", rvu->rvu_dbg.root); + if (!rvu->rvu_dbg.nix) { + dev_err(rvu->dev, "create debugfs dir failed for nix\n"); + return; + } + nix_hw = &rvu->hw->nix[0]; + } else { + rvu->rvu_dbg.nix = debugfs_create_dir("nix1", + rvu->rvu_dbg.root); + if (!rvu->rvu_dbg.nix) { + dev_err(rvu->dev, + "create debugfs dir failed for nix1\n"); + return; + } + nix_hw = &rvu->hw->nix[1]; } - pfile = debugfs_create_file("sq_ctx", 0600, rvu->rvu_dbg.nix, rvu, + pfile = debugfs_create_file("sq_ctx", 0600, rvu->rvu_dbg.nix, nix_hw, &rvu_dbg_nix_sq_ctx_fops); if (!pfile) goto create_failed; - pfile = debugfs_create_file("rq_ctx", 0600, rvu->rvu_dbg.nix, rvu, + pfile = debugfs_create_file("rq_ctx", 0600, rvu->rvu_dbg.nix, nix_hw, &rvu_dbg_nix_rq_ctx_fops); if (!pfile) goto create_failed; - pfile = debugfs_create_file("cq_ctx", 0600, rvu->rvu_dbg.nix, rvu, + pfile = debugfs_create_file("cq_ctx", 0600, rvu->rvu_dbg.nix, nix_hw, &rvu_dbg_nix_cq_ctx_fops); if (!pfile) goto create_failed; - pfile = debugfs_create_file("ndc_tx_cache", 0600, rvu->rvu_dbg.nix, rvu, - &rvu_dbg_nix_ndc_tx_cache_fops); + pfile = debugfs_create_file("ndc_tx_cache", 0600, rvu->rvu_dbg.nix, + nix_hw, &rvu_dbg_nix_ndc_tx_cache_fops); if (!pfile) goto create_failed; - pfile = debugfs_create_file("ndc_rx_cache", 0600, rvu->rvu_dbg.nix, rvu, - &rvu_dbg_nix_ndc_rx_cache_fops); + pfile = debugfs_create_file("ndc_rx_cache", 0600, rvu->rvu_dbg.nix, + nix_hw, &rvu_dbg_nix_ndc_rx_cache_fops); if (!pfile) goto create_failed; pfile = debugfs_create_file("ndc_tx_hits_miss", 0600, rvu->rvu_dbg.nix, - rvu, &rvu_dbg_nix_ndc_tx_hits_miss_fops); + nix_hw, + &rvu_dbg_nix_ndc_tx_hits_miss_fops); if (!pfile) goto create_failed; pfile = debugfs_create_file("ndc_rx_hits_miss", 0600, rvu->rvu_dbg.nix, - rvu, &rvu_dbg_nix_ndc_rx_hits_miss_fops); + nix_hw, + &rvu_dbg_nix_ndc_rx_hits_miss_fops); if (!pfile) goto create_failed; @@ -1312,7 +1431,8 @@ static void rvu_dbg_nix_init(struct rvu *rvu) return; create_failed: - dev_err(dev, "Failed to create debugfs dir/file for NIX\n"); + dev_err(dev, + "Failed to create debugfs dir/file for NIX blk\n"); debugfs_remove_recursive(rvu->rvu_dbg.nix); } @@ -1565,7 +1685,7 @@ static int rvu_dbg_npc_mcam_info_display(struct seq_file *filp, void *unsued) struct rvu *rvu = filp->private; int pf, vf, numvfs, blkaddr; struct npc_mcam *mcam; - u16 pcifunc; + u16 pcifunc, counters; u64 cfg; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -1573,6 +1693,7 @@ static int rvu_dbg_npc_mcam_info_display(struct seq_file *filp, void *unsued) return -ENODEV; mcam = &rvu->hw->mcam; + counters = rvu->hw->npc_counters; seq_puts(filp, "\nNPC MCAM info:\n"); /* MCAM keywidth on receive and transmit sides */ @@ -1595,10 +1716,9 @@ static int rvu_dbg_npc_mcam_info_display(struct seq_file *filp, void *unsued) seq_printf(filp, "\t\t Available \t: %d\n", mcam->bmap_fcnt); /* MCAM counters */ - cfg = rvu_read64(rvu, blkaddr, NPC_AF_CONST); - cfg = (cfg >> 48) & 0xFFFF; - seq_printf(filp, "\n\t\t MCAM counters \t: %lld\n", cfg); - seq_printf(filp, "\t\t Reserved \t: %lld\n", cfg - mcam->counters.max); + seq_printf(filp, "\n\t\t MCAM counters \t: %d\n", counters); + seq_printf(filp, "\t\t Reserved \t: %d\n", + counters - mcam->counters.max); seq_printf(filp, "\t\t Available \t: %d\n", rvu_rsrc_free_count(&mcam->counters)); @@ -1650,6 +1770,198 @@ static int rvu_dbg_npc_rx_miss_stats_display(struct seq_file *filp, RVU_DEBUG_SEQ_FOPS(npc_rx_miss_act, npc_rx_miss_stats_display, NULL); +static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s, + struct rvu_npc_mcam_rule *rule) +{ + u8 bit; + + for_each_set_bit(bit, (unsigned long *)&rule->features, 64) { + seq_printf(s, "\t%s ", npc_get_field_name(bit)); + switch (bit) { + case NPC_DMAC: + seq_printf(s, "%pM ", rule->packet.dmac); + seq_printf(s, "mask %pM\n", rule->mask.dmac); + break; + case NPC_SMAC: + seq_printf(s, "%pM ", rule->packet.smac); + seq_printf(s, "mask %pM\n", rule->mask.smac); + break; + case NPC_ETYPE: + seq_printf(s, "0x%x ", ntohs(rule->packet.etype)); + seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.etype)); + break; + case NPC_OUTER_VID: + seq_printf(s, "%d ", ntohs(rule->packet.vlan_tci)); + seq_printf(s, "mask 0x%x\n", + ntohs(rule->mask.vlan_tci)); + break; + case NPC_TOS: + seq_printf(s, "%d ", rule->packet.tos); + seq_printf(s, "mask 0x%x\n", rule->mask.tos); + break; + case NPC_SIP_IPV4: + seq_printf(s, "%pI4 ", &rule->packet.ip4src); + seq_printf(s, "mask %pI4\n", &rule->mask.ip4src); + break; + case NPC_DIP_IPV4: + seq_printf(s, "%pI4 ", &rule->packet.ip4dst); + seq_printf(s, "mask %pI4\n", &rule->mask.ip4dst); + break; + case NPC_SIP_IPV6: + seq_printf(s, "%pI6 ", rule->packet.ip6src); + seq_printf(s, "mask %pI6\n", rule->mask.ip6src); + break; + case NPC_DIP_IPV6: + seq_printf(s, "%pI6 ", rule->packet.ip6dst); + seq_printf(s, "mask %pI6\n", rule->mask.ip6dst); + break; + case NPC_SPORT_TCP: + case NPC_SPORT_UDP: + case NPC_SPORT_SCTP: + seq_printf(s, "%d ", ntohs(rule->packet.sport)); + seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.sport)); + break; + case NPC_DPORT_TCP: + case NPC_DPORT_UDP: + case NPC_DPORT_SCTP: + seq_printf(s, "%d ", ntohs(rule->packet.dport)); + seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.dport)); + break; + default: + break; + } + } +} + +static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, + struct rvu_npc_mcam_rule *rule) +{ + if (rule->intf == NIX_INTF_TX) { + switch (rule->tx_action.op) { + case NIX_TX_ACTIONOP_DROP: + seq_puts(s, "\taction: Drop\n"); + break; + case NIX_TX_ACTIONOP_UCAST_DEFAULT: + seq_puts(s, "\taction: Unicast to default channel\n"); + break; + case NIX_TX_ACTIONOP_UCAST_CHAN: + seq_printf(s, "\taction: Unicast to channel %d\n", + rule->tx_action.index); + break; + case NIX_TX_ACTIONOP_MCAST: + seq_puts(s, "\taction: Multicast\n"); + break; + case NIX_TX_ACTIONOP_DROP_VIOL: + seq_puts(s, "\taction: Lockdown Violation Drop\n"); + break; + default: + break; + }; + } else { + switch (rule->rx_action.op) { + case NIX_RX_ACTIONOP_DROP: + seq_puts(s, "\taction: Drop\n"); + break; + case NIX_RX_ACTIONOP_UCAST: + seq_printf(s, "\taction: Direct to queue %d\n", + rule->rx_action.index); + break; + case NIX_RX_ACTIONOP_RSS: + seq_puts(s, "\taction: RSS\n"); + break; + case NIX_RX_ACTIONOP_UCAST_IPSEC: + seq_puts(s, "\taction: Unicast ipsec\n"); + break; + case NIX_RX_ACTIONOP_MCAST: + seq_puts(s, "\taction: Multicast\n"); + break; + default: + break; + }; + } +} + +static const char *rvu_dbg_get_intf_name(int intf) +{ + switch (intf) { + case NIX_INTFX_RX(0): + return "NIX0_RX"; + case NIX_INTFX_RX(1): + return "NIX1_RX"; + case NIX_INTFX_TX(0): + return "NIX0_TX"; + case NIX_INTFX_TX(1): + return "NIX1_TX"; + default: + break; + } + + return "unknown"; +} + +static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused) +{ + struct rvu_npc_mcam_rule *iter; + struct rvu *rvu = s->private; + struct npc_mcam *mcam; + int pf, vf = -1; + int blkaddr; + u16 target; + u64 hits; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return 0; + + mcam = &rvu->hw->mcam; + + mutex_lock(&mcam->lock); + list_for_each_entry(iter, &mcam->mcam_rules, list) { + pf = (iter->owner >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK; + seq_printf(s, "\n\tInstalled by: PF%d ", pf); + + if (iter->owner & RVU_PFVF_FUNC_MASK) { + vf = (iter->owner & RVU_PFVF_FUNC_MASK) - 1; + seq_printf(s, "VF%d", vf); + } + seq_puts(s, "\n"); + + seq_printf(s, "\tdirection: %s\n", is_npc_intf_rx(iter->intf) ? + "RX" : "TX"); + seq_printf(s, "\tinterface: %s\n", + rvu_dbg_get_intf_name(iter->intf)); + seq_printf(s, "\tmcam entry: %d\n", iter->entry); + + rvu_dbg_npc_mcam_show_flows(s, iter); + if (iter->intf == NIX_INTF_RX) { + target = iter->rx_action.pf_func; + pf = (target >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK; + seq_printf(s, "\tForward to: PF%d ", pf); + + if (target & RVU_PFVF_FUNC_MASK) { + vf = (target & RVU_PFVF_FUNC_MASK) - 1; + seq_printf(s, "VF%d", vf); + } + seq_puts(s, "\n"); + } + + rvu_dbg_npc_mcam_show_action(s, iter); + seq_printf(s, "\tenabled: %s\n", iter->enable ? "yes" : "no"); + + if (!iter->has_cntr) + continue; + seq_printf(s, "\tcounter: %d\n", iter->cntr); + + hits = rvu_read64(rvu, blkaddr, NPC_AF_MATCH_STATX(iter->cntr)); + seq_printf(s, "\thits: %lld\n", hits); + } + mutex_unlock(&mcam->lock); + + return 0; +} + +RVU_DEBUG_SEQ_FOPS(npc_mcam_rules, npc_mcam_show_rules, NULL); + static void rvu_dbg_npc_init(struct rvu *rvu) { const struct device *dev = &rvu->pdev->dev; @@ -1664,6 +1976,11 @@ static void rvu_dbg_npc_init(struct rvu *rvu) if (!pfile) goto create_failed; + pfile = debugfs_create_file("mcam_rules", 0444, rvu->rvu_dbg.npc, + rvu, &rvu_dbg_npc_mcam_rules_fops); + if (!pfile) + goto create_failed; + pfile = debugfs_create_file("rx_miss_act_stats", 0444, rvu->rvu_dbg.npc, rvu, &rvu_dbg_npc_rx_miss_act_fops); if (!pfile) @@ -1691,8 +2008,15 @@ void rvu_dbg_init(struct rvu *rvu) if (!pfile) goto create_failed; + pfile = debugfs_create_file("rvu_pf_cgx_map", 0444, rvu->rvu_dbg.root, + rvu, &rvu_dbg_rvu_pf_cgx_map_fops); + if (!pfile) + goto create_failed; + rvu_dbg_npa_init(rvu); - rvu_dbg_nix_init(rvu); + rvu_dbg_nix_init(rvu, BLKADDR_NIX0); + + rvu_dbg_nix_init(rvu, BLKADDR_NIX1); rvu_dbg_cgx_init(rvu); rvu_dbg_npc_init(rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 21a89dd76d3c..e8d039503097 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -17,6 +17,7 @@ #include "npc.h" #include "cgx.h" +static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc); static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, int type, int chan_id); @@ -68,6 +69,23 @@ struct mce { u16 pcifunc; }; +int rvu_get_next_nix_blkaddr(struct rvu *rvu, int blkaddr) +{ + int i = 0; + + /*If blkaddr is 0, return the first nix block address*/ + if (blkaddr == 0) + return rvu->nix_blkaddr[blkaddr]; + + while (i + 1 < MAX_NIX_BLKS) { + if (rvu->nix_blkaddr[i] == blkaddr) + return rvu->nix_blkaddr[i + 1]; + i++; + } + + return 0; +} + bool is_nixlf_attached(struct rvu *rvu, u16 pcifunc) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -81,14 +99,16 @@ bool is_nixlf_attached(struct rvu *rvu, u16 pcifunc) int rvu_get_nixlf_count(struct rvu *rvu) { + int blkaddr = 0, max = 0; struct rvu_block *block; - int blkaddr; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); - if (blkaddr < 0) - return 0; - block = &rvu->hw->block[blkaddr]; - return block->lf.max; + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + while (blkaddr) { + block = &rvu->hw->block[blkaddr]; + max += block->lf.max; + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + } + return max; } int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr) @@ -130,11 +150,18 @@ static u16 nix_alloc_mce_list(struct nix_mcast *mcast, int count) return idx; } -static inline struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) +struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) { - if (blkaddr == BLKADDR_NIX0 && hw->nix0) - return hw->nix0; + int nix_blkaddr = 0, i = 0; + struct rvu *rvu = hw->rvu; + nix_blkaddr = rvu_get_next_nix_blkaddr(rvu, nix_blkaddr); + while (nix_blkaddr) { + if (blkaddr == nix_blkaddr && hw->nix) + return &hw->nix[i]; + nix_blkaddr = rvu_get_next_nix_blkaddr(rvu, nix_blkaddr); + i++; + } return NULL; } @@ -187,8 +214,8 @@ static bool is_valid_txschq(struct rvu *rvu, int blkaddr, static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + int pkind, pf, vf, lbkid; u8 cgx_id, lmac_id; - int pkind, pf, vf; int err; pf = rvu_get_pf(pcifunc); @@ -221,13 +248,24 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) case NIX_INTF_TYPE_LBK: vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1; + /* If NIX1 block is present on the silicon then NIXes are + * assigned alternatively for lbk interfaces. NIX0 should + * send packets on lbk link 1 channels and NIX1 should send + * on lbk link 0 channels for the communication between + * NIX0 and NIX1. + */ + lbkid = 0; + if (rvu->hw->lbk_links > 1) + lbkid = vf & 0x1 ? 0 : 1; + /* Note that AF's VFs work in pairs and talk over consecutive * loopback channels.Therefore if odd number of AF VFs are * enabled then the last VF remains with no pair. */ - pfvf->rx_chan_base = NIX_CHAN_LBK_CHX(0, vf); - pfvf->tx_chan_base = vf & 0x1 ? NIX_CHAN_LBK_CHX(0, vf - 1) : - NIX_CHAN_LBK_CHX(0, vf + 1); + pfvf->rx_chan_base = NIX_CHAN_LBK_CHX(lbkid, vf); + pfvf->tx_chan_base = vf & 0x1 ? + NIX_CHAN_LBK_CHX(lbkid, vf - 1) : + NIX_CHAN_LBK_CHX(lbkid, vf + 1); pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, @@ -265,7 +303,6 @@ static void nix_interface_deinit(struct rvu *rvu, u16 pcifunc, u8 nixlf) pfvf->maxlen = 0; pfvf->minlen = 0; - pfvf->rxvlan = false; /* Remove this PF_FUNC from bcast pkt replication list */ err = nix_update_bcast_mce_list(rvu, pcifunc, false); @@ -612,8 +649,9 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, return 0; } -static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, - struct nix_aq_enq_rsp *rsp) +static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, + struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) { struct rvu_hwinfo *hw = rvu->hw; u16 pcifunc = req->hdr.pcifunc; @@ -626,10 +664,7 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, bool ena; u64 cfg; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); - if (blkaddr < 0) - return NIX_AF_ERR_AF_LF_INVALID; - + blkaddr = nix_hw->blkaddr; block = &hw->block[blkaddr]; aq = block->aq; if (!aq) { @@ -669,8 +704,9 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, break; case NIX_AQ_CTYPE_MCE: cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_MCAST_CFG); + /* Check if index exceeds MCE list length */ - if (!hw->nix0->mcast.mce_ctx || + if (!nix_hw->mcast.mce_ctx || (req->qidx >= (256UL << (cfg & 0xF)))) rc = NIX_AF_ERR_AQ_ENQUEUE; @@ -832,6 +868,23 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, return 0; } +static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) +{ + struct nix_hw *nix_hw; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc); + if (blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return -EINVAL; + + return rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp); +} + static const char *nix_get_ctx_name(int ctype) { switch (ctype) { @@ -1129,6 +1182,10 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu, /* Config Rx pkt length, csum checks and apad enable / disable */ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg); + /* Configure pkind for TX parse config */ + cfg = NPC_TX_DEF_PKIND; + rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_PARSE_CFG(nixlf), cfg); + intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX; err = nix_interface_init(rvu, pcifunc, intf, nixlf); if (err) @@ -1137,6 +1194,11 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu, /* Disable NPC entries as NIXLF's contexts are not initialized yet */ rvu_npc_disable_default_entries(rvu, pcifunc, nixlf); + /* Configure RX VTAG Type 7 (strip) for vf vlan */ + rvu_write64(rvu, blkaddr, + NIX_AF_LFX_RX_VTAG_TYPEX(nixlf, NIX_AF_LFX_RX_VTAG_TYPE7), + VTAGSIZE_T4 | VTAG_STRIP); + goto exit; free_mem: @@ -1164,10 +1226,14 @@ exit: cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); rsp->qints = ((cfg >> 12) & 0xFFF); rsp->cints = ((cfg >> 24) & 0xFFF); + rsp->cgx_links = hw->cgx_links; + rsp->lbk_links = hw->lbk_links; + rsp->sdp_links = hw->sdp_links; + return rc; } -int rvu_mbox_handler_nix_lf_free(struct rvu *rvu, struct msg_req *req, +int rvu_mbox_handler_nix_lf_free(struct rvu *rvu, struct nix_lf_free_req *req, struct msg_rsp *rsp) { struct rvu_hwinfo *hw = rvu->hw; @@ -1186,6 +1252,15 @@ int rvu_mbox_handler_nix_lf_free(struct rvu *rvu, struct msg_req *req, if (nixlf < 0) return NIX_AF_ERR_AF_LF_INVALID; + if (req->flags & NIX_LF_DISABLE_FLOWS) + rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf); + else + rvu_npc_free_mcam_entries(rvu, pcifunc, nixlf); + + /* Free any tx vtag def entries used by this NIX LF */ + if (!(req->flags & NIX_LF_DONT_FREE_TX_VTAG)) + nix_free_tx_vtag_entries(rvu, pcifunc); + nix_interface_deinit(rvu, pcifunc, nixlf); /* Reset this NIX LF */ @@ -1914,9 +1989,14 @@ static int nix_rx_vtag_cfg(struct rvu *rvu, int nixlf, int blkaddr, { u64 regval = req->vtag_size; - if (req->rx.vtag_type > 7 || req->vtag_size > VTAGSIZE_T8) + if (req->rx.vtag_type > NIX_AF_LFX_RX_VTAG_TYPE7 || + req->vtag_size > VTAGSIZE_T8) return -EINVAL; + /* RX VTAG Type 7 reserved for vf vlan */ + if (req->rx.vtag_type == NIX_AF_LFX_RX_VTAG_TYPE7) + return NIX_AF_ERR_RX_VTAG_INUSE; + if (req->rx.capture_vtag) regval |= BIT_ULL(5); if (req->rx.strip_vtag) @@ -1927,9 +2007,149 @@ static int nix_rx_vtag_cfg(struct rvu *rvu, int nixlf, int blkaddr, return 0; } +static int nix_tx_vtag_free(struct rvu *rvu, int blkaddr, + u16 pcifunc, int index) +{ + struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); + struct nix_txvlan *vlan = &nix_hw->txvlan; + + if (vlan->entry2pfvf_map[index] != pcifunc) + return NIX_AF_ERR_PARAM; + + rvu_write64(rvu, blkaddr, + NIX_AF_TX_VTAG_DEFX_DATA(index), 0x0ull); + rvu_write64(rvu, blkaddr, + NIX_AF_TX_VTAG_DEFX_CTL(index), 0x0ull); + + vlan->entry2pfvf_map[index] = 0; + rvu_free_rsrc(&vlan->rsrc, index); + + return 0; +} + +static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc) +{ + struct nix_txvlan *vlan; + struct nix_hw *nix_hw; + int index, blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (blkaddr < 0) + return; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + vlan = &nix_hw->txvlan; + + mutex_lock(&vlan->rsrc_lock); + /* Scan all the entries and free the ones mapped to 'pcifunc' */ + for (index = 0; index < vlan->rsrc.max; index++) { + if (vlan->entry2pfvf_map[index] == pcifunc) + nix_tx_vtag_free(rvu, blkaddr, pcifunc, index); + } + mutex_unlock(&vlan->rsrc_lock); +} + +static int nix_tx_vtag_alloc(struct rvu *rvu, int blkaddr, + u64 vtag, u8 size) +{ + struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); + struct nix_txvlan *vlan = &nix_hw->txvlan; + u64 regval; + int index; + + mutex_lock(&vlan->rsrc_lock); + + index = rvu_alloc_rsrc(&vlan->rsrc); + if (index < 0) { + mutex_unlock(&vlan->rsrc_lock); + return index; + } + + mutex_unlock(&vlan->rsrc_lock); + + regval = size ? vtag : vtag << 32; + + rvu_write64(rvu, blkaddr, + NIX_AF_TX_VTAG_DEFX_DATA(index), regval); + rvu_write64(rvu, blkaddr, + NIX_AF_TX_VTAG_DEFX_CTL(index), size); + + return index; +} + +static int nix_tx_vtag_decfg(struct rvu *rvu, int blkaddr, + struct nix_vtag_config *req) +{ + struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); + struct nix_txvlan *vlan = &nix_hw->txvlan; + u16 pcifunc = req->hdr.pcifunc; + int idx0 = req->tx.vtag0_idx; + int idx1 = req->tx.vtag1_idx; + int err; + + if (req->tx.free_vtag0 && req->tx.free_vtag1) + if (vlan->entry2pfvf_map[idx0] != pcifunc || + vlan->entry2pfvf_map[idx1] != pcifunc) + return NIX_AF_ERR_PARAM; + + mutex_lock(&vlan->rsrc_lock); + + if (req->tx.free_vtag0) { + err = nix_tx_vtag_free(rvu, blkaddr, pcifunc, idx0); + if (err) + goto exit; + } + + if (req->tx.free_vtag1) + err = nix_tx_vtag_free(rvu, blkaddr, pcifunc, idx1); + +exit: + mutex_unlock(&vlan->rsrc_lock); + return err; +} + +static int nix_tx_vtag_cfg(struct rvu *rvu, int blkaddr, + struct nix_vtag_config *req, + struct nix_vtag_config_rsp *rsp) +{ + struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr); + struct nix_txvlan *vlan = &nix_hw->txvlan; + u16 pcifunc = req->hdr.pcifunc; + + if (req->tx.cfg_vtag0) { + rsp->vtag0_idx = + nix_tx_vtag_alloc(rvu, blkaddr, + req->tx.vtag0, req->vtag_size); + + if (rsp->vtag0_idx < 0) + return NIX_AF_ERR_TX_VTAG_NOSPC; + + vlan->entry2pfvf_map[rsp->vtag0_idx] = pcifunc; + } + + if (req->tx.cfg_vtag1) { + rsp->vtag1_idx = + nix_tx_vtag_alloc(rvu, blkaddr, + req->tx.vtag1, req->vtag_size); + + if (rsp->vtag1_idx < 0) + goto err_free; + + vlan->entry2pfvf_map[rsp->vtag1_idx] = pcifunc; + } + + return 0; + +err_free: + if (req->tx.cfg_vtag0) + nix_tx_vtag_free(rvu, blkaddr, pcifunc, rsp->vtag0_idx); + + return NIX_AF_ERR_TX_VTAG_NOSPC; +} + int rvu_mbox_handler_nix_vtag_cfg(struct rvu *rvu, struct nix_vtag_config *req, - struct msg_rsp *rsp) + struct nix_vtag_config_rsp *rsp) { u16 pcifunc = req->hdr.pcifunc; int blkaddr, nixlf, err; @@ -1939,19 +2159,28 @@ int rvu_mbox_handler_nix_vtag_cfg(struct rvu *rvu, return err; if (req->cfg_type) { + /* rx vtag configuration */ err = nix_rx_vtag_cfg(rvu, nixlf, blkaddr, req); if (err) return NIX_AF_ERR_PARAM; } else { - /* TODO: handle tx vtag configuration */ - return 0; + /* tx vtag configuration */ + if ((req->tx.cfg_vtag0 || req->tx.cfg_vtag1) && + (req->tx.free_vtag0 || req->tx.free_vtag1)) + return NIX_AF_ERR_PARAM; + + if (req->tx.cfg_vtag0 || req->tx.cfg_vtag1) + return nix_tx_vtag_cfg(rvu, blkaddr, req, rsp); + + if (req->tx.free_vtag0 || req->tx.free_vtag1) + return nix_tx_vtag_decfg(rvu, blkaddr, req); } return 0; } -static int nix_setup_mce(struct rvu *rvu, int mce, u8 op, - u16 pcifunc, int next, bool eol) +static int nix_blk_setup_mce(struct rvu *rvu, struct nix_hw *nix_hw, + int mce, u8 op, u16 pcifunc, int next, bool eol) { struct nix_aq_enq_req aq_req; int err; @@ -1971,7 +2200,7 @@ static int nix_setup_mce(struct rvu *rvu, int mce, u8 op, /* All fields valid */ *(u64 *)(&aq_req.mce_mask) = ~0ULL; - err = rvu_nix_aq_enq_inst(rvu, &aq_req, NULL); + err = rvu_nix_blk_aq_enq_inst(rvu, nix_hw, &aq_req, NULL); if (err) { dev_err(rvu->dev, "Failed to setup Bcast MCE for PF%d:VF%d\n", rvu_get_pf(pcifunc), pcifunc & RVU_PFVF_FUNC_MASK); @@ -2077,9 +2306,9 @@ int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) next_idx = idx + 1; /* EOL should be set in last MCE */ - err = nix_setup_mce(rvu, idx, NIX_AQ_INSTOP_WRITE, - mce->pcifunc, next_idx, - (next_idx > last_idx) ? true : false); + err = nix_blk_setup_mce(rvu, nix_hw, idx, NIX_AQ_INSTOP_WRITE, + mce->pcifunc, next_idx, + (next_idx > last_idx) ? true : false); if (err) goto end; idx++; @@ -2108,6 +2337,11 @@ static int nix_setup_bcast_tables(struct rvu *rvu, struct nix_hw *nix_hw) numvfs = (cfg >> 12) & 0xFF; pfvf = &rvu->pf[pf]; + + /* This NIX0/1 block mapped to PF ? */ + if (pfvf->nix_blkaddr != nix_hw->blkaddr) + continue; + /* Save the start MCE */ pfvf->bcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1); @@ -2122,9 +2356,10 @@ static int nix_setup_bcast_tables(struct rvu *rvu, struct nix_hw *nix_hw) * Will be updated when a NIXLF is attached/detached to * these PF/VFs. */ - err = nix_setup_mce(rvu, pfvf->bcast_mce_idx + idx, - NIX_AQ_INSTOP_INIT, - pcifunc, 0, true); + err = nix_blk_setup_mce(rvu, nix_hw, + pfvf->bcast_mce_idx + idx, + NIX_AQ_INSTOP_INIT, + pcifunc, 0, true); if (err) return err; } @@ -2176,6 +2411,31 @@ static int nix_setup_mcast(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) return nix_setup_bcast_tables(rvu, nix_hw); } +static int nix_setup_txvlan(struct rvu *rvu, struct nix_hw *nix_hw) +{ + struct nix_txvlan *vlan = &nix_hw->txvlan; + int err; + + /* Allocate resource bimap for tx vtag def registers*/ + vlan->rsrc.max = NIX_TX_VTAG_DEF_MAX; + err = rvu_alloc_bitmap(&vlan->rsrc); + if (err) + return -ENOMEM; + + /* Alloc memory for saving entry to RVU PFFUNC allocation mapping */ + vlan->entry2pfvf_map = devm_kcalloc(rvu->dev, vlan->rsrc.max, + sizeof(u16), GFP_KERNEL); + if (!vlan->entry2pfvf_map) + goto free_mem; + + mutex_init(&vlan->rsrc_lock); + return 0; + +free_mem: + kfree(vlan->rsrc.bmap); + return -ENOMEM; +} + static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) { struct nix_txsch *txsch; @@ -2680,6 +2940,7 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu, struct nix_set_mac_addr *req, struct msg_rsp *rsp) { + bool from_vf = req->hdr.pcifunc & RVU_PFVF_FUNC_MASK; u16 pcifunc = req->hdr.pcifunc; int blkaddr, nixlf, err; struct rvu_pfvf *pfvf; @@ -2690,13 +2951,15 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu, pfvf = rvu_get_pfvf(rvu, pcifunc); + /* VF can't overwrite admin(PF) changes */ + if (from_vf && pfvf->pf_set_vf_cfg) + return -EPERM; + ether_addr_copy(pfvf->mac_addr, req->mac_addr); rvu_npc_install_ucast_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, req->mac_addr); - rvu_npc_update_rxvlan(rvu, pcifunc, nixlf); - return 0; } @@ -2743,9 +3006,6 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req, else rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, allmulti); - - rvu_npc_update_rxvlan(rvu, pcifunc, nixlf); - return 0; } @@ -2882,65 +3142,6 @@ linkcfg: return 0; } -int rvu_mbox_handler_nix_rxvlan_alloc(struct rvu *rvu, struct msg_req *req, - struct msg_rsp *rsp) -{ - struct npc_mcam_alloc_entry_req alloc_req = { }; - struct npc_mcam_alloc_entry_rsp alloc_rsp = { }; - struct npc_mcam_free_entry_req free_req = { }; - u16 pcifunc = req->hdr.pcifunc; - int blkaddr, nixlf, err; - struct rvu_pfvf *pfvf; - - /* LBK VFs do not have separate MCAM UCAST entry hence - * skip allocating rxvlan for them - */ - if (is_afvf(pcifunc)) - return 0; - - pfvf = rvu_get_pfvf(rvu, pcifunc); - if (pfvf->rxvlan) - return 0; - - /* alloc new mcam entry */ - alloc_req.hdr.pcifunc = pcifunc; - alloc_req.count = 1; - - err = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req, - &alloc_rsp); - if (err) - return err; - - /* update entry to enable rxvlan offload */ - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); - if (blkaddr < 0) { - err = NIX_AF_ERR_AF_LF_INVALID; - goto free_entry; - } - - nixlf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], pcifunc, 0); - if (nixlf < 0) { - err = NIX_AF_ERR_AF_LF_INVALID; - goto free_entry; - } - - pfvf->rxvlan_index = alloc_rsp.entry_list[0]; - /* all it means is that rxvlan_index is valid */ - pfvf->rxvlan = true; - - err = rvu_npc_update_rxvlan(rvu, pcifunc, nixlf); - if (err) - goto free_entry; - - return 0; -free_entry: - free_req.hdr.pcifunc = pcifunc; - free_req.entry = alloc_rsp.entry_list[0]; - rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, rsp); - pfvf->rxvlan = false; - return err; -} - int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, struct msg_rsp *rsp) { @@ -3109,17 +3310,15 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block) return 0; } -int rvu_nix_init(struct rvu *rvu) +static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) { const struct npc_lt_def_cfg *ltdefs; struct rvu_hwinfo *hw = rvu->hw; + int blkaddr = nix_hw->blkaddr; struct rvu_block *block; - int blkaddr, err; + int err; u64 cfg; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); - if (blkaddr < 0) - return 0; block = &hw->block[blkaddr]; if (is_rvu_96xx_B0(rvu)) { @@ -3153,7 +3352,7 @@ int rvu_nix_init(struct rvu *rvu) hw->cgx = (cfg >> 12) & 0xF; hw->lmac_per_cgx = (cfg >> 8) & 0xF; hw->cgx_links = hw->cgx * hw->lmac_per_cgx; - hw->lbk_links = 1; + hw->lbk_links = (cfg >> 24) & 0xF; hw->sdp_links = 1; /* Initialize admin queue */ @@ -3164,26 +3363,25 @@ int rvu_nix_init(struct rvu *rvu) /* Restore CINT timer delay to HW reset values */ rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL); - if (blkaddr == BLKADDR_NIX0) { - hw->nix0 = devm_kzalloc(rvu->dev, - sizeof(struct nix_hw), GFP_KERNEL); - if (!hw->nix0) - return -ENOMEM; + if (is_block_implemented(hw, blkaddr)) { + err = nix_setup_txschq(rvu, nix_hw, blkaddr); + if (err) + return err; - err = nix_setup_txschq(rvu, hw->nix0, blkaddr); + err = nix_af_mark_format_setup(rvu, nix_hw, blkaddr); if (err) return err; - err = nix_af_mark_format_setup(rvu, hw->nix0, blkaddr); + err = nix_setup_mcast(rvu, nix_hw, blkaddr); if (err) return err; - err = nix_setup_mcast(rvu, hw->nix0, blkaddr); + err = nix_setup_txvlan(rvu, nix_hw); if (err) return err; /* Configure segmentation offload formats */ - nix_setup_lso(rvu, hw->nix0, blkaddr); + nix_setup_lso(rvu, nix_hw, blkaddr); /* Config Outer/Inner L2, IP, TCP, UDP and SCTP NPC layer info. * This helps HW protocol checker to identify headers @@ -3236,23 +3434,45 @@ int rvu_nix_init(struct rvu *rvu) return 0; } -void rvu_nix_freemem(struct rvu *rvu) +int rvu_nix_init(struct rvu *rvu) { struct rvu_hwinfo *hw = rvu->hw; - struct rvu_block *block; + struct nix_hw *nix_hw; + int blkaddr = 0, err; + int i = 0; + + hw->nix = devm_kcalloc(rvu->dev, MAX_NIX_BLKS, sizeof(struct nix_hw), + GFP_KERNEL); + if (!hw->nix) + return -ENOMEM; + + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + while (blkaddr) { + nix_hw = &hw->nix[i]; + nix_hw->rvu = rvu; + nix_hw->blkaddr = blkaddr; + err = rvu_nix_block_init(rvu, nix_hw); + if (err) + return err; + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + i++; + } + + return 0; +} + +static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr, + struct rvu_block *block) +{ struct nix_txsch *txsch; struct nix_mcast *mcast; + struct nix_txvlan *vlan; struct nix_hw *nix_hw; - int blkaddr, lvl; + int lvl; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); - if (blkaddr < 0) - return; - - block = &hw->block[blkaddr]; rvu_aq_free(rvu, block->aq); - if (blkaddr == BLKADDR_NIX0) { + if (is_block_implemented(rvu->hw, blkaddr)) { nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) return; @@ -3262,6 +3482,11 @@ void rvu_nix_freemem(struct rvu *rvu) kfree(txsch->schq.bmap); } + vlan = &nix_hw->txvlan; + kfree(vlan->rsrc.bmap); + mutex_destroy(&vlan->rsrc_lock); + devm_kfree(rvu->dev, vlan->entry2pfvf_map); + mcast = &nix_hw->mcast; qmem_free(rvu->dev, mcast->mce_ctx); qmem_free(rvu->dev, mcast->mcast_buf); @@ -3269,6 +3494,20 @@ void rvu_nix_freemem(struct rvu *rvu) } } +void rvu_nix_freemem(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr = 0; + + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + while (blkaddr) { + block = &hw->block[blkaddr]; + rvu_nix_block_freemem(rvu, blkaddr, block); + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + } +} + int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req, struct msg_rsp *rsp) { @@ -3281,6 +3520,8 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req, rvu_npc_enable_default_entries(rvu, pcifunc, nixlf); + npc_mcam_enable_flows(rvu, pcifunc); + return rvu_cgx_start_stop_io(rvu, pcifunc, true); } @@ -3296,6 +3537,8 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req, rvu_npc_disable_default_entries(rvu, pcifunc, nixlf); + npc_mcam_disable_flows(rvu, pcifunc); + return rvu_cgx_start_stop_io(rvu, pcifunc, false); } @@ -3308,6 +3551,8 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) ctx_req.hdr.pcifunc = pcifunc; /* Cleanup NPC MCAM entries, free Tx scheduler queues being used */ + rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf); + rvu_npc_free_mcam_entries(rvu, pcifunc, nixlf); nix_interface_deinit(rvu, pcifunc, nixlf); nix_rx_sync(rvu, blkaddr); nix_txschq_free(rvu, pcifunc); @@ -3431,3 +3676,12 @@ int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu, return 0; } + +void rvu_nix_reset_mac(struct rvu_pfvf *pfvf, int pcifunc) +{ + bool from_vf = !!(pcifunc & RVU_PFVF_FUNC_MASK); + + /* overwrite vf mac address with default_mac */ + if (from_vf) + ether_addr_copy(pfvf->mac_addr, pfvf->default_mac); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 511b01dd03ed..5cf9b7a907ae 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -28,6 +28,8 @@ #define NPC_PARSE_RESULT_DMAC_OFFSET 8 #define NPC_HW_TSTAMP_OFFSET 8 +#define NPC_KEX_CHAN_MASK 0xFFFULL +#define NPC_KEX_PF_FUNC_MASK 0xFFFFULL static const char def_pfl_name[] = "default"; @@ -36,6 +38,81 @@ static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam, static void npc_mcam_free_all_counters(struct rvu *rvu, struct npc_mcam *mcam, u16 pcifunc); +bool is_npc_intf_tx(u8 intf) +{ + return !!(intf & 0x1); +} + +bool is_npc_intf_rx(u8 intf) +{ + return !(intf & 0x1); +} + +bool is_npc_interface_valid(struct rvu *rvu, u8 intf) +{ + struct rvu_hwinfo *hw = rvu->hw; + + return intf < hw->npc_intfs; +} + +int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena) +{ + /* Due to a HW issue in these silicon versions, parse nibble enable + * configuration has to be identical for both Rx and Tx interfaces. + */ + if (is_rvu_96xx_B0(rvu)) + return nibble_ena; + return 0; +} + +static int npc_mcam_verify_pf_func(struct rvu *rvu, + struct mcam_entry *entry_data, u8 intf, + u16 pcifunc) +{ + u16 pf_func, pf_func_mask; + + if (is_npc_intf_rx(intf)) + return 0; + + pf_func_mask = (entry_data->kw_mask[0] >> 32) & + NPC_KEX_PF_FUNC_MASK; + pf_func = (entry_data->kw[0] >> 32) & NPC_KEX_PF_FUNC_MASK; + + pf_func = be16_to_cpu((__force __be16)pf_func); + if (pf_func_mask != NPC_KEX_PF_FUNC_MASK || + ((pf_func & ~RVU_PFVF_FUNC_MASK) != + (pcifunc & ~RVU_PFVF_FUNC_MASK))) + return -EINVAL; + + return 0; +} + +int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel) +{ + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + int base = 0, end; + + if (is_npc_intf_tx(intf)) + return 0; + + if (is_afvf(pcifunc)) { + end = rvu_get_num_lbk_chans(); + if (end < 0) + return -EINVAL; + } else { + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + base = NIX_CHAN_CGX_LMAC_CHX(cgx_id, lmac_id, 0x0); + /* CGX mapped functions has maximum of 16 channels */ + end = NIX_CHAN_CGX_LMAC_CHX(cgx_id, lmac_id, 0xF); + } + + if (channel < base || channel > end) + return -EINVAL; + + return 0; +} + void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf) { int blkaddr; @@ -94,6 +171,31 @@ int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool enable) return 0; } +static int npc_get_ucast_mcam_index(struct npc_mcam *mcam, u16 pcifunc, + int nixlf) +{ + struct rvu_hwinfo *hw = container_of(mcam, struct rvu_hwinfo, mcam); + struct rvu *rvu = hw->rvu; + int blkaddr = 0, max = 0; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* Given a PF/VF and NIX LF number calculate the unicast mcam + * entry index based on the NIX block assigned to the PF/VF. + */ + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + while (blkaddr) { + if (pfvf->nix_blkaddr == blkaddr) + break; + block = &rvu->hw->block[blkaddr]; + max += block->lf.max; + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + } + + return mcam->nixlf_offset + (max + nixlf) * RSVD_MCAM_ENTRIES_PER_NIXLF; +} + static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type) { @@ -114,10 +216,10 @@ static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, return index + 1; } - return (mcam->nixlf_offset + (nixlf * RSVD_MCAM_ENTRIES_PER_NIXLF)); + return npc_get_ucast_mcam_index(mcam, pcifunc, nixlf); } -static int npc_get_bank(struct npc_mcam *mcam, int index) +int npc_get_bank(struct npc_mcam *mcam, int index) { int bank = index / mcam->banksize; @@ -139,8 +241,8 @@ static bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, return (cfg & 1); } -static void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, - int blkaddr, int index, bool enable) +void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, int index, bool enable) { int bank = npc_get_bank(mcam, index); int actbank = bank; @@ -257,6 +359,93 @@ static void npc_get_keyword(struct mcam_entry *entry, int idx, *cam0 = ~*cam1 & kw_mask; } +static void npc_fill_entryword(struct mcam_entry *entry, int idx, + u64 cam0, u64 cam1) +{ + /* Similar to npc_get_keyword, but fills mcam_entry structure from + * CAM registers. + */ + switch (idx) { + case 0: + entry->kw[0] = cam1; + entry->kw_mask[0] = cam1 ^ cam0; + break; + case 1: + entry->kw[1] = cam1; + entry->kw_mask[1] = cam1 ^ cam0; + break; + case 2: + entry->kw[1] |= (cam1 & CAM_MASK(16)) << 48; + entry->kw[2] = (cam1 >> 16) & CAM_MASK(48); + entry->kw_mask[1] |= ((cam1 ^ cam0) & CAM_MASK(16)) << 48; + entry->kw_mask[2] = ((cam1 ^ cam0) >> 16) & CAM_MASK(48); + break; + case 3: + entry->kw[2] |= (cam1 & CAM_MASK(16)) << 48; + entry->kw[3] = (cam1 >> 16) & CAM_MASK(32); + entry->kw_mask[2] |= ((cam1 ^ cam0) & CAM_MASK(16)) << 48; + entry->kw_mask[3] = ((cam1 ^ cam0) >> 16) & CAM_MASK(32); + break; + case 4: + entry->kw[3] |= (cam1 & CAM_MASK(32)) << 32; + entry->kw[4] = (cam1 >> 32) & CAM_MASK(32); + entry->kw_mask[3] |= ((cam1 ^ cam0) & CAM_MASK(32)) << 32; + entry->kw_mask[4] = ((cam1 ^ cam0) >> 32) & CAM_MASK(32); + break; + case 5: + entry->kw[4] |= (cam1 & CAM_MASK(32)) << 32; + entry->kw[5] = (cam1 >> 32) & CAM_MASK(16); + entry->kw_mask[4] |= ((cam1 ^ cam0) & CAM_MASK(32)) << 32; + entry->kw_mask[5] = ((cam1 ^ cam0) >> 32) & CAM_MASK(16); + break; + case 6: + entry->kw[5] |= (cam1 & CAM_MASK(48)) << 16; + entry->kw[6] = (cam1 >> 48) & CAM_MASK(16); + entry->kw_mask[5] |= ((cam1 ^ cam0) & CAM_MASK(48)) << 16; + entry->kw_mask[6] = ((cam1 ^ cam0) >> 48) & CAM_MASK(16); + break; + case 7: + entry->kw[6] |= (cam1 & CAM_MASK(48)) << 16; + entry->kw_mask[6] |= ((cam1 ^ cam0) & CAM_MASK(48)) << 16; + break; + } +} + +static void npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, int index, + struct mcam_entry *entry) +{ + u16 owner, target_func; + struct rvu_pfvf *pfvf; + int bank, nixlf; + u64 rx_action; + + owner = mcam->entry2pfvf_map[index]; + target_func = (entry->action >> 4) & 0xffff; + /* return incase target is PF or LBK or rule owner is not PF */ + if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) || + !(target_func & RVU_PFVF_FUNC_MASK)) + return; + + pfvf = rvu_get_pfvf(rvu, target_func); + mcam->entry2target_pffunc[index] = target_func; + /* return if nixlf is not attached or initialized */ + if (!is_nixlf_attached(rvu, target_func) || !pfvf->def_ucast_rule) + return; + + /* get VF ucast entry rule */ + nix_get_nixlf(rvu, target_func, &nixlf, NULL); + index = npc_get_nixlf_mcam_index(mcam, target_func, + nixlf, NIXLF_UCAST_ENTRY); + bank = npc_get_bank(mcam, index); + index &= (mcam->banksize - 1); + + rx_action = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION(index, bank)); + if (rx_action) + entry->action = rx_action; +} + static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, u8 intf, struct mcam_entry *entry, bool enable) @@ -304,6 +493,11 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, NPC_AF_MCAMEX_BANKX_CAMX_W1(index, bank, 0), cam0); } + /* copy VF default entry action to the VF mcam entry */ + if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries) + npc_get_default_entry_action(rvu, mcam, blkaddr, actindex, + entry); + /* Set 'action' */ rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_ACTION(index, actbank), entry->action); @@ -317,6 +511,42 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, npc_enable_mcam_entry(rvu, mcam, blkaddr, actindex, true); } +void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, u16 src, + struct mcam_entry *entry, u8 *intf, u8 *ena) +{ + int sbank = npc_get_bank(mcam, src); + int bank, kw = 0; + u64 cam0, cam1; + + src &= (mcam->banksize - 1); + bank = sbank; + + for (; bank < (sbank + mcam->banks_per_entry); bank++, kw = kw + 2) { + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_CAMX_W0(src, bank, 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_CAMX_W0(src, bank, 0)); + npc_fill_entryword(entry, kw, cam0, cam1); + + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_CAMX_W1(src, bank, 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_CAMX_W1(src, bank, 0)); + npc_fill_entryword(entry, kw + 1, cam0, cam1); + } + + entry->action = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION(src, sbank)); + entry->vtag_action = + rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_TAG_ACT(src, sbank)); + *intf = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_CAMX_INTF(src, sbank, 1)) & 3; + *ena = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_CFG(src, sbank)) & 1; +} + static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 src, u16 dest) { @@ -371,11 +601,11 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan, u8 *mac_addr) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - struct mcam_entry entry = { {0} }; struct nix_rx_action action; - int blkaddr, index, kwi; - u64 mac = 0; + int blkaddr, index; /* AF's VFs work in promiscuous mode */ if (is_afvf(pcifunc)) @@ -385,20 +615,9 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, if (blkaddr < 0) return; - for (index = ETH_ALEN - 1; index >= 0; index--) - mac |= ((u64)*mac_addr++) << (8 * index); - index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); - /* Match ingress channel and DMAC */ - entry.kw[0] = chan; - entry.kw_mask[0] = 0xFFFULL; - - kwi = NPC_PARSE_RESULT_DMAC_OFFSET / sizeof(u64); - entry.kw[kwi] = mac; - entry.kw_mask[kwi] = BIT_ULL(48) - 1; - /* Don't change the action if entry is already enabled * Otherwise RSS action may get overwritten. */ @@ -411,25 +630,26 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, action.pf_func = pcifunc; } - entry.action = *(u64 *)&action; - npc_config_mcam_entry(rvu, mcam, blkaddr, index, - NIX_INTF_RX, &entry, true); - - /* add VLAN matching, setup action and save entry back for later */ - entry.kw[0] |= (NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG) << 20; - entry.kw_mask[0] |= (NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG) << 20; + req.default_rule = 1; + ether_addr_copy(req.packet.dmac, mac_addr); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.features = BIT_ULL(NPC_DMAC); + req.channel = chan; + req.intf = pfvf->nix_rx_intf; + req.op = action.op; + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = action.pf_func; + req.index = action.index; + req.match_id = action.match_id; + req.flow_key_alg = action.flow_key_alg; - entry.vtag_action = VTAG0_VALID_BIT | - FIELD_PREP(VTAG0_TYPE_MASK, 0) | - FIELD_PREP(VTAG0_LID_MASK, NPC_LID_LA) | - FIELD_PREP(VTAG0_RELPTR_MASK, 12); - - memcpy(&pfvf->entry, &entry, sizeof(entry)); + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan, bool allmulti) { + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); struct npc_mcam *mcam = &rvu->hw->mcam; int blkaddr, ucast_idx, index, kwi; struct mcam_entry entry = { {0} }; @@ -473,7 +693,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, entry.action = *(u64 *)&action; npc_config_mcam_entry(rvu, mcam, blkaddr, index, - NIX_INTF_RX, &entry, true); + pfvf->nix_rx_intf, &entry, true); } static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc, @@ -531,6 +751,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, /* Get 'pcifunc' of PF device */ pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; + pfvf = rvu_get_pfvf(rvu, pcifunc); index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_BCAST_ENTRY); @@ -553,14 +774,13 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, action.op = NIX_RX_ACTIONOP_UCAST; action.pf_func = pcifunc; } else { - pfvf = rvu_get_pfvf(rvu, pcifunc); action.index = pfvf->bcast_mce_idx; action.op = NIX_RX_ACTIONOP_MCAST; } entry.action = *(u64 *)&action; npc_config_mcam_entry(rvu, mcam, blkaddr, index, - NIX_INTF_RX, &entry, true); + pfvf->nix_rx_intf, &entry, true); } void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable) @@ -579,12 +799,47 @@ void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable) npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } +static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, u16 pcifunc, u64 rx_action) +{ + int actindex, index, bank; + bool enable; + + if (!(pcifunc & RVU_PFVF_FUNC_MASK)) + return; + + mutex_lock(&mcam->lock); + for (index = 0; index < mcam->bmap_entries; index++) { + if (mcam->entry2target_pffunc[index] == pcifunc) { + bank = npc_get_bank(mcam, index); + actindex = index; + index &= (mcam->banksize - 1); + + /* read vf flow entry enable status */ + enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, + actindex); + /* disable before mcam entry update */ + npc_enable_mcam_entry(rvu, mcam, blkaddr, actindex, + false); + /* update 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION(index, bank), + rx_action); + if (enable) + npc_enable_mcam_entry(rvu, mcam, blkaddr, + actindex, true); + } + } + mutex_unlock(&mcam->lock); +} + void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, int group, int alg_idx, int mcam_index) { struct npc_mcam *mcam = &rvu->hw->mcam; struct nix_rx_action action; int blkaddr, index, bank; + struct rvu_pfvf *pfvf; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) @@ -621,6 +876,16 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_ACTION(index, bank), *(u64 *)&action); + /* update the VF flow rule action with the VF default entry action */ + if (mcam_index < 0) + npc_update_vf_flow_entry(rvu, mcam, blkaddr, pcifunc, + *(u64 *)&action); + + /* update the action change in default rule */ + pfvf = rvu_get_pfvf(rvu, pcifunc); + if (pfvf->def_ucast_rule) + pfvf->def_ucast_rule->rx_action = action; + index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); @@ -635,8 +900,6 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, NPC_AF_MCAMEX_BANKX_ACTION(index, bank), *(u64 *)&action); } - - rvu_npc_update_rxvlan(rvu, pcifunc, nixlf); } static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, @@ -688,8 +951,6 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf); else rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf); - - rvu_npc_update_rxvlan(rvu, pcifunc, nixlf); } void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf) @@ -704,7 +965,41 @@ void rvu_npc_enable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf) void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf) { + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule *rule; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return; + + mutex_lock(&mcam->lock); + + /* Disable MCAM entries directing traffic to this 'pcifunc' */ + list_for_each_entry(rule, &mcam->mcam_rules, list) { + if (is_npc_intf_rx(rule->intf) && + rule->rx_action.pf_func == pcifunc) { + npc_enable_mcam_entry(rvu, mcam, blkaddr, + rule->entry, false); + rule->enable = false; + /* Indicate that default rule is disabled */ + if (rule->default_rule) + pfvf->def_ucast_rule = NULL; + } + } + + mutex_unlock(&mcam->lock); + + npc_mcam_disable_flows(rvu, pcifunc); + + rvu_npc_disable_default_entries(rvu, pcifunc, nixlf); +} + +void rvu_npc_free_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf) +{ struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule *rule, *tmp; int blkaddr; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -713,12 +1008,20 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf) mutex_lock(&mcam->lock); - /* Disable and free all MCAM entries mapped to this 'pcifunc' */ + /* Free all MCAM entries owned by this 'pcifunc' */ npc_mcam_free_all_entries(rvu, mcam, blkaddr, pcifunc); - /* Free all MCAM counters mapped to this 'pcifunc' */ + /* Free all MCAM counters owned by this 'pcifunc' */ npc_mcam_free_all_counters(rvu, mcam, pcifunc); + /* Delete MCAM entries owned by this 'pcifunc' */ + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { + if (rule->owner == pcifunc && !rule->default_rule) { + list_del(&rule->list); + kfree(rule); + } + } + mutex_unlock(&mcam->lock); rvu_npc_disable_default_entries(rvu, pcifunc, nixlf); @@ -732,44 +1035,78 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf) rvu_write64(rvu, blkaddr, \ NPC_AF_INTFX_LDATAX_FLAGSX_CFG(intf, ld, flags), cfg) -static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr, - const struct npc_mcam_kex *mkex) +static void npc_program_mkex_rx(struct rvu *rvu, int blkaddr, + struct npc_mcam_kex *mkex, u8 intf) { int lid, lt, ld, fl; - rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX), - mkex->keyx_cfg[NIX_INTF_RX]); - rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX), - mkex->keyx_cfg[NIX_INTF_TX]); + if (is_npc_intf_tx(intf)) + return; - for (ld = 0; ld < NPC_MAX_LD; ld++) - rvu_write64(rvu, blkaddr, NPC_AF_KEX_LDATAX_FLAGS_CFG(ld), - mkex->kex_ld_flags[ld]); + rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), + mkex->keyx_cfg[NIX_INTF_RX]); + /* Program LDATA */ for (lid = 0; lid < NPC_MAX_LID; lid++) { for (lt = 0; lt < NPC_MAX_LT; lt++) { - for (ld = 0; ld < NPC_MAX_LD; ld++) { - SET_KEX_LD(NIX_INTF_RX, lid, lt, ld, + for (ld = 0; ld < NPC_MAX_LD; ld++) + SET_KEX_LD(intf, lid, lt, ld, mkex->intf_lid_lt_ld[NIX_INTF_RX] [lid][lt][ld]); - - SET_KEX_LD(NIX_INTF_TX, lid, lt, ld, - mkex->intf_lid_lt_ld[NIX_INTF_TX] - [lid][lt][ld]); - } } } - + /* Program LFLAGS */ for (ld = 0; ld < NPC_MAX_LD; ld++) { - for (fl = 0; fl < NPC_MAX_LFL; fl++) { - SET_KEX_LDFLAGS(NIX_INTF_RX, ld, fl, + for (fl = 0; fl < NPC_MAX_LFL; fl++) + SET_KEX_LDFLAGS(intf, ld, fl, mkex->intf_ld_flags[NIX_INTF_RX] [ld][fl]); + } +} - SET_KEX_LDFLAGS(NIX_INTF_TX, ld, fl, +static void npc_program_mkex_tx(struct rvu *rvu, int blkaddr, + struct npc_mcam_kex *mkex, u8 intf) +{ + int lid, lt, ld, fl; + + if (is_npc_intf_rx(intf)) + return; + + rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), + mkex->keyx_cfg[NIX_INTF_TX]); + + /* Program LDATA */ + for (lid = 0; lid < NPC_MAX_LID; lid++) { + for (lt = 0; lt < NPC_MAX_LT; lt++) { + for (ld = 0; ld < NPC_MAX_LD; ld++) + SET_KEX_LD(intf, lid, lt, ld, + mkex->intf_lid_lt_ld[NIX_INTF_TX] + [lid][lt][ld]); + } + } + /* Program LFLAGS */ + for (ld = 0; ld < NPC_MAX_LD; ld++) { + for (fl = 0; fl < NPC_MAX_LFL; fl++) + SET_KEX_LDFLAGS(intf, ld, fl, mkex->intf_ld_flags[NIX_INTF_TX] [ld][fl]); - } + } +} + +static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr, + struct npc_mcam_kex *mkex) +{ + struct rvu_hwinfo *hw = rvu->hw; + u8 intf; + int ld; + + for (ld = 0; ld < NPC_MAX_LD; ld++) + rvu_write64(rvu, blkaddr, NPC_AF_KEX_LDATAX_FLAGS_CFG(ld), + mkex->kex_ld_flags[ld]); + + for (intf = 0; intf < hw->npc_intfs; intf++) { + npc_program_mkex_rx(rvu, blkaddr, mkex, intf); + npc_program_mkex_tx(rvu, blkaddr, mkex, intf); } } @@ -909,7 +1246,7 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu, kpu, profile->cam_entries, profile->action_entries); } - max_entries = rvu_read64(rvu, blkaddr, NPC_AF_CONST1) & 0xFFF; + max_entries = rvu->hw->npc_kpu_entries; /* Program CAM match entries for previous KPU extracted data */ num_entries = min_t(int, profile->cam_entries, max_entries); @@ -964,9 +1301,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) int num_pkinds, num_kpus, idx; struct npc_pkind *pkind; - /* Get HW limits */ - hw->npc_kpus = (rvu_read64(rvu, blkaddr, NPC_AF_CONST) >> 8) & 0x1F; - /* Disable all KPUs and their entries */ for (idx = 0; idx < hw->npc_kpus; idx++) { rvu_write64(rvu, blkaddr, @@ -1005,12 +1339,6 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) int rsvd, err; u64 cfg; - /* Get HW limits */ - cfg = rvu_read64(rvu, blkaddr, NPC_AF_CONST); - mcam->banks = (cfg >> 44) & 0xF; - mcam->banksize = (cfg >> 28) & 0xFFFF; - mcam->counters.max = (cfg >> 48) & 0xFFFF; - /* Actual number of MCAM entries vary by entry size */ cfg = (rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(0)) >> 32) & 0x07; @@ -1077,12 +1405,6 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) mcam->hprio_count = mcam->lprio_count; mcam->hprio_end = mcam->hprio_count; - /* Reserve last counter for MCAM RX miss action which is set to - * drop pkt. This way we will know how many pkts didn't match - * any MCAM entry. - */ - mcam->counters.max--; - mcam->rx_miss_act_cntr = mcam->counters.max; /* Allocate bitmap for managing MCAM counters and memory * for saving counter to RVU PFFUNC allocation mapping. @@ -1109,6 +1431,12 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) if (!mcam->cntr_refcnt) goto free_mem; + /* Alloc memory for saving target device of mcam rule */ + mcam->entry2target_pffunc = devm_kcalloc(rvu->dev, mcam->total_entries, + sizeof(u16), GFP_KERNEL); + if (!mcam->entry2target_pffunc) + goto free_mem; + mutex_init(&mcam->lock); return 0; @@ -1118,12 +1446,110 @@ free_mem: return -ENOMEM; } +static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) +{ + struct npc_pkind *pkind = &rvu->hw->pkind; + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_hwinfo *hw = rvu->hw; + u64 npc_const, npc_const1; + u64 npc_const2 = 0; + + npc_const = rvu_read64(rvu, blkaddr, NPC_AF_CONST); + npc_const1 = rvu_read64(rvu, blkaddr, NPC_AF_CONST1); + if (npc_const1 & BIT_ULL(63)) + npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2); + + pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL; + hw->npc_kpu_entries = npc_const1 & 0xFFFULL; + hw->npc_kpus = (npc_const >> 8) & 0x1FULL; + hw->npc_intfs = npc_const & 0xFULL; + hw->npc_counters = (npc_const >> 48) & 0xFFFFULL; + + mcam->banks = (npc_const >> 44) & 0xFULL; + mcam->banksize = (npc_const >> 28) & 0xFFFFULL; + /* Extended set */ + if (npc_const2) { + hw->npc_ext_set = true; + hw->npc_counters = (npc_const2 >> 16) & 0xFFFFULL; + mcam->banksize = npc_const2 & 0xFFFFULL; + } + + mcam->counters.max = hw->npc_counters; +} + +static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_hwinfo *hw = rvu->hw; + u64 nibble_ena, rx_kex, tx_kex; + u8 intf; + + /* Reserve last counter for MCAM RX miss action which is set to + * drop packet. This way we will know how many pkts didn't match + * any MCAM entry. + */ + mcam->counters.max--; + mcam->rx_miss_act_cntr = mcam->counters.max; + + rx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_RX]; + tx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_TX]; + nibble_ena = FIELD_GET(NPC_PARSE_NIBBLE, rx_kex); + + nibble_ena = rvu_npc_get_tx_nibble_cfg(rvu, nibble_ena); + if (nibble_ena) { + tx_kex &= ~NPC_PARSE_NIBBLE; + tx_kex |= FIELD_PREP(NPC_PARSE_NIBBLE, nibble_ena); + npc_mkex_default.keyx_cfg[NIX_INTF_TX] = tx_kex; + } + + /* Configure RX interfaces */ + for (intf = 0; intf < hw->npc_intfs; intf++) { + if (is_npc_intf_tx(intf)) + continue; + + /* Set RX MCAM search key size. LA..LE (ltype only) + Channel */ + rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), + rx_kex); + + /* If MCAM lookup doesn't result in a match, drop the received + * packet. And map this action to a counter to count dropped + * packets. + */ + rvu_write64(rvu, blkaddr, + NPC_AF_INTFX_MISS_ACT(intf), NIX_RX_ACTIONOP_DROP); + + /* NPC_AF_INTFX_MISS_STAT_ACT[14:12] - counter[11:9] + * NPC_AF_INTFX_MISS_STAT_ACT[8:0] - counter[8:0] + */ + rvu_write64(rvu, blkaddr, + NPC_AF_INTFX_MISS_STAT_ACT(intf), + ((mcam->rx_miss_act_cntr >> 9) << 12) | + BIT_ULL(9) | mcam->rx_miss_act_cntr); + } + + /* Configure TX interfaces */ + for (intf = 0; intf < hw->npc_intfs; intf++) { + if (is_npc_intf_rx(intf)) + continue; + + /* Extract Ltypes LID_LA to LID_LE */ + rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), + tx_kex); + + /* Set TX miss action to UCAST_DEFAULT i.e + * transmit the packet on NIX LF SQ's default channel. + */ + rvu_write64(rvu, blkaddr, + NPC_AF_INTFX_MISS_ACT(intf), + NIX_TX_ACTIONOP_UCAST_DEFAULT); + } +} + int rvu_npc_init(struct rvu *rvu) { struct npc_kpu_profile_adapter *kpu = &rvu->kpu; struct npc_pkind *pkind = &rvu->hw->pkind; struct npc_mcam *mcam = &rvu->hw->mcam; - u64 cfg, nibble_ena, rx_kex, tx_kex; int blkaddr, entry, bank, err; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -1132,17 +1558,15 @@ int rvu_npc_init(struct rvu *rvu) return -ENODEV; } + rvu_npc_hw_init(rvu, blkaddr); + /* First disable all MCAM entries, to stop traffic towards NIXLFs */ - cfg = rvu_read64(rvu, blkaddr, NPC_AF_CONST); - for (bank = 0; bank < ((cfg >> 44) & 0xF); bank++) { - for (entry = 0; entry < ((cfg >> 28) & 0xFFFF); entry++) + for (bank = 0; bank < mcam->banks; bank++) { + for (entry = 0; entry < mcam->banksize; entry++) rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CFG(entry, bank), 0); } - /* Allocate resource bimap for pkind*/ - pkind->rsrc.max = (rvu_read64(rvu, blkaddr, - NPC_AF_CONST1) >> 12) & 0xFF; err = rvu_alloc_bitmap(&pkind->rsrc); if (err) return err; @@ -1180,42 +1604,21 @@ int rvu_npc_init(struct rvu *rvu) BIT_ULL(32) | BIT_ULL(24) | BIT_ULL(6) | BIT_ULL(2) | BIT_ULL(1)); - /* Set RX and TX side MCAM search key size. - * LA..LD (ltype only) + Channel - */ - rx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_RX]; - tx_kex = npc_mkex_default.keyx_cfg[NIX_INTF_TX]; - nibble_ena = FIELD_GET(NPC_PARSE_NIBBLE, rx_kex); - rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX), rx_kex); - /* Due to an errata (35786) in A0 pass silicon, parse nibble enable - * configuration has to be identical for both Rx and Tx interfaces. - */ - if (is_rvu_96xx_B0(rvu)) { - tx_kex &= ~NPC_PARSE_NIBBLE; - tx_kex |= FIELD_PREP(NPC_PARSE_NIBBLE, nibble_ena); - } - rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX), tx_kex); - - err = npc_mcam_rsrcs_init(rvu, blkaddr); - if (err) - return err; + rvu_npc_setup_interfaces(rvu, blkaddr); /* Configure MKEX profile */ npc_load_mkex_profile(rvu, blkaddr, rvu->mkex_pfl_name); - /* Set TX miss action to UCAST_DEFAULT i.e - * transmit the packet on NIX LF SQ's default channel. - */ - rvu_write64(rvu, blkaddr, NPC_AF_INTFX_MISS_ACT(NIX_INTF_TX), - NIX_TX_ACTIONOP_UCAST_DEFAULT); + err = npc_mcam_rsrcs_init(rvu, blkaddr); + if (err) + return err; - /* If MCAM lookup doesn't result in a match, drop the received packet. - * And map this action to a counter to count dropped pkts. - */ - rvu_write64(rvu, blkaddr, NPC_AF_INTFX_MISS_ACT(NIX_INTF_RX), - NIX_RX_ACTIONOP_DROP); - rvu_write64(rvu, blkaddr, NPC_AF_INTFX_MISS_STAT_ACT(NIX_INTF_RX), - BIT_ULL(9) | mcam->rx_miss_act_cntr); + err = npc_flow_steering_init(rvu, blkaddr); + if (err) { + dev_err(rvu->dev, + "Incorrect mkex profile loaded using default mkex\n"); + npc_load_mkex_profile(rvu, blkaddr, def_pfl_name); + } return 0; } @@ -1307,10 +1710,13 @@ static void npc_map_mcam_entry_and_cntr(struct rvu *rvu, struct npc_mcam *mcam, /* Set mapping and increment counter's refcnt */ mcam->entry2cntr_map[entry] = cntr; mcam->cntr_refcnt[cntr]++; - /* Enable stats */ + /* Enable stats + * NPC_AF_MCAMEX_BANKX_STAT_ACT[14:12] - counter[11:9] + * NPC_AF_MCAMEX_BANKX_STAT_ACT[8:0] - counter[8:0] + */ rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank), - BIT_ULL(9) | cntr); + ((cntr >> 9) << 12) | BIT_ULL(9) | cntr); } static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu, @@ -1380,6 +1786,7 @@ static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam, npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr, index, cntr); + mcam->entry2target_pffunc[index] = 0x0; } } } @@ -1766,6 +2173,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, goto exit; mcam->entry2pfvf_map[req->entry] = 0; + mcam->entry2target_pffunc[req->entry] = 0x0; npc_mcam_clear_bit(mcam, req->entry); npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false); @@ -1785,18 +2193,49 @@ exit: return rc; } +int rvu_mbox_handler_npc_mcam_read_entry(struct rvu *rvu, + struct npc_mcam_read_entry_req *req, + struct npc_mcam_read_entry_rsp *rsp) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 pcifunc = req->hdr.pcifunc; + int blkaddr, rc; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + mutex_lock(&mcam->lock); + rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry); + if (!rc) { + npc_read_mcam_entry(rvu, mcam, blkaddr, req->entry, + &rsp->entry_data, + &rsp->intf, &rsp->enable); + } + + mutex_unlock(&mcam->lock); + return rc; +} + int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, struct npc_mcam_write_entry_req *req, struct msg_rsp *rsp) { + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); struct npc_mcam *mcam = &rvu->hw->mcam; u16 pcifunc = req->hdr.pcifunc; + u16 channel, chan_mask; int blkaddr, rc; + u8 nix_intf; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; + chan_mask = req->entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK; + channel = req->entry_data.kw[0] & NPC_KEX_CHAN_MASK; + channel &= chan_mask; + mutex_lock(&mcam->lock); rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry); if (rc) @@ -1808,12 +2247,28 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, goto exit; } - if (req->intf != NIX_INTF_RX && req->intf != NIX_INTF_TX) { + if (!is_npc_interface_valid(rvu, req->intf)) { rc = NPC_MCAM_INVALID_REQ; goto exit; } - npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, req->intf, + if (is_npc_intf_tx(req->intf)) + nix_intf = pfvf->nix_tx_intf; + else + nix_intf = pfvf->nix_rx_intf; + + if (npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { + rc = NPC_MCAM_INVALID_REQ; + goto exit; + } + + if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, + pcifunc)) { + rc = NPC_MCAM_INVALID_REQ; + goto exit; + } + + npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, nix_intf, &req->entry_data, req->enable_entry); if (req->set_cntr) @@ -2141,6 +2596,7 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, struct npc_mcam_alloc_and_write_entry_req *req, struct npc_mcam_alloc_and_write_entry_rsp *rsp) { + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); struct npc_mcam_alloc_counter_req cntr_req; struct npc_mcam_alloc_counter_rsp cntr_rsp; struct npc_mcam_alloc_entry_req entry_req; @@ -2148,13 +2604,26 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, struct npc_mcam *mcam = &rvu->hw->mcam; u16 entry = NPC_MCAM_ENTRY_INVALID; u16 cntr = NPC_MCAM_ENTRY_INVALID; + u16 channel, chan_mask; int blkaddr, rc; + u8 nix_intf; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; - if (req->intf != NIX_INTF_RX && req->intf != NIX_INTF_TX) + if (!is_npc_interface_valid(rvu, req->intf)) + return NPC_MCAM_INVALID_REQ; + + chan_mask = req->entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK; + channel = req->entry_data.kw[0] & NPC_KEX_CHAN_MASK; + channel &= chan_mask; + + if (npc_mcam_verify_channel(rvu, req->hdr.pcifunc, req->intf, channel)) + return NPC_MCAM_INVALID_REQ; + + if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, + req->hdr.pcifunc)) return NPC_MCAM_INVALID_REQ; /* Try to allocate a MCAM entry */ @@ -2196,7 +2665,13 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, write_entry: mutex_lock(&mcam->lock); - npc_config_mcam_entry(rvu, mcam, blkaddr, entry, req->intf, + + if (is_npc_intf_tx(req->intf)) + nix_intf = pfvf->nix_tx_intf; + else + nix_intf = pfvf->nix_rx_intf; + + npc_config_mcam_entry(rvu, mcam, blkaddr, entry, nix_intf, &req->entry_data, req->enable_entry); if (req->alloc_cntr) @@ -2255,26 +2730,72 @@ int rvu_mbox_handler_npc_get_kex_cfg(struct rvu *rvu, struct msg_req *req, return 0; } -int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf) +bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf, + u16 pcifunc, u8 intf, struct mcam_entry *entry, + int *index) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); struct npc_mcam *mcam = &rvu->hw->mcam; - int blkaddr, index; bool enable; + u8 nix_intf; + + if (is_npc_intf_tx(intf)) + nix_intf = pfvf->nix_tx_intf; + else + nix_intf = pfvf->nix_rx_intf; + + *index = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_UCAST_ENTRY); + /* dont force enable unicast entry */ + enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, *index); + npc_config_mcam_entry(rvu, mcam, blkaddr, *index, nix_intf, + entry, enable); + + return enable; +} + +int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu, + struct msg_req *req, + struct npc_mcam_read_base_rule_rsp *rsp) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int index, blkaddr, nixlf, rc = 0; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_pfvf *pfvf; + u8 intf, enable; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) - return NIX_AF_ERR_AF_LF_INVALID; + return NPC_MCAM_INVALID_REQ; - if (!pfvf->rxvlan) - return 0; + /* Return the channel number in case of PF */ + if (!(pcifunc & RVU_PFVF_FUNC_MASK)) { + pfvf = rvu_get_pfvf(rvu, pcifunc); + rsp->entry.kw[0] = pfvf->rx_chan_base; + rsp->entry.kw_mask[0] = 0xFFFULL; + goto out; + } + /* Find the pkt steering rule installed by PF to this VF */ + mutex_lock(&mcam->lock); + for (index = 0; index < mcam->bmap_entries; index++) { + if (mcam->entry2target_pffunc[index] == pcifunc) + goto read_entry; + } + + rc = nix_get_nixlf(rvu, pcifunc, &nixlf, NULL); + if (rc < 0) { + mutex_unlock(&mcam->lock); + goto out; + } + /* Read the default ucast entry if there is no pkt steering rule */ index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); - pfvf->entry.action = npc_get_mcam_action(rvu, mcam, blkaddr, index); - enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, index); - npc_config_mcam_entry(rvu, mcam, blkaddr, pfvf->rxvlan_index, - NIX_INTF_RX, &pfvf->entry, enable); - - return 0; +read_entry: + /* Read the mcam entry */ + npc_read_mcam_entry(rvu, mcam, blkaddr, index, &rsp->entry, &intf, + &enable); + mutex_unlock(&mcam->lock); +out: + return rc; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c new file mode 100644 index 000000000000..4ddfdff33a61 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -0,0 +1,1334 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2020 Marvell. + */ + +#include <linux/bitfield.h> + +#include "rvu_struct.h" +#include "rvu_reg.h" +#include "rvu.h" +#include "npc.h" + +#define NPC_BYTESM GENMASK_ULL(19, 16) +#define NPC_HDR_OFFSET GENMASK_ULL(15, 8) +#define NPC_KEY_OFFSET GENMASK_ULL(5, 0) +#define NPC_LDATA_EN BIT_ULL(7) + +static const char * const npc_flow_names[] = { + [NPC_DMAC] = "dmac", + [NPC_SMAC] = "smac", + [NPC_ETYPE] = "ether type", + [NPC_OUTER_VID] = "outer vlan id", + [NPC_TOS] = "tos", + [NPC_SIP_IPV4] = "ipv4 source ip", + [NPC_DIP_IPV4] = "ipv4 destination ip", + [NPC_SIP_IPV6] = "ipv6 source ip", + [NPC_DIP_IPV6] = "ipv6 destination ip", + [NPC_SPORT_TCP] = "tcp source port", + [NPC_DPORT_TCP] = "tcp destination port", + [NPC_SPORT_UDP] = "udp source port", + [NPC_DPORT_UDP] = "udp destination port", + [NPC_SPORT_SCTP] = "sctp source port", + [NPC_DPORT_SCTP] = "sctp destination port", + [NPC_UNKNOWN] = "unknown", +}; + +const char *npc_get_field_name(u8 hdr) +{ + if (hdr >= ARRAY_SIZE(npc_flow_names)) + return npc_flow_names[NPC_UNKNOWN]; + + return npc_flow_names[hdr]; +} + +/* Compute keyword masks and figure out the number of keywords a field + * spans in the key. + */ +static void npc_set_kw_masks(struct npc_mcam *mcam, u8 type, + u8 nr_bits, int start_kwi, int offset, u8 intf) +{ + struct npc_key_field *field = &mcam->rx_key_fields[type]; + u8 bits_in_kw; + int max_kwi; + + if (mcam->banks_per_entry == 1) + max_kwi = 1; /* NPC_MCAM_KEY_X1 */ + else if (mcam->banks_per_entry == 2) + max_kwi = 3; /* NPC_MCAM_KEY_X2 */ + else + max_kwi = 6; /* NPC_MCAM_KEY_X4 */ + + if (is_npc_intf_tx(intf)) + field = &mcam->tx_key_fields[type]; + + if (offset + nr_bits <= 64) { + /* one KW only */ + if (start_kwi > max_kwi) + return; + field->kw_mask[start_kwi] |= GENMASK_ULL(nr_bits - 1, 0) + << offset; + field->nr_kws = 1; + } else if (offset + nr_bits > 64 && + offset + nr_bits <= 128) { + /* two KWs */ + if (start_kwi + 1 > max_kwi) + return; + /* first KW mask */ + bits_in_kw = 64 - offset; + field->kw_mask[start_kwi] |= GENMASK_ULL(bits_in_kw - 1, 0) + << offset; + /* second KW mask i.e. mask for rest of bits */ + bits_in_kw = nr_bits + offset - 64; + field->kw_mask[start_kwi + 1] |= GENMASK_ULL(bits_in_kw - 1, 0); + field->nr_kws = 2; + } else { + /* three KWs */ + if (start_kwi + 2 > max_kwi) + return; + /* first KW mask */ + bits_in_kw = 64 - offset; + field->kw_mask[start_kwi] |= GENMASK_ULL(bits_in_kw - 1, 0) + << offset; + /* second KW mask */ + field->kw_mask[start_kwi + 1] = ~0ULL; + /* third KW mask i.e. mask for rest of bits */ + bits_in_kw = nr_bits + offset - 128; + field->kw_mask[start_kwi + 2] |= GENMASK_ULL(bits_in_kw - 1, 0); + field->nr_kws = 3; + } +} + +/* Helper function to figure out whether field exists in the key */ +static bool npc_is_field_present(struct rvu *rvu, enum key_fields type, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct npc_key_field *input; + + input = &mcam->rx_key_fields[type]; + if (is_npc_intf_tx(intf)) + input = &mcam->tx_key_fields[type]; + + return input->nr_kws > 0; +} + +static bool npc_is_same(struct npc_key_field *input, + struct npc_key_field *field) +{ + int ret; + + ret = memcmp(&input->layer_mdata, &field->layer_mdata, + sizeof(struct npc_layer_mdata)); + return ret == 0; +} + +static void npc_set_layer_mdata(struct npc_mcam *mcam, enum key_fields type, + u64 cfg, u8 lid, u8 lt, u8 intf) +{ + struct npc_key_field *input = &mcam->rx_key_fields[type]; + + if (is_npc_intf_tx(intf)) + input = &mcam->tx_key_fields[type]; + + input->layer_mdata.hdr = FIELD_GET(NPC_HDR_OFFSET, cfg); + input->layer_mdata.key = FIELD_GET(NPC_KEY_OFFSET, cfg); + input->layer_mdata.len = FIELD_GET(NPC_BYTESM, cfg) + 1; + input->layer_mdata.ltype = lt; + input->layer_mdata.lid = lid; +} + +static bool npc_check_overlap_fields(struct npc_key_field *input1, + struct npc_key_field *input2) +{ + int kwi; + + /* Fields with same layer id and different ltypes are mutually + * exclusive hence they can be overlapped + */ + if (input1->layer_mdata.lid == input2->layer_mdata.lid && + input1->layer_mdata.ltype != input2->layer_mdata.ltype) + return false; + + for (kwi = 0; kwi < NPC_MAX_KWS_IN_KEY; kwi++) { + if (input1->kw_mask[kwi] & input2->kw_mask[kwi]) + return true; + } + + return false; +} + +/* Helper function to check whether given field overlaps with any other fields + * in the key. Due to limitations on key size and the key extraction profile in + * use higher layers can overwrite lower layer's header fields. Hence overlap + * needs to be checked. + */ +static bool npc_check_overlap(struct rvu *rvu, int blkaddr, + enum key_fields type, u8 start_lid, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct npc_key_field *dummy, *input; + int start_kwi, offset; + u8 nr_bits, lid, lt, ld; + u64 cfg; + + dummy = &mcam->rx_key_fields[NPC_UNKNOWN]; + input = &mcam->rx_key_fields[type]; + + if (is_npc_intf_tx(intf)) { + dummy = &mcam->tx_key_fields[NPC_UNKNOWN]; + input = &mcam->tx_key_fields[type]; + } + + for (lid = start_lid; lid < NPC_MAX_LID; lid++) { + for (lt = 0; lt < NPC_MAX_LT; lt++) { + for (ld = 0; ld < NPC_MAX_LD; ld++) { + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_INTFX_LIDX_LTX_LDX_CFG + (intf, lid, lt, ld)); + if (!FIELD_GET(NPC_LDATA_EN, cfg)) + continue; + memset(dummy, 0, sizeof(struct npc_key_field)); + npc_set_layer_mdata(mcam, NPC_UNKNOWN, cfg, + lid, lt, intf); + /* exclude input */ + if (npc_is_same(input, dummy)) + continue; + start_kwi = dummy->layer_mdata.key / 8; + offset = (dummy->layer_mdata.key * 8) % 64; + nr_bits = dummy->layer_mdata.len * 8; + /* form KW masks */ + npc_set_kw_masks(mcam, NPC_UNKNOWN, nr_bits, + start_kwi, offset, intf); + /* check any input field bits falls in any + * other field bits. + */ + if (npc_check_overlap_fields(dummy, input)) + return true; + } + } + } + + return false; +} + +static int npc_check_field(struct rvu *rvu, int blkaddr, enum key_fields type, + u8 intf) +{ + if (!npc_is_field_present(rvu, type, intf) || + npc_check_overlap(rvu, blkaddr, type, 0, intf)) + return -EOPNOTSUPP; + return 0; +} + +static void npc_scan_parse_result(struct npc_mcam *mcam, u8 bit_number, + u8 key_nibble, u8 intf) +{ + u8 offset = (key_nibble * 4) % 64; /* offset within key word */ + u8 kwi = (key_nibble * 4) / 64; /* which word in key */ + u8 nr_bits = 4; /* bits in a nibble */ + u8 type; + + switch (bit_number) { + case 0 ... 2: + type = NPC_CHAN; + break; + case 3: + type = NPC_ERRLEV; + break; + case 4 ... 5: + type = NPC_ERRCODE; + break; + case 6: + type = NPC_LXMB; + break; + /* check for LTYPE only as of now */ + case 9: + type = NPC_LA; + break; + case 12: + type = NPC_LB; + break; + case 15: + type = NPC_LC; + break; + case 18: + type = NPC_LD; + break; + case 21: + type = NPC_LE; + break; + case 24: + type = NPC_LF; + break; + case 27: + type = NPC_LG; + break; + case 30: + type = NPC_LH; + break; + default: + return; + }; + npc_set_kw_masks(mcam, type, nr_bits, kwi, offset, intf); +} + +static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct npc_key_field *key_fields; + /* Ether type can come from three layers + * (ethernet, single tagged, double tagged) + */ + struct npc_key_field *etype_ether; + struct npc_key_field *etype_tag1; + struct npc_key_field *etype_tag2; + /* Outer VLAN TCI can come from two layers + * (single tagged, double tagged) + */ + struct npc_key_field *vlan_tag1; + struct npc_key_field *vlan_tag2; + u64 *features; + u8 start_lid; + int i; + + key_fields = mcam->rx_key_fields; + features = &mcam->rx_features; + + if (is_npc_intf_tx(intf)) { + key_fields = mcam->tx_key_fields; + features = &mcam->tx_features; + } + + /* Handle header fields which can come from multiple layers like + * etype, outer vlan tci. These fields should have same position in + * the key otherwise to install a mcam rule more than one entry is + * needed which complicates mcam space management. + */ + etype_ether = &key_fields[NPC_ETYPE_ETHER]; + etype_tag1 = &key_fields[NPC_ETYPE_TAG1]; + etype_tag2 = &key_fields[NPC_ETYPE_TAG2]; + vlan_tag1 = &key_fields[NPC_VLAN_TAG1]; + vlan_tag2 = &key_fields[NPC_VLAN_TAG2]; + + /* if key profile programmed does not extract Ethertype at all */ + if (!etype_ether->nr_kws && !etype_tag1->nr_kws && !etype_tag2->nr_kws) + goto vlan_tci; + + /* if key profile programmed extracts Ethertype from one layer */ + if (etype_ether->nr_kws && !etype_tag1->nr_kws && !etype_tag2->nr_kws) + key_fields[NPC_ETYPE] = *etype_ether; + if (!etype_ether->nr_kws && etype_tag1->nr_kws && !etype_tag2->nr_kws) + key_fields[NPC_ETYPE] = *etype_tag1; + if (!etype_ether->nr_kws && !etype_tag1->nr_kws && etype_tag2->nr_kws) + key_fields[NPC_ETYPE] = *etype_tag2; + + /* if key profile programmed extracts Ethertype from multiple layers */ + if (etype_ether->nr_kws && etype_tag1->nr_kws) { + for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + if (etype_ether->kw_mask[i] != etype_tag1->kw_mask[i]) + goto vlan_tci; + } + key_fields[NPC_ETYPE] = *etype_tag1; + } + if (etype_ether->nr_kws && etype_tag2->nr_kws) { + for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + if (etype_ether->kw_mask[i] != etype_tag2->kw_mask[i]) + goto vlan_tci; + } + key_fields[NPC_ETYPE] = *etype_tag2; + } + if (etype_tag1->nr_kws && etype_tag2->nr_kws) { + for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + if (etype_tag1->kw_mask[i] != etype_tag2->kw_mask[i]) + goto vlan_tci; + } + key_fields[NPC_ETYPE] = *etype_tag2; + } + + /* check none of higher layers overwrite Ethertype */ + start_lid = key_fields[NPC_ETYPE].layer_mdata.lid + 1; + if (npc_check_overlap(rvu, blkaddr, NPC_ETYPE, start_lid, intf)) + goto vlan_tci; + *features |= BIT_ULL(NPC_ETYPE); +vlan_tci: + /* if key profile does not extract outer vlan tci at all */ + if (!vlan_tag1->nr_kws && !vlan_tag2->nr_kws) + goto done; + + /* if key profile extracts outer vlan tci from one layer */ + if (vlan_tag1->nr_kws && !vlan_tag2->nr_kws) + key_fields[NPC_OUTER_VID] = *vlan_tag1; + if (!vlan_tag1->nr_kws && vlan_tag2->nr_kws) + key_fields[NPC_OUTER_VID] = *vlan_tag2; + + /* if key profile extracts outer vlan tci from multiple layers */ + if (vlan_tag1->nr_kws && vlan_tag2->nr_kws) { + for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + if (vlan_tag1->kw_mask[i] != vlan_tag2->kw_mask[i]) + goto done; + } + key_fields[NPC_OUTER_VID] = *vlan_tag2; + } + /* check none of higher layers overwrite outer vlan tci */ + start_lid = key_fields[NPC_OUTER_VID].layer_mdata.lid + 1; + if (npc_check_overlap(rvu, blkaddr, NPC_OUTER_VID, start_lid, intf)) + goto done; + *features |= BIT_ULL(NPC_OUTER_VID); +done: + return; +} + +static void npc_scan_ldata(struct rvu *rvu, int blkaddr, u8 lid, + u8 lt, u64 cfg, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u8 hdr, key, nr_bytes, bit_offset; + u8 la_ltype, la_start; + /* starting KW index and starting bit position */ + int start_kwi, offset; + + nr_bytes = FIELD_GET(NPC_BYTESM, cfg) + 1; + hdr = FIELD_GET(NPC_HDR_OFFSET, cfg); + key = FIELD_GET(NPC_KEY_OFFSET, cfg); + start_kwi = key / 8; + offset = (key * 8) % 64; + + /* For Tx, Layer A has NIX_INST_HDR_S(64 bytes) preceding + * ethernet header. + */ + if (is_npc_intf_tx(intf)) { + la_ltype = NPC_LT_LA_IH_NIX_ETHER; + la_start = 8; + } else { + la_ltype = NPC_LT_LA_ETHER; + la_start = 0; + } + +#define NPC_SCAN_HDR(name, hlid, hlt, hstart, hlen) \ +do { \ + if (lid == (hlid) && lt == (hlt)) { \ + if ((hstart) >= hdr && \ + ((hstart) + (hlen)) <= (hdr + nr_bytes)) { \ + bit_offset = (hdr + nr_bytes - (hstart) - (hlen)) * 8; \ + npc_set_layer_mdata(mcam, (name), cfg, lid, lt, intf); \ + npc_set_kw_masks(mcam, (name), (hlen) * 8, \ + start_kwi, offset + bit_offset, intf);\ + } \ + } \ +} while (0) + + /* List LID, LTYPE, start offset from layer and length(in bytes) of + * packet header fields below. + * Example: Source IP is 4 bytes and starts at 12th byte of IP header + */ + NPC_SCAN_HDR(NPC_SIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 12, 4); + NPC_SCAN_HDR(NPC_DIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 16, 4); + NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16); + NPC_SCAN_HDR(NPC_DIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 24, 16); + NPC_SCAN_HDR(NPC_SPORT_UDP, NPC_LID_LD, NPC_LT_LD_UDP, 0, 2); + NPC_SCAN_HDR(NPC_DPORT_UDP, NPC_LID_LD, NPC_LT_LD_UDP, 2, 2); + NPC_SCAN_HDR(NPC_SPORT_TCP, NPC_LID_LD, NPC_LT_LD_TCP, 0, 2); + NPC_SCAN_HDR(NPC_DPORT_TCP, NPC_LID_LD, NPC_LT_LD_TCP, 2, 2); + NPC_SCAN_HDR(NPC_SPORT_SCTP, NPC_LID_LD, NPC_LT_LD_SCTP, 0, 2); + NPC_SCAN_HDR(NPC_DPORT_SCTP, NPC_LID_LD, NPC_LT_LD_SCTP, 2, 2); + NPC_SCAN_HDR(NPC_ETYPE_ETHER, NPC_LID_LA, NPC_LT_LA_ETHER, 12, 2); + NPC_SCAN_HDR(NPC_ETYPE_TAG1, NPC_LID_LB, NPC_LT_LB_CTAG, 4, 2); + NPC_SCAN_HDR(NPC_ETYPE_TAG2, NPC_LID_LB, NPC_LT_LB_STAG_QINQ, 8, 2); + NPC_SCAN_HDR(NPC_VLAN_TAG1, NPC_LID_LB, NPC_LT_LB_CTAG, 2, 2); + NPC_SCAN_HDR(NPC_VLAN_TAG2, NPC_LID_LB, NPC_LT_LB_STAG_QINQ, 2, 2); + NPC_SCAN_HDR(NPC_DMAC, NPC_LID_LA, la_ltype, la_start, 6); + NPC_SCAN_HDR(NPC_SMAC, NPC_LID_LA, la_ltype, la_start, 6); + /* PF_FUNC is 2 bytes at 0th byte of NPC_LT_LA_IH_NIX_ETHER */ + NPC_SCAN_HDR(NPC_PF_FUNC, NPC_LID_LA, NPC_LT_LA_IH_NIX_ETHER, 0, 2); +} + +static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u64 *features = &mcam->rx_features; + u64 tcp_udp_sctp; + int err, hdr; + + if (is_npc_intf_tx(intf)) + features = &mcam->tx_features; + + for (hdr = NPC_DMAC; hdr < NPC_HEADER_FIELDS_MAX; hdr++) { + err = npc_check_field(rvu, blkaddr, hdr, intf); + if (!err) + *features |= BIT_ULL(hdr); + } + + tcp_udp_sctp = BIT_ULL(NPC_SPORT_TCP) | BIT_ULL(NPC_SPORT_UDP) | + BIT_ULL(NPC_DPORT_TCP) | BIT_ULL(NPC_DPORT_UDP) | + BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP); + + /* for tcp/udp/sctp corresponding layer type should be in the key */ + if (*features & tcp_udp_sctp) + if (npc_check_field(rvu, blkaddr, NPC_LD, intf)) + *features &= ~tcp_udp_sctp; + + /* for vlan corresponding layer type should be in the key */ + if (*features & BIT_ULL(NPC_OUTER_VID)) + if (npc_check_field(rvu, blkaddr, NPC_LB, intf)) + *features &= ~BIT_ULL(NPC_OUTER_VID); +} + +/* Scan key extraction profile and record how fields of our interest + * fill the key structure. Also verify Channel and DMAC exists in + * key and not overwritten by other header fields. + */ +static int npc_scan_kex(struct rvu *rvu, int blkaddr, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u8 lid, lt, ld, bitnr; + u8 key_nibble = 0; + u64 cfg; + + /* Scan and note how parse result is going to be in key. + * A bit set in PARSE_NIBBLE_ENA corresponds to a nibble from + * parse result in the key. The enabled nibbles from parse result + * will be concatenated in key. + */ + cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf)); + cfg &= NPC_PARSE_NIBBLE; + for_each_set_bit(bitnr, (unsigned long *)&cfg, 31) { + npc_scan_parse_result(mcam, bitnr, key_nibble, intf); + key_nibble++; + } + + /* Scan and note how layer data is going to be in key */ + for (lid = 0; lid < NPC_MAX_LID; lid++) { + for (lt = 0; lt < NPC_MAX_LT; lt++) { + for (ld = 0; ld < NPC_MAX_LD; ld++) { + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_INTFX_LIDX_LTX_LDX_CFG + (intf, lid, lt, ld)); + if (!FIELD_GET(NPC_LDATA_EN, cfg)) + continue; + npc_scan_ldata(rvu, blkaddr, lid, lt, cfg, + intf); + } + } + } + + return 0; +} + +static int npc_scan_verify_kex(struct rvu *rvu, int blkaddr) +{ + int err; + + err = npc_scan_kex(rvu, blkaddr, NIX_INTF_RX); + if (err) + return err; + + err = npc_scan_kex(rvu, blkaddr, NIX_INTF_TX); + if (err) + return err; + + /* Channel is mandatory */ + if (!npc_is_field_present(rvu, NPC_CHAN, NIX_INTF_RX)) { + dev_err(rvu->dev, "Channel not present in Key\n"); + return -EINVAL; + } + /* check that none of the fields overwrite channel */ + if (npc_check_overlap(rvu, blkaddr, NPC_CHAN, 0, NIX_INTF_RX)) { + dev_err(rvu->dev, "Channel cannot be overwritten\n"); + return -EINVAL; + } + /* DMAC should be present in key for unicast filter to work */ + if (!npc_is_field_present(rvu, NPC_DMAC, NIX_INTF_RX)) { + dev_err(rvu->dev, "DMAC not present in Key\n"); + return -EINVAL; + } + /* check that none of the fields overwrite DMAC */ + if (npc_check_overlap(rvu, blkaddr, NPC_DMAC, 0, NIX_INTF_RX)) { + dev_err(rvu->dev, "DMAC cannot be overwritten\n"); + return -EINVAL; + } + + npc_set_features(rvu, blkaddr, NIX_INTF_TX); + npc_set_features(rvu, blkaddr, NIX_INTF_RX); + npc_handle_multi_layer_fields(rvu, blkaddr, NIX_INTF_TX); + npc_handle_multi_layer_fields(rvu, blkaddr, NIX_INTF_RX); + + return 0; +} + +int npc_flow_steering_init(struct rvu *rvu, int blkaddr) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + + INIT_LIST_HEAD(&mcam->mcam_rules); + + return npc_scan_verify_kex(rvu, blkaddr); +} + +static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u64 *mcam_features = &mcam->rx_features; + u64 unsupported; + u8 bit; + + if (is_npc_intf_tx(intf)) + mcam_features = &mcam->tx_features; + + unsupported = (*mcam_features ^ features) & ~(*mcam_features); + if (unsupported) { + dev_info(rvu->dev, "Unsupported flow(s):\n"); + for_each_set_bit(bit, (unsigned long *)&unsupported, 64) + dev_info(rvu->dev, "%s ", npc_get_field_name(bit)); + return -EOPNOTSUPP; + } + + return 0; +} + +/* npc_update_entry - Based on the masks generated during + * the key scanning, updates the given entry with value and + * masks for the field of interest. Maximum 16 bytes of a packet + * header can be extracted by HW hence lo and hi are sufficient. + * When field bytes are less than or equal to 8 then hi should be + * 0 for value and mask. + * + * If exact match of value is required then mask should be all 1's. + * If any bits in mask are 0 then corresponding bits in value are + * dont care. + */ +static void npc_update_entry(struct rvu *rvu, enum key_fields type, + struct mcam_entry *entry, u64 val_lo, + u64 val_hi, u64 mask_lo, u64 mask_hi, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct mcam_entry dummy = { {0} }; + struct npc_key_field *field; + u64 kw1, kw2, kw3; + u8 shift; + int i; + + field = &mcam->rx_key_fields[type]; + if (is_npc_intf_tx(intf)) + field = &mcam->tx_key_fields[type]; + + if (!field->nr_kws) + return; + + for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + if (!field->kw_mask[i]) + continue; + /* place key value in kw[x] */ + shift = __ffs64(field->kw_mask[i]); + /* update entry value */ + kw1 = (val_lo << shift) & field->kw_mask[i]; + dummy.kw[i] = kw1; + /* update entry mask */ + kw1 = (mask_lo << shift) & field->kw_mask[i]; + dummy.kw_mask[i] = kw1; + + if (field->nr_kws == 1) + break; + /* place remaining bits of key value in kw[x + 1] */ + if (field->nr_kws == 2) { + /* update entry value */ + kw2 = shift ? val_lo >> (64 - shift) : 0; + kw2 |= (val_hi << shift); + kw2 &= field->kw_mask[i + 1]; + dummy.kw[i + 1] = kw2; + /* update entry mask */ + kw2 = shift ? mask_lo >> (64 - shift) : 0; + kw2 |= (mask_hi << shift); + kw2 &= field->kw_mask[i + 1]; + dummy.kw_mask[i + 1] = kw2; + break; + } + /* place remaining bits of key value in kw[x + 1], kw[x + 2] */ + if (field->nr_kws == 3) { + /* update entry value */ + kw2 = shift ? val_lo >> (64 - shift) : 0; + kw2 |= (val_hi << shift); + kw2 &= field->kw_mask[i + 1]; + kw3 = shift ? val_hi >> (64 - shift) : 0; + kw3 &= field->kw_mask[i + 2]; + dummy.kw[i + 1] = kw2; + dummy.kw[i + 2] = kw3; + /* update entry mask */ + kw2 = shift ? mask_lo >> (64 - shift) : 0; + kw2 |= (mask_hi << shift); + kw2 &= field->kw_mask[i + 1]; + kw3 = shift ? mask_hi >> (64 - shift) : 0; + kw3 &= field->kw_mask[i + 2]; + dummy.kw_mask[i + 1] = kw2; + dummy.kw_mask[i + 2] = kw3; + break; + } + } + /* dummy is ready with values and masks for given key + * field now clear and update input entry with those + */ + for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + if (!field->kw_mask[i]) + continue; + entry->kw[i] &= ~field->kw_mask[i]; + entry->kw_mask[i] &= ~field->kw_mask[i]; + + entry->kw[i] |= dummy.kw[i]; + entry->kw_mask[i] |= dummy.kw_mask[i]; + } +} + +#define IPV6_WORDS 4 + +static void npc_update_ipv6_flow(struct rvu *rvu, struct mcam_entry *entry, + u64 features, struct flow_msg *pkt, + struct flow_msg *mask, + struct rvu_npc_mcam_rule *output, u8 intf) +{ + u32 src_ip[IPV6_WORDS], src_ip_mask[IPV6_WORDS]; + u32 dst_ip[IPV6_WORDS], dst_ip_mask[IPV6_WORDS]; + struct flow_msg *opkt = &output->packet; + struct flow_msg *omask = &output->mask; + u64 mask_lo, mask_hi; + u64 val_lo, val_hi; + + /* For an ipv6 address fe80::2c68:63ff:fe5e:2d0a the packet + * values to be programmed in MCAM should as below: + * val_high: 0xfe80000000000000 + * val_low: 0x2c6863fffe5e2d0a + */ + if (features & BIT_ULL(NPC_SIP_IPV6)) { + be32_to_cpu_array(src_ip_mask, mask->ip6src, IPV6_WORDS); + be32_to_cpu_array(src_ip, pkt->ip6src, IPV6_WORDS); + + mask_hi = (u64)src_ip_mask[0] << 32 | src_ip_mask[1]; + mask_lo = (u64)src_ip_mask[2] << 32 | src_ip_mask[3]; + val_hi = (u64)src_ip[0] << 32 | src_ip[1]; + val_lo = (u64)src_ip[2] << 32 | src_ip[3]; + + npc_update_entry(rvu, NPC_SIP_IPV6, entry, val_lo, val_hi, + mask_lo, mask_hi, intf); + memcpy(opkt->ip6src, pkt->ip6src, sizeof(opkt->ip6src)); + memcpy(omask->ip6src, mask->ip6src, sizeof(omask->ip6src)); + } + if (features & BIT_ULL(NPC_DIP_IPV6)) { + be32_to_cpu_array(dst_ip_mask, mask->ip6dst, IPV6_WORDS); + be32_to_cpu_array(dst_ip, pkt->ip6dst, IPV6_WORDS); + + mask_hi = (u64)dst_ip_mask[0] << 32 | dst_ip_mask[1]; + mask_lo = (u64)dst_ip_mask[2] << 32 | dst_ip_mask[3]; + val_hi = (u64)dst_ip[0] << 32 | dst_ip[1]; + val_lo = (u64)dst_ip[2] << 32 | dst_ip[3]; + + npc_update_entry(rvu, NPC_DIP_IPV6, entry, val_lo, val_hi, + mask_lo, mask_hi, intf); + memcpy(opkt->ip6dst, pkt->ip6dst, sizeof(opkt->ip6dst)); + memcpy(omask->ip6dst, mask->ip6dst, sizeof(omask->ip6dst)); + } +} + +static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry, + u64 features, struct flow_msg *pkt, + struct flow_msg *mask, + struct rvu_npc_mcam_rule *output, u8 intf) +{ + u64 dmac_mask = ether_addr_to_u64(mask->dmac); + u64 smac_mask = ether_addr_to_u64(mask->smac); + u64 dmac_val = ether_addr_to_u64(pkt->dmac); + u64 smac_val = ether_addr_to_u64(pkt->smac); + struct flow_msg *opkt = &output->packet; + struct flow_msg *omask = &output->mask; + + if (!features) + return; + + /* For tcp/udp/sctp LTYPE should be present in entry */ + if (features & (BIT_ULL(NPC_SPORT_TCP) | BIT_ULL(NPC_DPORT_TCP))) + npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_TCP, + 0, ~0ULL, 0, intf); + if (features & (BIT_ULL(NPC_SPORT_UDP) | BIT_ULL(NPC_DPORT_UDP))) + npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_UDP, + 0, ~0ULL, 0, intf); + if (features & (BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP))) + npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_SCTP, + 0, ~0ULL, 0, intf); + + if (features & BIT_ULL(NPC_OUTER_VID)) + npc_update_entry(rvu, NPC_LB, entry, + NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG, 0, + NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG, 0, intf); + +#define NPC_WRITE_FLOW(field, member, val_lo, val_hi, mask_lo, mask_hi) \ +do { \ + if (features & BIT_ULL((field))) { \ + npc_update_entry(rvu, (field), entry, (val_lo), (val_hi), \ + (mask_lo), (mask_hi), intf); \ + memcpy(&opkt->member, &pkt->member, sizeof(pkt->member)); \ + memcpy(&omask->member, &mask->member, sizeof(mask->member)); \ + } \ +} while (0) + + NPC_WRITE_FLOW(NPC_DMAC, dmac, dmac_val, 0, dmac_mask, 0); + NPC_WRITE_FLOW(NPC_SMAC, smac, smac_val, 0, smac_mask, 0); + NPC_WRITE_FLOW(NPC_ETYPE, etype, ntohs(pkt->etype), 0, + ntohs(mask->etype), 0); + NPC_WRITE_FLOW(NPC_SIP_IPV4, ip4src, ntohl(pkt->ip4src), 0, + ntohl(mask->ip4src), 0); + NPC_WRITE_FLOW(NPC_DIP_IPV4, ip4dst, ntohl(pkt->ip4dst), 0, + ntohl(mask->ip4dst), 0); + NPC_WRITE_FLOW(NPC_SPORT_TCP, sport, ntohs(pkt->sport), 0, + ntohs(mask->sport), 0); + NPC_WRITE_FLOW(NPC_SPORT_UDP, sport, ntohs(pkt->sport), 0, + ntohs(mask->sport), 0); + NPC_WRITE_FLOW(NPC_DPORT_TCP, dport, ntohs(pkt->dport), 0, + ntohs(mask->dport), 0); + NPC_WRITE_FLOW(NPC_DPORT_UDP, dport, ntohs(pkt->dport), 0, + ntohs(mask->dport), 0); + NPC_WRITE_FLOW(NPC_SPORT_SCTP, sport, ntohs(pkt->sport), 0, + ntohs(mask->sport), 0); + NPC_WRITE_FLOW(NPC_DPORT_SCTP, dport, ntohs(pkt->dport), 0, + ntohs(mask->dport), 0); + + NPC_WRITE_FLOW(NPC_OUTER_VID, vlan_tci, ntohs(pkt->vlan_tci), 0, + ntohs(mask->vlan_tci), 0); + + npc_update_ipv6_flow(rvu, entry, features, pkt, mask, output, intf); +} + +static struct rvu_npc_mcam_rule *rvu_mcam_find_rule(struct npc_mcam *mcam, + u16 entry) +{ + struct rvu_npc_mcam_rule *iter; + + mutex_lock(&mcam->lock); + list_for_each_entry(iter, &mcam->mcam_rules, list) { + if (iter->entry == entry) { + mutex_unlock(&mcam->lock); + return iter; + } + } + mutex_unlock(&mcam->lock); + + return NULL; +} + +static void rvu_mcam_add_rule(struct npc_mcam *mcam, + struct rvu_npc_mcam_rule *rule) +{ + struct list_head *head = &mcam->mcam_rules; + struct rvu_npc_mcam_rule *iter; + + mutex_lock(&mcam->lock); + list_for_each_entry(iter, &mcam->mcam_rules, list) { + if (iter->entry > rule->entry) + break; + head = &iter->list; + } + + list_add(&rule->list, head); + mutex_unlock(&mcam->lock); +} + +static void rvu_mcam_remove_counter_from_rule(struct rvu *rvu, u16 pcifunc, + struct rvu_npc_mcam_rule *rule) +{ + struct npc_mcam_oper_counter_req free_req = { 0 }; + struct msg_rsp free_rsp; + + if (!rule->has_cntr) + return; + + free_req.hdr.pcifunc = pcifunc; + free_req.cntr = rule->cntr; + + rvu_mbox_handler_npc_mcam_free_counter(rvu, &free_req, &free_rsp); + rule->has_cntr = false; +} + +static void rvu_mcam_add_counter_to_rule(struct rvu *rvu, u16 pcifunc, + struct rvu_npc_mcam_rule *rule, + struct npc_install_flow_rsp *rsp) +{ + struct npc_mcam_alloc_counter_req cntr_req = { 0 }; + struct npc_mcam_alloc_counter_rsp cntr_rsp = { 0 }; + int err; + + cntr_req.hdr.pcifunc = pcifunc; + cntr_req.contig = true; + cntr_req.count = 1; + + /* we try to allocate a counter to track the stats of this + * rule. If counter could not be allocated then proceed + * without counter because counters are limited than entries. + */ + err = rvu_mbox_handler_npc_mcam_alloc_counter(rvu, &cntr_req, + &cntr_rsp); + if (!err && cntr_rsp.count) { + rule->cntr = cntr_rsp.cntr; + rule->has_cntr = true; + rsp->counter = rule->cntr; + } else { + rsp->counter = err; + } +} + +static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, + struct mcam_entry *entry, + struct npc_install_flow_req *req, u16 target) +{ + struct nix_rx_action action; + + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, + ~0ULL, 0, NIX_INTF_RX); + + *(u64 *)&action = 0x00; + action.pf_func = target; + action.op = req->op; + action.index = req->index; + action.match_id = req->match_id; + action.flow_key_alg = req->flow_key_alg; + + if (req->op == NIX_RX_ACTION_DEFAULT && pfvf->def_ucast_rule) + action = pfvf->def_ucast_rule->rx_action; + + entry->action = *(u64 *)&action; + + /* VTAG0 starts at 0th byte of LID_B. + * VTAG1 starts at 4th byte of LID_B. + */ + entry->vtag_action = FIELD_PREP(RX_VTAG0_VALID_BIT, req->vtag0_valid) | + FIELD_PREP(RX_VTAG0_TYPE_MASK, req->vtag0_type) | + FIELD_PREP(RX_VTAG0_LID_MASK, NPC_LID_LB) | + FIELD_PREP(RX_VTAG0_RELPTR_MASK, 0) | + FIELD_PREP(RX_VTAG1_VALID_BIT, req->vtag1_valid) | + FIELD_PREP(RX_VTAG1_TYPE_MASK, req->vtag1_type) | + FIELD_PREP(RX_VTAG1_LID_MASK, NPC_LID_LB) | + FIELD_PREP(RX_VTAG1_RELPTR_MASK, 4); +} + +static void npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, + struct mcam_entry *entry, + struct npc_install_flow_req *req, u16 target) +{ + struct nix_tx_action action; + + npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target), + 0, ~0ULL, 0, NIX_INTF_TX); + + *(u64 *)&action = 0x00; + action.op = req->op; + action.index = req->index; + action.match_id = req->match_id; + + entry->action = *(u64 *)&action; + + /* VTAG0 starts at 0th byte of LID_B. + * VTAG1 starts at 4th byte of LID_B. + */ + entry->vtag_action = FIELD_PREP(TX_VTAG0_DEF_MASK, req->vtag0_def) | + FIELD_PREP(TX_VTAG0_OP_MASK, req->vtag0_op) | + FIELD_PREP(TX_VTAG0_LID_MASK, NPC_LID_LA) | + FIELD_PREP(TX_VTAG0_RELPTR_MASK, 20) | + FIELD_PREP(TX_VTAG1_DEF_MASK, req->vtag1_def) | + FIELD_PREP(TX_VTAG1_OP_MASK, req->vtag1_op) | + FIELD_PREP(TX_VTAG1_LID_MASK, NPC_LID_LA) | + FIELD_PREP(TX_VTAG1_RELPTR_MASK, 24); +} + +static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, + int nixlf, struct rvu_pfvf *pfvf, + struct npc_install_flow_req *req, + struct npc_install_flow_rsp *rsp, bool enable, + bool pf_set_vfs_mac) +{ + struct rvu_npc_mcam_rule *def_ucast_rule = pfvf->def_ucast_rule; + u64 features, installed_features, missing_features = 0; + struct npc_mcam_write_entry_req write_req = { 0 }; + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule dummy = { 0 }; + struct rvu_npc_mcam_rule *rule; + bool new = false, msg_from_vf; + u16 owner = req->hdr.pcifunc; + struct msg_rsp write_rsp; + struct mcam_entry *entry; + int entry_index, err; + + msg_from_vf = !!(owner & RVU_PFVF_FUNC_MASK); + + installed_features = req->features; + features = req->features; + entry = &write_req.entry_data; + entry_index = req->entry; + + npc_update_flow(rvu, entry, features, &req->packet, &req->mask, &dummy, + req->intf); + + if (is_npc_intf_rx(req->intf)) + npc_update_rx_entry(rvu, pfvf, entry, req, target); + else + npc_update_tx_entry(rvu, pfvf, entry, req, target); + + /* Default unicast rules do not exist for TX */ + if (is_npc_intf_tx(req->intf)) + goto find_rule; + + if (def_ucast_rule) + missing_features = (def_ucast_rule->features ^ features) & + def_ucast_rule->features; + + if (req->default_rule && req->append) { + /* add to default rule */ + if (missing_features) + npc_update_flow(rvu, entry, missing_features, + &def_ucast_rule->packet, + &def_ucast_rule->mask, + &dummy, req->intf); + enable = rvu_npc_write_default_rule(rvu, blkaddr, + nixlf, target, + pfvf->nix_rx_intf, entry, + &entry_index); + installed_features = req->features | missing_features; + } else if (req->default_rule && !req->append) { + /* overwrite default rule */ + enable = rvu_npc_write_default_rule(rvu, blkaddr, + nixlf, target, + pfvf->nix_rx_intf, entry, + &entry_index); + } else if (msg_from_vf) { + /* normal rule - include default rule also to it for VF */ + npc_update_flow(rvu, entry, missing_features, + &def_ucast_rule->packet, &def_ucast_rule->mask, + &dummy, req->intf); + installed_features = req->features | missing_features; + } + +find_rule: + rule = rvu_mcam_find_rule(mcam, entry_index); + if (!rule) { + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; + new = true; + } + /* no counter for default rule */ + if (req->default_rule) + goto update_rule; + + /* allocate new counter if rule has no counter */ + if (req->set_cntr && !rule->has_cntr) + rvu_mcam_add_counter_to_rule(rvu, owner, rule, rsp); + + /* if user wants to delete an existing counter for a rule then + * free the counter + */ + if (!req->set_cntr && rule->has_cntr) + rvu_mcam_remove_counter_from_rule(rvu, owner, rule); + + write_req.hdr.pcifunc = owner; + write_req.entry = req->entry; + write_req.intf = req->intf; + write_req.enable_entry = (u8)enable; + /* if counter is available then clear and use it */ + if (req->set_cntr && rule->has_cntr) { + rvu_write64(rvu, blkaddr, NPC_AF_MATCH_STATX(rule->cntr), 0x00); + write_req.set_cntr = 1; + write_req.cntr = rule->cntr; + } + + err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, + &write_rsp); + if (err) { + rvu_mcam_remove_counter_from_rule(rvu, owner, rule); + if (new) + kfree(rule); + return err; + } +update_rule: + memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet)); + memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask)); + rule->entry = entry_index; + memcpy(&rule->rx_action, &entry->action, sizeof(struct nix_rx_action)); + if (is_npc_intf_tx(req->intf)) + memcpy(&rule->tx_action, &entry->action, + sizeof(struct nix_tx_action)); + rule->vtag_action = entry->vtag_action; + rule->features = installed_features; + rule->default_rule = req->default_rule; + rule->owner = owner; + rule->enable = enable; + if (is_npc_intf_tx(req->intf)) + rule->intf = pfvf->nix_tx_intf; + else + rule->intf = pfvf->nix_rx_intf; + + if (new) + rvu_mcam_add_rule(mcam, rule); + if (req->default_rule) + pfvf->def_ucast_rule = rule; + + /* VF's MAC address is being changed via PF */ + if (pf_set_vfs_mac) { + ether_addr_copy(pfvf->default_mac, req->packet.dmac); + ether_addr_copy(pfvf->mac_addr, req->packet.dmac); + } + + if (pfvf->pf_set_vf_cfg && req->vtag0_type == NIX_AF_LFX_RX_VTAG_TYPE7) + rule->vfvlan_cfg = true; + + return 0; +} + +int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, + struct npc_install_flow_req *req, + struct npc_install_flow_rsp *rsp) +{ + bool from_vf = !!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK); + int blkaddr, nixlf, err; + struct rvu_pfvf *pfvf; + bool pf_set_vfs_mac = false; + bool enable = true; + u16 target; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) { + dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__); + return -ENODEV; + } + + if (!is_npc_interface_valid(rvu, req->intf)) + return -EINVAL; + + if (from_vf && req->default_rule) + return NPC_MCAM_PERM_DENIED; + + /* Each PF/VF info is maintained in struct rvu_pfvf. + * rvu_pfvf for the target PF/VF needs to be retrieved + * hence modify pcifunc accordingly. + */ + + /* AF installing for a PF/VF */ + if (!req->hdr.pcifunc) + target = req->vf; + /* PF installing for its VF */ + else if (!from_vf && req->vf) { + target = (req->hdr.pcifunc & ~RVU_PFVF_FUNC_MASK) | req->vf; + pf_set_vfs_mac = req->default_rule && + (req->features & BIT_ULL(NPC_DMAC)); + } + /* msg received from PF/VF */ + else + target = req->hdr.pcifunc; + + if (npc_check_unsupported_flows(rvu, req->features, req->intf)) + return -EOPNOTSUPP; + + if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) + return -EINVAL; + + pfvf = rvu_get_pfvf(rvu, target); + + /* PF installing for its VF */ + if (req->hdr.pcifunc && !from_vf && req->vf) + pfvf->pf_set_vf_cfg = 1; + + /* update req destination mac addr */ + if ((req->features & BIT_ULL(NPC_DMAC)) && is_npc_intf_rx(req->intf) && + is_zero_ether_addr(req->packet.dmac)) { + ether_addr_copy(req->packet.dmac, pfvf->mac_addr); + eth_broadcast_addr((u8 *)&req->mask.dmac); + } + + err = nix_get_nixlf(rvu, target, &nixlf, NULL); + + /* If interface is uninitialized then do not enable entry */ + if (err || (!req->default_rule && !pfvf->def_ucast_rule)) + enable = false; + + /* Packets reaching NPC in Tx path implies that a + * NIXLF is properly setup and transmitting. + * Hence rules can be enabled for Tx. + */ + if (is_npc_intf_tx(req->intf)) + enable = true; + + /* Do not allow requests from uninitialized VFs */ + if (from_vf && !enable) + return -EINVAL; + + /* If message is from VF then its flow should not overlap with + * reserved unicast flow. + */ + if (from_vf && pfvf->def_ucast_rule && is_npc_intf_rx(req->intf) && + pfvf->def_ucast_rule->features & req->features) + return -EINVAL; + + return npc_install_flow(rvu, blkaddr, target, nixlf, pfvf, req, rsp, + enable, pf_set_vfs_mac); +} + +static int npc_delete_flow(struct rvu *rvu, struct rvu_npc_mcam_rule *rule, + u16 pcifunc) +{ + struct npc_mcam_ena_dis_entry_req dis_req = { 0 }; + struct msg_rsp dis_rsp; + + if (rule->default_rule) + return 0; + + if (rule->has_cntr) + rvu_mcam_remove_counter_from_rule(rvu, pcifunc, rule); + + dis_req.hdr.pcifunc = pcifunc; + dis_req.entry = rule->entry; + + list_del(&rule->list); + kfree(rule); + + return rvu_mbox_handler_npc_mcam_dis_entry(rvu, &dis_req, &dis_rsp); +} + +int rvu_mbox_handler_npc_delete_flow(struct rvu *rvu, + struct npc_delete_flow_req *req, + struct msg_rsp *rsp) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule *iter, *tmp; + u16 pcifunc = req->hdr.pcifunc; + struct list_head del_list; + + INIT_LIST_HEAD(&del_list); + + mutex_lock(&mcam->lock); + list_for_each_entry_safe(iter, tmp, &mcam->mcam_rules, list) { + if (iter->owner == pcifunc) { + /* All rules */ + if (req->all) { + list_move_tail(&iter->list, &del_list); + /* Range of rules */ + } else if (req->end && iter->entry >= req->start && + iter->entry <= req->end) { + list_move_tail(&iter->list, &del_list); + /* single rule */ + } else if (req->entry == iter->entry) { + list_move_tail(&iter->list, &del_list); + break; + } + } + } + mutex_unlock(&mcam->lock); + + list_for_each_entry_safe(iter, tmp, &del_list, list) { + /* clear the mcam entry target pcifunc */ + mcam->entry2target_pffunc[iter->entry] = 0x0; + if (npc_delete_flow(rvu, iter, pcifunc)) + dev_err(rvu->dev, "rule deletion failed for entry:%d", + iter->entry); + } + + return 0; +} + +static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, + struct rvu_npc_mcam_rule *rule, + struct rvu_pfvf *pfvf) +{ + struct npc_mcam_write_entry_req write_req = { 0 }; + struct mcam_entry *entry = &write_req.entry_data; + struct npc_mcam *mcam = &rvu->hw->mcam; + struct msg_rsp rsp; + u8 intf, enable; + int err; + + ether_addr_copy(rule->packet.dmac, pfvf->mac_addr); + + npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, + entry, &intf, &enable); + + npc_update_entry(rvu, NPC_DMAC, entry, + ether_addr_to_u64(pfvf->mac_addr), 0, + 0xffffffffffffull, 0, intf); + + write_req.hdr.pcifunc = rule->owner; + write_req.entry = rule->entry; + + mutex_unlock(&mcam->lock); + err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, &rsp); + mutex_lock(&mcam->lock); + + return err; +} + +void npc_mcam_enable_flows(struct rvu *rvu, u16 target) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, target); + struct rvu_npc_mcam_rule *def_ucast_rule; + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule *rule; + int blkaddr, bank, index; + u64 def_action; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return; + + def_ucast_rule = pfvf->def_ucast_rule; + + mutex_lock(&mcam->lock); + list_for_each_entry(rule, &mcam->mcam_rules, list) { + if (is_npc_intf_rx(rule->intf) && + rule->rx_action.pf_func == target && !rule->enable) { + if (rule->default_rule) { + npc_enable_mcam_entry(rvu, mcam, blkaddr, + rule->entry, true); + rule->enable = true; + continue; + } + + if (rule->vfvlan_cfg) + npc_update_dmac_value(rvu, blkaddr, rule, pfvf); + + if (rule->rx_action.op == NIX_RX_ACTION_DEFAULT) { + if (!def_ucast_rule) + continue; + /* Use default unicast entry action */ + rule->rx_action = def_ucast_rule->rx_action; + def_action = *(u64 *)&def_ucast_rule->rx_action; + bank = npc_get_bank(mcam, rule->entry); + rvu_write64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION + (rule->entry, bank), def_action); + } + + npc_enable_mcam_entry(rvu, mcam, blkaddr, + rule->entry, true); + rule->enable = true; + } + } + + /* Enable MCAM entries installed by PF with target as VF pcifunc */ + for (index = 0; index < mcam->bmap_entries; index++) { + if (mcam->entry2target_pffunc[index] == target) + npc_enable_mcam_entry(rvu, mcam, blkaddr, + index, true); + } + mutex_unlock(&mcam->lock); +} + +void npc_mcam_disable_flows(struct rvu *rvu, u16 target) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int blkaddr, index; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return; + + mutex_lock(&mcam->lock); + /* Disable MCAM entries installed by PF with target as VF pcifunc */ + for (index = 0; index < mcam->bmap_entries; index++) { + if (mcam->entry2target_pffunc[index] == target) + npc_enable_mcam_entry(rvu, mcam, blkaddr, + index, false); + } + mutex_unlock(&mcam->lock); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c index 9d7c135c7965..e266f0c49559 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c @@ -35,7 +35,7 @@ static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = { {0x1200, 0x12E0} } }, {NIX_TXSCH_LVL_TL3, 3, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608}, {0x1610, 0x1618} } }, - {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x1768} } }, + {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17B0} } }, {NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } }, }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 7ca599b973c0..1f3379f12b81 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -54,20 +54,20 @@ #define RVU_PRIV_PFX_MSIX_CFG(a) (0x8000110 | (a) << 16) #define RVU_PRIV_PFX_ID_CFG(a) (0x8000120 | (a) << 16) #define RVU_PRIV_PFX_INT_CFG(a) (0x8000200 | (a) << 16) -#define RVU_PRIV_PFX_NIX0_CFG (0x8000300) +#define RVU_PRIV_PFX_NIXX_CFG(a) (0x8000300 | (a) << 3) #define RVU_PRIV_PFX_NPA_CFG (0x8000310) #define RVU_PRIV_PFX_SSO_CFG (0x8000320) #define RVU_PRIV_PFX_SSOW_CFG (0x8000330) #define RVU_PRIV_PFX_TIM_CFG (0x8000340) -#define RVU_PRIV_PFX_CPT0_CFG (0x8000350) +#define RVU_PRIV_PFX_CPTX_CFG(a) (0x8000350 | (a) << 3) #define RVU_PRIV_BLOCK_TYPEX_REV(a) (0x8000400 | (a) << 3) #define RVU_PRIV_HWVFX_INT_CFG(a) (0x8001280 | (a) << 16) -#define RVU_PRIV_HWVFX_NIX0_CFG (0x8001300) +#define RVU_PRIV_HWVFX_NIXX_CFG(a) (0x8001300 | (a) << 3) #define RVU_PRIV_HWVFX_NPA_CFG (0x8001310) #define RVU_PRIV_HWVFX_SSO_CFG (0x8001320) #define RVU_PRIV_HWVFX_SSOW_CFG (0x8001330) #define RVU_PRIV_HWVFX_TIM_CFG (0x8001340) -#define RVU_PRIV_HWVFX_CPT0_CFG (0x8001350) +#define RVU_PRIV_HWVFX_CPTX_CFG(a) (0x8001350 | (a) << 3) /* RVU PF registers */ #define RVU_PF_VFX_PFVF_MBOX0 (0x00000) @@ -446,6 +446,8 @@ #define NPC_AF_BLK_RST (0x00040) #define NPC_AF_MCAM_SCRUB_CTL (0x000a0) #define NPC_AF_KCAM_SCRUB_CTL (0x000b0) +#define NPC_AF_CONST2 (0x00100) +#define NPC_AF_CONST3 (0x00110) #define NPC_AF_KPUX_CFG(a) (0x00500 | (a) << 3) #define NPC_AF_PCK_CFG (0x00600) #define NPC_AF_PCK_DEF_OL2 (0x00610) @@ -469,20 +471,7 @@ (0x900000 | (a) << 16 | (b) << 12 | (c) << 5 | (d) << 3) #define NPC_AF_INTFX_LDATAX_FLAGSX_CFG(a, b, c) \ (0x980000 | (a) << 16 | (b) << 12 | (c) << 3) -#define NPC_AF_MCAMEX_BANKX_CAMX_INTF(a, b, c) \ - (0x1000000ull | (a) << 10 | (b) << 6 | (c) << 3) -#define NPC_AF_MCAMEX_BANKX_CAMX_W0(a, b, c) \ - (0x1000010ull | (a) << 10 | (b) << 6 | (c) << 3) -#define NPC_AF_MCAMEX_BANKX_CAMX_W1(a, b, c) \ - (0x1000020ull | (a) << 10 | (b) << 6 | (c) << 3) -#define NPC_AF_MCAMEX_BANKX_CFG(a, b) (0x1800000ull | (a) << 8 | (b) << 4) -#define NPC_AF_MCAMEX_BANKX_STAT_ACT(a, b) \ - (0x1880000 | (a) << 8 | (b) << 4) -#define NPC_AF_MATCH_STATX(a) (0x1880008 | (a) << 8) #define NPC_AF_INTFX_MISS_STAT_ACT(a) (0x1880040 + (a) * 0x8) -#define NPC_AF_MCAMEX_BANKX_ACTION(a, b) (0x1900000ull | (a) << 8 | (b) << 4) -#define NPC_AF_MCAMEX_BANKX_TAG_ACT(a, b) \ - (0x1900008 | (a) << 8 | (b) << 4) #define NPC_AF_INTFX_MISS_ACT(a) (0x1a00000 | (a) << 4) #define NPC_AF_INTFX_MISS_TAG_ACT(a) (0x1b00008 | (a) << 4) #define NPC_AF_MCAM_BANKX_HITX(a, b) (0x1c80000 | (a) << 8 | (b) << 4) @@ -499,6 +488,70 @@ #define NPC_AF_DBG_DATAX(a) (0x3001400 | (a) << 4) #define NPC_AF_DBG_RESULTX(a) (0x3001800 | (a) << 4) +#define NPC_AF_MCAMEX_BANKX_CAMX_INTF(a, b, c) ({ \ + u64 offset; \ + \ + offset = (0x1000000ull | (a) << 10 | (b) << 6 | (c) << 3); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000000ull | (a) << 8 | (b) << 22 | (c) << 3); \ + offset; }) + +#define NPC_AF_MCAMEX_BANKX_CAMX_W0(a, b, c) ({ \ + u64 offset; \ + \ + offset = (0x1000010ull | (a) << 10 | (b) << 6 | (c) << 3); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000010ull | (a) << 8 | (b) << 22 | (c) << 3); \ + offset; }) + +#define NPC_AF_MCAMEX_BANKX_CAMX_W1(a, b, c) ({ \ + u64 offset; \ + \ + offset = (0x1000020ull | (a) << 10 | (b) << 6 | (c) << 3); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000020ull | (a) << 8 | (b) << 22 | (c) << 3); \ + offset; }) + +#define NPC_AF_MCAMEX_BANKX_CFG(a, b) ({ \ + u64 offset; \ + \ + offset = (0x1800000ull | (a) << 8 | (b) << 4); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000038ull | (a) << 8 | (b) << 22); \ + offset; }) + +#define NPC_AF_MCAMEX_BANKX_ACTION(a, b) ({ \ + u64 offset; \ + \ + offset = (0x1900000ull | (a) << 8 | (b) << 4); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000040ull | (a) << 8 | (b) << 22); \ + offset; }) \ + +#define NPC_AF_MCAMEX_BANKX_TAG_ACT(a, b) ({ \ + u64 offset; \ + \ + offset = (0x1900008ull | (a) << 8 | (b) << 4); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000048ull | (a) << 8 | (b) << 22); \ + offset; }) \ + +#define NPC_AF_MCAMEX_BANKX_STAT_ACT(a, b) ({ \ + u64 offset; \ + \ + offset = (0x1880000ull | (a) << 8 | (b) << 4); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000050ull | (a) << 8 | (b) << 22); \ + offset; }) \ + +#define NPC_AF_MATCH_STATX(a) ({ \ + u64 offset; \ + \ + offset = (0x1880008ull | (a) << 8); \ + if (rvu->hw->npc_ext_set) \ + offset = (0x8000078ull | (a) << 8); \ + offset; }) \ + /* NDC */ #define NDC_AF_CONST (0x00000) #define NDC_AF_CLK_EN (0x00020) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index a3ecb5de9000..723643868589 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -14,6 +14,8 @@ /* RVU Block revision IDs */ #define RVU_BLK_RVUM_REVID 0x01 +#define RVU_MULTI_BLK_VER 0x7ULL + /* RVU Block Address Enumeration */ enum rvu_block_addr_e { BLKADDR_RVUM = 0x0ULL, @@ -31,7 +33,9 @@ enum rvu_block_addr_e { BLKADDR_NDC_NIX0_RX = 0xcULL, BLKADDR_NDC_NIX0_TX = 0xdULL, BLKADDR_NDC_NPA0 = 0xeULL, - BLK_COUNT = 0xfULL, + BLKADDR_NDC_NIX1_RX = 0x10ULL, + BLKADDR_NDC_NIX1_TX = 0x11ULL, + BLK_COUNT = 0x12ULL, }; /* RVU Block Type Enumeration */ @@ -917,4 +921,15 @@ enum nix_vtag_size { VTAGSIZE_T4 = 0x0, VTAGSIZE_T8 = 0x1, }; + +enum nix_tx_vtag_op { + NOP = 0x0, + VTAG_INSERT = 0x1, + VTAG_REPLACE = 0x2, +}; + +/* NIX RX VTAG actions */ +#define VTAG_STRIP BIT_ULL(4) +#define VTAG_CAPTURE BIT_ULL(5) + #endif /* RVU_STRUCT_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index b2c6385707c9..4193ae3bde6b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += octeontx2_nicpf.o obj-$(CONFIG_OCTEONTX2_VF) += octeontx2_nicvf.o octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ - otx2_ptp.o + otx2_ptp.o otx2_flows.o octeontx2_nicvf-y := otx2_vf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index d2581090f9a4..68fb4e4757aa 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -191,10 +191,14 @@ int otx2_set_mac_address(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (!otx2_hw_set_mac_addr(pfvf, addr->sa_data)) + if (!otx2_hw_set_mac_addr(pfvf, addr->sa_data)) { memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - else + /* update dmac field in vlan offload rule */ + if (pfvf->flags & OTX2_FLAG_RX_VLAN_SUPPORT) + otx2_install_rxvlan_offload_flow(pfvf); + } else { return -EPERM; + } return 0; } @@ -531,8 +535,10 @@ static int otx2_get_link(struct otx2_nic *pfvf) link = 4 * ((map >> 8) & 0xF) + ((map >> 4) & 0xF); } /* LBK channel */ - if (pfvf->hw.tx_chan_base < SDP_CHAN_BASE) - link = 12; + if (pfvf->hw.tx_chan_base < SDP_CHAN_BASE) { + map = pfvf->hw.tx_chan_base & 0x7FF; + link = pfvf->hw.cgx_links | ((map >> 8) & 0xF); + } return link; } @@ -1237,7 +1243,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) sq = &qset->sq[qidx]; sq->sqb_count = 0; - sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(u64 *), GFP_KERNEL); + sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); if (!sq->sqb_ptrs) return -ENOMEM; @@ -1503,6 +1509,8 @@ void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf, pfvf->hw.tx_chan_base = rsp->tx_chan_base; pfvf->hw.lso_tsov4_idx = rsp->lso_tsov4_idx; pfvf->hw.lso_tsov6_idx = rsp->lso_tsov6_idx; + pfvf->hw.cgx_links = rsp->cgx_links; + pfvf->hw.lbk_links = rsp->lbk_links; } EXPORT_SYMBOL(mbox_handler_nix_lf_alloc); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index d6253f2a414d..b18b45d02165 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -18,6 +18,7 @@ #include <linux/timecounter.h> #include <mbox.h> +#include <npc.h> #include "otx2_reg.h" #include "otx2_txrx.h" #include <rvu_trace.h> @@ -197,12 +198,17 @@ struct otx2_hw { struct otx2_drv_stats drv_stats; u64 cgx_rx_stats[CGX_RX_STATS_COUNT]; u64 cgx_tx_stats[CGX_TX_STATS_COUNT]; + u8 cgx_links; /* No. of CGX links present in HW */ + u8 lbk_links; /* No. of LBK links present in HW */ }; struct otx2_vf_config { struct otx2_nic *pf; struct delayed_work link_event_work; bool intf_down; /* interface was either configured or not */ + u8 mac[ETH_ALEN]; + u16 vlan; + int tx_vtag_idx; }; struct flr_work { @@ -226,6 +232,32 @@ struct otx2_ptp { #define OTX2_HW_TIMESTAMP_LEN 8 +struct otx2_mac_table { + u8 addr[ETH_ALEN]; + u16 mcam_entry; + bool inuse; +}; + +struct otx2_flow_config { + u16 entry[NPC_MAX_NONCONTIG_ENTRIES]; + u32 nr_flows; +#define OTX2_MAX_NTUPLE_FLOWS 32 +#define OTX2_MAX_UNICAST_FLOWS 8 +#define OTX2_MAX_VLAN_FLOWS 1 +#define OTX2_MCAM_COUNT (OTX2_MAX_NTUPLE_FLOWS + \ + OTX2_MAX_UNICAST_FLOWS + \ + OTX2_MAX_VLAN_FLOWS) + u32 ntuple_offset; + u32 unicast_offset; + u32 rx_vlan_offset; + u32 vf_vlan_offset; +#define OTX2_PER_VF_VLAN_FLOWS 2 /* rx+tx per VF */ +#define OTX2_VF_VLAN_RX_INDEX 0 +#define OTX2_VF_VLAN_TX_INDEX 1 + u32 ntuple_max_flows; + struct list_head flow_list; +}; + struct otx2_nic { void __iomem *reg_base; struct net_device *netdev; @@ -236,6 +268,12 @@ struct otx2_nic { #define OTX2_FLAG_RX_TSTAMP_ENABLED BIT_ULL(0) #define OTX2_FLAG_TX_TSTAMP_ENABLED BIT_ULL(1) #define OTX2_FLAG_INTF_DOWN BIT_ULL(2) +#define OTX2_FLAG_MCAM_ENTRIES_ALLOC BIT_ULL(3) +#define OTX2_FLAG_NTUPLE_SUPPORT BIT_ULL(4) +#define OTX2_FLAG_UCAST_FLTR_SUPPORT BIT_ULL(5) +#define OTX2_FLAG_RX_VLAN_SUPPORT BIT_ULL(6) +#define OTX2_FLAG_VF_VLAN_SUPPORT BIT_ULL(7) +#define OTX2_FLAG_PF_SHUTDOWN BIT_ULL(8) #define OTX2_FLAG_RX_PAUSE_ENABLED BIT_ULL(9) #define OTX2_FLAG_TX_PAUSE_ENABLED BIT_ULL(10) u64 flags; @@ -264,6 +302,7 @@ struct otx2_nic { struct refill_work *refill_wrk; struct workqueue_struct *otx2_wq; struct work_struct rx_mode_work; + struct otx2_mac_table *mac_table; /* Ethtool stuff */ u32 msg_enable; @@ -273,6 +312,8 @@ struct otx2_nic { struct otx2_ptp *ptp; struct hwtstamp_config tstamp; + + struct otx2_flow_config *flow_cfg; }; static inline bool is_otx2_lbkvf(struct pci_dev *pdev) @@ -642,4 +683,24 @@ int otx2_open(struct net_device *netdev); int otx2_stop(struct net_device *netdev); int otx2_set_real_num_queues(struct net_device *netdev, int tx_queues, int rx_queues); +/* MCAM filter related APIs */ +int otx2_mcam_flow_init(struct otx2_nic *pf); +int otx2_alloc_mcam_entries(struct otx2_nic *pfvf); +void otx2_mcam_flow_del(struct otx2_nic *pf); +int otx2_destroy_ntuple_flows(struct otx2_nic *pf); +int otx2_destroy_mcam_flows(struct otx2_nic *pfvf); +int otx2_get_flow(struct otx2_nic *pfvf, + struct ethtool_rxnfc *nfc, u32 location); +int otx2_get_all_flows(struct otx2_nic *pfvf, + struct ethtool_rxnfc *nfc, u32 *rule_locs); +int otx2_add_flow(struct otx2_nic *pfvf, + struct ethtool_rx_flow_spec *fsp); +int otx2_remove_flow(struct otx2_nic *pfvf, u32 location); +int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, + struct npc_install_flow_req *req); +int otx2_del_macfilter(struct net_device *netdev, const u8 *mac); +int otx2_add_macfilter(struct net_device *netdev, const u8 *mac); +int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable); +int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf); + #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 662fb80dbb9d..67171b66a56c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -551,6 +551,16 @@ static int otx2_get_rxnfc(struct net_device *dev, nfc->data = pfvf->hw.rx_queues; ret = 0; break; + case ETHTOOL_GRXCLSRLCNT: + nfc->rule_cnt = pfvf->flow_cfg->nr_flows; + ret = 0; + break; + case ETHTOOL_GRXCLSRULE: + ret = otx2_get_flow(pfvf, nfc, nfc->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + ret = otx2_get_all_flows(pfvf, nfc, rules); + break; case ETHTOOL_GRXFH: return otx2_get_rss_hash_opts(pfvf, nfc); default: @@ -561,6 +571,50 @@ static int otx2_get_rxnfc(struct net_device *dev, static int otx2_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc) { + bool ntuple = !!(dev->features & NETIF_F_NTUPLE); + struct otx2_nic *pfvf = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (nfc->cmd) { + case ETHTOOL_SRXFH: + ret = otx2_set_rss_hash_opts(pfvf, nfc); + break; + case ETHTOOL_SRXCLSRLINS: + if (netif_running(dev) && ntuple) + ret = otx2_add_flow(pfvf, &nfc->fs); + break; + case ETHTOOL_SRXCLSRLDEL: + if (netif_running(dev) && ntuple) + ret = otx2_remove_flow(pfvf, nfc->fs.location); + break; + default: + break; + } + + return ret; +} + +static int otx2vf_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc, u32 *rules) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (nfc->cmd) { + case ETHTOOL_GRXRINGS: + nfc->data = pfvf->hw.rx_queues; + ret = 0; + break; + case ETHTOOL_GRXFH: + return otx2_get_rss_hash_opts(pfvf, nfc); + default: + break; + } + return ret; +} + +static int otx2vf_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc) +{ struct otx2_nic *pfvf = netdev_priv(dev); int ret = -EOPNOTSUPP; @@ -806,8 +860,8 @@ static const struct ethtool_ops otx2vf_ethtool_ops = { .get_sset_count = otx2vf_get_sset_count, .set_channels = otx2_set_channels, .get_channels = otx2_get_channels, - .get_rxnfc = otx2_get_rxnfc, - .set_rxnfc = otx2_set_rxnfc, + .get_rxnfc = otx2vf_get_rxnfc, + .set_rxnfc = otx2vf_set_rxnfc, .get_rxfh_key_size = otx2_get_rxfh_key_size, .get_rxfh_indir_size = otx2_get_rxfh_indir_size, .get_rxfh = otx2_get_rxfh, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c new file mode 100644 index 000000000000..be8ccfce1848 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -0,0 +1,820 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Physcial Function ethernet driver + * + * Copyright (C) 2020 Marvell. + */ + +#include <net/ipv6.h> + +#include "otx2_common.h" + +#define OTX2_DEFAULT_ACTION 0x1 + +struct otx2_flow { + struct ethtool_rx_flow_spec flow_spec; + struct list_head list; + u32 location; + u16 entry; + bool is_vf; + int vf; +}; + +int otx2_alloc_mcam_entries(struct otx2_nic *pfvf) +{ + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct npc_mcam_alloc_entry_req *req; + struct npc_mcam_alloc_entry_rsp *rsp; + int vf_vlan_max_flows; + int i; + + mutex_lock(&pfvf->mbox.lock); + + req = otx2_mbox_alloc_msg_npc_mcam_alloc_entry(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + vf_vlan_max_flows = pfvf->total_vfs * OTX2_PER_VF_VLAN_FLOWS; + req->contig = false; + req->count = OTX2_MCAM_COUNT + vf_vlan_max_flows; + + /* Send message to AF */ + if (otx2_sync_mbox_msg(&pfvf->mbox)) { + mutex_unlock(&pfvf->mbox.lock); + return -EINVAL; + } + + rsp = (struct npc_mcam_alloc_entry_rsp *)otx2_mbox_get_rsp + (&pfvf->mbox.mbox, 0, &req->hdr); + + if (rsp->count != req->count) { + netdev_info(pfvf->netdev, + "Unable to allocate %d MCAM entries, got %d\n", + req->count, rsp->count); + /* support only ntuples here */ + flow_cfg->ntuple_max_flows = rsp->count; + flow_cfg->ntuple_offset = 0; + pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT; + } else { + flow_cfg->vf_vlan_offset = 0; + flow_cfg->ntuple_offset = flow_cfg->vf_vlan_offset + + vf_vlan_max_flows; + flow_cfg->unicast_offset = flow_cfg->ntuple_offset + + OTX2_MAX_NTUPLE_FLOWS; + flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset + + OTX2_MAX_UNICAST_FLOWS; + pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT; + pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT; + pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT; + pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT; + } + + for (i = 0; i < rsp->count; i++) + flow_cfg->entry[i] = rsp->entry_list[i]; + + pfvf->flags |= OTX2_FLAG_MCAM_ENTRIES_ALLOC; + + mutex_unlock(&pfvf->mbox.lock); + + return 0; +} + +int otx2_mcam_flow_init(struct otx2_nic *pf) +{ + int err; + + pf->flow_cfg = devm_kzalloc(pf->dev, sizeof(struct otx2_flow_config), + GFP_KERNEL); + if (!pf->flow_cfg) + return -ENOMEM; + + INIT_LIST_HEAD(&pf->flow_cfg->flow_list); + + pf->flow_cfg->ntuple_max_flows = OTX2_MAX_NTUPLE_FLOWS; + + err = otx2_alloc_mcam_entries(pf); + if (err) + return err; + + pf->mac_table = devm_kzalloc(pf->dev, sizeof(struct otx2_mac_table) + * OTX2_MAX_UNICAST_FLOWS, GFP_KERNEL); + if (!pf->mac_table) + return -ENOMEM; + + return 0; +} + +void otx2_mcam_flow_del(struct otx2_nic *pf) +{ + otx2_destroy_mcam_flows(pf); +} + +/* On success adds mcam entry + * On failure enable promisous mode + */ +static int otx2_do_add_macfilter(struct otx2_nic *pf, const u8 *mac) +{ + struct otx2_flow_config *flow_cfg = pf->flow_cfg; + struct npc_install_flow_req *req; + int err, i; + + if (!(pf->flags & OTX2_FLAG_UCAST_FLTR_SUPPORT)) + return -ENOMEM; + + /* dont have free mcam entries or uc list is greater than alloted */ + if (netdev_uc_count(pf->netdev) > OTX2_MAX_UNICAST_FLOWS) + return -ENOMEM; + + mutex_lock(&pf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_install_flow(&pf->mbox); + if (!req) { + mutex_unlock(&pf->mbox.lock); + return -ENOMEM; + } + + /* unicast offset starts with 32 0..31 for ntuple */ + for (i = 0; i < OTX2_MAX_UNICAST_FLOWS; i++) { + if (pf->mac_table[i].inuse) + continue; + ether_addr_copy(pf->mac_table[i].addr, mac); + pf->mac_table[i].inuse = true; + pf->mac_table[i].mcam_entry = + flow_cfg->entry[i + flow_cfg->unicast_offset]; + req->entry = pf->mac_table[i].mcam_entry; + break; + } + + ether_addr_copy(req->packet.dmac, mac); + eth_broadcast_addr((u8 *)&req->mask.dmac); + req->features = BIT_ULL(NPC_DMAC); + req->channel = pf->hw.rx_chan_base; + req->intf = NIX_INTF_RX; + req->op = NIX_RX_ACTION_DEFAULT; + req->set_cntr = 1; + + err = otx2_sync_mbox_msg(&pf->mbox); + mutex_unlock(&pf->mbox.lock); + + return err; +} + +int otx2_add_macfilter(struct net_device *netdev, const u8 *mac) +{ + struct otx2_nic *pf = netdev_priv(netdev); + + return otx2_do_add_macfilter(pf, mac); +} + +static bool otx2_get_mcamentry_for_mac(struct otx2_nic *pf, const u8 *mac, + int *mcam_entry) +{ + int i; + + for (i = 0; i < OTX2_MAX_UNICAST_FLOWS; i++) { + if (!pf->mac_table[i].inuse) + continue; + + if (ether_addr_equal(pf->mac_table[i].addr, mac)) { + *mcam_entry = pf->mac_table[i].mcam_entry; + pf->mac_table[i].inuse = false; + return true; + } + } + return false; +} + +int otx2_del_macfilter(struct net_device *netdev, const u8 *mac) +{ + struct otx2_nic *pf = netdev_priv(netdev); + struct npc_delete_flow_req *req; + int err, mcam_entry; + + /* check does mcam entry exists for given mac */ + if (!otx2_get_mcamentry_for_mac(pf, mac, &mcam_entry)) + return 0; + + mutex_lock(&pf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_delete_flow(&pf->mbox); + if (!req) { + mutex_unlock(&pf->mbox.lock); + return -ENOMEM; + } + req->entry = mcam_entry; + /* Send message to AF */ + err = otx2_sync_mbox_msg(&pf->mbox); + mutex_unlock(&pf->mbox.lock); + + return err; +} + +static struct otx2_flow *otx2_find_flow(struct otx2_nic *pfvf, u32 location) +{ + struct otx2_flow *iter; + + list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) { + if (iter->location == location) + return iter; + } + + return NULL; +} + +static void otx2_add_flow_to_list(struct otx2_nic *pfvf, struct otx2_flow *flow) +{ + struct list_head *head = &pfvf->flow_cfg->flow_list; + struct otx2_flow *iter; + + list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) { + if (iter->location > flow->location) + break; + head = &iter->list; + } + + list_add(&flow->list, head); +} + +int otx2_get_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, + u32 location) +{ + struct otx2_flow *iter; + + if (location >= pfvf->flow_cfg->ntuple_max_flows) + return -EINVAL; + + list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) { + if (iter->location == location) { + nfc->fs = iter->flow_spec; + return 0; + } + } + + return -ENOENT; +} + +int otx2_get_all_flows(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, + u32 *rule_locs) +{ + u32 location = 0; + int idx = 0; + int err = 0; + + nfc->data = pfvf->flow_cfg->ntuple_max_flows; + while ((!err || err == -ENOENT) && idx < nfc->rule_cnt) { + err = otx2_get_flow(pfvf, nfc, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + + return err; +} + +static void otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, + struct npc_install_flow_req *req, + u32 flow_type) +{ + struct ethtool_usrip4_spec *ipv4_usr_mask = &fsp->m_u.usr_ip4_spec; + struct ethtool_usrip4_spec *ipv4_usr_hdr = &fsp->h_u.usr_ip4_spec; + struct ethtool_tcpip4_spec *ipv4_l4_mask = &fsp->m_u.tcp_ip4_spec; + struct ethtool_tcpip4_spec *ipv4_l4_hdr = &fsp->h_u.tcp_ip4_spec; + struct flow_msg *pmask = &req->mask; + struct flow_msg *pkt = &req->packet; + + switch (flow_type) { + case IP_USER_FLOW: + if (ipv4_usr_mask->ip4src) { + memcpy(&pkt->ip4src, &ipv4_usr_hdr->ip4src, + sizeof(pkt->ip4src)); + memcpy(&pmask->ip4src, &ipv4_usr_mask->ip4src, + sizeof(pmask->ip4src)); + req->features |= BIT_ULL(NPC_SIP_IPV4); + } + if (ipv4_usr_mask->ip4dst) { + memcpy(&pkt->ip4dst, &ipv4_usr_hdr->ip4dst, + sizeof(pkt->ip4dst)); + memcpy(&pmask->ip4dst, &ipv4_usr_mask->ip4dst, + sizeof(pmask->ip4dst)); + req->features |= BIT_ULL(NPC_DIP_IPV4); + } + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + if (ipv4_l4_mask->ip4src) { + memcpy(&pkt->ip4src, &ipv4_l4_hdr->ip4src, + sizeof(pkt->ip4src)); + memcpy(&pmask->ip4src, &ipv4_l4_mask->ip4src, + sizeof(pmask->ip4src)); + req->features |= BIT_ULL(NPC_SIP_IPV4); + } + if (ipv4_l4_mask->ip4dst) { + memcpy(&pkt->ip4dst, &ipv4_l4_hdr->ip4dst, + sizeof(pkt->ip4dst)); + memcpy(&pmask->ip4dst, &ipv4_l4_mask->ip4dst, + sizeof(pmask->ip4dst)); + req->features |= BIT_ULL(NPC_DIP_IPV4); + } + if (ipv4_l4_mask->psrc) { + memcpy(&pkt->sport, &ipv4_l4_hdr->psrc, + sizeof(pkt->sport)); + memcpy(&pmask->sport, &ipv4_l4_mask->psrc, + sizeof(pmask->sport)); + if (flow_type == UDP_V4_FLOW) + req->features |= BIT_ULL(NPC_SPORT_UDP); + else if (flow_type == TCP_V4_FLOW) + req->features |= BIT_ULL(NPC_SPORT_TCP); + else + req->features |= BIT_ULL(NPC_SPORT_SCTP); + } + if (ipv4_l4_mask->pdst) { + memcpy(&pkt->dport, &ipv4_l4_hdr->pdst, + sizeof(pkt->dport)); + memcpy(&pmask->dport, &ipv4_l4_mask->pdst, + sizeof(pmask->dport)); + if (flow_type == UDP_V4_FLOW) + req->features |= BIT_ULL(NPC_DPORT_UDP); + else if (flow_type == TCP_V4_FLOW) + req->features |= BIT_ULL(NPC_DPORT_TCP); + else + req->features |= BIT_ULL(NPC_DPORT_SCTP); + } + break; + default: + break; + } +} + +static void otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp, + struct npc_install_flow_req *req, + u32 flow_type) +{ + struct ethtool_usrip6_spec *ipv6_usr_mask = &fsp->m_u.usr_ip6_spec; + struct ethtool_usrip6_spec *ipv6_usr_hdr = &fsp->h_u.usr_ip6_spec; + struct ethtool_tcpip6_spec *ipv6_l4_mask = &fsp->m_u.tcp_ip6_spec; + struct ethtool_tcpip6_spec *ipv6_l4_hdr = &fsp->h_u.tcp_ip6_spec; + struct flow_msg *pmask = &req->mask; + struct flow_msg *pkt = &req->packet; + + switch (flow_type) { + case IPV6_USER_FLOW: + if (!ipv6_addr_any((struct in6_addr *)ipv6_usr_mask->ip6src)) { + memcpy(&pkt->ip6src, &ipv6_usr_hdr->ip6src, + sizeof(pkt->ip6src)); + memcpy(&pmask->ip6src, &ipv6_usr_mask->ip6src, + sizeof(pmask->ip6src)); + req->features |= BIT_ULL(NPC_SIP_IPV6); + } + if (!ipv6_addr_any((struct in6_addr *)ipv6_usr_mask->ip6dst)) { + memcpy(&pkt->ip6dst, &ipv6_usr_hdr->ip6dst, + sizeof(pkt->ip6dst)); + memcpy(&pmask->ip6dst, &ipv6_usr_mask->ip6dst, + sizeof(pmask->ip6dst)); + req->features |= BIT_ULL(NPC_DIP_IPV6); + } + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + if (!ipv6_addr_any((struct in6_addr *)ipv6_l4_mask->ip6src)) { + memcpy(&pkt->ip6src, &ipv6_l4_hdr->ip6src, + sizeof(pkt->ip6src)); + memcpy(&pmask->ip6src, &ipv6_l4_mask->ip6src, + sizeof(pmask->ip6src)); + req->features |= BIT_ULL(NPC_SIP_IPV6); + } + if (!ipv6_addr_any((struct in6_addr *)ipv6_l4_mask->ip6dst)) { + memcpy(&pkt->ip6dst, &ipv6_l4_hdr->ip6dst, + sizeof(pkt->ip6dst)); + memcpy(&pmask->ip6dst, &ipv6_l4_mask->ip6dst, + sizeof(pmask->ip6dst)); + req->features |= BIT_ULL(NPC_DIP_IPV6); + } + if (ipv6_l4_mask->psrc) { + memcpy(&pkt->sport, &ipv6_l4_hdr->psrc, + sizeof(pkt->sport)); + memcpy(&pmask->sport, &ipv6_l4_mask->psrc, + sizeof(pmask->sport)); + if (flow_type == UDP_V6_FLOW) + req->features |= BIT_ULL(NPC_SPORT_UDP); + else if (flow_type == TCP_V6_FLOW) + req->features |= BIT_ULL(NPC_SPORT_TCP); + else + req->features |= BIT_ULL(NPC_SPORT_SCTP); + } + if (ipv6_l4_mask->pdst) { + memcpy(&pkt->dport, &ipv6_l4_hdr->pdst, + sizeof(pkt->dport)); + memcpy(&pmask->dport, &ipv6_l4_mask->pdst, + sizeof(pmask->dport)); + if (flow_type == UDP_V6_FLOW) + req->features |= BIT_ULL(NPC_DPORT_UDP); + else if (flow_type == TCP_V6_FLOW) + req->features |= BIT_ULL(NPC_DPORT_TCP); + else + req->features |= BIT_ULL(NPC_DPORT_SCTP); + } + break; + default: + break; + } +} + +int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, + struct npc_install_flow_req *req) +{ + struct ethhdr *eth_mask = &fsp->m_u.ether_spec; + struct ethhdr *eth_hdr = &fsp->h_u.ether_spec; + struct flow_msg *pmask = &req->mask; + struct flow_msg *pkt = &req->packet; + u32 flow_type; + + flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + switch (flow_type) { + /* bits not set in mask are don't care */ + case ETHER_FLOW: + if (!is_zero_ether_addr(eth_mask->h_source)) { + ether_addr_copy(pkt->smac, eth_hdr->h_source); + ether_addr_copy(pmask->smac, eth_mask->h_source); + req->features |= BIT_ULL(NPC_SMAC); + } + if (!is_zero_ether_addr(eth_mask->h_dest)) { + ether_addr_copy(pkt->dmac, eth_hdr->h_dest); + ether_addr_copy(pmask->dmac, eth_mask->h_dest); + req->features |= BIT_ULL(NPC_DMAC); + } + if (eth_mask->h_proto) { + memcpy(&pkt->etype, ð_hdr->h_proto, + sizeof(pkt->etype)); + memcpy(&pmask->etype, ð_mask->h_proto, + sizeof(pmask->etype)); + req->features |= BIT_ULL(NPC_ETYPE); + } + break; + case IP_USER_FLOW: + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + otx2_prepare_ipv4_flow(fsp, req, flow_type); + break; + case IPV6_USER_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + otx2_prepare_ipv6_flow(fsp, req, flow_type); + break; + default: + return -EOPNOTSUPP; + } + if (fsp->flow_type & FLOW_EXT) { + if (fsp->m_ext.vlan_etype) + return -EINVAL; + if (fsp->m_ext.vlan_tci) { + if (fsp->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK)) + return -EINVAL; + if (be16_to_cpu(fsp->h_ext.vlan_tci) >= VLAN_N_VID) + return -EINVAL; + + memcpy(&pkt->vlan_tci, &fsp->h_ext.vlan_tci, + sizeof(pkt->vlan_tci)); + memcpy(&pmask->vlan_tci, &fsp->m_ext.vlan_tci, + sizeof(pmask->vlan_tci)); + req->features |= BIT_ULL(NPC_OUTER_VID); + } + + /* Not Drop/Direct to queue but use action in default entry */ + if (fsp->m_ext.data[1] && + fsp->h_ext.data[1] == cpu_to_be32(OTX2_DEFAULT_ACTION)) + req->op = NIX_RX_ACTION_DEFAULT; + } + + if (fsp->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fsp->m_ext.h_dest)) { + ether_addr_copy(pkt->dmac, fsp->h_ext.h_dest); + ether_addr_copy(pmask->dmac, fsp->m_ext.h_dest); + req->features |= BIT_ULL(NPC_DMAC); + } + + if (!req->features) + return -EOPNOTSUPP; + + return 0; +} + +static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) +{ + u64 ring_cookie = flow->flow_spec.ring_cookie; + struct npc_install_flow_req *req; + int err, vf = 0; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_install_flow(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + err = otx2_prepare_flow_request(&flow->flow_spec, req); + if (err) { + /* free the allocated msg above */ + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + mutex_unlock(&pfvf->mbox.lock); + return err; + } + + req->entry = flow->entry; + req->intf = NIX_INTF_RX; + req->set_cntr = 1; + req->channel = pfvf->hw.rx_chan_base; + if (ring_cookie == RX_CLS_FLOW_DISC) { + req->op = NIX_RX_ACTIONOP_DROP; + } else { + /* change to unicast only if action of default entry is not + * requested by user + */ + if (req->op != NIX_RX_ACTION_DEFAULT) + req->op = NIX_RX_ACTIONOP_UCAST; + req->index = ethtool_get_flow_spec_ring(ring_cookie); + vf = ethtool_get_flow_spec_ring_vf(ring_cookie); + if (vf > pci_num_vf(pfvf->pdev)) { + mutex_unlock(&pfvf->mbox.lock); + return -EINVAL; + } + } + + /* ethtool ring_cookie has (VF + 1) for VF */ + if (vf) { + req->vf = vf; + flow->is_vf = true; + flow->vf = vf; + } + + /* Send message to AF */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rx_flow_spec *fsp) +{ + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie); + struct otx2_flow *flow; + bool new = false; + int err; + + if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT)) + return -ENOMEM; + + if (ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) + return -EINVAL; + + if (fsp->location >= flow_cfg->ntuple_max_flows) + return -EINVAL; + + flow = otx2_find_flow(pfvf, fsp->location); + if (!flow) { + flow = kzalloc(sizeof(*flow), GFP_ATOMIC); + if (!flow) + return -ENOMEM; + flow->location = fsp->location; + flow->entry = flow_cfg->entry[flow_cfg->ntuple_offset + + flow->location]; + new = true; + } + /* struct copy */ + flow->flow_spec = *fsp; + + err = otx2_add_flow_msg(pfvf, flow); + if (err) { + if (new) + kfree(flow); + return err; + } + + /* add the new flow installed to list */ + if (new) { + otx2_add_flow_to_list(pfvf, flow); + flow_cfg->nr_flows++; + } + + return 0; +} + +static int otx2_remove_flow_msg(struct otx2_nic *pfvf, u16 entry, bool all) +{ + struct npc_delete_flow_req *req; + int err; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_delete_flow(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->entry = entry; + if (all) + req->all = 1; + + /* Send message to AF */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +int otx2_remove_flow(struct otx2_nic *pfvf, u32 location) +{ + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct otx2_flow *flow; + int err; + + if (location >= flow_cfg->ntuple_max_flows) + return -EINVAL; + + flow = otx2_find_flow(pfvf, location); + if (!flow) + return -ENOENT; + + err = otx2_remove_flow_msg(pfvf, flow->entry, false); + if (err) + return err; + + list_del(&flow->list); + kfree(flow); + flow_cfg->nr_flows--; + + return 0; +} + +int otx2_destroy_ntuple_flows(struct otx2_nic *pfvf) +{ + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct npc_delete_flow_req *req; + struct otx2_flow *iter, *tmp; + int err; + + if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT)) + return 0; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_delete_flow(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->start = flow_cfg->entry[flow_cfg->ntuple_offset]; + req->end = flow_cfg->entry[flow_cfg->ntuple_offset + + flow_cfg->ntuple_max_flows - 1]; + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + + list_for_each_entry_safe(iter, tmp, &flow_cfg->flow_list, list) { + list_del(&iter->list); + kfree(iter); + flow_cfg->nr_flows--; + } + return err; +} + +int otx2_destroy_mcam_flows(struct otx2_nic *pfvf) +{ + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct npc_mcam_free_entry_req *req; + struct otx2_flow *iter, *tmp; + int err; + + if (!(pfvf->flags & OTX2_FLAG_MCAM_ENTRIES_ALLOC)) + return 0; + + /* remove all flows */ + err = otx2_remove_flow_msg(pfvf, 0, true); + if (err) + return err; + + list_for_each_entry_safe(iter, tmp, &flow_cfg->flow_list, list) { + list_del(&iter->list); + kfree(iter); + flow_cfg->nr_flows--; + } + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_mcam_free_entry(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->all = 1; + /* Send message to AF to free MCAM entries */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + mutex_unlock(&pfvf->mbox.lock); + return err; + } + + pfvf->flags &= ~OTX2_FLAG_MCAM_ENTRIES_ALLOC; + mutex_unlock(&pfvf->mbox.lock); + + return 0; +} + +int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf) +{ + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct npc_install_flow_req *req; + int err; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_install_flow(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->entry = flow_cfg->entry[flow_cfg->rx_vlan_offset]; + req->intf = NIX_INTF_RX; + ether_addr_copy(req->packet.dmac, pfvf->netdev->dev_addr); + eth_broadcast_addr((u8 *)&req->mask.dmac); + req->channel = pfvf->hw.rx_chan_base; + req->op = NIX_RX_ACTION_DEFAULT; + req->features = BIT_ULL(NPC_OUTER_VID) | BIT_ULL(NPC_DMAC); + req->vtag0_valid = true; + req->vtag0_type = NIX_AF_LFX_RX_VTAG_TYPE0; + + /* Send message to AF */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +static int otx2_delete_rxvlan_offload_flow(struct otx2_nic *pfvf) +{ + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct npc_delete_flow_req *req; + int err; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_delete_flow(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->entry = flow_cfg->entry[flow_cfg->rx_vlan_offset]; + /* Send message to AF */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable) +{ + struct nix_vtag_config *req; + struct mbox_msghdr *rsp_hdr; + int err; + + /* Dont have enough mcam entries */ + if (!(pf->flags & OTX2_FLAG_RX_VLAN_SUPPORT)) + return -ENOMEM; + + if (enable) { + err = otx2_install_rxvlan_offload_flow(pf); + if (err) + return err; + } else { + err = otx2_delete_rxvlan_offload_flow(pf); + if (err) + return err; + } + + mutex_lock(&pf->mbox.lock); + req = otx2_mbox_alloc_msg_nix_vtag_cfg(&pf->mbox); + if (!req) { + mutex_unlock(&pf->mbox.lock); + return -ENOMEM; + } + + /* config strip, capture and size */ + req->vtag_size = VTAGSIZE_T4; + req->cfg_type = 1; /* rx vlan cfg */ + req->rx.vtag_type = NIX_AF_LFX_RX_VTAG_TYPE0; + req->rx.strip_vtag = enable; + req->rx.capture_vtag = enable; + + err = otx2_sync_mbox_msg(&pf->mbox); + if (err) { + mutex_unlock(&pf->mbox.lock); + return err; + } + + rsp_hdr = otx2_mbox_get_rsp(&pf->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp_hdr)) { + mutex_unlock(&pf->mbox.lock); + return PTR_ERR(rsp_hdr); + } + + mutex_unlock(&pf->mbox.lock); + return rsp_hdr->rc; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 66f1a212f1f4..4c82f60f3cf3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1278,6 +1278,7 @@ static void otx2_free_sq_res(struct otx2_nic *pf) static int otx2_init_hw_resources(struct otx2_nic *pf) { + struct nix_lf_free_req *free_req; struct mbox *mbox = &pf->mbox; struct otx2_hw *hw = &pf->hw; struct msg_req *req; @@ -1359,8 +1360,9 @@ err_free_rq_ptrs: otx2_aura_pool_free(pf); err_free_nix_lf: mutex_lock(&mbox->lock); - req = otx2_mbox_alloc_msg_nix_lf_free(mbox); - if (req) { + free_req = otx2_mbox_alloc_msg_nix_lf_free(mbox); + if (free_req) { + free_req->flags = NIX_LF_DISABLE_FLOWS; if (otx2_sync_mbox_msg(mbox)) dev_err(pf->dev, "%s failed to free nixlf\n", __func__); } @@ -1379,6 +1381,7 @@ exit: static void otx2_free_hw_resources(struct otx2_nic *pf) { struct otx2_qset *qset = &pf->qset; + struct nix_lf_free_req *free_req; struct mbox *mbox = &pf->mbox; struct otx2_cq_queue *cq; struct msg_req *req; @@ -1419,8 +1422,11 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) mutex_lock(&mbox->lock); /* Reset NIX LF */ - req = otx2_mbox_alloc_msg_nix_lf_free(mbox); - if (req) { + free_req = otx2_mbox_alloc_msg_nix_lf_free(mbox); + if (free_req) { + free_req->flags = NIX_LF_DISABLE_FLOWS; + if (!(pf->flags & OTX2_FLAG_PF_SHUTDOWN)) + free_req->flags |= NIX_LF_DONT_FREE_TX_VTAG; if (otx2_sync_mbox_msg(mbox)) dev_err(pf->dev, "%s failed to free nixlf\n", __func__); } @@ -1562,6 +1568,9 @@ int otx2_open(struct net_device *netdev) otx2_set_cints_affinity(pf); + if (pf->flags & OTX2_FLAG_RX_VLAN_SUPPORT) + otx2_enable_rxvlan(pf, true); + /* When reinitializing enable time stamping if it is enabled before */ if (pf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED) { pf->flags &= ~OTX2_FLAG_TX_TSTAMP_ENABLED; @@ -1716,10 +1725,20 @@ static void otx2_do_set_rx_mode(struct work_struct *work) struct otx2_nic *pf = container_of(work, struct otx2_nic, rx_mode_work); struct net_device *netdev = pf->netdev; struct nix_rx_mode *req; + bool promisc = false; if (!(netdev->flags & IFF_UP)) return; + if ((netdev->flags & IFF_PROMISC) || + (netdev_uc_count(netdev) > OTX2_MAX_UNICAST_FLOWS)) { + promisc = true; + } + + /* Write unicast address to mcam entries or del from mcam */ + if (!promisc && netdev->priv_flags & IFF_UNICAST_FLT) + __dev_uc_sync(netdev, otx2_add_macfilter, otx2_del_macfilter); + mutex_lock(&pf->mbox.lock); req = otx2_mbox_alloc_msg_nix_set_rx_mode(&pf->mbox); if (!req) { @@ -1729,8 +1748,7 @@ static void otx2_do_set_rx_mode(struct work_struct *work) req->mode = NIX_RX_MODE_UCAST; - /* We don't support MAC address filtering yet */ - if (netdev->flags & IFF_PROMISC) + if (promisc) req->mode |= NIX_RX_MODE_PROMISC; else if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST)) req->mode |= NIX_RX_MODE_ALLMULTI; @@ -1743,11 +1761,20 @@ static int otx2_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t changed = features ^ netdev->features; + bool ntuple = !!(features & NETIF_F_NTUPLE); struct otx2_nic *pf = netdev_priv(netdev); if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) return otx2_cgx_config_loopback(pf, features & NETIF_F_LOOPBACK); + + if ((changed & NETIF_F_HW_VLAN_CTAG_RX) && netif_running(netdev)) + return otx2_enable_rxvlan(pf, + features & NETIF_F_HW_VLAN_CTAG_RX); + + if ((changed & NETIF_F_NTUPLE) && !ntuple) + otx2_destroy_ntuple_flows(pf); + return 0; } @@ -1903,6 +1930,245 @@ static int otx2_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) } } +static int otx2_do_set_vf_mac(struct otx2_nic *pf, int vf, const u8 *mac) +{ + struct npc_install_flow_req *req; + int err; + + mutex_lock(&pf->mbox.lock); + req = otx2_mbox_alloc_msg_npc_install_flow(&pf->mbox); + if (!req) { + err = -ENOMEM; + goto out; + } + + ether_addr_copy(req->packet.dmac, mac); + eth_broadcast_addr((u8 *)&req->mask.dmac); + req->features = BIT_ULL(NPC_DMAC); + req->channel = pf->hw.rx_chan_base; + req->intf = NIX_INTF_RX; + req->default_rule = 1; + req->append = 1; + req->vf = vf + 1; + req->op = NIX_RX_ACTION_DEFAULT; + + err = otx2_sync_mbox_msg(&pf->mbox); +out: + mutex_unlock(&pf->mbox.lock); + return err; +} + +static int otx2_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) +{ + struct otx2_nic *pf = netdev_priv(netdev); + struct pci_dev *pdev = pf->pdev; + struct otx2_vf_config *config; + int ret; + + if (!netif_running(netdev)) + return -EAGAIN; + + if (vf >= pci_num_vf(pdev)) + return -EINVAL; + + if (!is_valid_ether_addr(mac)) + return -EINVAL; + + config = &pf->vf_configs[vf]; + ether_addr_copy(config->mac, mac); + + ret = otx2_do_set_vf_mac(pf, vf, mac); + if (ret == 0) + dev_info(&pdev->dev, "Reload VF driver to apply the changes\n"); + + return ret; +} + +static int otx2_do_set_vf_vlan(struct otx2_nic *pf, int vf, u16 vlan, u8 qos, + __be16 proto) +{ + struct otx2_flow_config *flow_cfg = pf->flow_cfg; + struct nix_vtag_config_rsp *vtag_rsp; + struct npc_delete_flow_req *del_req; + struct nix_vtag_config *vtag_req; + struct npc_install_flow_req *req; + struct otx2_vf_config *config; + int err = 0; + u32 idx; + + config = &pf->vf_configs[vf]; + + if (!vlan && !config->vlan) + goto out; + + mutex_lock(&pf->mbox.lock); + + /* free old tx vtag entry */ + if (config->vlan) { + vtag_req = otx2_mbox_alloc_msg_nix_vtag_cfg(&pf->mbox); + if (!vtag_req) { + err = -ENOMEM; + goto out; + } + vtag_req->cfg_type = 0; + vtag_req->tx.free_vtag0 = 1; + vtag_req->tx.vtag0_idx = config->tx_vtag_idx; + + err = otx2_sync_mbox_msg(&pf->mbox); + if (err) + goto out; + } + + if (!vlan && config->vlan) { + /* rx */ + del_req = otx2_mbox_alloc_msg_npc_delete_flow(&pf->mbox); + if (!del_req) { + err = -ENOMEM; + goto out; + } + idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_RX_INDEX); + del_req->entry = + flow_cfg->entry[flow_cfg->vf_vlan_offset + idx]; + err = otx2_sync_mbox_msg(&pf->mbox); + if (err) + goto out; + + /* tx */ + del_req = otx2_mbox_alloc_msg_npc_delete_flow(&pf->mbox); + if (!del_req) { + err = -ENOMEM; + goto out; + } + idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_TX_INDEX); + del_req->entry = + flow_cfg->entry[flow_cfg->vf_vlan_offset + idx]; + err = otx2_sync_mbox_msg(&pf->mbox); + + goto out; + } + + /* rx */ + req = otx2_mbox_alloc_msg_npc_install_flow(&pf->mbox); + if (!req) { + err = -ENOMEM; + goto out; + } + + idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_RX_INDEX); + req->entry = flow_cfg->entry[flow_cfg->vf_vlan_offset + idx]; + req->packet.vlan_tci = htons(vlan); + req->mask.vlan_tci = htons(VLAN_VID_MASK); + /* af fills the destination mac addr */ + eth_broadcast_addr((u8 *)&req->mask.dmac); + req->features = BIT_ULL(NPC_OUTER_VID) | BIT_ULL(NPC_DMAC); + req->channel = pf->hw.rx_chan_base; + req->intf = NIX_INTF_RX; + req->vf = vf + 1; + req->op = NIX_RX_ACTION_DEFAULT; + req->vtag0_valid = true; + req->vtag0_type = NIX_AF_LFX_RX_VTAG_TYPE7; + req->set_cntr = 1; + + err = otx2_sync_mbox_msg(&pf->mbox); + if (err) + goto out; + + /* tx */ + vtag_req = otx2_mbox_alloc_msg_nix_vtag_cfg(&pf->mbox); + if (!vtag_req) { + err = -ENOMEM; + goto out; + } + + /* configure tx vtag params */ + vtag_req->vtag_size = VTAGSIZE_T4; + vtag_req->cfg_type = 0; /* tx vlan cfg */ + vtag_req->tx.cfg_vtag0 = 1; + vtag_req->tx.vtag0 = (ntohs(proto) << 16) | vlan; + + err = otx2_sync_mbox_msg(&pf->mbox); + if (err) + goto out; + + vtag_rsp = (struct nix_vtag_config_rsp *)otx2_mbox_get_rsp + (&pf->mbox.mbox, 0, &vtag_req->hdr); + if (IS_ERR(vtag_rsp)) { + err = PTR_ERR(vtag_rsp); + goto out; + } + config->tx_vtag_idx = vtag_rsp->vtag0_idx; + + req = otx2_mbox_alloc_msg_npc_install_flow(&pf->mbox); + if (!req) { + err = -ENOMEM; + goto out; + } + + eth_zero_addr((u8 *)&req->mask.dmac); + idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_TX_INDEX); + req->entry = flow_cfg->entry[flow_cfg->vf_vlan_offset + idx]; + req->features = BIT_ULL(NPC_DMAC); + req->channel = pf->hw.tx_chan_base; + req->intf = NIX_INTF_TX; + req->vf = vf + 1; + req->op = NIX_TX_ACTIONOP_UCAST_DEFAULT; + req->vtag0_def = vtag_rsp->vtag0_idx; + req->vtag0_op = VTAG_INSERT; + req->set_cntr = 1; + + err = otx2_sync_mbox_msg(&pf->mbox); +out: + config->vlan = vlan; + mutex_unlock(&pf->mbox.lock); + return err; +} + +static int otx2_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 proto) +{ + struct otx2_nic *pf = netdev_priv(netdev); + struct pci_dev *pdev = pf->pdev; + + if (!netif_running(netdev)) + return -EAGAIN; + + if (vf >= pci_num_vf(pdev)) + return -EINVAL; + + /* qos is currently unsupported */ + if (vlan >= VLAN_N_VID || qos) + return -EINVAL; + + if (proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + + if (!(pf->flags & OTX2_FLAG_VF_VLAN_SUPPORT)) + return -EOPNOTSUPP; + + return otx2_do_set_vf_vlan(pf, vf, vlan, qos, proto); +} + +static int otx2_get_vf_config(struct net_device *netdev, int vf, + struct ifla_vf_info *ivi) +{ + struct otx2_nic *pf = netdev_priv(netdev); + struct pci_dev *pdev = pf->pdev; + struct otx2_vf_config *config; + + if (!netif_running(netdev)) + return -EAGAIN; + + if (vf >= pci_num_vf(pdev)) + return -EINVAL; + + config = &pf->vf_configs[vf]; + ivi->vf = vf; + ether_addr_copy(ivi->mac, config->mac); + ivi->vlan = config->vlan; + + return 0; +} + static const struct net_device_ops otx2_netdev_ops = { .ndo_open = otx2_open, .ndo_stop = otx2_stop, @@ -1914,6 +2180,9 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_tx_timeout = otx2_tx_timeout, .ndo_get_stats64 = otx2_get_stats64, .ndo_do_ioctl = otx2_ioctl, + .ndo_set_vf_mac = otx2_set_vf_mac, + .ndo_set_vf_vlan = otx2_set_vf_vlan, + .ndo_get_vf_config = otx2_get_vf_config, }; static int otx2_wq_init(struct otx2_nic *pf) @@ -2110,6 +2379,25 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL; + err = otx2_mcam_flow_init(pf); + if (err) + goto err_ptp_destroy; + + if (pf->flags & OTX2_FLAG_NTUPLE_SUPPORT) + netdev->hw_features |= NETIF_F_NTUPLE; + + if (pf->flags & OTX2_FLAG_UCAST_FLTR_SUPPORT) + netdev->priv_flags |= IFF_UNICAST_FLT; + + /* Support TSO on tag interface */ + netdev->vlan_features |= netdev->features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + if (pf->flags & OTX2_FLAG_RX_VLAN_SUPPORT) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX; + netdev->features |= netdev->hw_features; + netdev->gso_max_segs = OTX2_MAX_GSO_SEGS; netdev->watchdog_timeo = OTX2_TX_TIMEOUT; @@ -2122,7 +2410,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_ptp_destroy; + goto err_del_mcam_entries; } err = otx2_wq_init(pf); @@ -2142,6 +2430,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_unreg_netdev: unregister_netdev(netdev); +err_del_mcam_entries: + otx2_mcam_flow_del(pf); err_ptp_destroy: otx2_ptp_destroy(pf); err_detach_rsrc: @@ -2285,6 +2575,8 @@ static void otx2_remove(struct pci_dev *pdev) pf = netdev_priv(netdev); + pf->flags |= OTX2_FLAG_PF_SHUTDOWN; + if (pf->flags & OTX2_FLAG_TX_TSTAMP_ENABLED) otx2_config_hw_tx_tstamp(pf, false); if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED) @@ -2300,6 +2592,7 @@ static void otx2_remove(struct pci_dev *pdev) destroy_workqueue(pf->otx2_wq); otx2_ptp_destroy(pf); + otx2_mcam_flow_del(pf); otx2_detach_resources(&pf->mbox); otx2_disable_mbox_intr(pf); otx2_pfaf_mbox_destroy(pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index d5d7a2f37493..d0e25414f1a1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -556,6 +556,19 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, ext->tstmp = 1; } +#define OTX2_VLAN_PTR_OFFSET (ETH_HLEN - ETH_TLEN) + if (skb_vlan_tag_present(skb)) { + if (skb->vlan_proto == htons(ETH_P_8021Q)) { + ext->vlan1_ins_ena = 1; + ext->vlan1_ins_ptr = OTX2_VLAN_PTR_OFFSET; + ext->vlan1_ins_tci = skb_vlan_tag_get(skb); + } else if (skb->vlan_proto == htons(ETH_P_8021AD)) { + ext->vlan0_ins_ena = 1; + ext->vlan0_ins_ptr = OTX2_VLAN_PTR_OFFSET; + ext->vlan0_ins_tci = skb_vlan_tag_get(skb); + } + } + *offset += sizeof(*ext); } @@ -871,6 +884,9 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, } if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) { + /* Insert vlan tag before giving pkt to tso */ + if (skb_vlan_tag_present(skb)) + skb = __vlan_hwaccel_push_inside(skb); otx2_sq_append_tso(pfvf, sq, skb, qidx); return true; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 67fabf265fe6..d3e4cfd244e2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -558,6 +558,11 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4; netdev->features = netdev->hw_features; + /* Support TSO on tag interface */ + netdev->vlan_features |= netdev->features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + netdev->features |= netdev->hw_features; netdev->gso_max_segs = OTX2_MAX_GSO_SEGS; netdev->watchdog_timeo = OTX2_TX_TIMEOUT; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 25981a7a43b5..ebe1406c6e64 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4900,7 +4900,7 @@ static const char *sky2_name(u8 chipid, char *buf, int sz) }; if (chipid >= CHIP_ID_YUKON_XL && chipid <= CHIP_ID_YUKON_OP_2) - strncpy(buf, name[chipid - CHIP_ID_YUKON_XL], sz); + snprintf(buf, sz, "%s", name[chipid - CHIP_ID_YUKON_XL]); else snprintf(buf, sz, "(chip %#x)", chipid); return buf; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 502d1b97855c..b0f79a5151cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -684,7 +684,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud xdp_prog = rcu_dereference(ring->xdp_prog); xdp.rxq = &ring->xdp_rxq; xdp.frame_sz = priv->frag_info[0].frag_stride; - doorbell_pending = 0; + doorbell_pending = false; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index 582997577a04..954b86faac29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -135,7 +135,7 @@ int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport, * @dev: mlx4_dev. * @port: Physical port number. * @vport: Vport id. - * @out_param: Array of mlx4_vport_qos_param which holds the requested values. + * @in_param: Array of mlx4_vport_qos_param which holds the requested values. * * Returns 0 on success or a negative mlx4_core errno code. **/ diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1187ef1375e2..394f43add85c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -300,7 +300,7 @@ static const char *resource_str(enum mlx4_resource rt) case RES_FS_RULE: return "RES_FS_RULE"; case RES_XRCD: return "RES_XRCD"; default: return "Unknown resource type !!!"; - }; + } } static void rem_slave_vlans(struct mlx4_dev *dev, int slave); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 2d477f9a8cb7..83a67ca43a41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -81,7 +81,7 @@ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/t mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \ steering/dr_matcher.o steering/dr_rule.o \ - steering/dr_icm_pool.o \ + steering/dr_icm_pool.o steering/dr_buddy.o \ steering/dr_ste.o steering/dr_send.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 38e4f19d69f8..43271a3856ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -2,6 +2,8 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include "en/params.h" +#include "en/txrx.h" +#include "en_accel/tls_rxtx.h" static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -152,3 +154,35 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; } + +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + u16 stop_room; + + stop_room = mlx5e_tls_get_stop_room(mdev, params); + stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + if (is_mpwqe) + /* A MPWQE can take up to the maximum-sized WQE + all the normal + * stop room can be taken if a new packet breaks the active + * MPWQE session and allocates its WQEs right away. + */ + stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + + return stop_room; +} + +int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params) +{ + size_t sq_size = 1 << params->log_sq_size; + u16 stop_room; + + stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); + if (stop_room >= sq_size) { + netdev_err(priv->netdev, "Stop room %hu is bigger than the SQ size %zu\n", + stop_room, sq_size); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index a87273e801b2..187007ad3349 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -30,6 +30,7 @@ struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; bool is_mpw; + u16 stop_room; }; struct mlx5e_channel_param { @@ -124,4 +125,7 @@ void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_sq_param *param); +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params); + #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index ae90d533a350..2e3e78b0f333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -366,7 +366,8 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_wqe_info *wi, u32 *xsk_frames, - bool recycle) + bool recycle, + struct xdp_frame_bulk *bq) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; u16 i; @@ -379,7 +380,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, /* XDP_TX from the XSK RQ and XDP_REDIRECT */ dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, xdpi.frame.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame(xdpi.frame.xdpf); + xdp_return_frame_bulk(xdpi.frame.xdpf, bq); break; case MLX5E_XDP_XMIT_MODE_PAGE: /* XDP_TX from the regular RQ */ @@ -397,12 +398,15 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { + struct xdp_frame_bulk bq; struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; u32 xsk_frames = 0; u16 sqcc; int i; + xdp_frame_bulk_init(&bq); + sq = container_of(cq, struct mlx5e_xdpsq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) @@ -434,7 +438,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) sqcc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq); } while (!last_wqe); if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { @@ -447,6 +451,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) } } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + xdp_flush_frame_bulk(&bq); + if (xsk_frames) xsk_tx_completed(sq->xsk_pool, xsk_frames); @@ -463,8 +469,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) { + struct xdp_frame_bulk bq; u32 xsk_frames = 0; + xdp_frame_bulk_init(&bq); + + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + while (sq->cc != sq->pc) { struct mlx5e_xdp_wqe_info *wi; u16 ci; @@ -474,9 +485,12 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) sq->cc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq); } + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); + if (xsk_frames) xsk_tx_completed(sq->xsk_pool, xsk_frames); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index b140e13fdcc8..d16def68ecff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -13,20 +13,20 @@ struct mlx5e_dump_wqe { (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB)) static u8 -mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags, +mlx5e_ktls_dumps_num_wqes(struct mlx5e_params *params, unsigned int nfrags, unsigned int sync_len) { /* Given the MTU and sync_len, calculates an upper bound for the * number of DUMP WQEs needed for the TX resync of a record. */ - return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu); + return nfrags + DIV_ROUND_UP(sync_len, MLX5E_SW2HW_MTU(params, params->sw_mtu)); } -u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq) +u16 mlx5e_ktls_get_stop_room(struct mlx5e_params *params) { u16 num_dumps, stop_room = 0; - num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); + num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index 7521c9be735b..ee04e916fa21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -14,7 +14,7 @@ struct mlx5e_accel_tx_tls_state { u32 tls_tisn; }; -u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq); +u16 mlx5e_ktls_get_stop_room(struct mlx5e_params *params); bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq, struct sk_buff *skb, int datalen, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 6982b193ee8a..f51c04284e4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -385,15 +385,13 @@ void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; } -u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - struct mlx5_core_dev *mdev = sq->channel->mdev; - if (!mlx5_accel_is_tls_device(mdev)) return 0; if (mlx5_accel_is_ktls_device(mdev)) - return mlx5e_ktls_get_stop_room(sq); + return mlx5e_ktls_get_stop_room(params); /* FPGA */ /* Resync SKB. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 5f162ad2ee8f..9923132c9440 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -43,7 +43,7 @@ #include "en.h" #include "en/txrx.h" -u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq); +u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); @@ -71,7 +71,7 @@ mlx5e_accel_is_tls(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return false; static inline void mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) {} -static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +static inline u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d25a56ec6876..42e61dc28ead 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include "en.h" #include "en/port.h" +#include "en/params.h" #include "en/xsk/pool.h" #include "lib/clock.h" @@ -369,6 +370,10 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, new_channels.params.log_rq_mtu_frames = log_rq_size; new_channels.params.log_sq_size = log_sq_size; + err = mlx5e_validate_params(priv, &new_channels.params); + if (err) + goto unlock; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; goto unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ebce97921e03..527c5f12c5af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1121,28 +1121,6 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size) -{ - int sq_size = 1 << log_sq_size; - - sq->stop_room = mlx5e_tls_get_stop_room(sq); - sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) - /* A MPWQE can take up to the maximum-sized WQE + all the normal - * stop room can be taken if a new packet breaks the active - * MPWQE session and allocates its WQEs right away. - */ - sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - - if (WARN_ON(sq->stop_room >= sq_size)) { - netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n", - sq->stop_room, sq_size); - return -ENOSPC; - } - - return 0; -} - static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, @@ -1176,9 +1154,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, set_bit(MLX5E_SQ_STATE_TLS, &sq->state); if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); - err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size); - if (err) - return err; + sq->stop_room = param->stop_room; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -2225,6 +2201,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); } @@ -3999,6 +3976,9 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; + err = mlx5e_validate_params(priv, &new_channels.params); + if (err) + goto out; if (params->xdp_prog && !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8ebfe782f95e..4ea5d6ddf56a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -189,19 +189,21 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) return count_eqe; } -static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags) +static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) __acquires(&eq->lock) { - if (in_irq()) + if (!recovery) spin_lock(&eq->lock); else spin_lock_irqsave(&eq->lock, *flags); } -static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags) +static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) __releases(&eq->lock) { - if (in_irq()) + if (!recovery) spin_unlock(&eq->lock); else spin_unlock_irqrestore(&eq->lock, *flags); @@ -223,11 +225,13 @@ static int mlx5_eq_async_int(struct notifier_block *nb, struct mlx5_eqe *eqe; unsigned long flags; int num_eqes = 0; + bool recovery; dev = eq->dev; eqt = dev->priv.eq_table; - mlx5_eq_async_int_lock(eq_async, &flags); + recovery = action == ASYNC_EQ_RECOVER; + mlx5_eq_async_int_lock(eq_async, recovery, &flags); eqe = next_eqe_sw(eq); if (!eqe) @@ -249,9 +253,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb, out: eq_update_ci(eq, 1); - mlx5_eq_async_int_unlock(eq_async, &flags); + mlx5_eq_async_int_unlock(eq_async, recovery, &flags); - return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0; + return unlikely(recovery) ? num_eqes : 0; } void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h index 656f96be6e20..89ef592656c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -47,11 +47,12 @@ /** * enum mlx5_fpga_access_type - Enumerated the different methods possible for * accessing the device memory address space + * + * @MLX5_FPGA_ACCESS_TYPE_I2C: Use the slow CX-FPGA I2C bus + * @MLX5_FPGA_ACCESS_TYPE_DONTCARE: Use the fastest available method */ enum mlx5_fpga_access_type { - /** Use the slow CX-FPGA I2C bus */ MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, - /** Use the fastest available method */ MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0, }; @@ -113,6 +114,7 @@ struct mlx5_fpga_conn_attr { * subsequent receives. */ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + /** @cb_arg: A context to be passed to recv_cb callback */ void *cb_arg; }; @@ -145,7 +147,7 @@ void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn); /** * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet - * @fdev: An FPGA SBU connection + * @conn: An FPGA SBU connection * @buf: The packet buffer * * Queues a packet for transmission over an FPGA SBU connection. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c new file mode 100644 index 000000000000..7df11a019df9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 - 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006 - 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. + */ + +#include "dr_types.h" + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order) +{ + int i; + + buddy->max_order = max_order; + + INIT_LIST_HEAD(&buddy->list_node); + INIT_LIST_HEAD(&buddy->used_list); + INIT_LIST_HEAD(&buddy->hot_list); + + buddy->bitmap = kcalloc(buddy->max_order + 1, + sizeof(*buddy->bitmap), + GFP_KERNEL); + buddy->num_free = kcalloc(buddy->max_order + 1, + sizeof(*buddy->num_free), + GFP_KERNEL); + + if (!buddy->bitmap || !buddy->num_free) + goto err_free_all; + + /* Allocating max_order bitmaps, one for each order */ + + for (i = 0; i <= buddy->max_order; ++i) { + unsigned int size = 1 << (buddy->max_order - i); + + buddy->bitmap[i] = bitmap_zalloc(size, GFP_KERNEL); + if (!buddy->bitmap[i]) + goto err_out_free_each_bit_per_order; + } + + /* In the beginning, we have only one order that is available for + * use (the biggest one), so mark the first bit in both bitmaps. + */ + + bitmap_set(buddy->bitmap[buddy->max_order], 0, 1); + + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free_each_bit_per_order: + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + +err_free_all: + kfree(buddy->num_free); + kfree(buddy->bitmap); + return -ENOMEM; +} + +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy) +{ + int i; + + list_del(&buddy->list_node); + + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + + kfree(buddy->num_free); + kfree(buddy->bitmap); +} + +static int dr_buddy_find_free_seg(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int start_order, + unsigned int *segment, + unsigned int *order) +{ + unsigned int seg, order_iter, m; + + for (order_iter = start_order; + order_iter <= buddy->max_order; ++order_iter) { + if (!buddy->num_free[order_iter]) + continue; + + m = 1 << (buddy->max_order - order_iter); + seg = find_first_bit(buddy->bitmap[order_iter], m); + + if (WARN(seg >= m, + "ICM Buddy: failed finding free mem for order %d\n", + order_iter)) + return -ENOMEM; + + break; + } + + if (order_iter > buddy->max_order) + return -ENOMEM; + + *segment = seg; + *order = order_iter; + return 0; +} + +/** + * mlx5dr_buddy_alloc_mem() - Update second level bitmap. + * @buddy: Buddy to update. + * @order: Order of the buddy to update. + * @segment: Segment number. + * + * This function finds the first area of the ICM memory managed by this buddy. + * It uses the data structures of the buddy system in order to find the first + * area of free place, starting from the current order till the maximum order + * in the system. + * + * Return: 0 when segment is set, non-zero error status otherwise. + * + * The function returns the location (segment) in the whole buddy ICM memory + * area - the index of the memory segment that is available for use. + */ +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment) +{ + unsigned int seg, order_iter; + int err; + + err = dr_buddy_find_free_seg(buddy, order, &seg, &order_iter); + if (err) + return err; + + bitmap_clear(buddy->bitmap[order_iter], seg, 1); + --buddy->num_free[order_iter]; + + /* If we found free memory in some order that is bigger than the + * required order, we need to split every order between the required + * order and the order that we found into two parts, and mark accordingly. + */ + while (order_iter > order) { + --order_iter; + seg <<= 1; + bitmap_set(buddy->bitmap[order_iter], seg ^ 1, 1); + ++buddy->num_free[order_iter]; + } + + seg <<= order; + *segment = seg; + + return 0; +} + +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order) +{ + seg >>= order; + + /* Whenever a segment is free, + * the mem is added to the buddy that gave it. + */ + while (test_bit(seg ^ 1, buddy->bitmap[order])) { + bitmap_clear(buddy->bitmap[order], seg ^ 1, 1); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + bitmap_set(buddy->bitmap[order], seg, 1); + + ++buddy->num_free[order]; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 6bd34b293007..ebc879052e42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -93,12 +93,12 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); - if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); } - if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) { + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED) { caps->flex_parser_id_icmpv6_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0); caps->flex_parser_id_icmpv6_dw1 = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index cc33515b9aba..66c24767e3b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -4,50 +4,16 @@ #include "dr_types.h" #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 -#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024) - -struct mlx5dr_icm_pool; - -struct mlx5dr_icm_bucket { - struct mlx5dr_icm_pool *pool; - - /* Chunks that aren't visible to HW not directly and not in cache */ - struct list_head free_list; - unsigned int free_list_count; - - /* Used chunks, HW may be accessing this memory */ - struct list_head used_list; - unsigned int used_list_count; - - /* HW may be accessing this memory but at some future, - * undetermined time, it might cease to do so. Before deciding to call - * sync_ste, this list is moved to sync_list - */ - struct list_head hot_list; - unsigned int hot_list_count; - - /* Pending sync list, entries from the hot list are moved to this list. - * sync_ste is executed and then sync_list is concatenated to the free list - */ - struct list_head sync_list; - unsigned int sync_list_count; - - u32 total_chunks; - u32 num_of_entries; - u32 entry_size; - /* protect the ICM bucket */ - struct mutex mutex; -}; +#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024) struct mlx5dr_icm_pool { - struct mlx5dr_icm_bucket *buckets; enum mlx5dr_icm_type icm_type; enum mlx5dr_icm_chunk_size max_log_chunk_sz; - enum mlx5dr_icm_chunk_size num_of_buckets; - struct list_head icm_mr_list; - /* protect the ICM MR list */ - struct mutex mr_mutex; struct mlx5dr_domain *dmn; + /* memory management */ + struct mutex mutex; /* protect the ICM pool and ICM buddy */ + struct list_head buddy_mem_list; + u64 hot_memory_size; }; struct mlx5dr_icm_dm { @@ -58,13 +24,11 @@ struct mlx5dr_icm_dm { }; struct mlx5dr_icm_mr { - struct mlx5dr_icm_pool *pool; struct mlx5_core_mkey mkey; struct mlx5dr_icm_dm dm; - size_t used_length; + struct mlx5dr_domain *dmn; size_t length; u64 icm_start_addr; - struct list_head mr_list; }; static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, @@ -107,8 +71,7 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) if (!icm_mr) return NULL; - icm_mr->pool = pool; - INIT_LIST_HEAD(&icm_mr->mr_list); + icm_mr->dmn = pool->dmn; icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, pool->icm_type); @@ -150,8 +113,6 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) goto free_mkey; } - list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list); - return icm_mr; free_mkey: @@ -166,10 +127,9 @@ free_icm_mr: static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) { - struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev; + struct mlx5_core_dev *mdev = icm_mr->dmn->mdev; struct mlx5dr_icm_dm *dm = &icm_mr->dm; - list_del(&icm_mr->mr_list); mlx5_core_destroy_mkey(mdev, &icm_mr->mkey); mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, dm->addr, dm->obj_id); @@ -178,19 +138,17 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) { - struct mlx5dr_icm_bucket *bucket = chunk->bucket; - - chunk->ste_arr = kvzalloc(bucket->num_of_entries * + chunk->ste_arr = kvzalloc(chunk->num_of_entries * sizeof(chunk->ste_arr[0]), GFP_KERNEL); if (!chunk->ste_arr) return -ENOMEM; - chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries * + chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries * DR_STE_SIZE_REDUCED, GFP_KERNEL); if (!chunk->hw_ste_arr) goto out_free_ste_arr; - chunk->miss_list = kvmalloc(bucket->num_of_entries * + chunk->miss_list = kvmalloc(chunk->num_of_entries * sizeof(chunk->miss_list[0]), GFP_KERNEL); if (!chunk->miss_list) goto out_free_hw_ste_arr; @@ -204,72 +162,6 @@ out_free_ste_arr: return -ENOMEM; } -static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket) -{ - size_t mr_free_size, mr_req_size, mr_row_size; - struct mlx5dr_icm_pool *pool = bucket->pool; - struct mlx5dr_icm_mr *icm_mr = NULL; - struct mlx5dr_icm_chunk *chunk; - int i, err = 0; - - mr_req_size = bucket->num_of_entries * bucket->entry_size; - mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, - pool->icm_type); - mutex_lock(&pool->mr_mutex); - if (!list_empty(&pool->icm_mr_list)) { - icm_mr = list_last_entry(&pool->icm_mr_list, - struct mlx5dr_icm_mr, mr_list); - - if (icm_mr) - mr_free_size = icm_mr->dm.length - icm_mr->used_length; - } - - if (!icm_mr || mr_free_size < mr_row_size) { - icm_mr = dr_icm_pool_mr_create(pool); - if (!icm_mr) { - err = -ENOMEM; - goto out_err; - } - } - - /* Create memory aligned chunks */ - for (i = 0; i < mr_row_size / mr_req_size; i++) { - chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); - if (!chunk) { - err = -ENOMEM; - goto out_err; - } - - chunk->bucket = bucket; - chunk->rkey = icm_mr->mkey.key; - /* mr start addr is zero based */ - chunk->mr_addr = icm_mr->used_length; - chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length; - icm_mr->used_length += mr_req_size; - chunk->num_of_entries = bucket->num_of_entries; - chunk->byte_size = chunk->num_of_entries * bucket->entry_size; - - if (pool->icm_type == DR_ICM_TYPE_STE) { - err = dr_icm_chunk_ste_init(chunk); - if (err) - goto out_free_chunk; - } - - INIT_LIST_HEAD(&chunk->chunk_list); - list_add(&chunk->chunk_list, &bucket->free_list); - bucket->free_list_count++; - bucket->total_chunks++; - } - mutex_unlock(&pool->mr_mutex); - return 0; - -out_free_chunk: - kvfree(chunk); -out_err: - mutex_unlock(&pool->mr_mutex); - return err; -} - static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) { kvfree(chunk->miss_list); @@ -277,166 +169,199 @@ static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) kvfree(chunk->ste_arr); } -static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk) +static enum mlx5dr_icm_type +get_chunk_icm_type(struct mlx5dr_icm_chunk *chunk) +{ + return chunk->buddy_mem->pool->icm_type; +} + +static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, + struct mlx5dr_icm_buddy_mem *buddy) { - struct mlx5dr_icm_bucket *bucket = chunk->bucket; + enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk); + buddy->used_memory -= chunk->byte_size; list_del(&chunk->chunk_list); - bucket->total_chunks--; - if (bucket->pool->icm_type == DR_ICM_TYPE_STE) + if (icm_type == DR_ICM_TYPE_STE) dr_icm_chunk_ste_cleanup(chunk); kvfree(chunk); } -static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *bucket, - enum mlx5dr_icm_chunk_size chunk_size) +static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) { - if (pool->icm_type == DR_ICM_TYPE_STE) - bucket->entry_size = DR_STE_SIZE; - else - bucket->entry_size = DR_MODIFY_ACTION_SIZE; - - bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); - bucket->pool = pool; - mutex_init(&bucket->mutex); - INIT_LIST_HEAD(&bucket->free_list); - INIT_LIST_HEAD(&bucket->used_list); - INIT_LIST_HEAD(&bucket->hot_list); - INIT_LIST_HEAD(&bucket->sync_list); + struct mlx5dr_icm_buddy_mem *buddy; + struct mlx5dr_icm_mr *icm_mr; + + icm_mr = dr_icm_pool_mr_create(pool); + if (!icm_mr) + return -ENOMEM; + + buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL); + if (!buddy) + goto free_mr; + + if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz)) + goto err_free_buddy; + + buddy->icm_mr = icm_mr; + buddy->pool = pool; + + /* add it to the -start- of the list in order to search in it first */ + list_add(&buddy->list_node, &pool->buddy_mem_list); + + return 0; + +err_free_buddy: + kvfree(buddy); +free_mr: + dr_icm_pool_mr_destroy(icm_mr); + return -ENOMEM; } -static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket) +static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) { struct mlx5dr_icm_chunk *chunk, *next; - mutex_destroy(&bucket->mutex); - list_splice_tail_init(&bucket->sync_list, &bucket->free_list); - list_splice_tail_init(&bucket->hot_list, &bucket->free_list); + list_for_each_entry_safe(chunk, next, &buddy->hot_list, chunk_list) + dr_icm_chunk_destroy(chunk, buddy); + + list_for_each_entry_safe(chunk, next, &buddy->used_list, chunk_list) + dr_icm_chunk_destroy(chunk, buddy); - list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list) - dr_icm_chunk_destroy(chunk); + dr_icm_pool_mr_destroy(buddy->icm_mr); - WARN_ON(bucket->total_chunks != 0); + mlx5dr_buddy_cleanup(buddy); - /* Cleanup of unreturned chunks */ - list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list) - dr_icm_chunk_destroy(chunk); + kvfree(buddy); } -static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool) +static struct mlx5dr_icm_chunk * +dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem *buddy_mem_pool, + unsigned int seg) { - u64 hot_size = 0; - int chunk_order; + struct mlx5dr_icm_chunk *chunk; + int offset; - for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++) - hot_size += pool->buckets[chunk_order].hot_list_count * - mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type); + chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + return NULL; - return hot_size; -} + offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; + + chunk->rkey = buddy_mem_pool->icm_mr->mkey.key; + chunk->mr_addr = offset; + chunk->icm_addr = + (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset; + chunk->num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + chunk->byte_size = + mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type); + chunk->seg = seg; + + if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) { + mlx5dr_err(pool->dmn, + "Failed to init ste arrays (order: %d)\n", + chunk_size); + goto out_free_chunk; + } -static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *bucket) -{ - u64 bytes_for_sync; + buddy_mem_pool->used_memory += chunk->byte_size; + chunk->buddy_mem = buddy_mem_pool; + INIT_LIST_HEAD(&chunk->chunk_list); - bytes_for_sync = dr_icm_hot_mem_size(pool); - if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count) - return false; + /* chunk now is part of the used_list */ + list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list); - return true; -} + return chunk; -static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket) -{ - list_splice_tail_init(&bucket->hot_list, &bucket->sync_list); - bucket->sync_list_count += bucket->hot_list_count; - bucket->hot_list_count = 0; +out_free_chunk: + kvfree(chunk); + return NULL; } -static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket) +static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) { - list_splice_tail_init(&bucket->sync_list, &bucket->free_list); - bucket->free_list_count += bucket->sync_list_count; - bucket->sync_list_count = 0; -} + if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL) + return true; -static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket) -{ - list_splice_tail_init(&bucket->sync_list, &bucket->hot_list); - bucket->hot_list_count += bucket->sync_list_count; - bucket->sync_list_count = 0; + return false; } -static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *cb, - bool buckets[DR_CHUNK_SIZE_MAX]) +static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) { - struct mlx5dr_icm_bucket *bucket; - int i; - - for (i = 0; i < pool->num_of_buckets; i++) { - bucket = &pool->buckets[i]; - if (bucket == cb) { - dr_icm_chill_bucket_start(bucket); - continue; - } + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + int err; - /* Freeing the mutex is done at the end of that process, after - * sync_ste was executed at dr_icm_chill_buckets_end func. - */ - if (mutex_trylock(&bucket->mutex)) { - dr_icm_chill_bucket_start(bucket); - buckets[i] = true; - } + err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); + if (err) { + mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err); + return err; } -} -static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *cb, - bool buckets[DR_CHUNK_SIZE_MAX]) -{ - struct mlx5dr_icm_bucket *bucket; - int i; - - for (i = 0; i < pool->num_of_buckets; i++) { - bucket = &pool->buckets[i]; - if (bucket == cb) { - dr_icm_chill_bucket_end(bucket); - continue; - } + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) { + struct mlx5dr_icm_chunk *chunk, *tmp_chunk; - if (!buckets[i]) - continue; + list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) { + mlx5dr_buddy_free_mem(buddy, chunk->seg, + ilog2(chunk->num_of_entries)); + pool->hot_memory_size -= chunk->byte_size; + dr_icm_chunk_destroy(chunk, buddy); + } - dr_icm_chill_bucket_end(bucket); - mutex_unlock(&bucket->mutex); + if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_destroy(buddy); } + + return 0; } -static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *cb, - bool buckets[DR_CHUNK_SIZE_MAX]) +static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem **buddy, + unsigned int *seg) { - struct mlx5dr_icm_bucket *bucket; - int i; - - for (i = 0; i < pool->num_of_buckets; i++) { - bucket = &pool->buckets[i]; - if (bucket == cb) { - dr_icm_chill_bucket_abort(bucket); - continue; - } + struct mlx5dr_icm_buddy_mem *buddy_mem_pool; + bool new_mem = false; + int err; - if (!buckets[i]) - continue; +alloc_buddy_mem: + /* find the next free place from the buddy list */ + list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) { + err = mlx5dr_buddy_alloc_mem(buddy_mem_pool, + chunk_size, seg); + if (!err) + goto found; + + if (WARN_ON(new_mem)) { + /* We have new memory pool, first in the list */ + mlx5dr_err(pool->dmn, + "No memory for order: %d\n", + chunk_size); + goto out; + } + } - dr_icm_chill_bucket_abort(bucket); - mutex_unlock(&bucket->mutex); + /* no more available allocators in that pool, create new */ + err = dr_icm_buddy_create(pool); + if (err) { + mlx5dr_err(pool->dmn, + "Failed creating buddy for order %d\n", + chunk_size); + goto out; } + + /* mark we have new memory, first in list */ + new_mem = true; + goto alloc_buddy_mem; + +found: + *buddy = buddy_mem_pool; +out: + return err; } /* Allocate an ICM chunk, each chunk holds a piece of ICM memory and @@ -446,68 +371,48 @@ struct mlx5dr_icm_chunk * mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, enum mlx5dr_icm_chunk_size chunk_size) { - struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */ - bool buckets[DR_CHUNK_SIZE_MAX] = {}; - struct mlx5dr_icm_bucket *bucket; - int err; + struct mlx5dr_icm_chunk *chunk = NULL; + struct mlx5dr_icm_buddy_mem *buddy; + unsigned int seg; + int ret; if (chunk_size > pool->max_log_chunk_sz) return NULL; - bucket = &pool->buckets[chunk_size]; - - mutex_lock(&bucket->mutex); - - /* Take chunk from pool if available, otherwise allocate new chunks */ - if (list_empty(&bucket->free_list)) { - if (dr_icm_reuse_hot_entries(pool, bucket)) { - dr_icm_chill_buckets_start(pool, bucket, buckets); - err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); - if (err) { - dr_icm_chill_buckets_abort(pool, bucket, buckets); - mlx5dr_err(pool->dmn, "Sync_steering failed\n"); - chunk = NULL; - goto out; - } - dr_icm_chill_buckets_end(pool, bucket, buckets); - } else { - dr_icm_chunks_create(bucket); - } - } + mutex_lock(&pool->mutex); + /* find mem, get back the relevant buddy pool and seg in that mem */ + ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg); + if (ret) + goto out; - if (!list_empty(&bucket->free_list)) { - chunk = list_last_entry(&bucket->free_list, - struct mlx5dr_icm_chunk, - chunk_list); - if (chunk) { - list_del_init(&chunk->chunk_list); - list_add_tail(&chunk->chunk_list, &bucket->used_list); - bucket->free_list_count--; - bucket->used_list_count++; - } - } + chunk = dr_icm_chunk_create(pool, chunk_size, buddy, seg); + if (!chunk) + goto out_err; + + goto out; + +out_err: + mlx5dr_buddy_free_mem(buddy, seg, chunk_size); out: - mutex_unlock(&bucket->mutex); + mutex_unlock(&pool->mutex); return chunk; } void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) { - struct mlx5dr_icm_bucket *bucket = chunk->bucket; + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + struct mlx5dr_icm_pool *pool = buddy->pool; - if (bucket->pool->icm_type == DR_ICM_TYPE_STE) { - memset(chunk->ste_arr, 0, - bucket->num_of_entries * sizeof(chunk->ste_arr[0])); - memset(chunk->hw_ste_arr, 0, - bucket->num_of_entries * DR_STE_SIZE_REDUCED); - } + /* move the memory to the waiting list AKA "hot" */ + mutex_lock(&pool->mutex); + list_move_tail(&chunk->chunk_list, &buddy->hot_list); + pool->hot_memory_size += chunk->byte_size; + + /* Check if we have chunks that are waiting for sync-ste */ + if (dr_icm_pool_is_sync_required(pool)) + dr_icm_pool_sync_all_buddy_pools(pool); - mutex_lock(&bucket->mutex); - list_del_init(&chunk->chunk_list); - list_add_tail(&chunk->chunk_list, &bucket->hot_list); - bucket->hot_list_count++; - bucket->used_list_count--; - mutex_unlock(&bucket->mutex); + mutex_unlock(&pool->mutex); } struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, @@ -515,7 +420,6 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, { enum mlx5dr_icm_chunk_size max_log_chunk_sz; struct mlx5dr_icm_pool *pool; - int i; if (icm_type == DR_ICM_TYPE_STE) max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; @@ -526,43 +430,24 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, if (!pool) return NULL; - pool->buckets = kcalloc(max_log_chunk_sz + 1, - sizeof(pool->buckets[0]), - GFP_KERNEL); - if (!pool->buckets) - goto free_pool; - pool->dmn = dmn; pool->icm_type = icm_type; pool->max_log_chunk_sz = max_log_chunk_sz; - pool->num_of_buckets = max_log_chunk_sz + 1; - INIT_LIST_HEAD(&pool->icm_mr_list); - for (i = 0; i < pool->num_of_buckets; i++) - dr_icm_bucket_init(pool, &pool->buckets[i], i); + INIT_LIST_HEAD(&pool->buddy_mem_list); - mutex_init(&pool->mr_mutex); + mutex_init(&pool->mutex); return pool; - -free_pool: - kvfree(pool); - return NULL; } void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) { - struct mlx5dr_icm_mr *icm_mr, *next; - int i; - - mutex_destroy(&pool->mr_mutex); - - list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list) - dr_icm_pool_mr_destroy(icm_mr); + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; - for (i = 0; i < pool->num_of_buckets; i++) - dr_icm_bucket_cleanup(&pool->buckets[i]); + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) + dr_icm_buddy_destroy(buddy); - kfree(pool->buckets); + mutex_destroy(&pool->mutex); kvfree(pool); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 7df883686d46..cb5202e17856 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -85,7 +85,7 @@ static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) (_misc2)._inner_outer##_first_mpls_s_bos || \ (_misc2)._inner_outer##_first_mpls_ttl) -static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc) +static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc) { return (misc->gre_key_h || misc->gre_key_l || misc->gre_protocol || misc->gre_c_present || @@ -98,12 +98,12 @@ static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc) (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \ (_misc2).outer_first_mpls_over_##gre_udp##_ttl) -#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \ +#define DR_MASK_IS_TNL_MPLS_SET(_misc2) ( \ DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \ DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp)) static bool -dr_mask_is_misc3_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) +dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) { return (misc3->outer_vxlan_gpe_vni || misc3->outer_vxlan_gpe_next_protocol || @@ -111,21 +111,20 @@ dr_mask_is_misc3_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) } static bool -dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_cmd_caps *caps) +dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & - MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; + return caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; } static bool -dr_mask_is_flex_parser_tnl_vxlan_gpe_set(struct mlx5dr_match_param *mask, - struct mlx5dr_domain *dmn) +dr_mask_is_tnl_vxlan_gpe(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) { - return dr_mask_is_misc3_vxlan_gpe_set(&mask->misc3) && - dr_matcher_supp_flex_parser_vxlan_gpe(&dmn->info.caps); + return dr_mask_is_vxlan_gpe_set(&mask->misc3) && + dr_matcher_supp_vxlan_gpe(&dmn->info.caps); } -static bool dr_mask_is_misc_geneve_set(struct mlx5dr_match_misc *misc) +static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc) { return misc->geneve_vni || misc->geneve_oam || @@ -134,26 +133,46 @@ static bool dr_mask_is_misc_geneve_set(struct mlx5dr_match_misc *misc) } static bool -dr_matcher_supp_flex_parser_geneve(struct mlx5dr_cmd_caps *caps) +dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & - MLX5_FLEX_PARSER_GENEVE_ENABLED; + return caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED; } static bool -dr_mask_is_flex_parser_tnl_geneve_set(struct mlx5dr_match_param *mask, - struct mlx5dr_domain *dmn) +dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) { - return dr_mask_is_misc_geneve_set(&mask->misc) && - dr_matcher_supp_flex_parser_geneve(&dmn->info.caps); + return dr_mask_is_tnl_geneve_set(&mask->misc) && + dr_matcher_supp_tnl_geneve(&dmn->info.caps); } -static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3) +static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; +} + +static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; +} + +static bool dr_mask_is_icmpv6_set(struct mlx5dr_match_misc3 *misc3) { return (misc3->icmpv6_type || misc3->icmpv6_code || misc3->icmpv6_header_data); } +static bool dr_mask_is_icmp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + if (DR_MASK_IS_ICMPV4_SET(&mask->misc3)) + return dr_matcher_supp_icmp_v4(&dmn->info.caps); + else if (dr_mask_is_icmpv6_set(&mask->misc3)) + return dr_matcher_supp_icmp_v6(&dmn->info.caps); + + return false; +} + static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2) { return misc2->metadata_reg_a; @@ -257,7 +276,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_smac_set(&mask.outer) && dr_mask_is_dmac_set(&mask.outer)) { - mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask, + mlx5dr_ste_build_eth_l2_src_dst(&sb[idx++], &mask, inner, rx); } @@ -277,8 +296,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, inner, rx); if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer)) - mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, - inner, rx); + mlx5dr_ste_build_eth_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); } else { if (dr_mask_is_ipv4_5_tuple_set(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, @@ -289,14 +308,12 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, inner, rx); } - if (dr_mask_is_flex_parser_tnl_vxlan_gpe_set(&mask, dmn)) - mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(&sb[idx++], - &mask, - inner, rx); - else if (dr_mask_is_flex_parser_tnl_geneve_set(&mask, dmn)) - mlx5dr_ste_build_flex_parser_tnl_geneve(&sb[idx++], - &mask, - inner, rx); + if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn)) + mlx5dr_ste_build_tnl_vxlan_gpe(&sb[idx++], &mask, + inner, rx); + else if (dr_mask_is_tnl_geneve(&mask, dmn)) + mlx5dr_ste_build_tnl_geneve(&sb[idx++], &mask, + inner, rx); if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); @@ -304,22 +321,18 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer)) mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) - mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, - inner, rx); + if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) + mlx5dr_ste_build_tnl_mpls(&sb[idx++], &mask, inner, rx); - if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(&mask.misc3) && - mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) || - (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) && - mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) { - ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++], - &mask, &dmn->info.caps, - inner, rx); + if (dr_mask_is_icmp(&mask, dmn)) { + ret = mlx5dr_ste_build_icmp(&sb[idx++], + &mask, &dmn->info.caps, + inner, rx); if (ret) return ret; } - if (dr_mask_is_gre_set(&mask.misc)) - mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx); + if (dr_mask_is_tnl_gre_set(&mask.misc)) + mlx5dr_ste_build_tnl_gre(&sb[idx++], &mask, inner, rx); } /* Inner */ @@ -334,7 +347,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_smac_set(&mask.inner) && dr_mask_is_dmac_set(&mask.inner)) { - mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], + mlx5dr_ste_build_eth_l2_src_dst(&sb[idx++], &mask, inner, rx); } @@ -354,8 +367,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, inner, rx); if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner)) - mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, - inner, rx); + mlx5dr_ste_build_eth_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); } else { if (dr_mask_is_ipv4_5_tuple_set(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, @@ -372,8 +385,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner)) mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) - mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx); + if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) + mlx5dr_ste_build_tnl_mpls(&sb[idx++], &mask, inner, rx); } /* Empty matcher, takes all */ if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index b01aaec75622..d275823bff2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -1090,7 +1090,7 @@ static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb, +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx) { @@ -1594,9 +1594,9 @@ static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask); @@ -1693,8 +1693,8 @@ static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, bool inner, bool rx) +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) { dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask); @@ -1771,9 +1771,9 @@ static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask); @@ -1792,8 +1792,8 @@ static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask, struct mlx5dr_cmd_caps *caps, u8 *bit_mask) { + bool is_ipv4_mask = DR_MASK_IS_ICMPV4_SET(&mask->misc3); struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3; - bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask); u32 icmp_header_data_mask; u32 icmp_type_mask; u32 icmp_code_mask; @@ -1869,7 +1869,7 @@ static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, u32 icmp_code; bool is_ipv4; - is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3); + is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3); if (is_ipv4) { icmp_header_data = misc_3->icmpv4_header_data; icmp_type = misc_3->icmpv4_type; @@ -1928,10 +1928,10 @@ static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, return 0; } -int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx) +int mlx5dr_ste_build_icmp(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) { int ret; @@ -2069,9 +2069,9 @@ dr_ste_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(mask, inner, sb->bit_mask); @@ -2122,9 +2122,9 @@ dr_ste_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_flex_parser_tnl_geneve_bit_mask(mask, sb->bit_mask); sb->rx = rx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index f50f3b107aa3..3e423c8ed22f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -114,7 +114,7 @@ enum mlx5dr_ipv { struct mlx5dr_icm_pool; struct mlx5dr_icm_chunk; -struct mlx5dr_icm_bucket; +struct mlx5dr_icm_buddy_mem; struct mlx5dr_ste_htbl; struct mlx5dr_match_param; struct mlx5dr_cmd_caps; @@ -288,7 +288,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, struct mlx5dr_match_param *value, u8 *ste_arr); -void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder, +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_build *builder, struct mlx5dr_match_param *mask, bool inner, bool rx); void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, @@ -312,31 +312,31 @@ void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, +void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_icmp(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx); -void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); -void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); @@ -588,9 +588,9 @@ struct mlx5dr_match_param { struct mlx5dr_match_misc3 misc3; }; -#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ - (_misc3)->icmpv4_code || \ - (_misc3)->icmpv4_header_data) +#define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ + (_misc3)->icmpv4_code || \ + (_misc3)->icmpv4_header_data) struct mlx5dr_esw_caps { u64 drop_icm_address_rx; @@ -731,7 +731,6 @@ struct mlx5dr_action { struct mlx5dr_domain *dmn; struct mlx5dr_icm_chunk *chunk; u8 *data; - u32 data_size; u16 num_of_actions; u32 index; u8 allow_rx:1; @@ -804,7 +803,7 @@ void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste, struct mlx5dr_ste *ste); struct mlx5dr_icm_chunk { - struct mlx5dr_icm_bucket *bucket; + struct mlx5dr_icm_buddy_mem *buddy_mem; struct list_head chunk_list; u32 rkey; u32 num_of_entries; @@ -812,6 +811,11 @@ struct mlx5dr_icm_chunk { u64 icm_addr; u64 mr_addr; + /* indicates the index of this chunk in the whole memory, + * used for deleting the chunk from the buddy + */ + unsigned int seg; + /* Memory optimisation */ struct mlx5dr_ste *ste_arr; u8 *hw_ste_arr; @@ -840,23 +844,20 @@ static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn) mlx5dr_domain_nic_unlock(&dmn->info.rx); } -static inline int -mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps) -{ - return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; -} - -static inline int -mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps) -{ - return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; -} - int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, enum mlx5dr_ipv inner_ipv); +static inline int +mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type) +{ + if (icm_type == DR_ICM_TYPE_STE) + return DR_STE_SIZE; + + return DR_MODIFY_ACTION_SIZE; +} + static inline u32 mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size) { @@ -870,11 +871,7 @@ mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size, int num_of_entries; int entry_size; - if (icm_type == DR_ICM_TYPE_STE) - entry_size = DR_STE_SIZE; - else - entry_size = DR_MODIFY_ACTION_SIZE; - + entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(icm_type); num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); return entry_size * num_of_entries; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 7914fe3fc68d..4177786b8eaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -127,4 +127,36 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); } +/* buddy functions & structure */ + +struct mlx5dr_icm_mr; + +struct mlx5dr_icm_buddy_mem { + unsigned long **bitmap; + unsigned int *num_free; + u32 max_order; + struct list_head list_node; + struct mlx5dr_icm_mr *icm_mr; + struct mlx5dr_icm_pool *pool; + + /* This is the list of used chunks. HW may be accessing this memory */ + struct list_head used_list; + u64 used_memory; + + /* Hardware may be accessing this memory but at some future, + * undetermined time, it might cease to do so. + * sync_ste command sets them free. + */ + struct list_head hot_list; +}; + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order); +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy); +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment); +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order); + #endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 39eff6a57ba2..fcf9095b3f55 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -7279,10 +7279,11 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, enum mlxsw_reg_ralxx_protocol protocol, enum mlxsw_reg_ralue_op op, u16 virtual_router, u8 prefix_len, - u32 dip) + u32 *dip) { mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); - mlxsw_reg_ralue_dip4_set(payload, dip); + if (dip) + mlxsw_reg_ralue_dip4_set(payload, *dip); } static inline void mlxsw_reg_ralue_pack6(char *payload, @@ -7292,7 +7293,8 @@ static inline void mlxsw_reg_ralue_pack6(char *payload, const void *dip) { mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); - mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); + if (dip) + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); } static inline void @@ -8245,6 +8247,86 @@ mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router, mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask); } +/* Note that XRALXX register position violates the rule of ordering register + * definition by the ID. However, XRALXX pack helpers are using RALXX pack + * helpers, RALXX registers have higher IDs. + */ + +/* XRALTA - XM Router Algorithmic LPM Tree Allocation Register + * ----------------------------------------------------------- + * The XRALTA is used to allocate the XLT LPM trees. + * + * This register embeds original RALTA register. + */ +#define MLXSW_REG_XRALTA_ID 0x7811 +#define MLXSW_REG_XRALTA_LEN 0x08 +#define MLXSW_REG_XRALTA_RALTA_OFFSET 0x04 + +MLXSW_REG_DEFINE(xralta, MLXSW_REG_XRALTA_ID, MLXSW_REG_XRALTA_LEN); + +static inline void mlxsw_reg_xralta_pack(char *payload, bool alloc, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + char *ralta_payload = payload + MLXSW_REG_XRALTA_RALTA_OFFSET; + + MLXSW_REG_ZERO(xralta, payload); + mlxsw_reg_ralta_pack(ralta_payload, alloc, protocol, tree_id); +} + +/* XRALST - XM Router Algorithmic LPM Structure Tree Register + * ---------------------------------------------------------- + * The XRALST is used to set and query the structure of an XLT LPM tree. + * + * This register embeds original RALST register. + */ +#define MLXSW_REG_XRALST_ID 0x7812 +#define MLXSW_REG_XRALST_LEN 0x108 +#define MLXSW_REG_XRALST_RALST_OFFSET 0x04 + +MLXSW_REG_DEFINE(xralst, MLXSW_REG_XRALST_ID, MLXSW_REG_XRALST_LEN); + +static inline void mlxsw_reg_xralst_pack(char *payload, u8 root_bin, u8 tree_id) +{ + char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET; + + MLXSW_REG_ZERO(xralst, payload); + mlxsw_reg_ralst_pack(ralst_payload, root_bin, tree_id); +} + +static inline void mlxsw_reg_xralst_bin_pack(char *payload, u8 bin_number, + u8 left_child_bin, + u8 right_child_bin) +{ + char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET; + + mlxsw_reg_ralst_bin_pack(ralst_payload, bin_number, left_child_bin, + right_child_bin); +} + +/* XRALTB - XM Router Algorithmic LPM Tree Binding Register + * -------------------------------------------------------- + * The XRALTB register is used to bind virtual router and protocol + * to an allocated LPM tree. + * + * This register embeds original RALTB register. + */ +#define MLXSW_REG_XRALTB_ID 0x7813 +#define MLXSW_REG_XRALTB_LEN 0x08 +#define MLXSW_REG_XRALTB_RALTB_OFFSET 0x04 + +MLXSW_REG_DEFINE(xraltb, MLXSW_REG_XRALTB_ID, MLXSW_REG_XRALTB_LEN); + +static inline void mlxsw_reg_xraltb_pack(char *payload, u16 virtual_router, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + char *raltb_payload = payload + MLXSW_REG_XRALTB_RALTB_OFFSET; + + MLXSW_REG_ZERO(xraltb, payload); + mlxsw_reg_raltb_pack(raltb_payload, virtual_router, protocol, tree_id); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -11195,6 +11277,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rigr2), MLXSW_REG(recr2), MLXSW_REG(rmft2), + MLXSW_REG(xralta), + MLXSW_REG(xralst), + MLXSW_REG(xraltb), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index a8525992528f..089d99535f9e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -181,23 +181,26 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp, - u32 dip, u8 prefix_len, u16 ul_vr_id, - enum mlxsw_reg_ralue_op op, - u32 tunnel_index) +mlxsw_sp_ipip_fib_entry_op_gre4_do(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + u32 dip, u8 prefix_len, u16 ul_vr_id, + enum mlxsw_sp_fib_entry_op op, + u32 tunnel_index, + struct mlxsw_sp_fib_entry_priv *priv) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; - - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op, - ul_vr_id, prefix_len, dip); - mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + ll_ops->fib_entry_pack(op_ctx, MLXSW_SP_L3_PROTO_IPV4, op, ul_vr_id, + prefix_len, (unsigned char *) &dip, priv); + ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx, tunnel_index); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - enum mlxsw_reg_ralue_op op, - u32 tunnel_index) + const struct mlxsw_sp_router_ll_ops *ll_ops, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_sp_fib_entry_op op, u32 tunnel_index, + struct mlxsw_sp_fib_entry_priv *priv) { u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb); __be32 dip; @@ -210,9 +213,8 @@ static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp, dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, ipip_entry->ol_dev).addr4; - return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip), - 32, ul_vr_id, op, - tunnel_index); + return mlxsw_sp_ipip_fib_entry_op_gre4_do(mlxsw_sp, ll_ops, op_ctx, be32_to_cpu(dip), + 32, ul_vr_id, op, tunnel_index, priv); } static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, @@ -231,8 +233,7 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, } static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev, - enum mlxsw_sp_l3proto ol_proto) + const struct net_device *ol_dev) { struct ip_tunnel *tunnel = netdev_priv(ol_dev); __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index bb5c4d4a5872..d32702cb6ab4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -43,8 +43,7 @@ struct mlxsw_sp_ipip_ops { struct mlxsw_sp_ipip_entry *ipip_entry); bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev, - enum mlxsw_sp_l3proto ol_proto); + const struct net_device *ol_dev); /* Return a configuration for creating an overlay loopback RIF. */ struct mlxsw_sp_rif_ipip_lb_config @@ -52,9 +51,12 @@ struct mlxsw_sp_ipip_ops { const struct net_device *ol_dev); int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_ipip_entry *ipip_entry, - enum mlxsw_reg_ralue_op op, - u32 tunnel_index); + enum mlxsw_sp_fib_entry_op op, + u32 tunnel_index, + struct mlxsw_sp_fib_entry_priv *priv); int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4381f8c6c3fb..147dd8fab2af 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -352,6 +352,7 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP, }; +struct mlxsw_sp_nexthop_group_info; struct mlxsw_sp_nexthop_group; struct mlxsw_sp_fib_entry; @@ -368,18 +369,71 @@ struct mlxsw_sp_fib_entry_decap { u32 tunnel_index; }; +static struct mlxsw_sp_fib_entry_priv * +mlxsw_sp_fib_entry_priv_create(const struct mlxsw_sp_router_ll_ops *ll_ops) +{ + struct mlxsw_sp_fib_entry_priv *priv; + + if (!ll_ops->fib_entry_priv_size) + /* No need to have priv */ + return NULL; + + priv = kzalloc(sizeof(*priv) + ll_ops->fib_entry_priv_size, GFP_KERNEL); + if (!priv) + return ERR_PTR(-ENOMEM); + refcount_set(&priv->refcnt, 1); + return priv; +} + +static void +mlxsw_sp_fib_entry_priv_destroy(struct mlxsw_sp_fib_entry_priv *priv) +{ + kfree(priv); +} + +static void mlxsw_sp_fib_entry_priv_hold(struct mlxsw_sp_fib_entry_priv *priv) +{ + refcount_inc(&priv->refcnt); +} + +static void mlxsw_sp_fib_entry_priv_put(struct mlxsw_sp_fib_entry_priv *priv) +{ + if (!priv || !refcount_dec_and_test(&priv->refcnt)) + return; + mlxsw_sp_fib_entry_priv_destroy(priv); +} + +static void mlxsw_sp_fib_entry_op_ctx_priv_hold(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry_priv *priv) +{ + if (!priv) + return; + mlxsw_sp_fib_entry_priv_hold(priv); + list_add(&priv->list, &op_ctx->fib_entry_priv_list); +} + +static void mlxsw_sp_fib_entry_op_ctx_priv_put_all(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) +{ + struct mlxsw_sp_fib_entry_priv *priv, *tmp; + + list_for_each_entry_safe(priv, tmp, &op_ctx->fib_entry_priv_list, list) + mlxsw_sp_fib_entry_priv_put(priv); + INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list); +} + struct mlxsw_sp_fib_entry { struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */ + struct mlxsw_sp_fib_entry_priv *priv; }; struct mlxsw_sp_fib4_entry { struct mlxsw_sp_fib_entry common; + struct fib_info *fi; u32 tb_id; - u32 prio; u8 tos; u8 type; }; @@ -409,6 +463,7 @@ struct mlxsw_sp_fib { struct mlxsw_sp_vr *vr; struct mlxsw_sp_lpm_tree *lpm_tree; enum mlxsw_sp_l3proto proto; + const struct mlxsw_sp_router_ll_ops *ll_ops; }; struct mlxsw_sp_vr { @@ -422,12 +477,31 @@ struct mlxsw_sp_vr { refcount_t ul_rif_refcnt; }; +static int mlxsw_sp_router_ll_basic_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), + xralta_pl + MLXSW_REG_XRALTA_RALTA_OFFSET); +} + +static int mlxsw_sp_router_ll_basic_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), + xralst_pl + MLXSW_REG_XRALST_RALST_OFFSET); +} + +static int mlxsw_sp_router_ll_basic_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), + xraltb_pl + MLXSW_REG_XRALTB_RALTB_OFFSET); +} + static const struct rhashtable_params mlxsw_sp_fib_ht_params; static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, enum mlxsw_sp_l3proto proto) { + const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; struct mlxsw_sp_lpm_tree *lpm_tree; struct mlxsw_sp_fib *fib; int err; @@ -443,6 +517,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, fib->proto = proto; fib->vr = vr; fib->lpm_tree = lpm_tree; + fib->ll_ops = ll_ops; mlxsw_sp_lpm_tree_hold(lpm_tree); err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id); if (err) @@ -481,33 +556,36 @@ mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - char ralta_pl[MLXSW_REG_RALTA_LEN]; + char xralta_pl[MLXSW_REG_XRALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, true, - (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, - lpm_tree->id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_xralta_pack(xralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + return ll_ops->ralta_write(mlxsw_sp, xralta_pl); } static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - char ralta_pl[MLXSW_REG_RALTA_LEN]; + char xralta_pl[MLXSW_REG_XRALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, false, - (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, - lpm_tree->id); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_xralta_pack(xralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + ll_ops->ralta_write(mlxsw_sp, xralta_pl); } static int mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_prefix_usage *prefix_usage, struct mlxsw_sp_lpm_tree *lpm_tree) { - char ralst_pl[MLXSW_REG_RALST_LEN]; + char xralst_pl[MLXSW_REG_XRALST_LEN]; u8 root_bin = 0; u8 prefix; u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; @@ -515,19 +593,20 @@ mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) root_bin = prefix; - mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); + mlxsw_reg_xralst_pack(xralst_pl, root_bin, lpm_tree->id); mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { if (prefix == 0) continue; - mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, - MLXSW_REG_RALST_BIN_NO_CHILD); + mlxsw_reg_xralst_bin_pack(xralst_pl, prefix, last_prefix, + MLXSW_REG_RALST_BIN_NO_CHILD); last_prefix = prefix; } - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); + return ll_ops->ralst_write(mlxsw_sp, xralst_pl); } static struct mlxsw_sp_lpm_tree * mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_prefix_usage *prefix_usage, enum mlxsw_sp_l3proto proto) { @@ -538,12 +617,11 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, if (!lpm_tree) return ERR_PTR(-EBUSY); lpm_tree->proto = proto; - err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); + err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, ll_ops, lpm_tree); if (err) return ERR_PTR(err); - err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, - lpm_tree); + err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, ll_ops, prefix_usage, lpm_tree); if (err) goto err_left_struct_set; memcpy(&lpm_tree->prefix_usage, prefix_usage, @@ -554,14 +632,15 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, return lpm_tree; err_left_struct_set: - mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree); return ERR_PTR(err); } static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree); } static struct mlxsw_sp_lpm_tree * @@ -569,6 +648,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_prefix_usage *prefix_usage, enum mlxsw_sp_l3proto proto) { + const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; struct mlxsw_sp_lpm_tree *lpm_tree; int i; @@ -582,7 +662,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, return lpm_tree; } } - return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); + return mlxsw_sp_lpm_tree_create(mlxsw_sp, ll_ops, prefix_usage, proto); } static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) @@ -593,8 +673,11 @@ static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *lpm_tree) { + const struct mlxsw_sp_router_ll_ops *ll_ops = + mlxsw_sp->router->proto_ll_ops[lpm_tree->proto]; + if (--lpm_tree->ref_count == 0) - mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, ll_ops, lpm_tree); } #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ @@ -684,23 +767,23 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_fib *fib, u8 tree_id) { - char raltb_pl[MLXSW_REG_RALTB_LEN]; + char xraltb_pl[MLXSW_REG_XRALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, - (enum mlxsw_reg_ralxx_protocol) fib->proto, - tree_id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); + mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + tree_id); + return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl); } static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_fib *fib) { - char raltb_pl[MLXSW_REG_RALTB_LEN]; + char xraltb_pl[MLXSW_REG_XRALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, - (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); + mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); + return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl); } static u32 mlxsw_sp_fix_tb_id(u32 tb_id) @@ -1270,21 +1353,33 @@ mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, /* Given decap parameters, find the corresponding IPIP entry. */ static struct mlxsw_sp_ipip_entry * -mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, - const struct net_device *ul_dev, +mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex, enum mlxsw_sp_l3proto ul_proto, union mlxsw_sp_l3addr ul_dip) { - struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_ipip_entry *ipip_entry = NULL; + struct net_device *ul_dev; + + rcu_read_lock(); + + ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex); + if (!ul_dev) + goto out_unlock; list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev, ul_proto, ul_dip, ipip_entry)) - return ipip_entry; + goto out_unlock; + + rcu_read_unlock(); return NULL; + +out_unlock: + rcu_read_unlock(); + return ipip_entry; } static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, @@ -1370,11 +1465,7 @@ static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_ipip_ops *ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; - /* For deciding whether decap should be offloaded, we don't care about - * overlay protocol, so ask whether either one is supported. - */ - return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) || - ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6); + return ops->can_offload(mlxsw_sp, ol_dev); } static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, @@ -2749,10 +2840,11 @@ struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ struct list_head rif_list_node; struct list_head router_list_node; - struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group - * this belongs to - */ + struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group + * this nexthop belongs to + */ struct rhash_head ht_node; + struct neigh_table *neigh_tbl; struct mlxsw_sp_nexthop_key key; unsigned char gw_addr[sizeof(struct in6_addr)]; int ifindex; @@ -2778,21 +2870,36 @@ struct mlxsw_sp_nexthop { bool counter_valid; }; -struct mlxsw_sp_nexthop_group { - void *priv; - struct rhash_head ht_node; - struct list_head fib_list; /* list of fib entries that use this group */ - struct neigh_table *neigh_tbl; - u8 adj_index_valid:1, - gateway:1; /* routes using the group use a gateway */ +enum mlxsw_sp_nexthop_group_type { + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4, + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6, +}; + +struct mlxsw_sp_nexthop_group_info { + struct mlxsw_sp_nexthop_group *nh_grp; u32 adj_index; u16 ecmp_size; u16 count; int sum_norm_weight; + u8 adj_index_valid:1, + gateway:1; /* routes using the group use a gateway */ struct mlxsw_sp_nexthop nexthops[0]; #define nh_rif nexthops[0].rif }; +struct mlxsw_sp_nexthop_group { + struct rhash_head ht_node; + struct list_head fib_list; /* list of fib entries that use this group */ + union { + struct { + struct fib_info *fi; + } ipv4; + }; + struct mlxsw_sp_nexthop_group_info *nhgi; + enum mlxsw_sp_nexthop_group_type type; + bool can_destroy; +}; + void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { @@ -2858,18 +2965,18 @@ unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, u32 *p_adj_size, u32 *p_adj_hash_index) { - struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; u32 adj_hash_index = 0; int i; - if (!nh->offloaded || !nh_grp->adj_index_valid) + if (!nh->offloaded || !nhgi->adj_index_valid) return -EINVAL; - *p_adj_index = nh_grp->adj_index; - *p_adj_size = nh_grp->ecmp_size; + *p_adj_index = nhgi->adj_index; + *p_adj_size = nhgi->ecmp_size; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; if (nh_iter == nh) break; @@ -2888,11 +2995,11 @@ struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) { - struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) return true; @@ -2900,14 +3007,8 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) return false; } -static struct fib_info * -mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) -{ - return nh_grp->priv; -} - struct mlxsw_sp_nexthop_group_cmp_arg { - enum mlxsw_sp_l3proto proto; + enum mlxsw_sp_nexthop_group_type type; union { struct fib_info *fi; struct mlxsw_sp_fib6_entry *fib6_entry; @@ -2921,10 +3022,10 @@ mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp, { int i; - for (i = 0; i < nh_grp->count; i++) { + for (i = 0; i < nh_grp->nhgi->count; i++) { const struct mlxsw_sp_nexthop *nh; - nh = &nh_grp->nexthops[i]; + nh = &nh_grp->nhgi->nexthops[i]; if (nh->ifindex == ifindex && nh->nh_weight == weight && ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr)) return true; @@ -2939,7 +3040,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - if (nh_grp->count != fib6_entry->nrt6) + if (nh_grp->nhgi->count != fib6_entry->nrt6) return false; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { @@ -2964,10 +3065,13 @@ mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key; const struct mlxsw_sp_nexthop_group *nh_grp = ptr; - switch (cmp_arg->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp); - case MLXSW_SP_L3_PROTO_IPV6: + if (nh_grp->type != cmp_arg->type) + return 1; + + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + return cmp_arg->fi != nh_grp->ipv4.fi; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return !mlxsw_sp_nexthop6_group_cmp(nh_grp, cmp_arg->fib6_entry); default: @@ -2976,12 +3080,6 @@ mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) } } -static int -mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp) -{ - return nh_grp->neigh_tbl->family; -} - static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) { const struct mlxsw_sp_nexthop_group *nh_grp = data; @@ -2990,14 +3088,14 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) unsigned int val; int i; - switch (mlxsw_sp_nexthop_group_type(nh_grp)) { - case AF_INET: - fi = mlxsw_sp_nexthop4_group_fi(nh_grp); + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + fi = nh_grp->ipv4.fi; return jhash(&fi, sizeof(fi), seed); - case AF_INET6: - val = nh_grp->count; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + val = nh_grp->nhgi->count; + for (i = 0; i < nh_grp->nhgi->count; i++) { + nh = &nh_grp->nhgi->nexthops[i]; val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed); val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed); } @@ -3031,10 +3129,10 @@ mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) { const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data; - switch (cmp_arg->proto) { - case MLXSW_SP_L3_PROTO_IPV4: + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); - case MLXSW_SP_L3_PROTO_IPV6: + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); default: WARN_ON(1); @@ -3052,8 +3150,8 @@ static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && - !nh_grp->gateway) + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) return 0; return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht, @@ -3064,8 +3162,8 @@ static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && - !nh_grp->gateway) + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) return; rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht, @@ -3079,7 +3177,7 @@ mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; - cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4; + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; cmp_arg.fi = fi; return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &cmp_arg, @@ -3092,7 +3190,7 @@ mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; - cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6; + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; cmp_arg.fib6_entry = fib6_entry; return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &cmp_arg, @@ -3151,14 +3249,16 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + if (fib == fib_entry->fib_node->fib) continue; fib = fib_entry->fib_node->fib; err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib, old_adj_index, old_ecmp_size, - nh_grp->adj_index, - nh_grp->ecmp_size); + nhgi->adj_index, + nhgi->ecmp_size); if (err) return err; } @@ -3229,15 +3329,15 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group_info *nhgi, bool reallocate) { - u32 adj_index = nh_grp->adj_index; /* base */ + u32 adj_index = nhgi->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; int i; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) { nh->offloaded = 0; @@ -3337,13 +3437,13 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, } static void -mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi) { int i, g = 0, sum_norm_weight = 0; struct mlxsw_sp_nexthop *nh; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) continue; @@ -3353,8 +3453,8 @@ mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) g = nh->nh_weight; } - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) continue; @@ -3362,18 +3462,18 @@ mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) sum_norm_weight += nh->norm_nh_weight; } - nh_grp->sum_norm_weight = sum_norm_weight; + nhgi->sum_norm_weight = sum_norm_weight; } static void -mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi) { - int total = nh_grp->sum_norm_weight; - u16 ecmp_size = nh_grp->ecmp_size; int i, weight = 0, lower_bound = 0; + int total = nhgi->sum_norm_weight; + u16 ecmp_size = nhgi->ecmp_size; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; int upper_bound; if (!nh->should_offload) @@ -3395,8 +3495,8 @@ mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, { int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; if (nh->offloaded) nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; @@ -3442,36 +3542,36 @@ static void mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - switch (mlxsw_sp_nexthop_group_type(nh_grp)) { - case AF_INET: + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp); break; - case AF_INET6: + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp); break; } } -static void +static int mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; u16 ecmp_size, old_ecmp_size; struct mlxsw_sp_nexthop *nh; bool offload_change = false; u32 adj_index; bool old_adj_index_valid; + int i, err2, err = 0; u32 old_adj_index; - int i; - int err; - if (!nh_grp->gateway) { + if (!nhgi->gateway) { mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); - return; + return 0; } - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (nh->should_offload != nh->offloaded) { offload_change = true; @@ -3483,21 +3583,21 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* Nothing was added or removed, so no need to reallocate. Just * update MAC on existing adjacency indexes. */ - err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; } - return; + return 0; } - mlxsw_sp_nexthop_group_normalize(nh_grp); - if (!nh_grp->sum_norm_weight) + mlxsw_sp_nexthop_group_normalize(nhgi); + if (!nhgi->sum_norm_weight) /* No neigh of this group is connected so we just set * the trap and let everthing flow through kernel. */ goto set_trap; - ecmp_size = nh_grp->sum_norm_weight; + ecmp_size = nhgi->sum_norm_weight; err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); if (err) /* No valid allocation size available. */ @@ -3512,14 +3612,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); goto set_trap; } - old_adj_index_valid = nh_grp->adj_index_valid; - old_adj_index = nh_grp->adj_index; - old_ecmp_size = nh_grp->ecmp_size; - nh_grp->adj_index_valid = 1; - nh_grp->adj_index = adj_index; - nh_grp->ecmp_size = ecmp_size; - mlxsw_sp_nexthop_group_rebalance(nh_grp); - err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); + old_adj_index_valid = nhgi->adj_index_valid; + old_adj_index = nhgi->adj_index; + old_ecmp_size = nhgi->ecmp_size; + nhgi->adj_index_valid = 1; + nhgi->adj_index = adj_index; + nhgi->ecmp_size = ecmp_size; + mlxsw_sp_nexthop_group_rebalance(nhgi); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -3536,7 +3636,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); goto set_trap; } - return; + return 0; } err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, @@ -3548,22 +3648,23 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, goto set_trap; } - return; + return 0; set_trap: - old_adj_index_valid = nh_grp->adj_index_valid; - nh_grp->adj_index_valid = 0; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + old_adj_index_valid = nhgi->adj_index_valid; + nhgi->adj_index_valid = 0; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; nh->offloaded = 0; } - err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); - if (err) + err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err2) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); if (old_adj_index_valid) mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, - nh_grp->ecmp_size, nh_grp->adj_index); + nhgi->ecmp_size, nhgi->adj_index); + return err; } static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, @@ -3589,10 +3690,9 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, nh = list_first_entry(&neigh_entry->nexthop_list, struct mlxsw_sp_nexthop, neigh_list_node); - n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, - nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -3615,7 +3715,7 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, neigh_release(old_n); neigh_clone(n); __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } neigh_release(n); @@ -3652,7 +3752,7 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -3683,7 +3783,7 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, u8 nud_state, dead; int err; - if (!nh->nh_grp->gateway || nh->neigh_entry) + if (!nh->nhgi->gateway || nh->neigh_entry) return 0; /* Take a reference of neigh here ensuring that neigh would @@ -3691,10 +3791,9 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, - nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -3775,7 +3874,7 @@ static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, { bool removing; - if (!nh->nh_grp->gateway || nh->ipip_entry) + if (!nh->nhgi->gateway || nh->ipip_entry) return; nh->ipip_entry = ipip_entry; @@ -3807,27 +3906,11 @@ static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt); } -static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) -{ - switch (nh->type) { - case MLXSW_SP_NEXTHOP_TYPE_ETH: - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); - break; - case MLXSW_SP_NEXTHOP_TYPE_IPIP: - mlxsw_sp_nexthop_rif_fini(nh); - mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); - break; - } -} - -static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + const struct net_device *dev) { const struct mlxsw_sp_ipip_ops *ipip_ops; - struct net_device *dev = fib_nh->fib_nh_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif *rif; int err; @@ -3835,8 +3918,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); if (ipip_entry) { ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - if (ipip_ops->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4)) { + if (ipip_ops->can_offload(mlxsw_sp, dev)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); return 0; @@ -3860,10 +3942,19 @@ err_neigh_init: return err; } -static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); + break; + } } static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, @@ -3875,7 +3966,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, struct in_device *in_dev; int err; - nh->nh_grp = nh_grp; + nh->nhgi = nh_grp->nhgi; nh->key.fib_nh = fib_nh; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = fib_nh->fib_nh_weight; @@ -3883,6 +3974,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, nh->nh_weight = 1; #endif memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4)); + nh->neigh_tbl = &arp_tbl; err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -3892,6 +3984,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (!dev) return 0; + nh->ifindex = dev->ifindex; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -3902,7 +3995,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, } rcu_read_unlock(); - err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); if (err) goto err_nexthop_neigh_init; @@ -3916,7 +4009,7 @@ err_nexthop_neigh_init: static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); @@ -3938,14 +4031,14 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); + mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev); break; case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); break; } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, @@ -3968,7 +4061,7 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -3991,7 +4084,7 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -4004,45 +4097,94 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } +static int +mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi); + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop *nh; + int err, i; + + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi); + nhgi->count = nhs; + for (i = 0; i < nhgi->count; i++) { + struct fib_nh *fib_nh; + + nh = &nhgi->nexthops[i]; + fib_nh = fib_info_nh(nh_grp->ipv4.fi, i); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); + if (err) + goto err_nexthop4_init; + } + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + i = nhgi->count; +err_nexthop4_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + static struct mlxsw_sp_nexthop_group * mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { - unsigned int nhs = fib_info_num_path(fi); struct mlxsw_sp_nexthop_group *nh_grp; - struct mlxsw_sp_nexthop *nh; - struct fib_nh *fib_nh; - int i; int err; - nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL); + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); - nh_grp->priv = fi; INIT_LIST_HEAD(&nh_grp->fib_list); - nh_grp->neigh_tbl = &arp_tbl; - - nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); - nh_grp->count = nhs; + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; + nh_grp->ipv4.fi = fi; fib_info_hold(fi); - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; - fib_nh = fib_info_nh(fi, i); - err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); - if (err) - goto err_nexthop4_init; - } + + err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_info_init; + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) goto err_nexthop_group_insert; - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + + nh_grp->can_destroy = true; + return nh_grp; err_nexthop_group_insert: -err_nexthop4_init: - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); - } + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: fib_info_put(fi); kfree(nh_grp); return ERR_PTR(err); @@ -4052,17 +4194,11 @@ static void mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - struct mlxsw_sp_nexthop *nh; - int i; - + if (!nh_grp->can_destroy) + return; mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); - } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); - WARN_ON_ONCE(nh_grp->adj_index_valid); - fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp)); + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); + fib_info_put(nh_grp->ipv4.fi); kfree(nh_grp); } @@ -4120,9 +4256,9 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return !!nh_group->adj_index_valid; + return !!nh_group->nhgi->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return !!nh_group->nh_rif; + return !!nh_group->nhgi->nh_rif; case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: @@ -4138,8 +4274,8 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, { int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; struct fib6_info *rt = mlxsw_sp_rt6->rt; if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && @@ -4156,7 +4292,6 @@ static void mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; int dst_len = fib_entry->fib_node->key.prefix_len; struct mlxsw_sp_fib4_entry *fib4_entry; @@ -4166,7 +4301,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, common); - fri.fi = fi; + fri.fi = fib4_entry->fi; fri.tb_id = fib4_entry->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; @@ -4181,7 +4316,6 @@ static void mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; int dst_len = fib_entry->fib_node->key.prefix_len; struct mlxsw_sp_fib4_entry *fib4_entry; @@ -4189,7 +4323,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, common); - fri.fi = fi; + fri.fi = fib4_entry->fi; fri.tb_id = fib4_entry->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; @@ -4264,13 +4398,14 @@ mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { switch (op) { - case MLXSW_REG_RALUE_OP_WRITE_WRITE: + case MLXSW_SP_FIB_ENTRY_OP_WRITE: + case MLXSW_SP_FIB_ENTRY_OP_UPDATE: mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry); break; - case MLXSW_REG_RALUE_OP_WRITE_DELETE: + case MLXSW_SP_FIB_ENTRY_OP_DELETE: mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; default: @@ -4278,32 +4413,132 @@ mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, } } +struct mlxsw_sp_fib_entry_op_ctx_basic { + char ralue_pl[MLXSW_REG_RALUE_LEN]; +}; + static void -mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, - const struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +mlxsw_sp_router_ll_basic_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_sp_l3proto proto, + enum mlxsw_sp_fib_entry_op op, + u16 virtual_router, u8 prefix_len, + unsigned char *addr, + struct mlxsw_sp_fib_entry_priv *priv) { - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - enum mlxsw_reg_ralxx_protocol proto; - u32 *p_dip; + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + enum mlxsw_reg_ralxx_protocol ralxx_proto; + char *ralue_pl = op_ctx_basic->ralue_pl; + enum mlxsw_reg_ralue_op ralue_op; - proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + ralxx_proto = (enum mlxsw_reg_ralxx_protocol) proto; - switch (fib->proto) { + switch (op) { + case MLXSW_SP_FIB_ENTRY_OP_WRITE: + case MLXSW_SP_FIB_ENTRY_OP_UPDATE: + ralue_op = MLXSW_REG_RALUE_OP_WRITE_WRITE; + break; + case MLXSW_SP_FIB_ENTRY_OP_DELETE: + ralue_op = MLXSW_REG_RALUE_OP_WRITE_DELETE; + break; + default: + WARN_ON_ONCE(1); + return; + } + + switch (proto) { case MLXSW_SP_L3_PROTO_IPV4: - p_dip = (u32 *) fib_entry->fib_node->key.addr; - mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, - fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, ralxx_proto, ralue_op, + virtual_router, prefix_len, (u32 *) addr); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, - fib_entry->fib_node->key.prefix_len, - fib_entry->fib_node->key.addr); + mlxsw_reg_ralue_pack6(ralue_pl, ralxx_proto, ralue_op, + virtual_router, prefix_len, addr); break; } } +static void +mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_remote_pack(op_ctx_basic->ralue_pl, trap_action, + trap_id, adjacency_index, ecmp_size); +} + +static void +mlxsw_sp_router_ll_basic_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_local_pack(op_ctx_basic->ralue_pl, trap_action, + trap_id, local_erif); +} + +static void +mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_ip2me_pack(op_ctx_basic->ralue_pl); +} + +static void +mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + u32 tunnel_ptr) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_ip2me_tun_pack(op_ctx_basic->ralue_pl, tunnel_ptr); +} + +static int +mlxsw_sp_router_ll_basic_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + bool *postponed_for_bulk) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), + op_ctx_basic->ralue_pl); +} + +static bool +mlxsw_sp_router_ll_basic_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv) +{ + return true; +} + +static void mlxsw_sp_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_sp_fib_entry_op op) +{ + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; + + mlxsw_sp_fib_entry_op_ctx_priv_hold(op_ctx, fib_entry->priv); + fib->ll_ops->fib_entry_pack(op_ctx, fib->proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr, + fib_entry->priv); +} + +int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + const struct mlxsw_sp_router_ll_ops *ll_ops) +{ + bool postponed_for_bulk = false; + int err; + + err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, &postponed_for_bulk); + if (!postponed_for_bulk) + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); + return err; +} + static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index) { enum mlxsw_reg_ratr_trap_action trap_action; @@ -4338,11 +4573,13 @@ err_ratr_write: } static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; - char ralue_pl[MLXSW_REG_RALUE_LEN]; + struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -4355,12 +4592,11 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, */ if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; - adjacency_index = fib_entry->nh_group->adj_index; - ecmp_size = fib_entry->nh_group->ecmp_size; - } else if (!nh_group->adj_index_valid && nh_group->count && - nh_group->nh_rif) { + adjacency_index = nhgi->adj_index; + ecmp_size = nhgi->ecmp_size; + } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) { err = mlxsw_sp_adj_discard_write(mlxsw_sp, - nh_group->nh_rif->rif_index); + nhgi->nh_rif->rif_index); if (err) return err; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; @@ -4371,19 +4607,20 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, - adjacency_index, ecmp_size); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_remote_pack(op_ctx, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; + struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif; enum mlxsw_reg_ralue_trap_action trap_action; - char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id = 0; u16 rif_index = 0; @@ -4395,58 +4632,62 @@ static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, - rif_index); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, rif_index); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_ip2me_pack(op_ctx); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; enum mlxsw_reg_ralue_trap_action trap_action; - char ralue_pl[MLXSW_REG_RALUE_LEN]; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, 0, 0); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; enum mlxsw_reg_ralue_trap_action trap_action; - char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; trap_id = MLXSW_TRAP_ID_RTR_INGRESS1; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, 0); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; const struct mlxsw_sp_ipip_ops *ipip_ops; @@ -4454,52 +4695,53 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, return -EINVAL; ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op, - fib_entry->decap.tunnel_index); + return ipip_ops->fib_entry_op(mlxsw_sp, ll_ops, op_ctx, ipip_entry, op, + fib_entry->decap.tunnel_index, fib_entry->priv); } static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, - fib_entry->decap.tunnel_index); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx, + fib_entry->decap.tunnel_index); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: - return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE: - return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry, - op); + return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: - return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, - fib_entry, op); + return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: - return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, op_ctx, fib_entry, op); } return -EINVAL; } static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, op); if (err) return err; @@ -4509,18 +4751,35 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, return err; } +static int __mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry *fib_entry, + bool is_new) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, + is_new ? MLXSW_SP_FIB_ENTRY_OP_WRITE : + MLXSW_SP_FIB_ENTRY_OP_UPDATE); +} + static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, - MLXSW_REG_RALUE_OP_WRITE_WRITE); + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; + + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + return __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, false); } static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry) { - return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, - MLXSW_REG_RALUE_OP_WRITE_DELETE); + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; + + if (!ll_ops->fib_entry_is_committed(fib_entry->priv)) + return 0; + return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, + MLXSW_SP_FIB_ENTRY_OP_DELETE); } static int @@ -4528,17 +4787,17 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { - struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; struct mlxsw_sp_router *router = mlxsw_sp->router; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); + int ifindex = nhgi->nexthops[0].ifindex; struct mlxsw_sp_ipip_entry *ipip_entry; - struct fib_info *fi = fen_info->fi; switch (fen_info->type) { case RTN_LOCAL: - ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4, dip); + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex, + MLXSW_SP_L3_PROTO_IPV4, dip); if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, @@ -4571,7 +4830,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; return 0; case RTN_UNICAST: - if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi)) + if (nhgi->gateway) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; else fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; @@ -4608,15 +4867,22 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); fib_entry = &fib4_entry->common; - err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); - if (err) - goto err_fib4_entry_type_set; + fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops); + if (IS_ERR(fib_entry->priv)) { + err = PTR_ERR(fib_entry->priv); + goto err_fib_entry_priv_create; + } err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) goto err_nexthop4_group_get; - fib4_entry->prio = fen_info->fi->fib_priority; + err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); + if (err) + goto err_fib4_entry_type_set; + + fib4_entry->fi = fen_info->fi; + fib_info_hold(fib4_entry->fi); fib4_entry->tb_id = fen_info->tb_id; fib4_entry->type = fen_info->type; fib4_entry->tos = fen_info->tos; @@ -4625,9 +4891,11 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return fib4_entry; -err_nexthop4_group_get: - mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry); err_fib4_entry_type_set: + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); +err_nexthop4_group_get: + mlxsw_sp_fib_entry_priv_put(fib_entry->priv); +err_fib_entry_priv_create: kfree(fib4_entry); return ERR_PTR(err); } @@ -4635,8 +4903,10 @@ err_fib4_entry_type_set: static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry) { - mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + fib_info_put(fib4_entry->fi); mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib_entry_priv_put(fib4_entry->common.priv); kfree(fib4_entry); } @@ -4665,8 +4935,7 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (fib4_entry->tb_id == fen_info->tb_id && fib4_entry->tos == fen_info->tos && fib4_entry->type == fen_info->type && - mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == - fen_info->fi) + fib4_entry->fi == fen_info->fi) return fib4_entry; return NULL; @@ -4875,14 +5144,16 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + bool is_new = !fib_node->fib_entry; int err; fib_node->fib_entry = fib_entry; - err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, is_new); if (err) goto err_fib_entry_update; @@ -4893,14 +5164,25 @@ err_fib_entry_update: return err; } -static void -mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static int __mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + int err; - mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + err = mlxsw_sp_fib_entry_del(mlxsw_sp, op_ctx, fib_entry); fib_node->fib_entry = NULL; + return err; +} + +static void mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; + + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, fib_entry); } static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) @@ -4922,6 +5204,7 @@ static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) static int mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced; @@ -4955,7 +5238,7 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, } replaced = fib_node->fib_entry; - err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common); + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib4_entry->common); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib_node_entry_link; @@ -4980,23 +5263,26 @@ err_fib4_entry_create: return err; } -static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, - struct fib_entry_notifier_info *fen_info) +static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; + int err; if (mlxsw_sp->router->aborted) - return; + return 0; fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); if (!fib4_entry) - return; + return 0; fib_node = fib4_entry->common.fib_node; - mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common); + err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib4_entry->common); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; } static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) @@ -5047,7 +5333,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) { struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; - fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + if (!mlxsw_sp_rt6->rt->nh) + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); kfree(mlxsw_sp_rt6); } @@ -5081,51 +5368,6 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret); } -static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - const struct fib6_info *rt) -{ - const struct mlxsw_sp_ipip_ops *ipip_ops; - struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->fib6_nh->fib_nh_dev; - struct mlxsw_sp_rif *rif; - int err; - - ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); - if (ipip_entry) { - ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - if (ipip_ops->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV6)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); - return 0; - } - } - - nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); - - err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); - if (err) - goto err_nexthop_neigh_init; - - return 0; - -err_nexthop_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); - return err; -} - -static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) -{ - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); -} - static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, @@ -5133,9 +5375,12 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, { struct net_device *dev = rt->fib6_nh->fib_nh_dev; - nh->nh_grp = nh_grp; + nh->nhgi = nh_grp->nhgi; nh->nh_weight = rt->fib6_nh->fib_nh_weight; memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr)); +#if IS_ENABLED(CONFIG_IPV6) + nh->neigh_tbl = &nd_tbl; +#endif mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -5144,13 +5389,13 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, return 0; nh->ifindex = dev->ifindex; - return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt); + return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); } static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); } @@ -5162,51 +5407,98 @@ static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } -static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) +static int +mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) { - struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_nexthop *nh; - int i = 0; - int err; + int err, i; - nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6), - GFP_KERNEL); - if (!nh_grp) - return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&nh_grp->fib_list); -#if IS_ENABLED(CONFIG_IPV6) - nh_grp->neigh_tbl = &nd_tbl; -#endif + nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6), + GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list); - nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); - nh_grp->count = fib6_entry->nrt6; - for (i = 0; i < nh_grp->count; i++) { + nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); + nhgi->count = fib6_entry->nrt6; + for (i = 0; i < nhgi->count; i++) { struct fib6_info *rt = mlxsw_sp_rt6->rt; - nh = &nh_grp->nexthops[i]; + nh = &nhgi->nexthops[i]; err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); if (err) goto err_nexthop6_init; mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); } + nh_grp->nhgi = nhgi; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + i = nhgi->count; +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; + + err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry); + if (err) + goto err_nexthop_group_info_init; err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) goto err_nexthop_group_insert; - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + nh_grp->can_destroy = true; + return nh_grp; err_nexthop_group_insert: -err_nexthop6_init: - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); - } + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: kfree(nh_grp); return ERR_PTR(err); } @@ -5215,16 +5507,10 @@ static void mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - struct mlxsw_sp_nexthop *nh; - int i = nh_grp->count; - + if (!nh_grp->can_destroy) + return; mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); - } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); - WARN_ON(nh_grp->adj_index_valid); + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); kfree(nh_grp); } @@ -5240,15 +5526,15 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(nh_grp); } - list_add_tail(&fib6_entry->common.nexthop_group_node, - &nh_grp->fib_list); - fib6_entry->common.nh_group = nh_grp; - /* The route and the nexthop are described by the same struct, so we * need to the update the nexthop offload indication for the new route. */ __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); + list_add_tail(&fib6_entry->common.nexthop_group_node, + &nh_grp->fib_list); + fib6_entry->common.nh_group = nh_grp; + return 0; } @@ -5263,9 +5549,9 @@ static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); } -static int -mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) +static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib6_entry *fib6_entry) { struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; int err; @@ -5281,7 +5567,8 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, * currently associated with it in the device's table is that * of the old group. Start using the new one instead. */ - err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common); + err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, + &fib6_entry->common, false); if (err) goto err_fib_entry_update; @@ -5301,6 +5588,7 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib6_entry *fib6_entry, struct fib6_info **rt_arr, unsigned int nrt6) { @@ -5318,7 +5606,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, fib6_entry->nrt6++; } - err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); if (err) goto err_nexthop6_group_update; @@ -5339,6 +5627,7 @@ err_rt6_create: static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib6_entry *fib6_entry, struct fib6_info **rt_arr, unsigned int nrt6) { @@ -5356,26 +5645,20 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } - mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); } static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, const struct fib6_info *rt) { - /* Packets hitting RTF_REJECT routes need to be discarded by the - * stack. We can rely on their destination device not having a - * RIF (it's the loopback device) and can thus use action type - * local, which will cause them to be trapped with a lower - * priority than packets that need to be locally received. - */ if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; else if (rt->fib6_type == RTN_BLACKHOLE) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; else if (rt->fib6_flags & RTF_REJECT) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; - else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) + else if (fib_entry->nh_group->nhgi->gateway) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; else fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; @@ -5409,6 +5692,12 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); fib_entry = &fib6_entry->common; + fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops); + if (IS_ERR(fib_entry->priv)) { + err = PTR_ERR(fib_entry->priv); + goto err_fib_entry_priv_create; + } + INIT_LIST_HEAD(&fib6_entry->rt6_list); for (i = 0; i < nrt6; i++) { @@ -5421,12 +5710,12 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, fib6_entry->nrt6++; } - mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); - err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); if (err) goto err_nexthop6_group_get; + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); + fib_entry->fib_node = fib_node; return fib6_entry; @@ -5441,6 +5730,8 @@ err_rt6_create: list_del(&mlxsw_sp_rt6->list); mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } + mlxsw_sp_fib_entry_priv_put(fib_entry->priv); +err_fib_entry_priv_create: kfree(fib6_entry); return ERR_PTR(err); } @@ -5451,6 +5742,7 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); WARN_ON(fib6_entry->nrt6); + mlxsw_sp_fib_entry_priv_put(fib6_entry->common.priv); kfree(fib6_entry); } @@ -5508,8 +5800,8 @@ static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry) } static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6) + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced; struct mlxsw_sp_fib_entry *replaced; @@ -5548,7 +5840,7 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, } replaced = fib_node->fib_entry; - err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common); + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib6_entry->common); if (err) goto err_fib_node_entry_link; @@ -5572,8 +5864,8 @@ err_fib6_entry_create: } static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6) + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5604,8 +5896,7 @@ static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, fib6_entry = container_of(fib_node->fib_entry, struct mlxsw_sp_fib6_entry, common); - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr, - nrt6); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6); if (err) goto err_fib6_entry_nexthop_add; @@ -5616,19 +5907,20 @@ err_fib6_entry_nexthop_add: return err; } -static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6) +static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct fib6_info *rt = rt_arr[0]; + int err; if (mlxsw_sp->router->aborted) - return; + return 0; if (mlxsw_sp_fib6_rt_should_ignore(rt)) - return; + return 0; /* Multipath routes are first added to the FIB trie and only then * notified. If we vetoed the addition, we will get a delete @@ -5637,58 +5929,66 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, */ fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); if (!fib6_entry) - return; + return 0; /* If not all the nexthops are deleted, then only reduce the nexthop * group. */ if (nrt6 != fib6_entry->nrt6) { - mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, - nrt6); - return; + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6); + return 0; } fib_node = fib6_entry->common.fib_node; - mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common); + err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib6_entry->common); mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; } static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_reg_ralxx_protocol proto, + enum mlxsw_sp_l3proto proto, u8 tree_id) { - char ralta_pl[MLXSW_REG_RALTA_LEN]; - char ralst_pl[MLXSW_REG_RALST_LEN]; + const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; + enum mlxsw_reg_ralxx_protocol ralxx_proto = + (enum mlxsw_reg_ralxx_protocol) proto; + struct mlxsw_sp_fib_entry_priv *priv; + char xralta_pl[MLXSW_REG_XRALTA_LEN]; + char xralst_pl[MLXSW_REG_XRALST_LEN]; int i, err; - mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_xralta_pack(xralta_pl, true, ralxx_proto, tree_id); + err = ll_ops->ralta_write(mlxsw_sp, xralta_pl); if (err) return err; - mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); + mlxsw_reg_xralst_pack(xralst_pl, 0xff, tree_id); + err = ll_ops->ralst_write(mlxsw_sp, xralst_pl); if (err) return err; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; - char raltb_pl[MLXSW_REG_RALTB_LEN]; - char ralue_pl[MLXSW_REG_RALUE_LEN]; + char xraltb_pl[MLXSW_REG_XRALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), - raltb_pl); + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + mlxsw_reg_xraltb_pack(xraltb_pl, vr->id, ralxx_proto, tree_id); + err = ll_ops->raltb_write(mlxsw_sp, xraltb_pl); if (err) return err; - mlxsw_reg_ralue_pack(ralue_pl, proto, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); - mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), - ralue_pl); + priv = mlxsw_sp_fib_entry_priv_create(ll_ops); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + ll_ops->fib_entry_pack(op_ctx, proto, MLXSW_SP_FIB_ENTRY_OP_WRITE, + vr->id, 0, NULL, priv); + ll_ops->fib_entry_act_ip2me_pack(op_ctx); + err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, NULL); + mlxsw_sp_fib_entry_priv_put(priv); if (err) return err; } @@ -5784,7 +6084,7 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { - enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; + enum mlxsw_sp_l3proto proto = MLXSW_SP_L3_PROTO_IPV4; int err; err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, @@ -5796,7 +6096,7 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) * packets that don't match any routes are trapped to the CPU. */ - proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; + proto = MLXSW_SP_L3_PROTO_IPV6; return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, MLXSW_SP_LPM_TREE_MIN + 1); } @@ -5901,15 +6201,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } -struct mlxsw_sp_fib6_event_work { +struct mlxsw_sp_fib6_event { struct fib6_info **rt_arr; unsigned int nrt6; }; -struct mlxsw_sp_fib_event_work { - struct work_struct work; +struct mlxsw_sp_fib_event { + struct list_head list; /* node in fib queue */ union { - struct mlxsw_sp_fib6_event_work fib6_work; + struct mlxsw_sp_fib6_event fib6_event; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -5918,11 +6218,12 @@ struct mlxsw_sp_fib_event_work { }; struct mlxsw_sp *mlxsw_sp; unsigned long event; + int family; }; static int -mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, - struct fib6_entry_notifier_info *fen6_info) +mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event, + struct fib6_entry_notifier_info *fen6_info) { struct fib6_info *rt = fen6_info->rt; struct fib6_info **rt_arr; @@ -5936,8 +6237,8 @@ mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, if (!rt_arr) return -ENOMEM; - fib6_work->rt_arr = rt_arr; - fib6_work->nrt6 = nrt6; + fib6_event->rt_arr = rt_arr; + fib6_event->nrt6 = nrt6; rt_arr[0] = rt; fib6_info_hold(rt); @@ -5959,170 +6260,232 @@ mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, } static void -mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) +mlxsw_sp_router_fib6_event_fini(struct mlxsw_sp_fib6_event *fib6_event) { int i; - for (i = 0; i < fib6_work->nrt6; i++) - mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); - kfree(fib6_work->rt_arr); + for (i = 0; i < fib6_event->nrt6; i++) + mlxsw_sp_rt6_release(fib6_event->rt_arr[i]); + kfree(fib6_event->rt_arr); } -static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_event *fib_event) { - struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, work); - struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: - err = mlxsw_sp_router_fib4_replace(mlxsw_sp, - &fib_work->fen_info); - if (err) + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info); + if (err) { + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); - fib_info_put(fib_work->fen_info.fi); + } + fib_info_put(fib_event->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); - fib_info_put(fib_work->fen_info.fi); + err = mlxsw_sp_router_fib4_del(mlxsw_sp, op_ctx, &fib_event->fen_info); + if (err) + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); + fib_info_put(fib_event->fen_info.fi); break; case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, - fib_work->fnh_info.fib_nh); - fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_event->event, fib_event->fnh_info.fib_nh); + fib_info_put(fib_event->fnh_info.fib_nh->nh_parent); break; } - mutex_unlock(&mlxsw_sp->router->lock); - kfree(fib_work); } -static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_event *fib_event) { - struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, work); - struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: - err = mlxsw_sp_router_fib6_replace(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6); - if (err) + err = mlxsw_sp_router_fib6_replace(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, + fib_event->fib6_event.nrt6); + if (err) { + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + } + mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; case FIB_EVENT_ENTRY_APPEND: - err = mlxsw_sp_router_fib6_append(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6); - if (err) + err = mlxsw_sp_router_fib6_append(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, + fib_event->fib6_event.nrt6); + if (err) { + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + } + mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib6_del(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6); - mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + err = mlxsw_sp_router_fib6_del(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, + fib_event->fib6_event.nrt6); + if (err) + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); + mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; } - mutex_unlock(&mlxsw_sp->router->lock); - kfree(fib_work); } -static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) +static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_event *fib_event) { - struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, work); - struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; bool replace; int err; rtnl_lock(); mutex_lock(&mlxsw_sp->router->lock); - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + replace = fib_event->event == FIB_EVENT_ENTRY_REPLACE; - err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, - replace); + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - mr_cache_put(fib_work->men_info.mfc); + mr_cache_put(fib_event->men_info.mfc); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); - mr_cache_put(fib_work->men_info.mfc); + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_event->men_info); + mr_cache_put(fib_event->men_info.mfc); break; case FIB_EVENT_VIF_ADD: err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, - &fib_work->ven_info); + &fib_event->ven_info); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - dev_put(fib_work->ven_info.dev); + dev_put(fib_event->ven_info.dev); break; case FIB_EVENT_VIF_DEL: - mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, - &fib_work->ven_info); - dev_put(fib_work->ven_info.dev); + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_event->ven_info); + dev_put(fib_event->ven_info.dev); break; } mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); - kfree(fib_work); } -static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, +static void mlxsw_sp_router_fib_event_work(struct work_struct *work) +{ + struct mlxsw_sp_router *router = container_of(work, struct mlxsw_sp_router, fib_event_work); + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = router->ll_op_ctx; + struct mlxsw_sp *mlxsw_sp = router->mlxsw_sp; + struct mlxsw_sp_fib_event *next_fib_event; + struct mlxsw_sp_fib_event *fib_event; + int last_family = AF_UNSPEC; + LIST_HEAD(fib_event_queue); + + spin_lock_bh(&router->fib_event_queue_lock); + list_splice_init(&router->fib_event_queue, &fib_event_queue); + spin_unlock_bh(&router->fib_event_queue_lock); + + /* Router lock is held here to make sure per-instance + * operation context is not used in between FIB4/6 events + * processing. + */ + mutex_lock(&router->lock); + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + list_for_each_entry_safe(fib_event, next_fib_event, + &fib_event_queue, list) { + /* Check if the next entry in the queue exists and it is + * of the same type (family and event) as the currect one. + * In that case it is permitted to do the bulking + * of multiple FIB entries to a single register write. + */ + op_ctx->bulk_ok = !list_is_last(&fib_event->list, &fib_event_queue) && + fib_event->family == next_fib_event->family && + fib_event->event == next_fib_event->event; + + /* In case family of this and the previous entry are different, context + * reinitialization is going to be needed now, indicate that. + * Note that since last_family is initialized to AF_UNSPEC, this is always + * going to happen for the first entry processed in the work. + */ + if (fib_event->family != last_family) + op_ctx->initialized = false; + + switch (fib_event->family) { + case AF_INET: + mlxsw_sp_router_fib4_event_process(mlxsw_sp, op_ctx, + fib_event); + break; + case AF_INET6: + mlxsw_sp_router_fib6_event_process(mlxsw_sp, op_ctx, + fib_event); + break; + case RTNL_FAMILY_IP6MR: + case RTNL_FAMILY_IPMR: + /* Unlock here as inside FIBMR the lock is taken again + * under RTNL. The per-instance operation context + * is not used by FIBMR. + */ + mutex_unlock(&router->lock); + mlxsw_sp_router_fibmr_event_process(mlxsw_sp, + fib_event); + mutex_lock(&router->lock); + break; + default: + WARN_ON_ONCE(1); + } + last_family = fib_event->family; + kfree(fib_event); + cond_resched(); + } + WARN_ON_ONCE(!list_empty(&router->ll_op_ctx->fib_entry_priv_list)); + mutex_unlock(&router->lock); +} + +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event *fib_event, struct fib_notifier_info *info) { struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); - fib_work->fen_info = *fen_info; + fib_event->fen_info = *fen_info; /* Take reference on fib_info to prevent it from being - * freed while work is queued. Release it afterwards. + * freed while event is queued. Release it afterwards. */ - fib_info_hold(fib_work->fen_info.fi); + fib_info_hold(fib_event->fen_info.fi); break; case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: fnh_info = container_of(info, struct fib_nh_notifier_info, info); - fib_work->fnh_info = *fnh_info; - fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + fib_event->fnh_info = *fnh_info; + fib_info_hold(fib_event->fnh_info.fib_nh->nh_parent); break; } } -static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event, struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; int err; - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_APPEND: case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, - fen6_info); + err = mlxsw_sp_router_fib6_event_init(&fib_event->fib6_event, + fen6_info); if (err) return err; break; @@ -6132,20 +6495,20 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, } static void -mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event *fib_event, struct fib_notifier_info *info) { - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); - mr_cache_hold(fib_work->men_info.mfc); + memcpy(&fib_event->men_info, info, sizeof(fib_event->men_info)); + mr_cache_hold(fib_event->men_info.mfc); break; case FIB_EVENT_VIF_ADD: case FIB_EVENT_VIF_DEL: - memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); - dev_hold(fib_work->ven_info.dev); + memcpy(&fib_event->ven_info, info, sizeof(fib_event->ven_info)); + dev_hold(fib_event->ven_info.dev); break; } } @@ -6202,7 +6565,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct mlxsw_sp_fib_event_work *fib_work; + struct mlxsw_sp_fib_event *fib_event; struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; int err; @@ -6252,37 +6615,39 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; } - fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); - if (!fib_work) + fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); + if (!fib_event) return NOTIFY_BAD; - fib_work->mlxsw_sp = router->mlxsw_sp; - fib_work->event = event; + fib_event->mlxsw_sp = router->mlxsw_sp; + fib_event->event = event; + fib_event->family = info->family; switch (info->family) { case AF_INET: - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); - mlxsw_sp_router_fib4_event(fib_work, info); + mlxsw_sp_router_fib4_event(fib_event, info); break; case AF_INET6: - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); - err = mlxsw_sp_router_fib6_event(fib_work, info); + err = mlxsw_sp_router_fib6_event(fib_event, info); if (err) goto err_fib_event; break; case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IPMR: - INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); - mlxsw_sp_router_fibmr_event(fib_work, info); + mlxsw_sp_router_fibmr_event(fib_event, info); break; } - mlxsw_core_schedule_work(&fib_work->work); + /* Enqueue the event and trigger the work */ + spin_lock_bh(&router->fib_event_queue_lock); + list_add_tail(&fib_event->list, &router->fib_event_queue); + spin_unlock_bh(&router->fib_event_queue_lock); + mlxsw_core_schedule_work(&router->fib_event_work); return NOTIFY_DONE; err_fib_event: - kfree(fib_work); + kfree(fib_event); return NOTIFY_BAD; } @@ -8057,6 +8422,45 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } +static const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_basic_ops = { + .ralta_write = mlxsw_sp_router_ll_basic_ralta_write, + .ralst_write = mlxsw_sp_router_ll_basic_ralst_write, + .raltb_write = mlxsw_sp_router_ll_basic_raltb_write, + .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_basic), + .fib_entry_pack = mlxsw_sp_router_ll_basic_fib_entry_pack, + .fib_entry_act_remote_pack = mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack, + .fib_entry_act_local_pack = mlxsw_sp_router_ll_basic_fib_entry_act_local_pack, + .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack, + .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack, + .fib_entry_commit = mlxsw_sp_router_ll_basic_fib_entry_commit, + .fib_entry_is_committed = mlxsw_sp_router_ll_basic_fib_entry_is_committed, +}; + +static int mlxsw_sp_router_ll_op_ctx_init(struct mlxsw_sp_router *router) +{ + size_t max_size = 0; + int i; + + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) { + size_t size = router->proto_ll_ops[i]->fib_entry_op_ctx_size; + + if (size > max_size) + max_size = size; + } + router->ll_op_ctx = kzalloc(sizeof(*router->ll_op_ctx) + max_size, + GFP_KERNEL); + if (!router->ll_op_ctx) + return -ENOMEM; + INIT_LIST_HEAD(&router->ll_op_ctx->fib_entry_priv_list); + return 0; +} + +static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router) +{ + WARN_ON(!list_empty(&router->ll_op_ctx->fib_entry_priv_list)); + kfree(router->ll_op_ctx); +} + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { @@ -8070,6 +8474,13 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; + router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_router_ll_basic_ops; + router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_router_ll_basic_ops; + + err = mlxsw_sp_router_ll_op_ctx_init(router); + if (err) + goto err_ll_op_ctx_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) @@ -8118,6 +8529,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_dscp_init; + INIT_WORK(&router->fib_event_work, mlxsw_sp_router_fib_event_work); + INIT_LIST_HEAD(&router->fib_event_queue); + spin_lock_init(&router->fib_event_queue_lock); + router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; err = register_inetaddr_notifier(&router->inetaddr_nb); if (err) @@ -8151,6 +8566,7 @@ err_register_inet6addr_notifier: unregister_inetaddr_notifier(&router->inetaddr_nb); err_register_inetaddr_notifier: mlxsw_core_flush_owq(); + WARN_ON(!list_empty(&router->fib_event_queue)); err_dscp_init: err_mp_hash_init: mlxsw_sp_neigh_fini(mlxsw_sp); @@ -8171,6 +8587,8 @@ err_ipips_init: err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_router_ll_op_ctx_fini(router); +err_ll_op_ctx_init: mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); return err; @@ -8184,6 +8602,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); mlxsw_core_flush_owq(); + WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue)); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); @@ -8193,6 +8612,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router); mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 8418dc3ae967..8230f6ff02ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -15,6 +15,26 @@ struct mlxsw_sp_router_nve_decap { u8 valid:1; }; +struct mlxsw_sp_fib_entry_op_ctx { + u8 bulk_ok:1, /* Indicate to the low-level op it is ok to bulk + * the actual entry with the one that is the next + * in queue. + */ + initialized:1; /* Bit that the low-level op sets in case + * the context priv is initialized. + */ + struct list_head fib_entry_priv_list; + unsigned long ll_priv[]; +}; + +static inline void +mlxsw_sp_fib_entry_op_ctx_clear(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) +{ + WARN_ON_ONCE(!list_empty(&op_ctx->fib_entry_priv_list)); + memset(op_ctx, 0, sizeof(*op_ctx)); + INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list); +} + struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif **rifs; @@ -48,8 +68,58 @@ struct mlxsw_sp_router { bool adj_discard_index_valid; struct mlxsw_sp_router_nve_decap nve_decap_config; struct mutex lock; /* Protects shared router resources */ + struct work_struct fib_event_work; + struct list_head fib_event_queue; + spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ + /* One set of ops for each protocol: IPv4 and IPv6 */ + const struct mlxsw_sp_router_ll_ops *proto_ll_ops[MLXSW_SP_L3_PROTO_MAX]; + struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; +}; + +struct mlxsw_sp_fib_entry_priv { + refcount_t refcnt; + struct list_head list; /* Member in op_ctx->fib_entry_priv_list */ + unsigned long priv[]; +}; + +enum mlxsw_sp_fib_entry_op { + MLXSW_SP_FIB_ENTRY_OP_WRITE, + MLXSW_SP_FIB_ENTRY_OP_UPDATE, + MLXSW_SP_FIB_ENTRY_OP_DELETE, }; +/* Low-level router ops. Basically this is to handle the different + * register sets to work with ordinary and XM trees and FIB entries. + */ +struct mlxsw_sp_router_ll_ops { + int (*ralta_write)(struct mlxsw_sp *mlxsw_sp, char *xralta_pl); + int (*ralst_write)(struct mlxsw_sp *mlxsw_sp, char *xralst_pl); + int (*raltb_write)(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl); + size_t fib_entry_op_ctx_size; + size_t fib_entry_priv_size; + void (*fib_entry_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_sp_l3proto proto, enum mlxsw_sp_fib_entry_op op, + u16 virtual_router, u8 prefix_len, unsigned char *addr, + struct mlxsw_sp_fib_entry_priv *priv); + void (*fib_entry_act_remote_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size); + void (*fib_entry_act_local_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif); + void (*fib_entry_act_ip2me_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx); + void (*fib_entry_act_ip2me_tun_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + u32 tunnel_ptr); + int (*fib_entry_commit)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + bool *postponed_for_bulk); + bool (*fib_entry_is_committed)(struct mlxsw_sp_fib_entry_priv *priv); +}; + +int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + const struct mlxsw_sp_router_ll_ops *ll_ops); + struct mlxsw_sp_rif_ipip_lb; struct mlxsw_sp_rif_ipip_lb_config { enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index dcde496da7fb..c5de8f46cdd3 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -780,7 +780,9 @@ static void lan743x_ethtool_get_wol(struct net_device *netdev, wol->supported = 0; wol->wolopts = 0; - phy_ethtool_get_wol(netdev->phydev, wol); + + if (netdev->phydev) + phy_ethtool_get_wol(netdev->phydev, wol); wol->supported |= WAKE_BCAST | WAKE_UCAST | WAKE_MCAST | WAKE_MAGIC | WAKE_PHY | WAKE_ARP; @@ -809,9 +811,8 @@ static int lan743x_ethtool_set_wol(struct net_device *netdev, device_set_wakeup_enable(&adapter->pdev->dev, (bool)wol->wolopts); - phy_ethtool_set_wol(netdev->phydev, wol); - - return 0; + return netdev->phydev ? phy_ethtool_set_wol(netdev->phydev, wol) + : -ENETDOWN; } #endif /* CONFIG_PM */ diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index b319c22c211c..794510cba51f 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -830,14 +830,13 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter) static int lan743x_mac_open(struct lan743x_adapter *adapter) { - int ret = 0; u32 temp; temp = lan743x_csr_read(adapter, MAC_RX); lan743x_csr_write(adapter, MAC_RX, temp | MAC_RX_RXEN_); temp = lan743x_csr_read(adapter, MAC_TX); lan743x_csr_write(adapter, MAC_TX, temp | MAC_TX_TXEN_); - return ret; + return 0; } static void lan743x_mac_close(struct lan743x_adapter *adapter) @@ -958,7 +957,7 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) data = lan743x_csr_read(adapter, MAC_CR); /* set interface mode */ - if (phy_interface_mode_is_rgmii(adapter->phy_mode)) + if (phy_interface_is_rgmii(phydev)) /* RGMII */ data &= ~MAC_CR_MII_EN_; else @@ -1014,33 +1013,14 @@ static void lan743x_phy_close(struct lan743x_adapter *adapter) static int lan743x_phy_open(struct lan743x_adapter *adapter) { + struct net_device *netdev = adapter->netdev; struct lan743x_phy *phy = &adapter->phy; - struct phy_device *phydev = NULL; - struct device_node *phynode; - struct net_device *netdev; + struct phy_device *phydev; int ret = -EIO; - netdev = adapter->netdev; - phynode = of_node_get(adapter->pdev->dev.of_node); - - if (phynode) { - /* try devicetree phy, or fixed link */ - of_get_phy_mode(phynode, &adapter->phy_mode); - - if (of_phy_is_fixed_link(phynode)) { - ret = of_phy_register_fixed_link(phynode); - if (ret) { - netdev_err(netdev, - "cannot register fixed PHY\n"); - of_node_put(phynode); - goto return_error; - } - } - phydev = of_phy_connect(netdev, phynode, - lan743x_phy_link_status_change, 0, - adapter->phy_mode); - of_node_put(phynode); - } + /* try devicetree phy, or fixed link */ + phydev = of_phy_get_and_connect(netdev, adapter->pdev->dev.of_node, + lan743x_phy_link_status_change); if (!phydev) { /* try internal phy */ @@ -1048,10 +1028,9 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter) if (!phydev) goto return_error; - adapter->phy_mode = PHY_INTERFACE_MODE_GMII; ret = phy_connect_direct(netdev, phydev, lan743x_phy_link_status_change, - adapter->phy_mode); + PHY_INTERFACE_MODE_GMII); if (ret) goto return_error; } @@ -1066,6 +1045,7 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter) phy_start(phydev); phy_start_aneg(phydev); + phy_attached_info(phydev); return 0; return_error: diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index a536f4a4994d..3a0e70daa88f 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -703,7 +703,6 @@ struct lan743x_rx { struct lan743x_adapter { struct net_device *netdev; struct mii_bus *mdiobus; - phy_interface_t phy_mode; int msg_enable; #ifdef CONFIG_PM u32 wolopts; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 70bf8c67d7ef..2632fe2d2448 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -147,42 +147,20 @@ static int ocelot_vlant_set_mask(struct ocelot *ocelot, u16 vid, u32 mask) return ocelot_vlant_wait_for_completion(ocelot); } -static int ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, - u16 vid) +static void ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, + struct ocelot_vlan native_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; u32 val = 0; - if (ocelot_port->vid != vid) { - /* Always permit deleting the native VLAN (vid = 0) */ - if (ocelot_port->vid && vid) { - dev_err(ocelot->dev, - "Port already has a native VLAN: %d\n", - ocelot_port->vid); - return -EBUSY; - } - ocelot_port->vid = vid; - } + ocelot_port->native_vlan = native_vlan; - ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_VID(vid), + ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_VID(native_vlan.vid), REW_PORT_VLAN_CFG_PORT_VID_M, REW_PORT_VLAN_CFG, port); - if (ocelot_port->vlan_aware && !ocelot_port->vid) - /* If port is vlan-aware and tagged, drop untagged and priority - * tagged frames. - */ - val = ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; - ocelot_rmw_gix(ocelot, val, - ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, - ANA_PORT_DROP_CFG, port); - if (ocelot_port->vlan_aware) { - if (ocelot_port->vid) + if (native_vlan.valid) /* Tag all frames except when VID == DEFAULT_VLAN */ val = REW_TAG_CFG_TAG_CFG(1); else @@ -195,8 +173,38 @@ static int ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, ocelot_rmw_gix(ocelot, val, REW_TAG_CFG_TAG_CFG_M, REW_TAG_CFG, port); +} - return 0; +/* Default vlan to clasify for untagged frames (may be zero) */ +static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, + struct ocelot_vlan pvid_vlan) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + u32 val = 0; + + ocelot_port->pvid_vlan = pvid_vlan; + + if (!ocelot_port->vlan_aware) + pvid_vlan.vid = 0; + + ocelot_rmw_gix(ocelot, + ANA_PORT_VLAN_CFG_VLAN_VID(pvid_vlan.vid), + ANA_PORT_VLAN_CFG_VLAN_VID_M, + ANA_PORT_VLAN_CFG, port); + + /* If there's no pvid, we should drop not only untagged traffic (which + * happens automatically), but also 802.1p traffic which gets + * classified to VLAN 0, but that is always in our RX filter, so it + * would get accepted were it not for this setting. + */ + if (!pvid_vlan.valid && ocelot_port->vlan_aware) + val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; + + ocelot_rmw_gix(ocelot, val, + ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, + ANA_PORT_DROP_CFG, port); } int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, @@ -233,24 +241,30 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, ANA_PORT_VLAN_CFG, port); - ocelot_port_set_native_vlan(ocelot, port, ocelot_port->vid); + ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + ocelot_port_set_native_vlan(ocelot, port, ocelot_port->native_vlan); return 0; } EXPORT_SYMBOL(ocelot_port_vlan_filtering); -/* Default vlan to clasify for untagged frames (may be zero) */ -static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, u16 pvid) +int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, + bool untagged) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - ocelot_rmw_gix(ocelot, - ANA_PORT_VLAN_CFG_VLAN_VID(pvid), - ANA_PORT_VLAN_CFG_VLAN_VID_M, - ANA_PORT_VLAN_CFG, port); + /* Deny changing the native VLAN, but always permit deleting it */ + if (untagged && ocelot_port->native_vlan.vid != vid && + ocelot_port->native_vlan.valid) { + dev_err(ocelot->dev, + "Port already has a native VLAN: %d\n", + ocelot_port->native_vlan.vid); + return -EBUSY; + } - ocelot_port->pvid = pvid; + return 0; } +EXPORT_SYMBOL(ocelot_vlan_prepare); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged) @@ -264,14 +278,21 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return ret; /* Default ingress vlan classification */ - if (pvid) - ocelot_port_set_pvid(ocelot, port, vid); + if (pvid) { + struct ocelot_vlan pvid_vlan; + + pvid_vlan.vid = vid; + pvid_vlan.valid = true; + ocelot_port_set_pvid(ocelot, port, pvid_vlan); + } /* Untagged egress vlan clasification */ if (untagged) { - ret = ocelot_port_set_native_vlan(ocelot, port, vid); - if (ret) - return ret; + struct ocelot_vlan native_vlan; + + native_vlan.vid = vid; + native_vlan.valid = true; + ocelot_port_set_native_vlan(ocelot, port, native_vlan); } return 0; @@ -290,12 +311,18 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return ret; /* Ingress */ - if (ocelot_port->pvid == vid) - ocelot_port_set_pvid(ocelot, port, 0); + if (ocelot_port->pvid_vlan.vid == vid) { + struct ocelot_vlan pvid_vlan = {0}; + + ocelot_port_set_pvid(ocelot, port, pvid_vlan); + } /* Egress */ - if (ocelot_port->vid == vid) - ocelot_port_set_native_vlan(ocelot, port, 0); + if (ocelot_port->native_vlan.vid == vid) { + struct ocelot_vlan native_vlan = {0}; + + ocelot_port_set_native_vlan(ocelot, port, native_vlan); + } return 0; } @@ -542,26 +569,11 @@ EXPORT_SYMBOL(ocelot_get_txtstamp); int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr, u16 vid) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; int pgid = port; if (port == ocelot->npi) pgid = PGID_CPU; - if (!vid) { - if (!ocelot_port->vlan_aware) - /* If the bridge is not VLAN aware and no VID was - * provided, set it to pvid to ensure the MAC entry - * matches incoming untagged packets - */ - vid = ocelot_port->pvid; - else - /* If the bridge is VLAN aware a VID must be provided as - * otherwise the learnt entry wouldn't match any frame. - */ - return -EINVAL; - } - return ocelot_mact_learn(ocelot, pgid, addr, vid, ENTRYTYPE_LOCKED); } EXPORT_SYMBOL(ocelot_fdb_add); @@ -958,52 +970,88 @@ static enum macaccess_entry_type ocelot_classify_mdb(const unsigned char *addr) return ENTRYTYPE_MACv4; if (addr[0] == 0x33 && addr[1] == 0x33) return ENTRYTYPE_MACv6; - return ENTRYTYPE_NORMAL; + return ENTRYTYPE_LOCKED; +} + +static struct ocelot_pgid *ocelot_pgid_alloc(struct ocelot *ocelot, int index, + unsigned long ports) +{ + struct ocelot_pgid *pgid; + + pgid = kzalloc(sizeof(*pgid), GFP_KERNEL); + if (!pgid) + return ERR_PTR(-ENOMEM); + + pgid->ports = ports; + pgid->index = index; + refcount_set(&pgid->refcount, 1); + list_add_tail(&pgid->list, &ocelot->pgids); + + return pgid; +} + +static void ocelot_pgid_free(struct ocelot *ocelot, struct ocelot_pgid *pgid) +{ + if (!refcount_dec_and_test(&pgid->refcount)) + return; + + list_del(&pgid->list); + kfree(pgid); } -static int ocelot_mdb_get_pgid(struct ocelot *ocelot, - enum macaccess_entry_type entry_type) +static struct ocelot_pgid *ocelot_mdb_get_pgid(struct ocelot *ocelot, + const struct ocelot_multicast *mc) { - int pgid; + struct ocelot_pgid *pgid; + int index; /* According to VSC7514 datasheet 3.9.1.5 IPv4 Multicast Entries and * 3.9.1.6 IPv6 Multicast Entries, "Instead of a lookup in the * destination mask table (PGID), the destination set is programmed as * part of the entry MAC address.", and the DEST_IDX is set to 0. */ - if (entry_type == ENTRYTYPE_MACv4 || - entry_type == ENTRYTYPE_MACv6) - return 0; + if (mc->entry_type == ENTRYTYPE_MACv4 || + mc->entry_type == ENTRYTYPE_MACv6) + return ocelot_pgid_alloc(ocelot, 0, mc->ports); - for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) { - struct ocelot_multicast *mc; + list_for_each_entry(pgid, &ocelot->pgids, list) { + /* When searching for a nonreserved multicast PGID, ignore the + * dummy PGID of zero that we have for MACv4/MACv6 entries + */ + if (pgid->index && pgid->ports == mc->ports) { + refcount_inc(&pgid->refcount); + return pgid; + } + } + + /* Search for a free index in the nonreserved multicast PGID area */ + for_each_nonreserved_multicast_dest_pgid(ocelot, index) { bool used = false; - list_for_each_entry(mc, &ocelot->multicast, list) { - if (mc->pgid == pgid) { + list_for_each_entry(pgid, &ocelot->pgids, list) { + if (pgid->index == index) { used = true; break; } } if (!used) - return pgid; + return ocelot_pgid_alloc(ocelot, index, mc->ports); } - return -1; + return ERR_PTR(-ENOSPC); } static void ocelot_encode_ports_to_mdb(unsigned char *addr, - struct ocelot_multicast *mc, - enum macaccess_entry_type entry_type) + struct ocelot_multicast *mc) { - memcpy(addr, mc->addr, ETH_ALEN); + ether_addr_copy(addr, mc->addr); - if (entry_type == ENTRYTYPE_MACv4) { + if (mc->entry_type == ENTRYTYPE_MACv4) { addr[0] = 0; addr[1] = mc->ports >> 8; addr[2] = mc->ports & 0xff; - } else if (entry_type == ENTRYTYPE_MACv6) { + } else if (mc->entry_type == ENTRYTYPE_MACv6) { addr[0] = mc->ports >> 8; addr[1] = mc->ports & 0xff; } @@ -1012,80 +1060,78 @@ static void ocelot_encode_ports_to_mdb(unsigned char *addr, int ocelot_port_mdb_add(struct ocelot *ocelot, int port, const struct switchdev_obj_port_mdb *mdb) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; - enum macaccess_entry_type entry_type; unsigned char addr[ETH_ALEN]; struct ocelot_multicast *mc; + struct ocelot_pgid *pgid; u16 vid = mdb->vid; - bool new = false; if (port == ocelot->npi) port = ocelot->num_phys_ports; - if (!vid) - vid = ocelot_port->pvid; - - entry_type = ocelot_classify_mdb(mdb->addr); - mc = ocelot_multicast_get(ocelot, mdb->addr, vid); if (!mc) { - int pgid = ocelot_mdb_get_pgid(ocelot, entry_type); - - if (pgid < 0) { - dev_err(ocelot->dev, - "No more PGIDs available for mdb %pM vid %d\n", - mdb->addr, vid); - return -ENOSPC; - } - + /* New entry */ mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM; - memcpy(mc->addr, mdb->addr, ETH_ALEN); + mc->entry_type = ocelot_classify_mdb(mdb->addr); + ether_addr_copy(mc->addr, mdb->addr); mc->vid = vid; - mc->pgid = pgid; list_add_tail(&mc->list, &ocelot->multicast); - new = true; - } - - if (!new) { - ocelot_encode_ports_to_mdb(addr, mc, entry_type); + } else { + /* Existing entry. Clean up the current port mask from + * hardware now, because we'll be modifying it. + */ + ocelot_pgid_free(ocelot, mc->pgid); + ocelot_encode_ports_to_mdb(addr, mc); ocelot_mact_forget(ocelot, addr, vid); } mc->ports |= BIT(port); - ocelot_encode_ports_to_mdb(addr, mc, entry_type); - return ocelot_mact_learn(ocelot, mc->pgid, addr, vid, entry_type); + pgid = ocelot_mdb_get_pgid(ocelot, mc); + if (IS_ERR(pgid)) { + dev_err(ocelot->dev, + "Cannot allocate PGID for mdb %pM vid %d\n", + mc->addr, mc->vid); + devm_kfree(ocelot->dev, mc); + return PTR_ERR(pgid); + } + mc->pgid = pgid; + + ocelot_encode_ports_to_mdb(addr, mc); + + if (mc->entry_type != ENTRYTYPE_MACv4 && + mc->entry_type != ENTRYTYPE_MACv6) + ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID, + pgid->index); + + return ocelot_mact_learn(ocelot, pgid->index, addr, vid, + mc->entry_type); } EXPORT_SYMBOL(ocelot_port_mdb_add); int ocelot_port_mdb_del(struct ocelot *ocelot, int port, const struct switchdev_obj_port_mdb *mdb) { - struct ocelot_port *ocelot_port = ocelot->ports[port]; - enum macaccess_entry_type entry_type; unsigned char addr[ETH_ALEN]; struct ocelot_multicast *mc; + struct ocelot_pgid *pgid; u16 vid = mdb->vid; if (port == ocelot->npi) port = ocelot->num_phys_ports; - if (!vid) - vid = ocelot_port->pvid; - mc = ocelot_multicast_get(ocelot, mdb->addr, vid); if (!mc) return -ENOENT; - entry_type = ocelot_classify_mdb(mdb->addr); - - ocelot_encode_ports_to_mdb(addr, mc, entry_type); + ocelot_encode_ports_to_mdb(addr, mc); ocelot_mact_forget(ocelot, addr, vid); + ocelot_pgid_free(ocelot, mc->pgid); mc->ports &= ~BIT(port); if (!mc->ports) { list_del(&mc->list); @@ -1093,9 +1139,21 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port, return 0; } - ocelot_encode_ports_to_mdb(addr, mc, entry_type); + /* We have a PGID with fewer ports now */ + pgid = ocelot_mdb_get_pgid(ocelot, mc); + if (IS_ERR(pgid)) + return PTR_ERR(pgid); + mc->pgid = pgid; + + ocelot_encode_ports_to_mdb(addr, mc); + + if (mc->entry_type != ENTRYTYPE_MACv4 && + mc->entry_type != ENTRYTYPE_MACv6) + ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID, + pgid->index); - return ocelot_mact_learn(ocelot, mc->pgid, addr, vid, entry_type); + return ocelot_mact_learn(ocelot, pgid->index, addr, vid, + mc->entry_type); } EXPORT_SYMBOL(ocelot_port_mdb_del); @@ -1120,6 +1178,7 @@ EXPORT_SYMBOL(ocelot_port_bridge_join); int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, struct net_device *bridge) { + struct ocelot_vlan pvid = {0}, native_vlan = {0}; struct switchdev_trans trans; int ret; @@ -1138,8 +1197,10 @@ int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, if (ret) return ret; - ocelot_port_set_pvid(ocelot, port, 0); - return ocelot_port_set_native_vlan(ocelot, port, 0); + ocelot_port_set_pvid(ocelot, port, pvid); + ocelot_port_set_native_vlan(ocelot, port, native_vlan); + + return 0; } EXPORT_SYMBOL(ocelot_port_bridge_leave); @@ -1453,6 +1514,7 @@ int ocelot_init(struct ocelot *ocelot) return -ENOMEM; INIT_LIST_HEAD(&ocelot->multicast); + INIT_LIST_HEAD(&ocelot->pgids); ocelot_mact_init(ocelot); ocelot_vlan_init(ocelot); ocelot_vcap_init(ocelot); diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index abb407dff93c..291d39d49c4e 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -41,14 +41,6 @@ struct frame_info { u32 timestamp; /* rew_val */ }; -struct ocelot_multicast { - struct list_head list; - unsigned char addr[ETH_ALEN]; - u16 vid; - u16 ports; - int pgid; -}; - struct ocelot_port_tc { bool block_shared; unsigned long offload_cnt; @@ -87,6 +79,29 @@ enum macaccess_entry_type { ENTRYTYPE_MACv6, }; +/* A (PGID) port mask structure, encoding the 2^ocelot->num_phys_ports + * possibilities of egress port masks for L2 multicast traffic. + * For a switch with 9 user ports, there are 512 possible port masks, but the + * hardware only has 46 individual PGIDs that it can forward multicast traffic + * to. So we need a structure that maps the limited PGID indices to the port + * destinations requested by the user for L2 multicast. + */ +struct ocelot_pgid { + unsigned long ports; + int index; + refcount_t refcount; + struct list_head list; +}; + +struct ocelot_multicast { + struct list_head list; + enum macaccess_entry_type entry_type; + unsigned char addr[ETH_ALEN]; + u16 vid; + u16 ports; + struct ocelot_pgid *pgid; +}; + int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data); int ocelot_mact_learn(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index b34da11acf65..c65ae6f75a16 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -206,6 +206,17 @@ static void ocelot_port_adjust_link(struct net_device *dev) ocelot_adjust_link(ocelot, port, dev->phydev); } +static int ocelot_vlan_vid_prepare(struct net_device *dev, u16 vid, bool pvid, + bool untagged) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; + int port = priv->chip_port; + + return ocelot_vlan_prepare(ocelot, port, vid, pvid, untagged); +} + static int ocelot_vlan_vid_add(struct net_device *dev, u16 vid, bool pvid, bool untagged) { @@ -409,7 +420,7 @@ static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr) struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; - return ocelot_mact_forget(ocelot, addr, ocelot_port->pvid); + return ocelot_mact_forget(ocelot, addr, ocelot_port->pvid_vlan.vid); } static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr) @@ -418,8 +429,8 @@ static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr) struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; - return ocelot_mact_learn(ocelot, PGID_CPU, addr, ocelot_port->pvid, - ENTRYTYPE_LOCKED); + return ocelot_mact_learn(ocelot, PGID_CPU, addr, + ocelot_port->pvid_vlan.vid, ENTRYTYPE_LOCKED); } static void ocelot_set_rx_mode(struct net_device *dev) @@ -462,10 +473,10 @@ static int ocelot_port_set_mac_address(struct net_device *dev, void *p) const struct sockaddr *addr = p; /* Learn the new net device MAC address in the mac table. */ - ocelot_mact_learn(ocelot, PGID_CPU, addr->sa_data, ocelot_port->pvid, - ENTRYTYPE_LOCKED); + ocelot_mact_learn(ocelot, PGID_CPU, addr->sa_data, + ocelot_port->pvid_vlan.vid, ENTRYTYPE_LOCKED); /* Then forget the previous one. */ - ocelot_mact_forget(ocelot, dev->dev_addr, ocelot_port->pvid); + ocelot_mact_forget(ocelot, dev->dev_addr, ocelot_port->pvid_vlan.vid); ether_addr_copy(dev->dev_addr, addr->sa_data); return 0; @@ -812,9 +823,14 @@ static int ocelot_port_obj_add_vlan(struct net_device *dev, u16 vid; for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - ret = ocelot_vlan_vid_add(dev, vid, - vlan->flags & BRIDGE_VLAN_INFO_PVID, - vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED); + bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + + if (switchdev_trans_ph_prepare(trans)) + ret = ocelot_vlan_vid_prepare(dev, vid, pvid, + untagged); + else + ret = ocelot_vlan_vid_add(dev, vid, pvid, untagged); if (ret) return ret; } @@ -1074,8 +1090,8 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN); dev->dev_addr[ETH_ALEN - 1] += port; - ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, ocelot_port->pvid, - ENTRYTYPE_LOCKED); + ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, + ocelot_port->pvid_vlan.vid, ENTRYTYPE_LOCKED); ocelot_init_port(ocelot, port); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index d13d92bf7447..8f2f091bce89 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -1106,7 +1106,7 @@ static int s2io_print_pci_mode(struct s2io_nic *nic) * '-1' on failure */ -static int init_tti(struct s2io_nic *nic, int link) +static int init_tti(struct s2io_nic *nic, int link, bool may_sleep) { struct XENA_dev_config __iomem *bar0 = nic->bar0; register u64 val64 = 0; @@ -1166,7 +1166,7 @@ static int init_tti(struct s2io_nic *nic, int link) if (wait_for_cmd_complete(&bar0->tti_command_mem, TTI_CMD_MEM_STROBE_NEW_CMD, - S2IO_BIT_RESET) != SUCCESS) + S2IO_BIT_RESET, may_sleep) != SUCCESS) return FAILURE; } @@ -1659,7 +1659,7 @@ static int init_nic(struct s2io_nic *nic) */ /* Initialize TTI */ - if (SUCCESS != init_tti(nic, nic->last_link_state)) + if (SUCCESS != init_tti(nic, nic->last_link_state, true)) return -ENODEV; /* RTI Initialization */ @@ -3331,7 +3331,7 @@ static void s2io_updt_xpak_counter(struct net_device *dev) */ static int wait_for_cmd_complete(void __iomem *addr, u64 busy_bit, - int bit_state) + int bit_state, bool may_sleep) { int ret = FAILURE, cnt = 0, delay = 1; u64 val64; @@ -3353,7 +3353,7 @@ static int wait_for_cmd_complete(void __iomem *addr, u64 busy_bit, } } - if (in_interrupt()) + if (!may_sleep) mdelay(delay); else msleep(delay); @@ -4877,8 +4877,7 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev) * Return value: * void. */ - -static void s2io_set_multicast(struct net_device *dev) +static void s2io_set_multicast(struct net_device *dev, bool may_sleep) { int i, j, prev_cnt; struct netdev_hw_addr *ha; @@ -4903,7 +4902,7 @@ static void s2io_set_multicast(struct net_device *dev) /* Wait till command completes */ wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem, RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING, - S2IO_BIT_RESET); + S2IO_BIT_RESET, may_sleep); sp->m_cast_flg = 1; sp->all_multi_pos = config->max_mc_addr - 1; @@ -4920,7 +4919,7 @@ static void s2io_set_multicast(struct net_device *dev) /* Wait till command completes */ wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem, RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING, - S2IO_BIT_RESET); + S2IO_BIT_RESET, may_sleep); sp->m_cast_flg = 0; sp->all_multi_pos = 0; @@ -5000,7 +4999,7 @@ static void s2io_set_multicast(struct net_device *dev) /* Wait for command completes */ if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem, RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING, - S2IO_BIT_RESET)) { + S2IO_BIT_RESET, may_sleep)) { DBG_PRINT(ERR_DBG, "%s: Adding Multicasts failed\n", dev->name); @@ -5030,7 +5029,7 @@ static void s2io_set_multicast(struct net_device *dev) /* Wait for command completes */ if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem, RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING, - S2IO_BIT_RESET)) { + S2IO_BIT_RESET, may_sleep)) { DBG_PRINT(ERR_DBG, "%s: Adding Multicasts failed\n", dev->name); @@ -5041,6 +5040,12 @@ static void s2io_set_multicast(struct net_device *dev) } } +/* NDO wrapper for s2io_set_multicast */ +static void s2io_ndo_set_multicast(struct net_device *dev) +{ + s2io_set_multicast(dev, false); +} + /* read from CAM unicast & multicast addresses and store it in * def_mac_addr structure */ @@ -5127,7 +5132,7 @@ static int do_s2io_add_mac(struct s2io_nic *sp, u64 addr, int off) /* Wait till command completes */ if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem, RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING, - S2IO_BIT_RESET)) { + S2IO_BIT_RESET, true)) { DBG_PRINT(INFO_DBG, "do_s2io_add_mac failed\n"); return FAILURE; } @@ -5171,7 +5176,7 @@ static u64 do_s2io_read_unicast_mc(struct s2io_nic *sp, int offset) /* Wait till command completes */ if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem, RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING, - S2IO_BIT_RESET)) { + S2IO_BIT_RESET, true)) { DBG_PRINT(INFO_DBG, "do_s2io_read_unicast_mc failed\n"); return FAILURE; } @@ -7141,7 +7146,7 @@ static int s2io_card_up(struct s2io_nic *sp) } /* Setting its receive mode */ - s2io_set_multicast(dev); + s2io_set_multicast(dev, true); if (dev->features & NETIF_F_LRO) { /* Initialize max aggregatable pkts per session based on MTU */ @@ -7447,7 +7452,7 @@ static void s2io_link(struct s2io_nic *sp, int link) struct swStat *swstats = &sp->mac_control.stats_info->sw_stat; if (link != sp->last_link_state) { - init_tti(sp, link); + init_tti(sp, link, false); if (link == LINK_DOWN) { DBG_PRINT(ERR_DBG, "%s: Link down\n", dev->name); s2io_stop_all_tx_queue(sp); @@ -7604,7 +7609,7 @@ static int rts_ds_steer(struct s2io_nic *nic, u8 ds_codepoint, u8 ring) return wait_for_cmd_complete(&bar0->rts_ds_mem_ctrl, RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED, - S2IO_BIT_RESET); + S2IO_BIT_RESET, true); } static const struct net_device_ops s2io_netdev_ops = { @@ -7613,7 +7618,7 @@ static const struct net_device_ops s2io_netdev_ops = { .ndo_get_stats = s2io_get_stats, .ndo_start_xmit = s2io_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_set_rx_mode = s2io_set_multicast, + .ndo_set_rx_mode = s2io_ndo_set_multicast, .ndo_do_ioctl = s2io_ioctl, .ndo_set_mac_address = s2io_set_mac_addr, .ndo_change_mtu = s2io_change_mtu, @@ -7929,7 +7934,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) writeq(val64, &bar0->rmac_addr_cmd_mem); wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem, RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING, - S2IO_BIT_RESET); + S2IO_BIT_RESET, true); tmp64 = readq(&bar0->rmac_addr_data0_mem); mac_down = (u32)tmp64; mac_up = (u32) (tmp64 >> 32); diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index 6fa3159a977f..5a6032212c19 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1066,7 +1066,7 @@ static void tx_intr_handler(struct fifo_info *fifo_data); static void s2io_handle_errors(void * dev_id); static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue); -static void s2io_set_multicast(struct net_device *dev); +static void s2io_set_multicast(struct net_device *dev, bool may_sleep); static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp); static void s2io_link(struct s2io_nic * sp, int link); static void s2io_reset(struct s2io_nic * sp); @@ -1087,7 +1087,7 @@ static int s2io_set_swapper(struct s2io_nic * sp); static void s2io_card_down(struct s2io_nic *nic); static int s2io_card_up(struct s2io_nic *nic); static int wait_for_cmd_complete(void __iomem *addr, u64 busy_bit, - int bit_state); + int bit_state, bool may_sleep); static int s2io_add_isr(struct s2io_nic * sp); static void s2io_rem_isr(struct s2io_nic * sp); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index f5d48d7c4ce2..da48dd85770c 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -1121,7 +1121,7 @@ static void __vxge_hw_blockpool_destroy(struct __vxge_hw_blockpool *blockpool) list_for_each_safe(p, n, &blockpool->free_entry_list) { list_del(&((struct __vxge_hw_blockpool_entry *)p)->item); - kfree((void *)p); + kfree(p); } return; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c index 76c51da5b66f..9b32ae46011c 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -295,8 +295,8 @@ nfp_net_tls_add(struct net_device *netdev, struct sock *sk, ipv6 = true; break; } -#endif fallthrough; +#endif case AF_INET: req_sz = sizeof(struct nfp_crypto_req_add_v4); ipv6 = false; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 7ff2ccbd43b0..e672614d2906 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -724,10 +724,8 @@ static int nfp_pci_probe(struct pci_dev *pdev, } pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev); - if (IS_ERR_OR_NULL(pf->cpp)) { + if (IS_ERR(pf->cpp)) { err = PTR_ERR(pf->cpp); - if (err >= 0) - err = -ENOMEM; goto err_disable_msix; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 2fc10a36afa4..8724d6a9ed02 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1043,8 +1043,7 @@ static int using_multi_irqs(struct net_device *dev) struct fe_priv *np = get_nvpriv(dev); if (!(np->msi_flags & NV_MSI_X_ENABLED) || - ((np->msi_flags & NV_MSI_X_ENABLED) && - ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1)) return 0; else return 1; @@ -1666,11 +1665,7 @@ static void nv_update_stats(struct net_device *dev) struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); - /* If it happens that this is run in top-half context, then - * replace the spin_lock of hwstats_lock with - * spin_lock_irqsave() in calling functions. */ - WARN_ONCE(in_irq(), "forcedeth: estats spin_lock(_bh) from top-half"); - assert_spin_locked(&np->hwstats_lock); + lockdep_assert_held(&np->hwstats_lock); /* query hardware */ np->estats.tx_bytes += readl(base + NvRegTxCnt); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index ade8c44c01cd..b9e32e4478a8 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2412,7 +2412,6 @@ static int __pch_gbe_suspend(struct pci_dev *pdev) struct pch_gbe_adapter *adapter = netdev_priv(netdev); struct pch_gbe_hw *hw = &adapter->hw; u32 wufc = adapter->wake_up_evt; - int retval = 0; netif_device_detach(netdev); if (netif_running(netdev)) @@ -2432,7 +2431,7 @@ static int __pch_gbe_suspend(struct pci_dev *pdev) pch_gbe_mac_set_wol_event(hw, wufc); pci_disable_device(pdev); } - return retval; + return 0; } #ifdef CONFIG_PM diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index dc5fbc2704f3..318db5f77fdb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -25,7 +25,7 @@ static void ionic_watchdog_cb(struct timer_list *t) hb = ionic_heartbeat_check(ionic); if (hb >= 0) - ionic_link_status_check_request(ionic->lif, false); + ionic_link_status_check_request(ionic->lif, CAN_NOT_SLEEP); } void ionic_init_devinfo(struct ionic *ionic) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 6c243b17312c..690768ff0143 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -12,8 +12,10 @@ #define IONIC_MAX_TX_DESC 8192 #define IONIC_MAX_RX_DESC 16384 -#define IONIC_MIN_TXRX_DESC 16 +#define IONIC_MIN_TXRX_DESC 64 #define IONIC_DEF_TXRX_DESC 4096 +#define IONIC_RX_FILL_THRESHOLD 16 +#define IONIC_RX_FILL_DIV 8 #define IONIC_LIFS_MAX 1024 #define IONIC_WATCHDOG_SECS 5 #define IONIC_ITR_COAL_USEC_DEFAULT 64 diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index a12df3946a07..deabd813e3fe 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -123,6 +123,12 @@ static void ionic_link_status_check(struct ionic_lif *lif) link_up = link_status == IONIC_PORT_OPER_STATUS_UP; if (link_up) { + if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { + mutex_lock(&lif->queue_lock); + ionic_start_queues(lif); + mutex_unlock(&lif->queue_lock); + } + if (!netif_carrier_ok(netdev)) { u32 link_speed; @@ -132,12 +138,6 @@ static void ionic_link_status_check(struct ionic_lif *lif) link_speed / 1000); netif_carrier_on(netdev); } - - if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { - mutex_lock(&lif->queue_lock); - ionic_start_queues(lif); - mutex_unlock(&lif->queue_lock); - } } else { if (netif_carrier_ok(netdev)) { netdev_info(netdev, "Link down\n"); @@ -1074,22 +1074,22 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, static int ionic_addr_add(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, true, true); + return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP); } static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, true, false); + return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP); } static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, false, true); + return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP); } static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, false, false); + return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP); } static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) @@ -1129,38 +1129,10 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) lif->rx_mode = rx_mode; } -static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode, - bool from_ndo) -{ - struct ionic_deferred_work *work; - - if (from_ndo) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "%s OOM\n", __func__); - return; - } - work->type = IONIC_DW_TYPE_RX_MODE; - work->rx_mode = rx_mode; - netdev_dbg(lif->netdev, "deferred: rx_mode\n"); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - ionic_lif_rx_mode(lif, rx_mode); - } -} - -static void ionic_dev_uc_sync(struct net_device *netdev, bool from_ndo) -{ - if (from_ndo) - __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); - else - __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); - -} - -static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) +static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) { struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_deferred_work *work; unsigned int nfilters; unsigned int rx_mode; @@ -1177,7 +1149,10 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) * we remove our overflow flag and check the netdev flags * to see if we can disable NIC PROMISC */ - ionic_dev_uc_sync(netdev, from_ndo); + if (can_sleep) + __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); + else + __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); if (netdev_uc_count(netdev) + 1 > nfilters) { rx_mode |= IONIC_RX_MODE_F_PROMISC; @@ -1189,7 +1164,10 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) } /* same for multicast */ - ionic_dev_uc_sync(netdev, from_ndo); + if (can_sleep) + __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); + else + __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); if (netdev_mc_count(netdev) > nfilters) { rx_mode |= IONIC_RX_MODE_F_ALLMULTI; @@ -1200,13 +1178,26 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool from_ndo) rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; } - if (lif->rx_mode != rx_mode) - _ionic_lif_rx_mode(lif, rx_mode, from_ndo); + if (lif->rx_mode != rx_mode) { + if (!can_sleep) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "%s OOM\n", __func__); + return; + } + work->type = IONIC_DW_TYPE_RX_MODE; + work->rx_mode = rx_mode; + netdev_dbg(lif->netdev, "deferred: rx_mode\n"); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_lif_rx_mode(lif, rx_mode); + } + } } static void ionic_ndo_set_rx_mode(struct net_device *netdev) { - ionic_set_rx_mode(netdev, true); + ionic_set_rx_mode(netdev, CAN_NOT_SLEEP); } static __le64 ionic_netdev_features_to_nic(netdev_features_t features) @@ -1625,6 +1616,24 @@ static void ionic_lif_rss_deinit(struct ionic_lif *lif) ionic_lif_rss_config(lif, 0x0, NULL, NULL); } +static void ionic_lif_quiesce(struct ionic_lif *lif) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_STATE, + .state = IONIC_LIF_QUIESCE, + }, + }; + int err; + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + netdev_err(lif->netdev, "lif quiesce failed %d\n", err); +} + static void ionic_txrx_disable(struct ionic_lif *lif) { unsigned int i; @@ -1639,6 +1648,8 @@ static void ionic_txrx_disable(struct ionic_lif *lif) for (i = 0; i < lif->nxqs; i++) err = ionic_qcq_disable(lif->rxqcqs[i], (err != -ETIMEDOUT)); } + + ionic_lif_quiesce(lif); } static void ionic_txrx_deinit(struct ionic_lif *lif) @@ -1773,7 +1784,7 @@ static int ionic_txrx_init(struct ionic_lif *lif) if (lif->netdev->features & NETIF_F_RXHASH) ionic_lif_rss_init(lif); - ionic_set_rx_mode(lif->netdev, false); + ionic_set_rx_mode(lif->netdev, CAN_SLEEP); return 0; @@ -2781,7 +2792,7 @@ static int ionic_station_set(struct ionic_lif *lif) */ if (!ether_addr_equal(ctx.comp.lif_getattr.mac, netdev->dev_addr)) - ionic_lif_addr(lif, netdev->dev_addr, true, true); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); } else { /* Update the netdev mac with the device's mac */ memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); @@ -2798,7 +2809,7 @@ static int ionic_station_set(struct ionic_lif *lif) netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, true, true); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); return 0; } @@ -2959,6 +2970,8 @@ int ionic_lif_register(struct ionic_lif *lif) dev_err(lif->ionic->dev, "Cannot register net device, aborting\n"); return err; } + + ionic_link_status_check_request(lif, true); lif->registered = true; ionic_lif_set_netdev_info(lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 0224dfd24b8a..9bed42719ae5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -13,6 +13,12 @@ #define IONIC_MAX_NUM_NAPI_CNTR (NAPI_POLL_WEIGHT + 1) #define IONIC_MAX_NUM_SG_CNTR (IONIC_TX_MAX_SG_ELEMS + 1) + +#define ADD_ADDR true +#define DEL_ADDR false +#define CAN_SLEEP true +#define CAN_NOT_SLEEP false + #define IONIC_RX_COPYBREAK_DEFAULT 256 #define IONIC_TX_BUDGET_DEFAULT 256 diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index b3d2250c77d0..9156c9825a16 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -392,11 +392,6 @@ void ionic_rx_fill(struct ionic_queue *q) q->dbval | q->head_idx); } -static void ionic_rx_fill_cb(void *arg) -{ - ionic_rx_fill(arg); -} - void ionic_rx_empty(struct ionic_queue *q) { struct ionic_desc_info *desc_info; @@ -480,6 +475,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) struct ionic_cq *cq = napi_to_cq(napi); struct ionic_dev *idev; struct ionic_lif *lif; + u16 rx_fill_threshold; u32 work_done = 0; u32 flags = 0; @@ -489,7 +485,9 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) work_done = ionic_cq_service(cq, budget, ionic_rx_service, NULL, NULL); - if (work_done) + rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD, + cq->num_descs / IONIC_RX_FILL_DIV); + if (work_done && ionic_q_space_avail(cq->bound_q) >= rx_fill_threshold) ionic_rx_fill(cq->bound_q); if (work_done < budget && napi_complete_done(napi, work_done)) { @@ -518,6 +516,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) struct ionic_dev *idev; struct ionic_lif *lif; struct ionic_cq *txcq; + u16 rx_fill_threshold; u32 rx_work_done = 0; u32 tx_work_done = 0; u32 flags = 0; @@ -531,8 +530,11 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) rx_work_done = ionic_cq_service(rxcq, budget, ionic_rx_service, NULL, NULL); - if (rx_work_done) - ionic_rx_fill_cb(rxcq->bound_q); + + rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD, + rxcq->num_descs / IONIC_RX_FILL_DIV); + if (rx_work_done && ionic_q_space_avail(rxcq->bound_q) >= rx_fill_threshold) + ionic_rx_fill(rxcq->bound_q); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { ionic_dim_update(qcq); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 85d9c3e30c69..1acf7128c146 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -67,7 +67,7 @@ #define R8169_REGS_SIZE 256 #define R8169_RX_BUF_SIZE (SZ_16K - 1) -#define NUM_TX_DESC 64 /* Number of Tx descriptor registers */ +#define NUM_TX_DESC 256 /* Number of Tx descriptor registers */ #define NUM_RX_DESC 256U /* Number of Rx descriptor registers */ #define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) #define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc)) @@ -584,12 +584,6 @@ enum rtl_flag { RTL_FLAG_MAX }; -struct rtl8169_stats { - u64 packets; - u64 bytes; - struct u64_stats_sync syncp; -}; - struct rtl8169_private { void __iomem *mmio_addr; /* memory map physical address */ struct pci_dev *pci_dev; @@ -600,8 +594,6 @@ struct rtl8169_private { u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ u32 dirty_tx; - struct rtl8169_stats rx_stats; - struct rtl8169_stats tx_stats; struct TxDesc *TxDescArray; /* 256-aligned Tx descriptor ring */ struct RxDesc *RxDescArray; /* 256-aligned Rx descriptor ring */ dma_addr_t TxPhyAddr; @@ -700,27 +692,6 @@ static bool rtl_supports_eee(struct rtl8169_private *tp) tp->mac_version != RTL_GIGA_MAC_VER_39; } -static void rtl_get_priv_stats(struct rtl8169_stats *stats, - u64 *pkts, u64 *bytes) -{ - unsigned int start; - - do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - *pkts = stats->packets; - *bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); -} - -static void rtl_inc_priv_stats(struct rtl8169_stats *stats, - u64 pkts, u64 bytes) -{ - u64_stats_update_begin(&stats->syncp); - stats->packets += pkts; - stats->bytes += bytes; - u64_stats_update_end(&stats->syncp); -} - static void rtl_read_mac_from_reg(struct rtl8169_private *tp, u8 *mac, int reg) { int i; @@ -4170,13 +4141,13 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, return true; } -static bool rtl_tx_slots_avail(struct rtl8169_private *tp, - unsigned int nr_frags) +static bool rtl_tx_slots_avail(struct rtl8169_private *tp) { - unsigned int slots_avail = tp->dirty_tx + NUM_TX_DESC - tp->cur_tx; + unsigned int slots_avail = READ_ONCE(tp->dirty_tx) + NUM_TX_DESC + - READ_ONCE(tp->cur_tx); /* A skbuff with nr_frags needs nr_frags+1 entries in the tx queue */ - return slots_avail > nr_frags; + return slots_avail > MAX_SKB_FRAGS; } /* Versions RTL8102e and from RTL8168c onwards support csum_v2 */ @@ -4209,17 +4180,12 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, bool stop_queue, door_bell; u32 opts[2]; - txd_first = tp->TxDescArray + entry; - - if (unlikely(!rtl_tx_slots_avail(tp, frags))) { + if (unlikely(!rtl_tx_slots_avail(tp))) { if (net_ratelimit()) netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); goto err_stop_0; } - if (unlikely(le32_to_cpu(txd_first->opts1) & DescOwn)) - goto err_stop_0; - opts[1] = rtl8169_tx_vlan_tag(skb); opts[0] = 0; @@ -4232,6 +4198,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, entry, false))) goto err_dma_0; + txd_first = tp->TxDescArray + entry; + if (frags) { if (rtl8169_xmit_frags(tp, skb, opts, entry)) goto err_dma_1; @@ -4254,22 +4222,15 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, /* rtl_tx needs to see descriptor changes before updated tp->cur_tx */ smp_wmb(); - tp->cur_tx += frags + 1; + WRITE_ONCE(tp->cur_tx, tp->cur_tx + frags + 1); - stop_queue = !rtl_tx_slots_avail(tp, MAX_SKB_FRAGS); + stop_queue = !rtl_tx_slots_avail(tp); if (unlikely(stop_queue)) { /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must * not miss a ring update when it notices a stopped queue. */ smp_wmb(); netif_stop_queue(dev); - door_bell = true; - } - - if (door_bell) - rtl8169_doorbell(tp); - - if (unlikely(stop_queue)) { /* Sync with rtl_tx: * - publish queue status and cur_tx ring index (write barrier) * - refresh dirty_tx ring index (read barrier). @@ -4277,11 +4238,15 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, * status and forget to wake up queue, a racing rtl_tx thread * can't. */ - smp_mb(); - if (rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) + smp_mb__after_atomic(); + if (rtl_tx_slots_avail(tp)) netif_start_queue(dev); + door_bell = true; } + if (door_bell) + rtl8169_doorbell(tp); + return NETDEV_TX_OK; err_dma_1: @@ -4390,12 +4355,11 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, int budget) { - unsigned int dirty_tx, tx_left, bytes_compl = 0, pkts_compl = 0; + unsigned int dirty_tx, bytes_compl = 0, pkts_compl = 0; dirty_tx = tp->dirty_tx; - smp_rmb(); - for (tx_left = tp->cur_tx - dirty_tx; tx_left > 0; tx_left--) { + while (READ_ONCE(tp->cur_tx) != dirty_tx) { unsigned int entry = dirty_tx % NUM_TX_DESC; struct sk_buff *skb = tp->tx_skb[entry].skb; u32 status; @@ -4416,10 +4380,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, if (tp->dirty_tx != dirty_tx) { netdev_completed_queue(dev, pkts_compl, bytes_compl); + dev_sw_netstats_tx_add(dev, pkts_compl, bytes_compl); - rtl_inc_priv_stats(&tp->tx_stats, pkts_compl, bytes_compl); - - tp->dirty_tx = dirty_tx; /* Sync with rtl8169_start_xmit: * - publish dirty_tx ring index (write barrier) * - refresh cur_tx ring index and queue status (read barrier) @@ -4427,11 +4389,9 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp, * a racing xmit thread can only have a right view of the * ring status. */ - smp_mb(); - if (netif_queue_stopped(dev) && - rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) { + smp_store_mb(tp->dirty_tx, dirty_tx); + if (netif_queue_stopped(dev) && rtl_tx_slots_avail(tp)) netif_wake_queue(dev); - } /* * 8168 hack: TxPoll requests are lost when the Tx packets are * too close. Let's kick an extra TxPoll request when a burst @@ -4539,7 +4499,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget napi_gro_receive(&tp->napi, skb); - rtl_inc_priv_stats(&tp->rx_stats, 1, pkt_size); + dev_sw_netstats_rx_add(dev, pkt_size); release_descriptor: rtl8169_mark_to_asic(desc); } @@ -4721,6 +4681,7 @@ static int rtl_open(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); struct pci_dev *pdev = tp->pci_dev; + unsigned long irqflags; int retval = -ENOMEM; pm_runtime_get_sync(&pdev->dev); @@ -4732,7 +4693,7 @@ static int rtl_open(struct net_device *dev) tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES, &tp->TxPhyAddr, GFP_KERNEL); if (!tp->TxDescArray) - goto err_pm_runtime_put; + goto out; tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES, &tp->RxPhyAddr, GFP_KERNEL); @@ -4745,8 +4706,9 @@ static int rtl_open(struct net_device *dev) rtl_request_firmware(tp); + irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED; retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt, - IRQF_SHARED, dev->name, tp); + irqflags, dev->name, tp); if (retval < 0) goto err_release_fw_2; @@ -4757,9 +4719,9 @@ static int rtl_open(struct net_device *dev) rtl8169_up(tp); rtl8169_init_counter_offsets(tp); netif_start_queue(dev); - - pm_runtime_put_sync(&pdev->dev); out: + pm_runtime_put_sync(&pdev->dev); + return retval; err_free_irq: @@ -4775,8 +4737,6 @@ err_free_tx_0: dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, tp->TxPhyAddr); tp->TxDescArray = NULL; -err_pm_runtime_put: - pm_runtime_put_noidle(&pdev->dev); goto out; } @@ -4790,9 +4750,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) pm_runtime_get_noresume(&pdev->dev); netdev_stats_to_stats64(stats, &dev->stats); - - rtl_get_priv_stats(&tp->rx_stats, &stats->rx_packets, &stats->rx_bytes); - rtl_get_priv_stats(&tp->tx_stats, &stats->tx_packets, &stats->tx_bytes); + dev_fetch_sw_netstats(stats, dev->tstats); /* * Fetch additional counter values missing in stats collected by driver @@ -5263,6 +5221,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; + dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, + struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + /* Get the *optional* external "ether_clk" used on some boards */ rc = rtl_get_ether_clk(tp); if (rc) @@ -5340,8 +5303,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } INIT_WORK(&tp->wk.work, rtl_task); - u64_stats_init(&tp->rx_stats.syncp); - u64_stats_init(&tp->tx_stats.syncp); rtl_init_mac_address(tp); diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h index 2590cab53e54..1f981dfe4bdc 100644 --- a/drivers/net/ethernet/sfc/bitfield.h +++ b/drivers/net/ethernet/sfc/bitfield.h @@ -285,7 +285,13 @@ typedef union efx_oword { field10, value10, \ field11, value11, \ field12, value12, \ - field13, value13) \ + field13, value13, \ + field14, value14, \ + field15, value15, \ + field16, value16, \ + field17, value17, \ + field18, value18, \ + field19, value19) \ (EFX_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) | \ @@ -298,7 +304,13 @@ typedef union efx_oword { EFX_INSERT_FIELD_NATIVE((min), (max), field10, (value10)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field11, (value11)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field12, (value12)) | \ - EFX_INSERT_FIELD_NATIVE((min), (max), field13, (value13))) + EFX_INSERT_FIELD_NATIVE((min), (max), field13, (value13)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field14, (value14)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field15, (value15)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field16, (value16)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field17, (value17)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field18, (value18)) | \ + EFX_INSERT_FIELD_NATIVE((min), (max), field19, (value19))) #define EFX_INSERT_FIELDS64(...) \ cpu_to_le64(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__)) @@ -340,7 +352,19 @@ typedef union efx_oword { #endif /* Populate an octword field with various numbers of arguments */ -#define EFX_POPULATE_OWORD_13 EFX_POPULATE_OWORD +#define EFX_POPULATE_OWORD_19 EFX_POPULATE_OWORD +#define EFX_POPULATE_OWORD_18(oword, ...) \ + EFX_POPULATE_OWORD_19(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_17(oword, ...) \ + EFX_POPULATE_OWORD_18(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_16(oword, ...) \ + EFX_POPULATE_OWORD_17(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_15(oword, ...) \ + EFX_POPULATE_OWORD_16(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_14(oword, ...) \ + EFX_POPULATE_OWORD_15(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_OWORD_13(oword, ...) \ + EFX_POPULATE_OWORD_14(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) #define EFX_POPULATE_OWORD_12(oword, ...) \ EFX_POPULATE_OWORD_13(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) #define EFX_POPULATE_OWORD_11(oword, ...) \ @@ -375,7 +399,19 @@ typedef union efx_oword { EFX_DWORD_3, 0xffffffff) /* Populate a quadword field with various numbers of arguments */ -#define EFX_POPULATE_QWORD_13 EFX_POPULATE_QWORD +#define EFX_POPULATE_QWORD_19 EFX_POPULATE_QWORD +#define EFX_POPULATE_QWORD_18(qword, ...) \ + EFX_POPULATE_QWORD_19(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_17(qword, ...) \ + EFX_POPULATE_QWORD_18(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_16(qword, ...) \ + EFX_POPULATE_QWORD_17(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_15(qword, ...) \ + EFX_POPULATE_QWORD_16(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_14(qword, ...) \ + EFX_POPULATE_QWORD_15(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_QWORD_13(qword, ...) \ + EFX_POPULATE_QWORD_14(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) #define EFX_POPULATE_QWORD_12(qword, ...) \ EFX_POPULATE_QWORD_13(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) #define EFX_POPULATE_QWORD_11(qword, ...) \ @@ -408,7 +444,19 @@ typedef union efx_oword { EFX_DWORD_1, 0xffffffff) /* Populate a dword field with various numbers of arguments */ -#define EFX_POPULATE_DWORD_13 EFX_POPULATE_DWORD +#define EFX_POPULATE_DWORD_19 EFX_POPULATE_DWORD +#define EFX_POPULATE_DWORD_18(dword, ...) \ + EFX_POPULATE_DWORD_19(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_17(dword, ...) \ + EFX_POPULATE_DWORD_18(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_16(dword, ...) \ + EFX_POPULATE_DWORD_17(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_15(dword, ...) \ + EFX_POPULATE_DWORD_16(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_14(dword, ...) \ + EFX_POPULATE_DWORD_15(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) +#define EFX_POPULATE_DWORD_13(dword, ...) \ + EFX_POPULATE_DWORD_14(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) #define EFX_POPULATE_DWORD_12(dword, ...) \ EFX_POPULATE_DWORD_13(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) #define EFX_POPULATE_DWORD_11(dword, ...) \ diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 3148fe770356..518268ce2064 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -182,8 +182,20 @@ static int efx_ef100_init_datapath_caps(struct efx_nic *efx) if (rc) return rc; - if (efx_ef100_has_cap(nic_data->datapath_caps2, TX_TSO_V3)) - efx->net_dev->features |= NETIF_F_TSO | NETIF_F_TSO6; + if (efx_ef100_has_cap(nic_data->datapath_caps2, TX_TSO_V3)) { + struct net_device *net_dev = efx->net_dev; + netdev_features_t tso = NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM; + + net_dev->features |= tso; + net_dev->hw_features |= tso; + net_dev->hw_enc_features |= tso; + /* EF100 HW can only offload outer checksums if they are UDP, + * so for GRE_CSUM we have to use GSO_PARTIAL. + */ + net_dev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; + } efx->num_mac_stats = MCDI_WORD(outbuf, GET_CAPABILITIES_V4_OUT_MAC_STATS_NUM_STATS); netif_dbg(efx, probe, efx->net_dev, @@ -686,7 +698,7 @@ static unsigned int ef100_check_caps(const struct efx_nic *efx, #define EF100_OFFLOAD_FEATURES (NETIF_F_HW_CSUM | NETIF_F_RXCSUM | \ NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_NTUPLE | \ NETIF_F_RXHASH | NETIF_F_RXFCS | NETIF_F_TSO_ECN | NETIF_F_RXALL | \ - NETIF_F_TSO_MANGLEID | NETIF_F_HW_VLAN_CTAG_TX) + NETIF_F_HW_VLAN_CTAG_TX) const struct efx_nic_type ef100_pf_nic_type = { .revision = EFX_REV_EF100, @@ -1101,6 +1113,9 @@ static int ef100_probe_main(struct efx_nic *efx) nic_data->efx = efx; net_dev->features |= efx->type->offload_features; net_dev->hw_features |= efx->type->offload_features; + net_dev->hw_enc_features |= efx->type->offload_features; + net_dev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_SG | + NETIF_F_HIGHDMA | NETIF_F_ALL_TSO; /* Populate design-parameter defaults */ nic_data->tso_max_hdr_len = ESE_EF100_DP_GZ_TSO_MAX_HDR_LEN_DEFAULT; diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c index a90e5a9d2a37..26ef51d6b542 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.c +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -54,8 +54,6 @@ static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct efx_nic *efx = tx_queue->efx; struct ef100_nic_data *nic_data; struct efx_tx_buffer *buffer; - struct tcphdr *tcphdr; - struct iphdr *iphdr; size_t header_len; u32 mss; @@ -98,20 +96,6 @@ static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) buffer->unmap_len = 0; buffer->skb = skb; ++tx_queue->insert_count; - - /* Adjust the TCP checksum to exclude the total length, since we set - * ED_INNER_IP_LEN in the descriptor. - */ - tcphdr = tcp_hdr(skb); - if (skb_is_gso_v6(skb)) { - tcphdr->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - 0, IPPROTO_TCP, 0); - } else { - iphdr = ip_hdr(skb); - tcphdr->check = ~csum_tcpudp_magic(iphdr->saddr, iphdr->daddr, - 0, IPPROTO_TCP, 0); - } return true; } @@ -203,34 +187,66 @@ static void ef100_make_tso_desc(struct efx_nic *efx, struct efx_tx_buffer *buffer, efx_oword_t *txd, unsigned int segment_count) { - u32 mangleid = (efx->net_dev->features & NETIF_F_TSO_MANGLEID) || - skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID ? - ESE_GZ_TX_DESC_IP4_ID_NO_OP : - ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; - u16 vlan_enable = efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX ? - skb_vlan_tag_present(skb) : 0; + bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; unsigned int len, ip_offset, tcp_offset, payload_segs; + u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; + unsigned int outer_ip_offset, outer_l4_offset; u16 vlan_tci = skb_vlan_tag_get(skb); u32 mss = skb_shinfo(skb)->gso_size; + bool encap = skb->encapsulation; + bool udp_encap = false; + u16 vlan_enable = 0; + struct tcphdr *tcp; + bool outer_csum; + u32 paylen; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) + mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) + vlan_enable = skb_vlan_tag_present(skb); len = skb->len - buffer->len; /* We use 1 for the TSO descriptor and 1 for the header */ payload_segs = segment_count - 2; - ip_offset = skb_network_offset(skb); - tcp_offset = skb_transport_offset(skb); + if (encap) { + outer_ip_offset = skb_network_offset(skb); + outer_l4_offset = skb_transport_offset(skb); + ip_offset = skb_inner_network_offset(skb); + tcp_offset = skb_inner_transport_offset(skb); + if (skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)) + udp_encap = true; + } else { + ip_offset = skb_network_offset(skb); + tcp_offset = skb_transport_offset(skb); + outer_ip_offset = outer_l4_offset = 0; + } + outer_csum = skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM; + + /* subtract TCP payload length from inner checksum */ + tcp = (void *)skb->data + tcp_offset; + paylen = skb->len - tcp_offset; + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); - EFX_POPULATE_OWORD_13(*txd, + EFX_POPULATE_OWORD_19(*txd, ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO, ESF_GZ_TX_TSO_MSS, mss, ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1, ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, payload_segs, ESF_GZ_TX_TSO_HDR_LEN_W, buffer->len >> 1, ESF_GZ_TX_TSO_PAYLOAD_LEN, len, + ESF_GZ_TX_TSO_CSO_OUTER_L4, outer_csum, ESF_GZ_TX_TSO_CSO_INNER_L4, 1, ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1, ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1, ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid, ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, + ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1, + ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, + ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, + ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : + ESE_GZ_TX_DESC_IP4_ID_NO_OP, ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci ); diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index b77e427e6729..c52a38df0e0d 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -8,7 +8,7 @@ config NET_VENDOR_SMSC default y depends on ARM || ARM64 || ATARI_ETHERNAT || COLDFIRE || \ ISA || MAC || MIPS || NIOS2 || PCI || \ - PCMCIA || SUPERH || XTENSA || H8300 + PCMCIA || SUPERH || XTENSA || H8300 || COMPILE_TEST help If you have a network (Ethernet) card belonging to this class, say Y. @@ -39,7 +39,7 @@ config SMC91X select MII depends on !OF || GPIOLIB depends on ARM || ARM64 || ATARI_ETHERNAT || COLDFIRE || \ - MIPS || NIOS2 || SUPERH || XTENSA || H8300 + MIPS || NIOS2 || SUPERH || XTENSA || H8300 || COMPILE_TEST help This is a driver for SMC's 91x series of Ethernet chipsets, including the SMC91C94 and the SMC91C111. Say Y if you want it @@ -78,7 +78,7 @@ config SMC911X tristate "SMSC LAN911[5678] support" select CRC32 select MII - depends on (ARM || SUPERH) + depends on (ARM || SUPERH || COMPILE_TEST) help This is a driver for SMSC's LAN911x series of Ethernet chipsets including the new LAN9115, LAN9116, LAN9117, and LAN9118. diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 01069dfaf75c..22cdbf12c823 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -102,7 +102,10 @@ MODULE_ALIAS("platform:smc911x"); #define PRINTK(dev, args...) netdev_info(dev, args) #else -#define DBG(n, dev, args...) do { } while (0) +#define DBG(n, dev, args...) \ + while (0) { \ + netdev_dbg(dev, args); \ + } #define PRINTK(dev, args...) netdev_dbg(dev, args) #endif @@ -462,9 +465,9 @@ static void smc911x_hardware_send_pkt(struct net_device *dev) TX_CMD_A_INT_FIRST_SEG_ | TX_CMD_A_INT_LAST_SEG_ | skb->len; #else - buf = (char*)((u32)skb->data & ~0x3); - len = (skb->len + 3 + ((u32)skb->data & 3)) & ~0x3; - cmdA = (((u32)skb->data & 0x3) << 16) | + buf = (char *)((uintptr_t)skb->data & ~0x3); + len = (skb->len + 3 + ((uintptr_t)skb->data & 3)) & ~0x3; + cmdA = (((uintptr_t)skb->data & 0x3) << 16) | TX_CMD_A_INT_FIRST_SEG_ | TX_CMD_A_INT_LAST_SEG_ | skb->len; #endif @@ -879,7 +882,7 @@ static void smc911x_phy_configure(struct work_struct *work) int phyaddr = lp->mii.phy_id; int my_phy_caps; /* My PHY capabilities */ int my_ad_caps; /* My Advertised capabilities */ - int status; + int status __always_unused; unsigned long flags; DBG(SMC_DEBUG_FUNC, dev, "--> %s()\n", __func__); @@ -973,7 +976,7 @@ static void smc911x_phy_interrupt(struct net_device *dev) { struct smc911x_local *lp = netdev_priv(dev); int phyaddr = lp->mii.phy_id; - int status; + int status __always_unused; DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); @@ -2044,8 +2047,6 @@ static int smc911x_drv_probe(struct platform_device *pdev) void __iomem *addr; int ret; - /* ndev is not valid yet, so avoid passing it in. */ - DBG(SMC_DEBUG_FUNC, "--> %s\n", __func__); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { ret = -ENODEV; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index f6b73afd1879..742a1f7a838c 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -703,7 +703,8 @@ static void smc_tx(struct net_device *dev) { struct smc_local *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - unsigned int saved_packet, packet_no, tx_status, pkt_len; + unsigned int saved_packet, packet_no, tx_status; + unsigned int pkt_len __always_unused; DBG(3, dev, "%s\n", __func__); @@ -2191,6 +2192,12 @@ MODULE_DEVICE_TABLE(of, smc91x_match); /** * of_try_set_control_gpio - configure a gpio if it exists + * @dev: net device + * @desc: where to store the GPIO descriptor, if it exists + * @name: name of the GPIO in DT + * @index: index of the GPIO in DT + * @value: set the GPIO to this value + * @nsdelay: delay before setting the GPIO */ static int try_toggle_control_gpio(struct device *dev, struct gpio_desc **desc, diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index df7de50497a0..6f271c46368d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -402,6 +402,7 @@ struct dma_features { /* Default LPI timers */ #define STMMAC_DEFAULT_LIT_LS 0x3E8 #define STMMAC_DEFAULT_TWT_LS 0x1E +#define STMMAC_ET_MAX 0xFFFFF #define STMMAC_CHAIN_MODE 0x1 #define STMMAC_RING_MODE 0x2 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 2342d497348e..27254b27d7ed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -119,23 +119,23 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, return 0; } -static void *dwc_qos_probe(struct platform_device *pdev, - struct plat_stmmacenet_data *plat_dat, - struct stmmac_resources *stmmac_res) +static int dwc_qos_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat_dat, + struct stmmac_resources *stmmac_res) { int err; plat_dat->stmmac_clk = devm_clk_get(&pdev->dev, "apb_pclk"); if (IS_ERR(plat_dat->stmmac_clk)) { dev_err(&pdev->dev, "apb_pclk clock not found.\n"); - return ERR_CAST(plat_dat->stmmac_clk); + return PTR_ERR(plat_dat->stmmac_clk); } err = clk_prepare_enable(plat_dat->stmmac_clk); if (err < 0) { dev_err(&pdev->dev, "failed to enable apb_pclk clock: %d\n", err); - return ERR_PTR(err); + return err; } plat_dat->pclk = devm_clk_get(&pdev->dev, "phy_ref_clk"); @@ -152,11 +152,11 @@ static void *dwc_qos_probe(struct platform_device *pdev, goto disable; } - return NULL; + return 0; disable: clk_disable_unprepare(plat_dat->stmmac_clk); - return ERR_PTR(err); + return err; } static int dwc_qos_remove(struct platform_device *pdev) @@ -267,19 +267,17 @@ static int tegra_eqos_init(struct platform_device *pdev, void *priv) return 0; } -static void *tegra_eqos_probe(struct platform_device *pdev, - struct plat_stmmacenet_data *data, - struct stmmac_resources *res) +static int tegra_eqos_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *data, + struct stmmac_resources *res) { struct device *dev = &pdev->dev; struct tegra_eqos *eqos; int err; eqos = devm_kzalloc(&pdev->dev, sizeof(*eqos), GFP_KERNEL); - if (!eqos) { - err = -ENOMEM; - goto error; - } + if (!eqos) + return -ENOMEM; eqos->dev = &pdev->dev; eqos->regs = res->addr; @@ -368,9 +366,7 @@ bypass_clk_reset_gpio: if (err < 0) goto reset; -out: - return eqos; - + return 0; reset: reset_control_assert(eqos->rst); reset_phy: @@ -384,8 +380,7 @@ disable_slave: disable_master: clk_disable_unprepare(eqos->clk_master); error: - eqos = ERR_PTR(err); - goto out; + return err; } static int tegra_eqos_remove(struct platform_device *pdev) @@ -403,9 +398,9 @@ static int tegra_eqos_remove(struct platform_device *pdev) } struct dwc_eth_dwmac_data { - void *(*probe)(struct platform_device *pdev, - struct plat_stmmacenet_data *data, - struct stmmac_resources *res); + int (*probe)(struct platform_device *pdev, + struct plat_stmmacenet_data *data, + struct stmmac_resources *res); int (*remove)(struct platform_device *pdev); }; @@ -424,7 +419,6 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) const struct dwc_eth_dwmac_data *data; struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - void *priv; int ret; data = device_get_match_data(&pdev->dev); @@ -448,10 +442,8 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); - priv = data->probe(pdev, plat_dat, &stmmac_res); - if (IS_ERR(priv)) { - ret = PTR_ERR(priv); - + ret = data->probe(pdev, plat_dat, &stmmac_res); + if (ret < 0) { if (ret != -EPROBE_DEFER) dev_err(&pdev->dev, "failed to probe subdriver: %d\n", ret); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 81ee0a071b4e..a2e80c89de2d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -236,6 +236,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, int ret; int i; + plat->phy_addr = -1; plat->clk_csr = 5; plat->has_gmac = 0; plat->has_gmac4 = 1; @@ -345,7 +346,6 @@ static int ehl_sgmii_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; plat->serdes_powerup = intel_serdes_powerup; @@ -362,7 +362,6 @@ static int ehl_rgmii_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_RGMII; return ehl_common_data(pdev, plat); @@ -376,7 +375,6 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 2; - plat->phy_addr = 1; return ehl_common_data(pdev, plat); } @@ -408,7 +406,6 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 3; - plat->phy_addr = 1; return ehl_common_data(pdev, plat); } @@ -450,7 +447,6 @@ static int tgl_sgmii_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 1; - plat->phy_addr = 0; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 5afcf05bbf9c..dc0b8b6d180d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -299,7 +299,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) dev_err(dwmac->dev, "unsupported phy-mode %s\n", phy_modes(dwmac->phy_mode)); return -EINVAL; - }; + } if (rx_dly_config & PRG_ETH0_ADJ_ENABLE) { if (!dwmac->timing_adj_clk) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 592b043f9676..82df91c130f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -176,9 +176,11 @@ enum power_event { */ #define GMAC4_LPI_CTRL_STATUS 0xd0 #define GMAC4_LPI_TIMER_CTRL 0xd4 +#define GMAC4_LPI_ENTRY_TIMER 0xd8 /* LPI control and status defines */ #define GMAC4_LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable */ +#define GMAC4_LPI_CTRL_STATUS_LPIATE BIT(20) /* LPI Timer Enable */ #define GMAC4_LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */ #define GMAC4_LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */ #define GMAC4_LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 002791b77356..3ed4f4cda7f9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -379,6 +379,27 @@ static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link) writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS); } +static void dwmac4_set_eee_lpi_entry_timer(struct mac_device_info *hw, int et) +{ + void __iomem *ioaddr = hw->pcsr; + int value = et & STMMAC_ET_MAX; + int regval; + + /* Program LPI entry timer value into register */ + writel(value, ioaddr + GMAC4_LPI_ENTRY_TIMER); + + /* Enable/disable LPI entry timer */ + regval = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); + regval |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA; + + if (et) + regval |= GMAC4_LPI_CTRL_STATUS_LPIATE; + else + regval &= ~GMAC4_LPI_CTRL_STATUS_LPIATE; + + writel(regval, ioaddr + GMAC4_LPI_CTRL_STATUS); +} + static void dwmac4_set_eee_timer(struct mac_device_info *hw, int ls, int tw) { void __iomem *ioaddr = hw->pcsr; @@ -1164,6 +1185,7 @@ const struct stmmac_ops dwmac4_ops = { .get_umac_addr = dwmac4_get_umac_addr, .set_eee_mode = dwmac4_set_eee_mode, .reset_eee_mode = dwmac4_reset_eee_mode, + .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, @@ -1206,6 +1228,7 @@ const struct stmmac_ops dwmac410_ops = { .get_umac_addr = dwmac4_get_umac_addr, .set_eee_mode = dwmac4_set_eee_mode, .reset_eee_mode = dwmac4_reset_eee_mode, + .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, @@ -1249,6 +1272,7 @@ const struct stmmac_ops dwmac510_ops = { .get_umac_addr = dwmac4_get_umac_addr, .set_eee_mode = dwmac4_set_eee_mode, .reset_eee_mode = dwmac4_reset_eee_mode, + .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index e2dca9b6e992..b40b2e0667bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -337,6 +337,7 @@ struct stmmac_ops { void (*set_eee_mode)(struct mac_device_info *hw, bool en_tx_lpi_clockgating); void (*reset_eee_mode)(struct mac_device_info *hw); + void (*set_eee_lpi_entry_timer)(struct mac_device_info *hw, int et); void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw); void (*set_eee_pls)(struct mac_device_info *hw, int link); void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x, @@ -439,6 +440,8 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, set_eee_mode, __args) #define stmmac_reset_eee_mode(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, reset_eee_mode, __args) +#define stmmac_set_eee_lpi_timer(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_eee_lpi_entry_timer, __args) #define stmmac_set_eee_timer(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_eee_timer, __args) #define stmmac_set_eee_pls(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 727e68dfaf1c..c88ee8ea4245 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -207,6 +207,7 @@ struct stmmac_priv { int tx_lpi_timer; int tx_lpi_enabled; int eee_tw_timer; + bool eee_sw_timer_en; unsigned int mode; unsigned int chain_mode; int extend_desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ba45fe237512..c8770e9668a1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -294,6 +294,16 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue) return dirty; } +static void stmmac_lpi_entry_timer_config(struct stmmac_priv *priv, bool en) +{ + int tx_lpi_timer; + + /* Clear/set the SW EEE timer flag based on LPI ET enablement */ + priv->eee_sw_timer_en = en ? 0 : 1; + tx_lpi_timer = en ? priv->tx_lpi_timer : 0; + stmmac_set_eee_lpi_timer(priv, priv->hw, tx_lpi_timer); +} + /** * stmmac_enable_eee_mode - check and enter in LPI mode * @priv: driver private structure @@ -327,6 +337,11 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv) */ void stmmac_disable_eee_mode(struct stmmac_priv *priv) { + if (!priv->eee_sw_timer_en) { + stmmac_lpi_entry_timer_config(priv, 0); + return; + } + stmmac_reset_eee_mode(priv, priv->hw); del_timer_sync(&priv->eee_ctrl_timer); priv->tx_path_in_lpi_mode = false; @@ -376,6 +391,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if (!priv->eee_active) { if (priv->eee_enabled) { netdev_dbg(priv->dev, "disable EEE\n"); + stmmac_lpi_entry_timer_config(priv, 0); del_timer_sync(&priv->eee_ctrl_timer); stmmac_set_eee_timer(priv, priv->hw, 0, eee_tw_timer); } @@ -389,7 +405,15 @@ bool stmmac_eee_init(struct stmmac_priv *priv) eee_tw_timer); } - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); + if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) { + del_timer_sync(&priv->eee_ctrl_timer); + priv->tx_path_in_lpi_mode = false; + stmmac_lpi_entry_timer_config(priv, 1); + } else { + stmmac_lpi_entry_timer_config(priv, 0); + mod_timer(&priv->eee_ctrl_timer, + STMMAC_LPI_T(priv->tx_lpi_timer)); + } mutex_unlock(&priv->lock); netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); @@ -2044,7 +2068,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue)); } - if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) { + if (priv->eee_enabled && !priv->tx_path_in_lpi_mode && + priv->eee_sw_timer_en) { stmmac_enable_eee_mode(priv); mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } @@ -3306,7 +3331,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q = &priv->tx_queue[queue]; first_tx = tx_q->cur_tx; - if (priv->tx_path_in_lpi_mode) + if (priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) stmmac_disable_eee_mode(priv); /* Manage oversized TCP frames for GMAC4 device */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index af34a4cadbb0..6dc9f10414e4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -399,6 +399,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat; struct stmmac_dma_cfg *dma_cfg; + void *ret; int rc; plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); @@ -576,12 +577,10 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) clk_prepare_enable(plat->stmmac_clk); } - plat->pclk = devm_clk_get(&pdev->dev, "pclk"); + plat->pclk = devm_clk_get_optional(&pdev->dev, "pclk"); if (IS_ERR(plat->pclk)) { - if (PTR_ERR(plat->pclk) == -EPROBE_DEFER) - goto error_pclk_get; - - plat->pclk = NULL; + ret = plat->pclk; + goto error_pclk_get; } clk_prepare_enable(plat->pclk); @@ -596,14 +595,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) dev_dbg(&pdev->dev, "PTP rate %d\n", plat->clk_ptp_rate); } - plat->stmmac_rst = devm_reset_control_get(&pdev->dev, - STMMAC_RESOURCE_NAME); + plat->stmmac_rst = devm_reset_control_get_optional(&pdev->dev, + STMMAC_RESOURCE_NAME); if (IS_ERR(plat->stmmac_rst)) { - if (PTR_ERR(plat->stmmac_rst) == -EPROBE_DEFER) - goto error_hw_init; - - dev_info(&pdev->dev, "no reset control found\n"); - plat->stmmac_rst = NULL; + ret = plat->stmmac_rst; + goto error_hw_init; } return plat; @@ -613,7 +609,7 @@ error_hw_init: error_pclk_get: clk_disable_unprepare(plat->stmmac_clk); - return ERR_PTR(-EPROBE_DEFER); + return ret; } /** diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 501d676fd88b..766e8866bbef 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -241,8 +241,8 @@ static int am65_cpsw_nuss_ndo_slave_add_vid(struct net_device *ndev, if (!vid) unreg_mcast = port_mask; dev_info(common->dev, "Adding vlan %d to vlan filter\n", vid); - ret = cpsw_ale_add_vlan(common->ale, vid, port_mask, - unreg_mcast, port_mask, 0); + ret = cpsw_ale_vlan_add_modify(common->ale, vid, port_mask, + unreg_mcast, port_mask, 0); pm_runtime_put(common->dev); return ret; @@ -252,6 +252,7 @@ static int am65_cpsw_nuss_ndo_slave_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); int ret; if (!netif_running(ndev) || !vid) @@ -264,14 +265,15 @@ static int am65_cpsw_nuss_ndo_slave_kill_vid(struct net_device *ndev, } dev_info(common->dev, "Removing vlan %d from vlan filter\n", vid); - ret = cpsw_ale_del_vlan(common->ale, vid, 0); + ret = cpsw_ale_del_vlan(common->ale, vid, + BIT(port->port_id) | ALE_PORT_HOST); pm_runtime_put(common->dev); return ret; } -static void am65_cpsw_slave_set_promisc_2g(struct am65_cpsw_port *port, - bool promisc) +static void am65_cpsw_slave_set_promisc(struct am65_cpsw_port *port, + bool promisc) { struct am65_cpsw_common *common = port->common; @@ -296,7 +298,7 @@ static void am65_cpsw_nuss_ndo_slave_set_rx_mode(struct net_device *ndev) bool promisc; promisc = !!(ndev->flags & IFF_PROMISC); - am65_cpsw_slave_set_promisc_2g(port, promisc); + am65_cpsw_slave_set_promisc(port, promisc); if (promisc) return; @@ -373,7 +375,7 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, AM65_CPSW_NAV_PS_DATA_SIZE); - cppi5_hdesc_attach_buf(desc_rx, 0, 0, buf_dma, skb_tailroom(skb)); + cppi5_hdesc_attach_buf(desc_rx, buf_dma, skb_tailroom(skb), buf_dma, skb_tailroom(skb)); swdata = cppi5_hdesc_get_swdata(desc_rx); *((void **)swdata) = skb; @@ -426,9 +428,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common, writel(common->rx_flow_id_base, host_p->port_base + AM65_CPSW_PORT0_REG_FLOW_ID_OFFSET); /* en tx crc offload */ - if (features & NETIF_F_HW_CSUM) - writel(AM65_CPSW_P0_REG_CTL_RX_CHECKSUM_EN, - host_p->port_base + AM65_CPSW_P0_REG_CTL); + writel(AM65_CPSW_P0_REG_CTL_RX_CHECKSUM_EN, host_p->port_base + AM65_CPSW_P0_REG_CTL); am65_cpsw_nuss_set_p0_ptype(common); @@ -629,13 +629,13 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) am65_cpsw_port_set_sl_mac(port, ndev->dev_addr); - if (port->slave.mac_only) + if (port->slave.mac_only) { /* enable mac-only mode on port */ cpsw_ale_control_set(common->ale, port->port_id, ALE_PORT_MACONLY, 1); - if (AM65_CPSW_IS_CPSW2G(common)) cpsw_ale_control_set(common->ale, port->port_id, ALE_PORT_NOLEARN, 1); + } port_mask = BIT(port->port_id) | ALE_PORT_HOST; cpsw_ale_add_ucast(common->ale, ndev->dev_addr, @@ -767,7 +767,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, return ret; } - if (desc_dma & 0x1) { + if (cppi5_desc_is_tdcm(desc_dma)) { dev_dbg(dev, "%s RX tdown flow: %u\n", __func__, flow_idx); return 0; } @@ -911,10 +911,57 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) dev_kfree_skb_any(skb); } +static struct sk_buff * +am65_cpsw_nuss_tx_compl_packet(struct am65_cpsw_tx_chn *tx_chn, + dma_addr_t desc_dma) +{ + struct am65_cpsw_ndev_priv *ndev_priv; + struct am65_cpsw_ndev_stats *stats; + struct cppi5_host_desc_t *desc_tx; + struct net_device *ndev; + struct sk_buff *skb; + void **swdata; + + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, + desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_tx); + skb = *(swdata); + am65_cpsw_nuss_xmit_free(tx_chn, tx_chn->common->dev, desc_tx); + + ndev = skb->dev; + + am65_cpts_tx_timestamp(tx_chn->common->cpts, skb); + + ndev_priv = netdev_priv(ndev); + stats = this_cpu_ptr(ndev_priv->stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + + return skb; +} + +static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_device *ndev, + struct netdev_queue *netif_txq) +{ + if (netif_tx_queue_stopped(netif_txq)) { + /* Check whether the queue is stopped due to stalled + * tx dma, if the queue is stopped then wake the queue + * as we have free desc for tx + */ + __netif_tx_lock(netif_txq, smp_processor_id()); + if (netif_running(ndev) && + (k3_cppi_desc_pool_avail(tx_chn->desc_pool) >= MAX_SKB_FRAGS)) + netif_tx_wake_queue(netif_txq); + + __netif_tx_unlock(netif_txq); + } +} + static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, int chn, unsigned int budget) { - struct cppi5_host_desc_t *desc_tx; struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; struct netdev_queue *netif_txq; @@ -923,41 +970,68 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, struct sk_buff *skb; dma_addr_t desc_dma; int res, num_tx = 0; - void **swdata; tx_chn = &common->tx_chns[chn]; while (true) { - struct am65_cpsw_ndev_priv *ndev_priv; - struct am65_cpsw_ndev_stats *stats; - + spin_lock(&tx_chn->lock); res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); + spin_unlock(&tx_chn->lock); if (res == -ENODATA) break; - if (desc_dma & 0x1) { + if (cppi5_desc_is_tdcm(desc_dma)) { if (atomic_dec_and_test(&common->tdown_cnt)) complete(&common->tdown_complete); break; } - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, - desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, dev, desc_tx); - + skb = am65_cpsw_nuss_tx_compl_packet(tx_chn, desc_dma); + total_bytes = skb->len; ndev = skb->dev; + napi_consume_skb(skb, budget); + num_tx++; - am65_cpts_tx_timestamp(common->cpts, skb); + netif_txq = netdev_get_tx_queue(ndev, chn); - ndev_priv = netdev_priv(ndev); - stats = this_cpu_ptr(ndev_priv->stats); - u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += skb->len; - u64_stats_update_end(&stats->syncp); + netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); + am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); + } + + dev_dbg(dev, "%s:%u pkt:%d\n", __func__, chn, num_tx); + + return num_tx; +} + +static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, + int chn, unsigned int budget) +{ + struct device *dev = common->dev; + struct am65_cpsw_tx_chn *tx_chn; + struct netdev_queue *netif_txq; + unsigned int total_bytes = 0; + struct net_device *ndev; + struct sk_buff *skb; + dma_addr_t desc_dma; + int res, num_tx = 0; + + tx_chn = &common->tx_chns[chn]; + + while (true) { + res = k3_udma_glue_pop_tx_chn(tx_chn->tx_chn, &desc_dma); + if (res == -ENODATA) + break; + + if (cppi5_desc_is_tdcm(desc_dma)) { + if (atomic_dec_and_test(&common->tdown_cnt)) + complete(&common->tdown_complete); + break; + } + + skb = am65_cpsw_nuss_tx_compl_packet(tx_chn, desc_dma); + + ndev = skb->dev; total_bytes += skb->len; napi_consume_skb(skb, budget); num_tx++; @@ -970,19 +1044,8 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); - if (netif_tx_queue_stopped(netif_txq)) { - /* Check whether the queue is stopped due to stalled tx dma, - * if the queue is stopped then wake the queue as - * we have free desc for tx - */ - __netif_tx_lock(netif_txq, smp_processor_id()); - if (netif_running(ndev) && - (k3_cppi_desc_pool_avail(tx_chn->desc_pool) >= - MAX_SKB_FRAGS)) - netif_tx_wake_queue(netif_txq); + am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); - __netif_tx_unlock(netif_txq); - } dev_dbg(dev, "%s:%u pkt:%d\n", __func__, chn, num_tx); return num_tx; @@ -993,8 +1056,11 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) struct am65_cpsw_tx_chn *tx_chn = am65_cpsw_napi_to_tx_chn(napi_tx); int num_tx; - num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, - budget); + if (AM65_CPSW_IS_CPSW2G(tx_chn->common)) + num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id, budget); + else + num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget); + num_tx = min(num_tx, budget); if (num_tx < budget) { napi_complete(napi_tx); @@ -1139,7 +1205,13 @@ done_tx: cppi5_hdesc_set_pktlen(first_desc, pkt_len); desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); - ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); + if (AM65_CPSW_IS_CPSW2G(common)) { + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); + } else { + spin_lock_bh(&tx_chn->lock); + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); + spin_unlock_bh(&tx_chn->lock); + } if (ret) { dev_err(dev, "can't push desc %d\n", ret); /* inform bql */ @@ -1369,32 +1441,7 @@ static void am65_cpsw_nuss_ndo_get_stats(struct net_device *dev, stats->tx_dropped = dev->stats.tx_dropped; } -static int am65_cpsw_nuss_ndo_slave_set_features(struct net_device *ndev, - netdev_features_t features) -{ - struct am65_cpsw_common *common = am65_ndev_to_common(ndev); - netdev_features_t changes = features ^ ndev->features; - struct am65_cpsw_host *host_p; - - host_p = am65_common_get_host(common); - - if (changes & NETIF_F_HW_CSUM) { - bool enable = !!(features & NETIF_F_HW_CSUM); - - dev_info(common->dev, "Turn %s tx-checksum-ip-generic\n", - enable ? "ON" : "OFF"); - if (enable) - writel(AM65_CPSW_P0_REG_CTL_RX_CHECKSUM_EN, - host_p->port_base + AM65_CPSW_P0_REG_CTL); - else - writel(0, - host_p->port_base + AM65_CPSW_P0_REG_CTL); - } - - return 0; -} - -static const struct net_device_ops am65_cpsw_nuss_netdev_ops_2g = { +static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_open = am65_cpsw_nuss_ndo_slave_open, .ndo_stop = am65_cpsw_nuss_ndo_slave_stop, .ndo_start_xmit = am65_cpsw_nuss_ndo_slave_xmit, @@ -1406,7 +1453,6 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops_2g = { .ndo_vlan_rx_add_vid = am65_cpsw_nuss_ndo_slave_add_vid, .ndo_vlan_rx_kill_vid = am65_cpsw_nuss_ndo_slave_kill_vid, .ndo_do_ioctl = am65_cpsw_nuss_ndo_slave_ioctl, - .ndo_set_features = am65_cpsw_nuss_ndo_slave_set_features, .ndo_setup_tc = am65_cpsw_qos_ndo_setup_tc, }; @@ -1417,7 +1463,6 @@ static void am65_cpsw_nuss_slave_disable_unused(struct am65_cpsw_port *port) if (!port->disabled) return; - common->disabled_ports_mask |= BIT(port->port_id); cpsw_ale_control_set(common->ale, port->port_id, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); @@ -1496,6 +1541,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) snprintf(tx_chn->tx_chn_name, sizeof(tx_chn->tx_chn_name), "tx%d", i); + spin_lock_init(&tx_chn->lock); tx_chn->common = common; tx_chn->id = i; tx_chn->descs_num = max_desc_num; @@ -1515,9 +1561,8 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) tx_chn->tx_chn_name, &tx_cfg); if (IS_ERR(tx_chn->tx_chn)) { - ret = PTR_ERR(tx_chn->tx_chn); - dev_err(dev, "Failed to request tx dma channel %d\n", - ret); + ret = dev_err_probe(dev, PTR_ERR(tx_chn->tx_chn), + "Failed to request tx dma channel\n"); goto err; } @@ -1588,8 +1633,8 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, "rx", &rx_cfg); if (IS_ERR(rx_chn->rx_chn)) { - ret = PTR_ERR(rx_chn->rx_chn); - dev_err(dev, "Failed to request rx dma channel %d\n", ret); + ret = dev_err_probe(dev, PTR_ERR(rx_chn->rx_chn), + "Failed to request rx dma channel\n"); goto err; } @@ -1606,7 +1651,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) }; struct k3_ring_cfg fdqring_cfg = { .elm_size = K3_RINGACC_RING_ELSIZE_8, - .mode = K3_RINGACC_RING_MODE_MESSAGE, .flags = K3_RINGACC_RING_SHARED, }; struct k3_udma_glue_rx_flow_cfg rx_flow_cfg = { @@ -1620,6 +1664,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) rx_flow_cfg.ring_rxfdq0_id = fdqring_id; rx_flow_cfg.rx_cfg.size = max_desc_num; rx_flow_cfg.rxfdq_cfg.size = max_desc_num; + rx_flow_cfg.rxfdq_cfg.mode = common->pdata.fdqring_mode; ret = k3_udma_glue_rx_flow_init(rx_chn->rx_chn, i, &rx_flow_cfg); @@ -1725,6 +1770,13 @@ static int am65_cpsw_init_cpts(struct am65_cpsw_common *common) return ret; } common->cpts = cpts; + /* Forbid PM runtime if CPTS is running. + * K3 CPSWxG modules may completely lose context during ON->OFF + * transitions depending on integration. + * AM65x/J721E MCU CPSW2G: false + * J721E MAIN_CPSW9G: true + */ + pm_runtime_forbid(dev); return 0; } @@ -1778,8 +1830,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) return PTR_ERR(port->slave.mac_sl); port->disabled = !of_device_is_available(port_np); - if (port->disabled) + if (port->disabled) { + common->disabled_ports_mask |= BIT(port->port_id); continue; + } port->slave.ifphy = devm_of_phy_get(dev, port_np, NULL); if (IS_ERR(port->slave.ifphy)) { @@ -1795,12 +1849,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) /* get phy/link info */ if (of_phy_is_fixed_link(port_np)) { ret = of_phy_register_fixed_link(port_np); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "%pOF failed to register fixed-link phy: %d\n", - port_np, ret); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, + "failed to register fixed-link phy %pOF\n", + port_np); port->slave.phy_node = of_node_get(port_np); } else { port->slave.phy_node = @@ -1833,6 +1885,12 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) } of_node_put(node); + /* is there at least one ext.port */ + if (!(~common->disabled_ports_mask & GENMASK(common->port_num, 1))) { + dev_err(dev, "No Ext. port are available\n"); + return -ENODEV; + } + return 0; } @@ -1843,14 +1901,18 @@ static void am65_cpsw_pcpu_stats_free(void *data) free_percpu(stats); } -static int am65_cpsw_nuss_init_ndev_2g(struct am65_cpsw_common *common) +static int +am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) { struct am65_cpsw_ndev_priv *ndev_priv; struct device *dev = common->dev; struct am65_cpsw_port *port; int ret; - port = am65_common_get_port(common, 1); + port = &common->ports[port_idx]; + + if (port->disabled) + return 0; /* alloc netdev */ port->ndev = devm_alloc_etherdev_mqs(common->dev, @@ -1879,7 +1941,7 @@ static int am65_cpsw_nuss_init_ndev_2g(struct am65_cpsw_common *common) port->ndev->features = port->ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; port->ndev->vlan_features |= NETIF_F_SG; - port->ndev->netdev_ops = &am65_cpsw_nuss_netdev_ops_2g; + port->ndev->netdev_ops = &am65_cpsw_nuss_netdev_ops; port->ndev->ethtool_ops = &am65_cpsw_ethtool_ops_slave; /* Disable TX checksum offload by default due to HW bug */ @@ -1892,29 +1954,41 @@ static int am65_cpsw_nuss_init_ndev_2g(struct am65_cpsw_common *common) ret = devm_add_action_or_reset(dev, am65_cpsw_pcpu_stats_free, ndev_priv->stats); - if (ret) { - dev_err(dev, "Failed to add percpu stat free action %d\n", ret); - return ret; + if (ret) + dev_err(dev, "failed to add percpu stat free action %d\n", ret); + + if (!common->dma_ndev) + common->dma_ndev = port->ndev; + + return ret; +} + +static int am65_cpsw_nuss_init_ndevs(struct am65_cpsw_common *common) +{ + int ret; + int i; + + for (i = 0; i < common->port_num; i++) { + ret = am65_cpsw_nuss_init_port_ndev(common, i); + if (ret) + return ret; } - netif_napi_add(port->ndev, &common->napi_rx, + netif_napi_add(common->dma_ndev, &common->napi_rx, am65_cpsw_nuss_rx_poll, NAPI_POLL_WEIGHT); return ret; } -static int am65_cpsw_nuss_ndev_add_napi_2g(struct am65_cpsw_common *common) +static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common) { struct device *dev = common->dev; - struct am65_cpsw_port *port; int i, ret = 0; - port = am65_common_get_port(common, 1); - for (i = 0; i < common->tx_ch_num; i++) { struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i]; - netif_tx_napi_add(port->ndev, &tx_chn->napi_tx, + netif_tx_napi_add(common->dma_ndev, &tx_chn->napi_tx, am65_cpsw_nuss_tx_poll, NAPI_POLL_WEIGHT); ret = devm_request_irq(dev, tx_chn->irq, @@ -1932,16 +2006,27 @@ err: return ret; } -static int am65_cpsw_nuss_ndev_reg_2g(struct am65_cpsw_common *common) +static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) +{ + struct am65_cpsw_port *port; + int i; + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; + if (port->ndev) + unregister_netdev(port->ndev); + } +} + +static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { struct device *dev = common->dev; struct am65_cpsw_port *port; - int ret = 0; + int ret = 0, i; - port = am65_common_get_port(common, 1); - ret = am65_cpsw_nuss_ndev_add_napi_2g(common); + ret = am65_cpsw_nuss_ndev_add_tx_napi(common); if (ret) - goto err; + return ret; ret = devm_request_irq(dev, common->rx_chns.irq, am65_cpsw_nuss_rx_irq, @@ -1949,17 +2034,31 @@ static int am65_cpsw_nuss_ndev_reg_2g(struct am65_cpsw_common *common) if (ret) { dev_err(dev, "failure requesting rx irq %u, %d\n", common->rx_chns.irq, ret); - goto err; + return ret; + } + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; + + if (!port->ndev) + continue; + + ret = register_netdev(port->ndev); + if (ret) { + dev_err(dev, "error registering slave net device%i %d\n", + i, ret); + goto err_cleanup_ndev; + } } - ret = register_netdev(port->ndev); - if (ret) - dev_err(dev, "error registering slave net device %d\n", ret); /* can't auto unregister ndev using devm_add_action() due to * devres release sequence in DD core for DMA */ -err: + return 0; + +err_cleanup_ndev: + am65_cpsw_nuss_cleanup_ndev(common); return ret; } @@ -1972,19 +2071,7 @@ int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx) if (ret) return ret; - return am65_cpsw_nuss_ndev_add_napi_2g(common); -} - -static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) -{ - struct am65_cpsw_port *port; - int i; - - for (i = 0; i < common->port_num; i++) { - port = &common->ports[i]; - if (port->ndev) - unregister_netdev(port->ndev); - } + return am65_cpsw_nuss_ndev_add_tx_napi(common); } struct am65_cpsw_soc_pdata { @@ -2005,10 +2092,14 @@ static const struct soc_device_attribute am65_cpsw_socinfo[] = { static const struct am65_cpsw_pdata am65x_sr1_0 = { .quirks = AM65_CPSW_QUIRK_I2027_NO_TX_CSUM, + .ale_dev_id = "am65x-cpsw2g", + .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, }; static const struct am65_cpsw_pdata j721e_pdata = { .quirks = 0, + .ale_dev_id = "am65x-cpsw2g", + .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, }; static const struct of_device_id am65_cpsw_nuss_of_mtable[] = { @@ -2068,9 +2159,6 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) return -ENOENT; of_node_put(node); - if (common->port_num != 1) - return -EOPNOTSUPP; - common->rx_flow_id_base = -1; init_completion(&common->tdown_complete); common->tx_ch_num = 1; @@ -2089,13 +2177,8 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) return -ENOMEM; clk = devm_clk_get(dev, "fck"); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - - if (ret != -EPROBE_DEFER) - dev_err(dev, "error getting fck clock %d\n", ret); - return ret; - } + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), "getting fck clock\n"); common->bus_freq = clk_get_rate(clk); pm_runtime_enable(dev); @@ -2145,7 +2228,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) ale_params.ale_ageout = AM65_CPSW_ALE_AGEOUT_DEFAULT; ale_params.ale_ports = common->port_num + 1; ale_params.ale_regs = common->cpsw_base + AM65_CPSW_NU_ALE_BASE; - ale_params.dev_id = "am65x-cpsw2g"; + ale_params.dev_id = common->pdata.ale_dev_id; ale_params.bus_freq = common->bus_freq; common->ale = cpsw_ale_create(&ale_params); @@ -2165,11 +2248,11 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) dev_set_drvdata(dev, common); - ret = am65_cpsw_nuss_init_ndev_2g(common); + ret = am65_cpsw_nuss_init_ndevs(common); if (ret) goto err_of_clear; - ret = am65_cpsw_nuss_ndev_reg_2g(common); + ret = am65_cpsw_nuss_register_ndevs(common); if (ret) goto err_of_clear; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 993e1d4d3222..02aed4c0ceba 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/soc/ti/k3-ringacc.h> #include "am65-cpsw-qos.h" struct am65_cpts; @@ -59,6 +60,7 @@ struct am65_cpsw_tx_chn { struct am65_cpsw_common *common; struct k3_cppi_desc_pool *desc_pool; struct k3_udma_glue_tx_channel *tx_chn; + spinlock_t lock; /* protect TX rings in multi-port mode */ int irq; u32 id; u32 descs_num; @@ -77,6 +79,8 @@ struct am65_cpsw_rx_chn { struct am65_cpsw_pdata { u32 quirks; + enum k3_ring_mode fdqring_mode; + const char *ale_dev_id; }; struct am65_cpsw_common { @@ -91,6 +95,7 @@ struct am65_cpsw_common { struct am65_cpsw_host host; struct am65_cpsw_port *ports; u32 disabled_ports_mask; + struct net_device *dma_ndev; int usage_count; /* number of opened ports */ struct cpsw_ale *ale; diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index a6a455c32628..cdc308a2aa3e 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -634,8 +634,8 @@ int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port_mask, int untag, return 0; } -static void cpsw_ale_del_vlan_modify(struct cpsw_ale *ale, u32 *ale_entry, - u16 vid, int port_mask) +static void cpsw_ale_vlan_del_modify_int(struct cpsw_ale *ale, u32 *ale_entry, + u16 vid, int port_mask) { int reg_mcast, unreg_mcast; int members, untag; @@ -644,6 +644,7 @@ static void cpsw_ale_del_vlan_modify(struct cpsw_ale *ale, u32 *ale_entry, ALE_ENT_VID_MEMBER_LIST); members &= ~port_mask; if (!members) { + cpsw_ale_set_vlan_untag(ale, ale_entry, vid, 0); cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); return; } @@ -673,7 +674,7 @@ static void cpsw_ale_del_vlan_modify(struct cpsw_ale *ale, u32 *ale_entry, ALE_ENT_VID_MEMBER_LIST, members); } -int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask) +int cpsw_ale_vlan_del_modify(struct cpsw_ale *ale, u16 vid, int port_mask) { u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0}; int idx; @@ -684,11 +685,39 @@ int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask) cpsw_ale_read(ale, idx, ale_entry); - if (port_mask) { - cpsw_ale_del_vlan_modify(ale, ale_entry, vid, port_mask); - } else { + cpsw_ale_vlan_del_modify_int(ale, ale_entry, vid, port_mask); + cpsw_ale_write(ale, idx, ale_entry); + + return 0; +} + +int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask) +{ + u32 ale_entry[ALE_ENTRY_WORDS] = {0, 0, 0}; + int members, idx; + + idx = cpsw_ale_match_vlan(ale, vid); + if (idx < 0) + return -ENOENT; + + cpsw_ale_read(ale, idx, ale_entry); + + /* if !port_mask - force remove VLAN (legacy). + * Check if there are other VLAN members ports + * if no - remove VLAN. + * if yes it means same VLAN was added to >1 port in multi port mode, so + * remove port_mask ports from VLAN ALE entry excluding Host port. + */ + members = cpsw_ale_vlan_get_fld(ale, ale_entry, ALE_ENT_VID_MEMBER_LIST); + members &= ~port_mask; + + if (!port_mask || !members) { + /* last port or force remove - remove VLAN */ cpsw_ale_set_vlan_untag(ale, ale_entry, vid, 0); cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); + } else { + port_mask &= ~ALE_PORT_HOST; + cpsw_ale_vlan_del_modify_int(ale, ale_entry, vid, port_mask); } cpsw_ale_write(ale, idx, ale_entry); diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index 5e4a69662c5f..13fe47687fde 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -134,6 +134,7 @@ static inline int cpsw_ale_get_vlan_p0_untag(struct cpsw_ale *ale, u16 vid) int cpsw_ale_vlan_add_modify(struct cpsw_ale *ale, u16 vid, int port_mask, int untag_mask, int reg_mcast, int unreg_mcast); +int cpsw_ale_vlan_del_modify(struct cpsw_ale *ale, u16 vid, int port_mask); void cpsw_ale_set_unreg_mcast(struct cpsw_ale *ale, int unreg_mcast_mask, bool add); diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index 985a929bb957..29747da5c514 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -227,7 +227,7 @@ static int cpsw_port_vlan_del(struct cpsw_priv *priv, u16 vid, else port_mask = BIT(priv->emac_port); - ret = cpsw_ale_del_vlan(cpsw->ale, vid, port_mask); + ret = cpsw_ale_vlan_del_modify(cpsw->ale, vid, port_mask); if (ret != 0) return ret; diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 267c080ee084..0b2ce4bdc2c3 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -186,6 +186,7 @@ static void tlan_reset_adapter(struct net_device *); static void tlan_finish_reset(struct net_device *); static void tlan_set_mac(struct net_device *, int areg, char *mac); +static void __tlan_phy_print(struct net_device *); static void tlan_phy_print(struct net_device *); static void tlan_phy_detect(struct net_device *); static void tlan_phy_power_down(struct net_device *); @@ -201,9 +202,11 @@ static void tlan_phy_finish_auto_neg(struct net_device *); static int tlan_phy_dp83840a_check(struct net_device *); */ -static bool tlan_mii_read_reg(struct net_device *, u16, u16, u16 *); +static bool __tlan_mii_read_reg(struct net_device *, u16, u16, u16 *); +static void tlan_mii_read_reg(struct net_device *, u16, u16, u16 *); static void tlan_mii_send_data(u16, u32, unsigned); static void tlan_mii_sync(u16); +static void __tlan_mii_write_reg(struct net_device *, u16, u16, u16); static void tlan_mii_write_reg(struct net_device *, u16, u16, u16); static void tlan_ee_send_start(u16); @@ -242,23 +245,20 @@ static u32 tlan_handle_rx_eoc }; -static inline void +static void tlan_set_timer(struct net_device *dev, u32 ticks, u32 type) { struct tlan_priv *priv = netdev_priv(dev); unsigned long flags = 0; - if (!in_irq()) - spin_lock_irqsave(&priv->lock, flags); + spin_lock_irqsave(&priv->lock, flags); if (priv->timer.function != NULL && priv->timer_type != TLAN_TIMER_ACTIVITY) { - if (!in_irq()) - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irqrestore(&priv->lock, flags); return; } priv->timer.function = tlan_timer; - if (!in_irq()) - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irqrestore(&priv->lock, flags); priv->timer_set_at = jiffies; priv->timer_type = type; @@ -1703,22 +1703,22 @@ static u32 tlan_handle_status_check(struct net_device *dev, u16 host_int) dev->name, (unsigned) net_sts); } if ((net_sts & TLAN_NET_STS_MIRQ) && (priv->phy_num == 0)) { - tlan_mii_read_reg(dev, phy, TLAN_TLPHY_STS, &tlphy_sts); - tlan_mii_read_reg(dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl); + __tlan_mii_read_reg(dev, phy, TLAN_TLPHY_STS, &tlphy_sts); + __tlan_mii_read_reg(dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl); if (!(tlphy_sts & TLAN_TS_POLOK) && !(tlphy_ctl & TLAN_TC_SWAPOL)) { tlphy_ctl |= TLAN_TC_SWAPOL; - tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL, - tlphy_ctl); + __tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL, + tlphy_ctl); } else if ((tlphy_sts & TLAN_TS_POLOK) && (tlphy_ctl & TLAN_TC_SWAPOL)) { tlphy_ctl &= ~TLAN_TC_SWAPOL; - tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL, - tlphy_ctl); + __tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL, + tlphy_ctl); } if (debug) - tlan_phy_print(dev); + __tlan_phy_print(dev); } } @@ -2379,7 +2379,7 @@ ThunderLAN driver PHY layer routines /********************************************************************* - * tlan_phy_print + * __tlan_phy_print * * Returns: * Nothing @@ -2391,11 +2391,13 @@ ThunderLAN driver PHY layer routines * ********************************************************************/ -static void tlan_phy_print(struct net_device *dev) +static void __tlan_phy_print(struct net_device *dev) { struct tlan_priv *priv = netdev_priv(dev); u16 i, data0, data1, data2, data3, phy; + lockdep_assert_held(&priv->lock); + phy = priv->phy[priv->phy_num]; if (priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY) { @@ -2404,10 +2406,10 @@ static void tlan_phy_print(struct net_device *dev) netdev_info(dev, "PHY 0x%02x\n", phy); pr_info(" Off. +0 +1 +2 +3\n"); for (i = 0; i < 0x20; i += 4) { - tlan_mii_read_reg(dev, phy, i, &data0); - tlan_mii_read_reg(dev, phy, i + 1, &data1); - tlan_mii_read_reg(dev, phy, i + 2, &data2); - tlan_mii_read_reg(dev, phy, i + 3, &data3); + __tlan_mii_read_reg(dev, phy, i, &data0); + __tlan_mii_read_reg(dev, phy, i + 1, &data1); + __tlan_mii_read_reg(dev, phy, i + 2, &data2); + __tlan_mii_read_reg(dev, phy, i + 3, &data3); pr_info(" 0x%02x 0x%04hx 0x%04hx 0x%04hx 0x%04hx\n", i, data0, data1, data2, data3); } @@ -2417,7 +2419,15 @@ static void tlan_phy_print(struct net_device *dev) } +static void tlan_phy_print(struct net_device *dev) +{ + struct tlan_priv *priv = netdev_priv(dev); + unsigned long flags; + spin_lock_irqsave(&priv->lock, flags); + __tlan_phy_print(dev); + spin_unlock_irqrestore(&priv->lock, flags); +} /********************************************************************* @@ -2795,7 +2805,7 @@ these routines are based on the information in chap. 2 of the /*************************************************************** - * tlan_mii_read_reg + * __tlan_mii_read_reg * * Returns: * false if ack received ok @@ -2819,7 +2829,7 @@ these routines are based on the information in chap. 2 of the **************************************************************/ static bool -tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val) +__tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val) { u8 nack; u16 sio, tmp; @@ -2827,15 +2837,13 @@ tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val) bool err; int minten; struct tlan_priv *priv = netdev_priv(dev); - unsigned long flags = 0; + + lockdep_assert_held(&priv->lock); err = false; outw(TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR); sio = dev->base_addr + TLAN_DIO_DATA + TLAN_NET_SIO; - if (!in_irq()) - spin_lock_irqsave(&priv->lock, flags); - tlan_mii_sync(dev->base_addr); minten = tlan_get_bit(TLAN_NET_SIO_MINTEN, sio); @@ -2881,15 +2889,19 @@ tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val) *val = tmp; - if (!in_irq()) - spin_unlock_irqrestore(&priv->lock, flags); - return err; - } +static void tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, + u16 *val) +{ + struct tlan_priv *priv = netdev_priv(dev); + unsigned long flags; - + spin_lock_irqsave(&priv->lock, flags); + __tlan_mii_read_reg(dev, phy, reg, val); + spin_unlock_irqrestore(&priv->lock, flags); +} /*************************************************************** * tlan_mii_send_data @@ -2971,7 +2983,7 @@ static void tlan_mii_sync(u16 base_port) /*************************************************************** - * tlan_mii_write_reg + * __tlan_mii_write_reg * * Returns: * Nothing @@ -2991,19 +3003,17 @@ static void tlan_mii_sync(u16 base_port) **************************************************************/ static void -tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val) +__tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val) { u16 sio; int minten; - unsigned long flags = 0; struct tlan_priv *priv = netdev_priv(dev); + lockdep_assert_held(&priv->lock); + outw(TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR); sio = dev->base_addr + TLAN_DIO_DATA + TLAN_NET_SIO; - if (!in_irq()) - spin_lock_irqsave(&priv->lock, flags); - tlan_mii_sync(dev->base_addr); minten = tlan_get_bit(TLAN_NET_SIO_MINTEN, sio); @@ -3024,12 +3034,18 @@ tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val) if (minten) tlan_set_bit(TLAN_NET_SIO_MINTEN, sio); - if (!in_irq()) - spin_unlock_irqrestore(&priv->lock, flags); - } +static void +tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val) +{ + struct tlan_priv *priv = netdev_priv(dev); + unsigned long flags; + spin_lock_irqsave(&priv->lock, flags); + __tlan_mii_write_reg(dev, phy, reg, val); + spin_unlock_irqrestore(&priv->lock, flags); +} /***************************************************************************** diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index d0d0d4fe9d40..3b2137d1f4c6 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -18,7 +18,7 @@ if NET_VENDOR_XILINX config XILINX_EMACLITE tristate "Xilinx 10/100 Ethernet Lite support" - depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS + depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS || COMPILE_TEST select PHYLIB help This driver supports the 10/100 Ethernet Lite from Xilinx. diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index f34c7903ff52..a03c3ca1b28d 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -378,6 +378,7 @@ struct axidma_bd { * @dev: Pointer to device structure * @phy_node: Pointer to device node structure * @mii_bus: Pointer to MII bus structure + * @mii_clk_div: MII bus clock divider value * @regs_start: Resource start for axienet device addresses * @regs: Base address for the axienet_local device address space * @dma_regs: Base address for the axidma device address space @@ -419,11 +420,15 @@ struct axienet_local { struct phylink *phylink; struct phylink_config phylink_config; + /* Reference to PCS/PMA PHY if used */ + struct mdio_device *pcs_phy; + /* Clock for AXI bus */ struct clk *clk; /* MDIO bus data */ struct mii_bus *mii_bus; /* MII bus reference */ + u8 mii_clk_div; /* MII bus clock divider value */ /* IO registers, dma functions and IRQs */ resource_size_t regs_start; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9aafd3ecdaa4..6fea980acf64 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1049,20 +1049,13 @@ static int axienet_open(struct net_device *ndev) dev_dbg(&ndev->dev, "axienet_open()\n"); - /* Disable the MDIO interface till Axi Ethernet Reset is completed. - * When we do an Axi Ethernet reset, it resets the complete core - * including the MDIO. MDIO must be disabled before resetting - * and re-enabled afterwards. + /* When we do an Axi Ethernet reset, it resets the complete core + * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ mutex_lock(&lp->mii_bus->mdio_lock); - axienet_mdio_disable(lp); ret = axienet_device_reset(ndev); - if (ret == 0) - ret = axienet_mdio_enable(lp); mutex_unlock(&lp->mii_bus->mdio_lock); - if (ret < 0) - return ret; ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); if (ret) { @@ -1156,9 +1149,7 @@ static int axienet_stop(struct net_device *ndev) /* Do a reset to ensure DMA is really stopped */ mutex_lock(&lp->mii_bus->mdio_lock); - axienet_mdio_disable(lp); __axienet_device_reset(lp); - axienet_mdio_enable(lp); mutex_unlock(&lp->mii_bus->mdio_lock); cancel_work_sync(&lp->dma_err_task); @@ -1517,10 +1508,27 @@ static void axienet_validate(struct phylink_config *config, phylink_set(mask, Asym_Pause); phylink_set(mask, Pause); - phylink_set(mask, 1000baseX_Full); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Full); - phylink_set(mask, 1000baseT_Full); + + switch (state->interface) { + case PHY_INTERFACE_MODE_NA: + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_GMII: + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + phylink_set(mask, 1000baseX_Full); + phylink_set(mask, 1000baseT_Full); + if (state->interface == PHY_INTERFACE_MODE_1000BASEX) + break; + fallthrough; + case PHY_INTERFACE_MODE_MII: + phylink_set(mask, 100baseT_Full); + phylink_set(mask, 10baseT_Full); + default: + break; + } bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -1533,38 +1541,46 @@ static void axienet_mac_pcs_get_state(struct phylink_config *config, { struct net_device *ndev = to_net_dev(config->dev); struct axienet_local *lp = netdev_priv(ndev); - u32 emmc_reg, fcc_reg; - - state->interface = lp->phy_mode; - emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET); - if (emmc_reg & XAE_EMMC_LINKSPD_1000) - state->speed = SPEED_1000; - else if (emmc_reg & XAE_EMMC_LINKSPD_100) - state->speed = SPEED_100; - else - state->speed = SPEED_10; - - state->pause = 0; - fcc_reg = axienet_ior(lp, XAE_FCC_OFFSET); - if (fcc_reg & XAE_FCC_FCTX_MASK) - state->pause |= MLO_PAUSE_TX; - if (fcc_reg & XAE_FCC_FCRX_MASK) - state->pause |= MLO_PAUSE_RX; - - state->an_complete = 0; - state->duplex = 1; + switch (state->interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + phylink_mii_c22_pcs_get_state(lp->pcs_phy, state); + break; + default: + break; + } } static void axienet_mac_an_restart(struct phylink_config *config) { - /* Unsupported, do nothing */ + struct net_device *ndev = to_net_dev(config->dev); + struct axienet_local *lp = netdev_priv(ndev); + + phylink_mii_c22_pcs_an_restart(lp->pcs_phy); } static void axienet_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { - /* nothing meaningful to do */ + struct net_device *ndev = to_net_dev(config->dev); + struct axienet_local *lp = netdev_priv(ndev); + int ret; + + switch (state->interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode, + state->interface, + state->advertising); + if (ret < 0) + netdev_warn(ndev, "Failed to configure PCS: %d\n", + ret); + break; + + default: + break; + } } static void axienet_mac_link_down(struct phylink_config *config, @@ -1644,16 +1660,12 @@ static void axienet_dma_err_handler(struct work_struct *work) axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); - /* Disable the MDIO interface till Axi Ethernet Reset is completed. - * When we do an Axi Ethernet reset, it resets the complete core - * including the MDIO. MDIO must be disabled before resetting - * and re-enabled afterwards. + /* When we do an Axi Ethernet reset, it resets the complete core + * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ mutex_lock(&lp->mii_bus->mdio_lock); - axienet_mdio_disable(lp); __axienet_device_reset(lp); - axienet_mdio_enable(lp); mutex_unlock(&lp->mii_bus->mdio_lock); for (i = 0; i < lp->tx_bd_num; i++) { @@ -1999,6 +2011,20 @@ static int axienet_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "error registering MDIO bus: %d\n", ret); } + if (lp->phy_mode == PHY_INTERFACE_MODE_SGMII || + lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) { + if (!lp->phy_node) { + dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); + ret = -EINVAL; + goto free_netdev; + } + lp->pcs_phy = of_mdio_find_device(lp->phy_node); + if (!lp->pcs_phy) { + ret = -EPROBE_DEFER; + goto free_netdev; + } + lp->phylink_config.pcs_poll = true; + } lp->phylink_config.dev = &ndev->dev; lp->phylink_config.type = PHYLINK_NETDEV; @@ -2036,6 +2062,9 @@ static int axienet_remove(struct platform_device *pdev) if (lp->phylink) phylink_destroy(lp->phylink); + if (lp->pcs_phy) + put_device(&lp->pcs_phy->dev); + axienet_mdio_teardown(lp); clk_disable_unprepare(lp->clk); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 435ed308d990..9c014cee34b2 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -30,6 +30,23 @@ static int axienet_mdio_wait_until_ready(struct axienet_local *lp) 1, 20000); } +/* Enable the MDIO MDC. Called prior to a read/write operation */ +static void axienet_mdio_mdc_enable(struct axienet_local *lp) +{ + axienet_iow(lp, XAE_MDIO_MC_OFFSET, + ((u32)lp->mii_clk_div | XAE_MDIO_MC_MDIOEN_MASK)); +} + +/* Disable the MDIO MDC. Called after a read/write operation*/ +static void axienet_mdio_mdc_disable(struct axienet_local *lp) +{ + u32 mc_reg; + + mc_reg = axienet_ior(lp, XAE_MDIO_MC_OFFSET); + axienet_iow(lp, XAE_MDIO_MC_OFFSET, + (mc_reg & ~XAE_MDIO_MC_MDIOEN_MASK)); +} + /** * axienet_mdio_read - MDIO interface read function * @bus: Pointer to mii bus structure @@ -48,9 +65,13 @@ static int axienet_mdio_read(struct mii_bus *bus, int phy_id, int reg) int ret; struct axienet_local *lp = bus->priv; + axienet_mdio_mdc_enable(lp); + ret = axienet_mdio_wait_until_ready(lp); - if (ret < 0) + if (ret < 0) { + axienet_mdio_mdc_disable(lp); return ret; + } axienet_iow(lp, XAE_MDIO_MCR_OFFSET, (((phy_id << XAE_MDIO_MCR_PHYAD_SHIFT) & @@ -61,14 +82,17 @@ static int axienet_mdio_read(struct mii_bus *bus, int phy_id, int reg) XAE_MDIO_MCR_OP_READ_MASK)); ret = axienet_mdio_wait_until_ready(lp); - if (ret < 0) + if (ret < 0) { + axienet_mdio_mdc_disable(lp); return ret; + } rc = axienet_ior(lp, XAE_MDIO_MRD_OFFSET) & 0x0000FFFF; dev_dbg(lp->dev, "axienet_mdio_read(phy_id=%i, reg=%x) == %x\n", phy_id, reg, rc); + axienet_mdio_mdc_disable(lp); return rc; } @@ -94,9 +118,13 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg, dev_dbg(lp->dev, "axienet_mdio_write(phy_id=%i, reg=%x, val=%x)\n", phy_id, reg, val); + axienet_mdio_mdc_enable(lp); + ret = axienet_mdio_wait_until_ready(lp); - if (ret < 0) + if (ret < 0) { + axienet_mdio_mdc_disable(lp); return ret; + } axienet_iow(lp, XAE_MDIO_MWD_OFFSET, (u32) val); axienet_iow(lp, XAE_MDIO_MCR_OFFSET, @@ -108,8 +136,11 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg, XAE_MDIO_MCR_OP_WRITE_MASK)); ret = axienet_mdio_wait_until_ready(lp); - if (ret < 0) + if (ret < 0) { + axienet_mdio_mdc_disable(lp); return ret; + } + axienet_mdio_mdc_disable(lp); return 0; } @@ -124,7 +155,9 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg, **/ int axienet_mdio_enable(struct axienet_local *lp) { - u32 clk_div, host_clock; + u32 host_clock; + + lp->mii_clk_div = 0; if (lp->clk) { host_clock = clk_get_rate(lp->clk); @@ -176,19 +209,19 @@ int axienet_mdio_enable(struct axienet_local *lp) * "clock-frequency" from the CPU */ - clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1; + lp->mii_clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1; /* If there is any remainder from the division of * fHOST / (MAX_MDIO_FREQ * 2), then we need to add * 1 to the clock divisor or we will surely be above 2.5 MHz */ if (host_clock % (MAX_MDIO_FREQ * 2)) - clk_div++; + lp->mii_clk_div++; netdev_dbg(lp->ndev, "Setting MDIO clock divisor to %u/%u Hz host clock.\n", - clk_div, host_clock); + lp->mii_clk_div, host_clock); - axienet_iow(lp, XAE_MDIO_MC_OFFSET, clk_div | XAE_MDIO_MC_MDIOEN_MASK); + axienet_iow(lp, XAE_MDIO_MC_OFFSET, lp->mii_clk_div | XAE_MDIO_MC_MDIOEN_MASK); return axienet_mdio_wait_until_ready(lp); } @@ -211,8 +244,8 @@ void axienet_mdio_disable(struct axienet_local *lp) * Return: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when * mdiobus_alloc (to allocate memory for mii bus structure) fails. * - * Sets up the MDIO interface by initializing the MDIO clock and enabling the - * MDIO interface in hardware. Register the MDIO interface. + * Sets up the MDIO interface by initializing the MDIO clock. + * Register the MDIO interface. **/ int axienet_mdio_setup(struct axienet_local *lp) { @@ -246,6 +279,7 @@ int axienet_mdio_setup(struct axienet_local *lp) lp->mii_bus = NULL; return ret; } + axienet_mdio_mdc_disable(lp); return 0; } diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 0c26f5bcc523..008b9a40faad 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -97,7 +97,7 @@ #define ALIGNMENT 4 /* BUFFER_ALIGN(adr) calculates the number of bytes to the next alignment. */ -#define BUFFER_ALIGN(adr) ((ALIGNMENT - ((u32)adr)) % ALIGNMENT) +#define BUFFER_ALIGN(adr) ((ALIGNMENT - ((uintptr_t)adr)) % ALIGNMENT) #ifdef __BIG_ENDIAN #define xemaclite_readl ioread32be @@ -338,7 +338,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, * if it is configured in HW */ - addr = (void __iomem __force *)((u32 __force)addr ^ + addr = (void __iomem __force *)((uintptr_t __force)addr ^ XEL_BUFFER_OFFSET); reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); @@ -399,8 +399,9 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) * will correct on subsequent calls */ if (drvdata->rx_ping_pong != 0) - addr = (void __iomem __force *)((u32 __force)addr ^ - XEL_BUFFER_OFFSET); + addr = (void __iomem __force *) + ((uintptr_t __force)addr ^ + XEL_BUFFER_OFFSET); else return 0; /* No data was available */ @@ -518,6 +519,7 @@ static int xemaclite_set_mac_address(struct net_device *dev, void *address) /** * xemaclite_tx_timeout - Callback for Tx Timeout * @dev: Pointer to the network device + * @txqueue: Unused * * This function is called when Tx time out occurs for Emaclite device. */ @@ -1191,9 +1193,9 @@ static int xemaclite_of_probe(struct platform_device *ofdev) } dev_info(dev, - "Xilinx EmacLite at 0x%08X mapped to 0x%08X, irq=%d\n", + "Xilinx EmacLite at 0x%08X mapped to 0x%08lX, irq=%d\n", (unsigned int __force)ndev->mem_start, - (unsigned int __force)lp->base_addr, ndev->irq); + (unsigned long __force)lp->base_addr, ndev->irq); return 0; error: diff --git a/drivers/net/fddi/skfp/drvfbi.c b/drivers/net/fddi/skfp/drvfbi.c index cc9ac572423e..e9b9614639cd 100644 --- a/drivers/net/fddi/skfp/drvfbi.c +++ b/drivers/net/fddi/skfp/drvfbi.c @@ -22,10 +22,6 @@ #include <linux/bitrev.h> #include <linux/pci.h> -#ifndef lint -static const char ID_sccs[] = "@(#)drvfbi.c 1.63 99/02/11 (C) SK " ; -#endif - /* * PCM active state */ diff --git a/drivers/net/fddi/skfp/ecm.c b/drivers/net/fddi/skfp/ecm.c index 15c503f43727..2f5f5f26bb43 100644 --- a/drivers/net/fddi/skfp/ecm.c +++ b/drivers/net/fddi/skfp/ecm.c @@ -40,10 +40,6 @@ #define KERNEL #include "h/smtstate.h" -#ifndef lint -static const char ID_sccs[] = "@(#)ecm.c 2.7 99/08/05 (C) SK " ; -#endif - /* * FSM Macros */ @@ -147,10 +143,11 @@ static void ecm_fsm(struct s_smc *smc, int cmd) /* For AIX event notification: */ /* Is a disconnect command remotely issued ? */ if (cmd == EC_DISCONNECT && - smc->mib.fddiSMTRemoteDisconnectFlag == TRUE) + smc->mib.fddiSMTRemoteDisconnectFlag == TRUE) { AIX_EVENT (smc, (u_long) CIO_HARD_FAIL, (u_long) FDDI_REMOTE_DISCONNECT, smt_get_event_word(smc), smt_get_error_word(smc) ); + } /*jd 05-Aug-1999 Bug #10419 "Port Disconnect fails at Dup MAc Cond."*/ if (cmd == EC_CONNECT) { diff --git a/drivers/net/fddi/skfp/ess.c b/drivers/net/fddi/skfp/ess.c index afd5ca39f43b..35110c0c00a0 100644 --- a/drivers/net/fddi/skfp/ess.c +++ b/drivers/net/fddi/skfp/ess.c @@ -40,7 +40,6 @@ #ifdef ESS #ifndef lint -static const char ID_sccs[] = "@(#)ess.c 1.10 96/02/23 (C) SK" ; #define LINT_USE(x) #else #define LINT_USE(x) (x)=(x) diff --git a/drivers/net/fddi/skfp/hwt.c b/drivers/net/fddi/skfp/hwt.c index 32804ed049cd..5577b8e14b73 100644 --- a/drivers/net/fddi/skfp/hwt.c +++ b/drivers/net/fddi/skfp/hwt.c @@ -27,10 +27,6 @@ #include "h/fddi.h" #include "h/smc.h" -#ifndef lint -static const char ID_sccs[] = "@(#)hwt.c 1.13 97/04/23 (C) SK " ; -#endif - /* * Prototypes of local functions. */ diff --git a/drivers/net/fddi/skfp/pcmplc.c b/drivers/net/fddi/skfp/pcmplc.c index 554cde8d6073..90e8df6d9a88 100644 --- a/drivers/net/fddi/skfp/pcmplc.c +++ b/drivers/net/fddi/skfp/pcmplc.c @@ -45,10 +45,6 @@ #define KERNEL #include "h/smtstate.h" -#ifndef lint -static const char ID_sccs[] = "@(#)pcmplc.c 2.55 99/08/05 (C) SK " ; -#endif - #ifdef FDDI_MIB extern int snmp_fddi_trap( #ifdef ANSIC diff --git a/drivers/net/fddi/skfp/pmf.c b/drivers/net/fddi/skfp/pmf.c index 14f10b4cab0f..563fb7f0b327 100644 --- a/drivers/net/fddi/skfp/pmf.c +++ b/drivers/net/fddi/skfp/pmf.c @@ -24,10 +24,6 @@ #ifndef SLIM_SMT -#ifndef lint -static const char ID_sccs[] = "@(#)pmf.c 1.37 97/08/04 (C) SK " ; -#endif - static int smt_authorize(struct s_smc *smc, struct smt_header *sm); static int smt_check_set_count(struct s_smc *smc, struct smt_header *sm); static const struct s_p_tab* smt_get_ptab(u_short para); diff --git a/drivers/net/fddi/skfp/queue.c b/drivers/net/fddi/skfp/queue.c index ba022f723bd7..abe155ad777f 100644 --- a/drivers/net/fddi/skfp/queue.c +++ b/drivers/net/fddi/skfp/queue.c @@ -18,10 +18,6 @@ #include "h/fddi.h" #include "h/smc.h" -#ifndef lint -static const char ID_sccs[] = "@(#)queue.c 2.9 97/08/04 (C) SK " ; -#endif - #define PRINTF(a,b,c) /* diff --git a/drivers/net/fddi/skfp/rmt.c b/drivers/net/fddi/skfp/rmt.c index c0e62c25332c..37a89675dbeb 100644 --- a/drivers/net/fddi/skfp/rmt.c +++ b/drivers/net/fddi/skfp/rmt.c @@ -45,10 +45,6 @@ #define KERNEL #include "h/smtstate.h" -#ifndef lint -static const char ID_sccs[] = "@(#)rmt.c 2.13 99/07/02 (C) SK " ; -#endif - /* * FSM Macros */ diff --git a/drivers/net/fddi/skfp/smtdef.c b/drivers/net/fddi/skfp/smtdef.c index 0bebde3c6cb9..99cc9a549bd7 100644 --- a/drivers/net/fddi/skfp/smtdef.c +++ b/drivers/net/fddi/skfp/smtdef.c @@ -22,10 +22,6 @@ #define OEM_USER_DATA "SK-NET FDDI V2.0 Userdata" #endif -#ifndef lint -static const char ID_sccs[] = "@(#)smtdef.c 2.53 99/08/11 (C) SK " ; -#endif - /* * defaults */ diff --git a/drivers/net/fddi/skfp/smtinit.c b/drivers/net/fddi/skfp/smtinit.c index 01f6c75cbea8..c9898c83fe30 100644 --- a/drivers/net/fddi/skfp/smtinit.c +++ b/drivers/net/fddi/skfp/smtinit.c @@ -19,10 +19,6 @@ #include "h/fddi.h" #include "h/smc.h" -#ifndef lint -static const char ID_sccs[] = "@(#)smtinit.c 1.15 97/05/06 (C) SK " ; -#endif - void init_fddi_driver(struct s_smc *smc, u_char *mac_addr); /* define global debug variable */ diff --git a/drivers/net/fddi/skfp/smttimer.c b/drivers/net/fddi/skfp/smttimer.c index 9d549bb14f07..5f3e5d7bf415 100644 --- a/drivers/net/fddi/skfp/smttimer.c +++ b/drivers/net/fddi/skfp/smttimer.c @@ -18,10 +18,6 @@ #include "h/fddi.h" #include "h/smc.h" -#ifndef lint -static const char ID_sccs[] = "@(#)smttimer.c 2.4 97/08/04 (C) SK " ; -#endif - static void timer_done(struct s_smc *smc, int restart); void smt_timer_init(struct s_smc *smc) diff --git a/drivers/net/fddi/skfp/srf.c b/drivers/net/fddi/skfp/srf.c index f98d060b0f5b..4cad68c3f49b 100644 --- a/drivers/net/fddi/skfp/srf.c +++ b/drivers/net/fddi/skfp/srf.c @@ -26,11 +26,6 @@ #ifndef SLIM_SMT #ifndef BOOT -#ifndef lint -static const char ID_sccs[] = "@(#)srf.c 1.18 97/08/04 (C) SK " ; -#endif - - /* * function declarations */ diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 1426bfc009bc..ef9b5ea9073b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1137,7 +1137,7 @@ static const struct net_device_ops geneve_netdev_ops = { .ndo_open = geneve_open, .ndo_stop = geneve_stop, .ndo_start_xmit = geneve_xmit, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = geneve_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index dc668ed280b9..4c04e271f184 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -607,7 +607,7 @@ static const struct net_device_ops gtp_netdev_ops = { .ndo_init = gtp_dev_init, .ndo_uninit = gtp_dev_uninit, .ndo_start_xmit = gtp_dev_xmit, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, }; static void gtp_link_setup(struct net_device *dev) diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index e7413a643929..9e0058154ac3 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -597,7 +597,7 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case HDLCDRVCTL_DRIVERNAME: if (s->ops && s->ops->drvname) { - strncpy(bi.data.drivername, s->ops->drvname, + strlcpy(bi.data.drivername, s->ops->drvname, sizeof(bi.data.drivername)); break; } diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 4eb64709d44c..3a2824f24caa 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -316,6 +316,7 @@ struct cas_control { * struct ca8210_test - ca8210 test interface structure * @ca8210_dfs_spi_int: pointer to the entry in the debug fs for this device * @up_fifo: fifo for upstream messages + * @readq: read wait queue * * This structure stores all the data pertaining to the debug interface */ @@ -346,12 +347,12 @@ struct ca8210_test { * @ca8210_is_awake: nonzero if ca8210 is initialised, ready for comms * @sync_down: counts number of downstream synchronous commands * @sync_up: counts number of upstream synchronous commands - * @spi_transfer_complete completion object for a single spi_transfer - * @sync_exchange_complete completion object for a complete synchronous API - * exchange - * @promiscuous whether the ca8210 is in promiscuous mode or not + * @spi_transfer_complete: completion object for a single spi_transfer + * @sync_exchange_complete: completion object for a complete synchronous API + * exchange + * @promiscuous: whether the ca8210 is in promiscuous mode or not * @retries: records how many times the current pending spi - * transfer has been retried + * transfer has been retried */ struct ca8210_priv { struct spi_device *spi; @@ -420,8 +421,8 @@ struct fulladdr { /** * union macaddr: generic MAC address container - * @short_addr: 16-bit short address - * @ieee_address: 64-bit extended address as LE byte array + * @short_address: 16-bit short address + * @ieee_address: 64-bit extended address as LE byte array * */ union macaddr { @@ -714,7 +715,7 @@ static void ca8210_mlme_reset_worker(struct work_struct *work) /** * ca8210_rx_done() - Calls various message dispatches responding to a received * command - * @arg: Pointer to the cas_control object for the relevant spi transfer + * @cas_ctl: Pointer to the cas_control object for the relevant spi transfer * * Presents a received SAP command from the ca8210 to the Cascoda EVBME, test * interface and network driver. @@ -1277,7 +1278,6 @@ static u8 tdme_channelinit(u8 channel, void *device_ref) * @pib_attribute: Attribute Number * @pib_attribute_length: Attribute length * @pib_attribute_value: Pointer to Attribute Value - * @device_ref: Nondescript pointer to target device * * Return: 802.15.4 status code of checks */ @@ -3046,7 +3046,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv) /** * ca8210_remove() - Shut down a ca8210 upon being disconnected - * @priv: Pointer to private data structure + * @spi_device: Pointer to spi device data structure * * Return: 0 or linux error code */ @@ -3096,7 +3096,7 @@ static int ca8210_remove(struct spi_device *spi_device) /** * ca8210_probe() - Set up a connected ca8210 upon being detected by the system - * @priv: Pointer to private data structure + * @spi_device: Pointer to spi device data structure * * Return: 0 or linux error code */ diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 7fe306e76281..fa63d4dee0ba 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -187,8 +187,7 @@ static const struct net_device_ops ifb_netdev_ops = { }; #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \ - NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6 | \ - NETIF_F_GSO_ENCAP_ALL | \ + NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \ NETIF_F_HW_VLAN_STAG_TX) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 6bfac1efe037..55151960a698 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -21,6 +21,7 @@ #include "gsi_trans.h" #include "ipa_gsi.h" #include "ipa_data.h" +#include "ipa_version.h" /** * DOC: The IPA Generic Software Interface @@ -108,62 +109,6 @@ struct gsi_event { u8 chid; }; -/* Hardware values from the error log register error code field */ -enum gsi_err_code { - GSI_INVALID_TRE_ERR = 0x1, - GSI_OUT_OF_BUFFERS_ERR = 0x2, - GSI_OUT_OF_RESOURCES_ERR = 0x3, - GSI_UNSUPPORTED_INTER_EE_OP_ERR = 0x4, - GSI_EVT_RING_EMPTY_ERR = 0x5, - GSI_NON_ALLOCATED_EVT_ACCESS_ERR = 0x6, - GSI_HWO_1_ERR = 0x8, -}; - -/* Hardware values from the error log register error type field */ -enum gsi_err_type { - GSI_ERR_TYPE_GLOB = 0x1, - GSI_ERR_TYPE_CHAN = 0x2, - GSI_ERR_TYPE_EVT = 0x3, -}; - -/* Hardware values used when programming an event ring */ -enum gsi_evt_chtype { - GSI_EVT_CHTYPE_MHI_EV = 0x0, - GSI_EVT_CHTYPE_XHCI_EV = 0x1, - GSI_EVT_CHTYPE_GPI_EV = 0x2, - GSI_EVT_CHTYPE_XDCI_EV = 0x3, -}; - -/* Hardware values used when programming a channel */ -enum gsi_channel_protocol { - GSI_CHANNEL_PROTOCOL_MHI = 0x0, - GSI_CHANNEL_PROTOCOL_XHCI = 0x1, - GSI_CHANNEL_PROTOCOL_GPI = 0x2, - GSI_CHANNEL_PROTOCOL_XDCI = 0x3, -}; - -/* Hardware values representing an event ring immediate command opcode */ -enum gsi_evt_cmd_opcode { - GSI_EVT_ALLOCATE = 0x0, - GSI_EVT_RESET = 0x9, - GSI_EVT_DE_ALLOC = 0xa, -}; - -/* Hardware values representing a generic immediate command opcode */ -enum gsi_generic_cmd_opcode { - GSI_GENERIC_HALT_CHANNEL = 0x1, - GSI_GENERIC_ALLOCATE_CHANNEL = 0x2, -}; - -/* Hardware values representing a channel immediate command opcode */ -enum gsi_ch_cmd_opcode { - GSI_CH_ALLOCATE = 0x0, - GSI_CH_START = 0x1, - GSI_CH_STOP = 0x2, - GSI_CH_RESET = 0x9, - GSI_CH_DE_ALLOC = 0xa, -}; - /** gsi_channel_scratch_gpi - GPI protocol scratch register * @max_outstanding_tre: * Defines the maximum number of TREs allowed in a single transaction @@ -229,21 +174,70 @@ static u32 gsi_channel_id(struct gsi_channel *channel) return channel - &channel->gsi->channel[0]; } +/* Update the GSI IRQ type register with the cached value */ +static void gsi_irq_type_update(struct gsi *gsi, u32 val) +{ + gsi->type_enabled_bitmap = val; + iowrite32(val, gsi->virt + GSI_CNTXT_TYPE_IRQ_MSK_OFFSET); +} + +static void gsi_irq_type_enable(struct gsi *gsi, enum gsi_irq_type_id type_id) +{ + gsi_irq_type_update(gsi, gsi->type_enabled_bitmap | BIT(type_id)); +} + +static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id) +{ + gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id)); +} + +/* Turn off all GSI interrupts initially */ +static void gsi_irq_setup(struct gsi *gsi) +{ + /* Disable all interrupt types */ + gsi_irq_type_update(gsi, 0); + + /* Clear all type-specific interrupt masks */ + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_INTER_EE_SRC_CH_IRQ_OFFSET); + iowrite32(0, gsi->virt + GSI_INTER_EE_SRC_EV_CH_IRQ_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); +} + +/* Turn off all GSI interrupts when we're all done */ +static void gsi_irq_teardown(struct gsi *gsi) +{ + /* Nothing to do */ +} + static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id) { + bool enable_ieob = !gsi->ieob_enabled_bitmap; u32 val; - gsi->event_enable_bitmap |= BIT(evt_ring_id); - val = gsi->event_enable_bitmap; + gsi->ieob_enabled_bitmap |= BIT(evt_ring_id); + val = gsi->ieob_enabled_bitmap; iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); + + /* Enable the interrupt type if this is the first channel enabled */ + if (enable_ieob) + gsi_irq_type_enable(gsi, GSI_IEOB); } static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id) { u32 val; - gsi->event_enable_bitmap &= ~BIT(evt_ring_id); - val = gsi->event_enable_bitmap; + gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id); + + /* Disable the interrupt type if this was the last enabled channel */ + if (!gsi->ieob_enabled_bitmap) + gsi_irq_type_disable(gsi, GSI_IEOB); + + val = gsi->ieob_enabled_bitmap; iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); } @@ -252,38 +246,32 @@ static void gsi_irq_enable(struct gsi *gsi) { u32 val; - /* We don't use inter-EE channel or event interrupts */ - val = GSI_CNTXT_TYPE_IRQ_MSK_ALL; - val &= ~INTER_EE_CH_CTRL_FMASK; - val &= ~INTER_EE_EV_CTRL_FMASK; - iowrite32(val, gsi->virt + GSI_CNTXT_TYPE_IRQ_MSK_OFFSET); - - val = GENMASK(gsi->channel_count - 1, 0); - iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); - - val = GENMASK(gsi->evt_ring_count - 1, 0); - iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); - - /* Each IEOB interrupt is enabled (later) as needed by channels */ - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); - - val = GSI_CNTXT_GLOB_IRQ_ALL; - iowrite32(val, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + /* Global interrupts include hardware error reports. Enable + * that so we can at least report the error should it occur. + */ + iowrite32(BIT(ERROR_INT), gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + gsi_irq_type_update(gsi, gsi->type_enabled_bitmap | BIT(GSI_GLOB_EE)); - /* Never enable GSI_BREAK_POINT */ - val = GSI_CNTXT_GSI_IRQ_ALL & ~BREAK_POINT_FMASK; + /* General GSI interrupts are reported to all EEs; if they occur + * they are unrecoverable (without reset). A breakpoint interrupt + * also exists, but we don't support that. We want to be notified + * of errors so we can report them, even if they can't be handled. + */ + val = BIT(BUS_ERROR); + val |= BIT(CMD_FIFO_OVRFLOW); + val |= BIT(MCS_STACK_OVRFLOW); iowrite32(val, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); + gsi_irq_type_update(gsi, gsi->type_enabled_bitmap | BIT(GSI_GENERAL)); } -/* Disable all GSI_interrupt types */ +/* Disable all GSI interrupt types */ static void gsi_irq_disable(struct gsi *gsi) { + gsi_irq_type_update(gsi, 0); + + /* Clear the type-specific interrupt masks set by gsi_irq_enable() */ iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_TYPE_IRQ_MSK_OFFSET); } /* Return the virtual address associated with a ring index */ @@ -337,13 +325,30 @@ static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id, struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; struct completion *completion = &evt_ring->completion; struct device *dev = gsi->dev; + bool success; u32 val; + /* We only perform one event ring command at a time, and event + * control interrupts should only occur when such a command + * is issued here. Only permit *this* event ring to trigger + * an interrupt, and only enable the event control IRQ type + * when we expect it to occur. + */ + val = BIT(evt_ring_id); + iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + gsi_irq_type_enable(gsi, GSI_EV_CTRL); + val = u32_encode_bits(evt_ring_id, EV_CHID_FMASK); val |= u32_encode_bits(opcode, EV_OPCODE_FMASK); - if (gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion)) - return 0; /* Success! */ + success = gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion); + + /* Disable the interrupt again */ + gsi_irq_type_disable(gsi, GSI_EV_CTRL); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + + if (success) + return 0; dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n", opcode, evt_ring_id, evt_ring->state); @@ -433,13 +438,29 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) u32 channel_id = gsi_channel_id(channel); struct gsi *gsi = channel->gsi; struct device *dev = gsi->dev; + bool success; u32 val; + /* We only perform one channel command at a time, and channel + * control interrupts should only occur when such a command is + * issued here. So we only permit *this* channel to trigger + * an interrupt and only enable the channel control IRQ type + * when we expect it to occur. + */ + val = BIT(channel_id); + iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + gsi_irq_type_enable(gsi, GSI_CH_CTRL); + val = u32_encode_bits(channel_id, CH_CHID_FMASK); val |= u32_encode_bits(opcode, CH_OPCODE_FMASK); + success = gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion); - if (gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion)) - return 0; /* Success! */ + /* Disable the interrupt again */ + gsi_irq_type_disable(gsi, GSI_CH_CTRL); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + + if (success) + return 0; dev_err(dev, "GSI command %u for channel %u timed out, state %u\n", opcode, channel_id, gsi_channel_state(channel)); @@ -607,7 +628,8 @@ static void gsi_evt_ring_program(struct gsi *gsi, u32 evt_ring_id) size_t size = evt_ring->ring.count * GSI_RING_ELEMENT_SIZE; u32 val; - val = u32_encode_bits(GSI_EVT_CHTYPE_GPI_EV, EV_CHTYPE_FMASK); + /* We program all event rings as GPI type/protocol */ + val = u32_encode_bits(GSI_CHANNEL_TYPE_GPI, EV_CHTYPE_FMASK); val |= EV_INTYPE_FMASK; val |= u32_encode_bits(GSI_RING_ELEMENT_SIZE, EV_ELEMENT_SIZE_FMASK); iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_0_OFFSET(evt_ring_id)); @@ -714,8 +736,8 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) /* Arbitrarily pick TRE 0 as the first channel element to use */ channel->tre_ring.index = 0; - /* We program all channels to use GPI protocol */ - val = u32_encode_bits(GSI_CHANNEL_PROTOCOL_GPI, CHTYPE_PROTOCOL_FMASK); + /* We program all channels as GPI type/protocol */ + val = u32_encode_bits(GSI_CHANNEL_TYPE_GPI, CHTYPE_PROTOCOL_FMASK); if (channel->toward_ipa) val |= CHTYPE_DIR_FMASK; val |= u32_encode_bits(channel->evt_ring_id, ERINDEX_FMASK); @@ -742,11 +764,12 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) /* Max prefetch is 1 segment (do not set MAX_PREFETCH_FMASK) */ - /* Enable the doorbell engine if requested */ - if (doorbell) + /* We enable the doorbell engine for IPA v3.5.1 */ + if (gsi->version == IPA_VERSION_3_5_1 && doorbell) val |= USE_DB_ENG_FMASK; - if (!channel->use_prefetch) + /* Starting with IPA v4.0 the command channel uses the escape buffer */ + if (gsi->version != IPA_VERSION_3_5_1 && channel->command) val |= USE_ESCAPE_BUF_ONLY_FMASK; iowrite32(val, gsi->virt + GSI_CH_C_QOS_OFFSET(channel_id)); @@ -829,8 +852,8 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) return ret; } -/* Reset and reconfigure a channel (possibly leaving doorbell disabled) */ -void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool legacy) +/* Reset and reconfigure a channel, (possibly) enabling the doorbell engine */ +void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell) { struct gsi_channel *channel = &gsi->channel[channel_id]; @@ -838,10 +861,10 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool legacy) gsi_channel_reset_command(channel); /* Due to a hardware quirk we may need to reset RX channels twice. */ - if (legacy && !channel->toward_ipa) + if (gsi->version == IPA_VERSION_3_5_1 && !channel->toward_ipa) gsi_channel_reset_command(channel); - gsi_channel_program(channel, legacy); + gsi_channel_program(channel, doorbell); gsi_channel_trans_cancel_pending(channel); mutex_unlock(&gsi->mutex); @@ -989,7 +1012,7 @@ static void gsi_isr_evt_ctrl(struct gsi *gsi) static void gsi_isr_glob_chan_err(struct gsi *gsi, u32 err_ee, u32 channel_id, u32 code) { - if (code == GSI_OUT_OF_RESOURCES_ERR) { + if (code == GSI_OUT_OF_RESOURCES) { dev_err(gsi->dev, "channel %u out of resources\n", channel_id); complete(&gsi->channel[channel_id].completion); return; @@ -1004,7 +1027,7 @@ gsi_isr_glob_chan_err(struct gsi *gsi, u32 err_ee, u32 channel_id, u32 code) static void gsi_isr_glob_evt_err(struct gsi *gsi, u32 err_ee, u32 evt_ring_id, u32 code) { - if (code == GSI_OUT_OF_RESOURCES_ERR) { + if (code == GSI_OUT_OF_RESOURCES) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; u32 channel_id = gsi_channel_id(evt_ring->channel); @@ -1034,8 +1057,8 @@ static void gsi_isr_glob_err(struct gsi *gsi) iowrite32(~0, gsi->virt + GSI_ERROR_LOG_CLR_OFFSET); ee = u32_get_bits(val, ERR_EE_FMASK); - which = u32_get_bits(val, ERR_VIRT_IDX_FMASK); type = u32_get_bits(val, ERR_TYPE_FMASK); + which = u32_get_bits(val, ERR_VIRT_IDX_FMASK); code = u32_get_bits(val, ERR_CODE_FMASK); if (type == GSI_ERR_TYPE_CHAN) @@ -1054,7 +1077,7 @@ static void gsi_isr_gp_int1(struct gsi *gsi) val = ioread32(gsi->virt + GSI_CNTXT_SCRATCH_0_OFFSET); result = u32_get_bits(val, GENERIC_EE_RESULT_FMASK); - if (result != GENERIC_EE_SUCCESS_FVAL) + if (result != GENERIC_EE_SUCCESS) dev_err(gsi->dev, "global INT1 generic result %u\n", result); complete(&gsi->completion); @@ -1067,15 +1090,15 @@ static void gsi_isr_glob_ee(struct gsi *gsi) val = ioread32(gsi->virt + GSI_CNTXT_GLOB_IRQ_STTS_OFFSET); - if (val & ERROR_INT_FMASK) + if (val & BIT(ERROR_INT)) gsi_isr_glob_err(gsi); iowrite32(val, gsi->virt + GSI_CNTXT_GLOB_IRQ_CLR_OFFSET); - val &= ~ERROR_INT_FMASK; + val &= ~BIT(ERROR_INT); - if (val & GP_INT1_FMASK) { - val ^= GP_INT1_FMASK; + if (val & BIT(GP_INT1)) { + val ^= BIT(GP_INT1); gsi_isr_gp_int1(gsi); } @@ -1110,8 +1133,7 @@ static void gsi_isr_general(struct gsi *gsi) val = ioread32(gsi->virt + GSI_CNTXT_GSI_IRQ_STTS_OFFSET); iowrite32(val, gsi->virt + GSI_CNTXT_GSI_IRQ_CLR_OFFSET); - if (val) - dev_err(dev, "unexpected general interrupt 0x%08x\n", val); + dev_err(dev, "unexpected general interrupt 0x%08x\n", val); } /** @@ -1128,6 +1150,7 @@ static irqreturn_t gsi_isr(int irq, void *dev_id) u32 intr_mask; u32 cnt = 0; + /* enum gsi_irq_type_id defines GSI interrupt types */ while ((intr_mask = ioread32(gsi->virt + GSI_CNTXT_TYPE_IRQ_OFFSET))) { /* intr_mask contains bitmask of pending GSI interrupts */ do { @@ -1136,19 +1159,19 @@ static irqreturn_t gsi_isr(int irq, void *dev_id) intr_mask ^= gsi_intr; switch (gsi_intr) { - case CH_CTRL_FMASK: + case BIT(GSI_CH_CTRL): gsi_isr_chan_ctrl(gsi); break; - case EV_CTRL_FMASK: + case BIT(GSI_EV_CTRL): gsi_isr_evt_ctrl(gsi); break; - case GLOB_EE_FMASK: + case BIT(GSI_GLOB_EE): gsi_isr_glob_ee(gsi); break; - case IEOB_FMASK: + case BIT(GSI_IEOB): gsi_isr_ieob(gsi); break; - case GENERAL_FMASK: + case BIT(GSI_GENERAL): gsi_isr_general(gsi); break; default: @@ -1168,6 +1191,34 @@ static irqreturn_t gsi_isr(int irq, void *dev_id) return IRQ_HANDLED; } +static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + unsigned int irq; + int ret; + + ret = platform_get_irq_byname(pdev, "gsi"); + if (ret <= 0) { + dev_err(dev, "DT error %d getting \"gsi\" IRQ property\n", ret); + return ret ? : -EINVAL; + } + irq = ret; + + ret = request_irq(irq, gsi_isr, 0, "gsi", gsi); + if (ret) { + dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret); + return ret; + } + gsi->irq = irq; + + return 0; +} + +static void gsi_irq_exit(struct gsi *gsi) +{ + free_irq(gsi->irq, gsi); +} + /* Return the transaction associated with a transfer completion event */ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel, struct gsi_event *event) @@ -1452,8 +1503,7 @@ static void gsi_evt_ring_teardown(struct gsi *gsi) } /* Setup function for a single channel */ -static int gsi_channel_setup_one(struct gsi *gsi, u32 channel_id, - bool legacy) +static int gsi_channel_setup_one(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; u32 evt_ring_id = channel->evt_ring_id; @@ -1472,7 +1522,7 @@ static int gsi_channel_setup_one(struct gsi *gsi, u32 channel_id, if (ret) goto err_evt_ring_de_alloc; - gsi_channel_program(channel, legacy); + gsi_channel_program(channel, true); if (channel->toward_ipa) netif_tx_napi_add(&gsi->dummy_dev, &channel->napi, @@ -1511,8 +1561,19 @@ static int gsi_generic_command(struct gsi *gsi, u32 channel_id, enum gsi_generic_cmd_opcode opcode) { struct completion *completion = &gsi->completion; + bool success; u32 val; + /* The error global interrupt type is always enabled (until we + * teardown), so we won't change that. A generic EE command + * completes with a GSI global interrupt of type GP_INT1. We + * only perform one generic command at a time (to allocate or + * halt a modem channel) and only from this function. So we + * enable the GP_INT1 IRQ type here while we're expecting it. + */ + val = BIT(ERROR_INT) | BIT(GP_INT1); + iowrite32(val, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + /* First zero the result code field */ val = ioread32(gsi->virt + GSI_CNTXT_SCRATCH_0_OFFSET); val &= ~GENERIC_EE_RESULT_FMASK; @@ -1523,8 +1584,13 @@ static int gsi_generic_command(struct gsi *gsi, u32 channel_id, val |= u32_encode_bits(channel_id, GENERIC_CHID_FMASK); val |= u32_encode_bits(GSI_EE_MODEM, GENERIC_EE_FMASK); - if (gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion)) - return 0; /* Success! */ + success = gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion); + + /* Disable the GP_INT1 IRQ type again */ + iowrite32(BIT(ERROR_INT), gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + + if (success) + return 0; dev_err(gsi->dev, "GSI generic command %u to channel %u timed out\n", opcode, channel_id); @@ -1540,16 +1606,11 @@ static int gsi_modem_channel_alloc(struct gsi *gsi, u32 channel_id) static void gsi_modem_channel_halt(struct gsi *gsi, u32 channel_id) { - int ret; - - ret = gsi_generic_command(gsi, channel_id, GSI_GENERIC_HALT_CHANNEL); - if (ret) - dev_err(gsi->dev, "error %d halting modem channel %u\n", - ret, channel_id); + (void)gsi_generic_command(gsi, channel_id, GSI_GENERIC_HALT_CHANNEL); } /* Setup function for channels */ -static int gsi_channel_setup(struct gsi *gsi, bool legacy) +static int gsi_channel_setup(struct gsi *gsi) { u32 channel_id = 0; u32 mask; @@ -1561,7 +1622,7 @@ static int gsi_channel_setup(struct gsi *gsi, bool legacy) mutex_lock(&gsi->mutex); do { - ret = gsi_channel_setup_one(gsi, channel_id, legacy); + ret = gsi_channel_setup_one(gsi, channel_id); if (ret) goto err_unwind; } while (++channel_id < gsi->channel_count); @@ -1647,10 +1708,11 @@ static void gsi_channel_teardown(struct gsi *gsi) } /* Setup function for GSI. GSI firmware must be loaded and initialized */ -int gsi_setup(struct gsi *gsi, bool legacy) +int gsi_setup(struct gsi *gsi) { struct device *dev = gsi->dev; u32 val; + int ret; /* Here is where we first touch the GSI hardware */ val = ioread32(gsi->virt + GSI_GSI_STATUS_OFFSET); @@ -1659,6 +1721,8 @@ int gsi_setup(struct gsi *gsi, bool legacy) return -EIO; } + gsi_irq_setup(gsi); + val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET); gsi->channel_count = u32_get_bits(val, NUM_CH_PER_EE_FMASK); @@ -1691,13 +1755,18 @@ int gsi_setup(struct gsi *gsi, bool legacy) /* Writing 1 indicates IRQ interrupts; 0 would be MSI */ iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET); - return gsi_channel_setup(gsi, legacy); + ret = gsi_channel_setup(gsi); + if (ret) + gsi_irq_teardown(gsi); + + return ret; } /* Inverse of gsi_setup() */ void gsi_teardown(struct gsi *gsi) { gsi_channel_teardown(gsi); + gsi_irq_teardown(gsi); } /* Initialize a channel's event ring */ @@ -1745,7 +1814,7 @@ static void gsi_evt_ring_init(struct gsi *gsi) u32 evt_ring_id = 0; gsi->event_bitmap = gsi_event_bitmap_init(GSI_EVT_RING_COUNT_MAX); - gsi->event_enable_bitmap = 0; + gsi->ieob_enabled_bitmap = 0; do init_completion(&gsi->evt_ring[evt_ring_id].completion); while (++evt_ring_id < GSI_EVT_RING_COUNT_MAX); @@ -1814,7 +1883,7 @@ static bool gsi_channel_data_valid(struct gsi *gsi, /* Init function for a single channel */ static int gsi_channel_init_one(struct gsi *gsi, const struct ipa_gsi_endpoint_data *data, - bool command, bool prefetch) + bool command) { struct gsi_channel *channel; u32 tre_count; @@ -1838,7 +1907,6 @@ static int gsi_channel_init_one(struct gsi *gsi, channel->gsi = gsi; channel->toward_ipa = data->toward_ipa; channel->command = command; - channel->use_prefetch = command && prefetch; channel->tlv_count = data->channel.tlv_count; channel->tre_count = tre_count; channel->event_count = data->channel.event_count; @@ -1892,13 +1960,16 @@ static void gsi_channel_exit_one(struct gsi_channel *channel) } /* Init function for channels */ -static int gsi_channel_init(struct gsi *gsi, bool prefetch, u32 count, - const struct ipa_gsi_endpoint_data *data, - bool modem_alloc) +static int gsi_channel_init(struct gsi *gsi, u32 count, + const struct ipa_gsi_endpoint_data *data) { + bool modem_alloc; int ret = 0; u32 i; + /* IPA v4.2 requires the AP to allocate channels for the modem */ + modem_alloc = gsi->version == IPA_VERSION_4_2; + gsi_evt_ring_init(gsi); /* The endpoint data array is indexed by endpoint name */ @@ -1916,7 +1987,7 @@ static int gsi_channel_init(struct gsi *gsi, bool prefetch, u32 count, continue; } - ret = gsi_channel_init_one(gsi, &data[i], command, prefetch); + ret = gsi_channel_init_one(gsi, &data[i], command); if (ret) goto err_unwind; } @@ -1952,19 +2023,19 @@ static void gsi_channel_exit(struct gsi *gsi) } /* Init function for GSI. GSI hardware does not need to be "ready" */ -int gsi_init(struct gsi *gsi, struct platform_device *pdev, bool prefetch, - u32 count, const struct ipa_gsi_endpoint_data *data, - bool modem_alloc) +int gsi_init(struct gsi *gsi, struct platform_device *pdev, + enum ipa_version version, u32 count, + const struct ipa_gsi_endpoint_data *data) { struct device *dev = &pdev->dev; struct resource *res; resource_size_t size; - unsigned int irq; int ret; gsi_validate_build(); gsi->dev = dev; + gsi->version = version; /* The GSI layer performs NAPI on all endpoints. NAPI requires a * network device structure, but the GSI layer does not have one, @@ -1972,55 +2043,43 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev, bool prefetch, */ init_dummy_netdev(&gsi->dummy_dev); - ret = platform_get_irq_byname(pdev, "gsi"); - if (ret <= 0) { - dev_err(dev, "DT error %d getting \"gsi\" IRQ property\n", ret); - return ret ? : -EINVAL; - } - irq = ret; - - ret = request_irq(irq, gsi_isr, 0, "gsi", gsi); - if (ret) { - dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret); - return ret; - } - gsi->irq = irq; - /* Get GSI memory range and map it */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gsi"); if (!res) { dev_err(dev, "DT error getting \"gsi\" memory property\n"); - ret = -ENODEV; - goto err_free_irq; + return -ENODEV; } size = resource_size(res); if (res->start > U32_MAX || size > U32_MAX - res->start) { dev_err(dev, "DT memory resource \"gsi\" out of range\n"); - ret = -EINVAL; - goto err_free_irq; + return -EINVAL; } gsi->virt = ioremap(res->start, size); if (!gsi->virt) { dev_err(dev, "unable to remap \"gsi\" memory\n"); - ret = -ENOMEM; - goto err_free_irq; + return -ENOMEM; } - ret = gsi_channel_init(gsi, prefetch, count, data, modem_alloc); + init_completion(&gsi->completion); + + ret = gsi_irq_init(gsi, pdev); if (ret) goto err_iounmap; + ret = gsi_channel_init(gsi, count, data); + if (ret) + goto err_irq_exit; + mutex_init(&gsi->mutex); - init_completion(&gsi->completion); return 0; +err_irq_exit: + gsi_irq_exit(gsi); err_iounmap: iounmap(gsi->virt); -err_free_irq: - free_irq(gsi->irq, gsi); return ret; } @@ -2030,7 +2089,7 @@ void gsi_exit(struct gsi *gsi) { mutex_destroy(&gsi->mutex); gsi_channel_exit(gsi); - free_irq(gsi->irq, gsi); + gsi_irq_exit(gsi); iounmap(gsi->virt); } diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h index 3f9f29d531c4..ecc784e3a812 100644 --- a/drivers/net/ipa/gsi.h +++ b/drivers/net/ipa/gsi.h @@ -13,6 +13,8 @@ #include <linux/platform_device.h> #include <linux/netdevice.h> +#include "ipa_version.h" + /* Maximum number of channels and event rings supported by the driver */ #define GSI_CHANNEL_COUNT_MAX 17 #define GSI_EVT_RING_COUNT_MAX 13 @@ -31,10 +33,10 @@ struct ipa_gsi_endpoint_data; /* Execution environment IDs */ enum gsi_ee_id { - GSI_EE_AP = 0, - GSI_EE_MODEM = 1, - GSI_EE_UC = 2, - GSI_EE_TZ = 3, + GSI_EE_AP = 0x0, + GSI_EE_MODEM = 0x1, + GSI_EE_UC = 0x2, + GSI_EE_TZ = 0x3, }; struct gsi_ring { @@ -94,12 +96,12 @@ struct gsi_trans_info { /* Hardware values signifying the state of a channel */ enum gsi_channel_state { - GSI_CHANNEL_STATE_NOT_ALLOCATED = 0x0, - GSI_CHANNEL_STATE_ALLOCATED = 0x1, - GSI_CHANNEL_STATE_STARTED = 0x2, - GSI_CHANNEL_STATE_STOPPED = 0x3, - GSI_CHANNEL_STATE_STOP_IN_PROC = 0x4, - GSI_CHANNEL_STATE_ERROR = 0xf, + GSI_CHANNEL_STATE_NOT_ALLOCATED = 0x0, + GSI_CHANNEL_STATE_ALLOCATED = 0x1, + GSI_CHANNEL_STATE_STARTED = 0x2, + GSI_CHANNEL_STATE_STOPPED = 0x3, + GSI_CHANNEL_STATE_STOP_IN_PROC = 0x4, + GSI_CHANNEL_STATE_ERROR = 0xf, }; /* We only care about channels between IPA and AP */ @@ -107,7 +109,6 @@ struct gsi_channel { struct gsi *gsi; bool toward_ipa; bool command; /* AP command TX channel or not */ - bool use_prefetch; /* use prefetch (else escape buf) */ u8 tlv_count; /* # entries in TLV FIFO */ u16 tre_count; @@ -147,6 +148,7 @@ struct gsi_evt_ring { struct gsi { struct device *dev; /* Same as IPA device */ + enum ipa_version version; struct net_device dummy_dev; /* needed for NAPI */ void __iomem *virt; u32 irq; @@ -154,9 +156,10 @@ struct gsi { u32 evt_ring_count; struct gsi_channel channel[GSI_CHANNEL_COUNT_MAX]; struct gsi_evt_ring evt_ring[GSI_EVT_RING_COUNT_MAX]; - u32 event_bitmap; - u32 event_enable_bitmap; - u32 modem_channel_bitmap; + u32 event_bitmap; /* allocated event rings */ + u32 modem_channel_bitmap; /* modem channels to allocate */ + u32 type_enabled_bitmap; /* GSI IRQ types enabled */ + u32 ieob_enabled_bitmap; /* IEOB IRQ enabled (event rings) */ struct completion completion; /* for global EE commands */ struct mutex mutex; /* protects commands, programming */ }; @@ -164,14 +167,13 @@ struct gsi { /** * gsi_setup() - Set up the GSI subsystem * @gsi: Address of GSI structure embedded in an IPA structure - * @legacy: Set up for legacy hardware * * Return: 0 if successful, or a negative error code * * Performs initialization that must wait until the GSI hardware is * ready (including firmware loaded). */ -int gsi_setup(struct gsi *gsi, bool legacy); +int gsi_setup(struct gsi *gsi); /** * gsi_teardown() - Tear down GSI subsystem @@ -219,15 +221,15 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id); * gsi_channel_reset() - Reset an allocated GSI channel * @gsi: GSI pointer * @channel_id: Channel to be reset - * @legacy: Legacy behavior + * @doorbell: Whether to (possibly) enable the doorbell engine * - * Reset a channel and reconfigure it. The @legacy flag indicates - * that some steps should be done differently for legacy hardware. + * Reset a channel and reconfigure it. The @doorbell flag indicates + * that the doorbell engine should be enabled if needed. * * GSI hardware relinquishes ownership of all pending receive buffer * transactions and they will complete with their cancelled flag set. */ -void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool legacy); +void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell); int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop); int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start); @@ -236,15 +238,18 @@ int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start); * gsi_init() - Initialize the GSI subsystem * @gsi: Address of GSI structure embedded in an IPA structure * @pdev: IPA platform device + * @version: IPA hardware version (implies GSI version) + * @count: Number of entries in the configuration data array + * @data: Endpoint and channel configuration data * * Return: 0 if successful, or a negative error code * * Early stage initialization of the GSI subsystem, performing tasks * that can be done before the GSI hardware is ready to use. */ -int gsi_init(struct gsi *gsi, struct platform_device *pdev, bool prefetch, - u32 count, const struct ipa_gsi_endpoint_data *data, - bool modem_alloc); +int gsi_init(struct gsi *gsi, struct platform_device *pdev, + enum ipa_version version, u32 count, + const struct ipa_gsi_endpoint_data *data); /** * gsi_exit() - Exit the GSI subsystem diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h index 8e0e9350c383..c1799d1e8a83 100644 --- a/drivers/net/ipa/gsi_reg.h +++ b/drivers/net/ipa/gsi_reg.h @@ -66,12 +66,20 @@ #define CHTYPE_DIR_FMASK GENMASK(3, 3) #define EE_FMASK GENMASK(7, 4) #define CHID_FMASK GENMASK(12, 8) -/* The next field is present for GSI v2.0 and above */ +/* The next field is present for IPA v4.5 and above */ #define CHTYPE_PROTOCOL_MSB_FMASK GENMASK(13, 13) #define ERINDEX_FMASK GENMASK(18, 14) #define CHSTATE_FMASK GENMASK(23, 20) #define ELEMENT_SIZE_FMASK GENMASK(31, 24) +/** enum gsi_channel_type - CHTYPE_PROTOCOL field values in CH_C_CNTXT_0 */ +enum gsi_channel_type { + GSI_CHANNEL_TYPE_MHI = 0x0, + GSI_CHANNEL_TYPE_XHCI = 0x1, + GSI_CHANNEL_TYPE_GPI = 0x2, + GSI_CHANNEL_TYPE_XDCI = 0x3, +}; + #define GSI_CH_C_CNTXT_1_OFFSET(ch) \ GSI_EE_N_CH_C_CNTXT_1_OFFSET((ch), GSI_EE_AP) #define GSI_EE_N_CH_C_CNTXT_1_OFFSET(ch, ee) \ @@ -95,7 +103,7 @@ #define WRR_WEIGHT_FMASK GENMASK(3, 0) #define MAX_PREFETCH_FMASK GENMASK(8, 8) #define USE_DB_ENG_FMASK GENMASK(9, 9) -/* The next field is present for GSI v2.0 and above */ +/* The next field is only present for IPA v4.0, v4.1, and v4.2 */ #define USE_ESCAPE_BUF_ONLY_FMASK GENMASK(10, 10) #define GSI_CH_C_SCRATCH_0_OFFSET(ch) \ @@ -128,6 +136,7 @@ #define EV_INTYPE_FMASK GENMASK(16, 16) #define EV_CHSTATE_FMASK GENMASK(23, 20) #define EV_ELEMENT_SIZE_FMASK GENMASK(31, 24) +/* enum gsi_channel_type defines EV_CHTYPE field values in EV_CH_E_CNTXT_0 */ #define GSI_EV_CH_E_CNTXT_1_OFFSET(ev) \ GSI_EE_N_EV_CH_E_CNTXT_1_OFFSET((ev), GSI_EE_AP) @@ -216,6 +225,15 @@ #define CH_CHID_FMASK GENMASK(7, 0) #define CH_OPCODE_FMASK GENMASK(31, 24) +/** enum gsi_ch_cmd_opcode - CH_OPCODE field values in CH_CMD */ +enum gsi_ch_cmd_opcode { + GSI_CH_ALLOCATE = 0x0, + GSI_CH_START = 0x1, + GSI_CH_STOP = 0x2, + GSI_CH_RESET = 0x9, + GSI_CH_DE_ALLOC = 0xa, +}; + #define GSI_EV_CH_CMD_OFFSET \ GSI_EE_N_EV_CH_CMD_OFFSET(GSI_EE_AP) #define GSI_EE_N_EV_CH_CMD_OFFSET(ee) \ @@ -223,6 +241,13 @@ #define EV_CHID_FMASK GENMASK(7, 0) #define EV_OPCODE_FMASK GENMASK(31, 24) +/** enum gsi_evt_cmd_opcode - EV_OPCODE field values in EV_CH_CMD */ +enum gsi_evt_cmd_opcode { + GSI_EVT_ALLOCATE = 0x0, + GSI_EVT_RESET = 0x9, + GSI_EVT_DE_ALLOC = 0xa, +}; + #define GSI_GENERIC_CMD_OFFSET \ GSI_EE_N_GENERIC_CMD_OFFSET(GSI_EE_AP) #define GSI_EE_N_GENERIC_CMD_OFFSET(ee) \ @@ -231,29 +256,40 @@ #define GENERIC_CHID_FMASK GENMASK(9, 5) #define GENERIC_EE_FMASK GENMASK(13, 10) +/** enum gsi_generic_cmd_opcode - GENERIC_OPCODE field values in GENERIC_CMD */ +enum gsi_generic_cmd_opcode { + GSI_GENERIC_HALT_CHANNEL = 0x1, + GSI_GENERIC_ALLOCATE_CHANNEL = 0x2, +}; + #define GSI_GSI_HW_PARAM_2_OFFSET \ GSI_EE_N_GSI_HW_PARAM_2_OFFSET(GSI_EE_AP) #define GSI_EE_N_GSI_HW_PARAM_2_OFFSET(ee) \ (0x0001f040 + 0x4000 * (ee)) #define IRAM_SIZE_FMASK GENMASK(2, 0) -#define IRAM_SIZE_ONE_KB_FVAL 0 -#define IRAM_SIZE_TWO_KB_FVAL 1 -/* The next two values are available for GSI v2.0 and above */ -#define IRAM_SIZE_TWO_N_HALF_KB_FVAL 2 -#define IRAM_SIZE_THREE_KB_FVAL 3 #define NUM_CH_PER_EE_FMASK GENMASK(7, 3) #define NUM_EV_PER_EE_FMASK GENMASK(12, 8) #define GSI_CH_PEND_TRANSLATE_FMASK GENMASK(13, 13) #define GSI_CH_FULL_LOGIC_FMASK GENMASK(14, 14) -/* Fields below are present for GSI v2.0 and above */ +/* Fields below are present for IPA v4.0 and above */ #define GSI_USE_SDMA_FMASK GENMASK(15, 15) #define GSI_SDMA_N_INT_FMASK GENMASK(18, 16) #define GSI_SDMA_MAX_BURST_FMASK GENMASK(26, 19) #define GSI_SDMA_N_IOVEC_FMASK GENMASK(29, 27) -/* Fields below are present for GSI v2.2 and above */ +/* Fields below are present for IPA v4.2 and above */ #define GSI_USE_RD_WR_ENG_FMASK GENMASK(30, 30) #define GSI_USE_INTER_EE_FMASK GENMASK(31, 31) +/** enum gsi_iram_size - IRAM_SIZE field values in HW_PARAM_2 */ +enum gsi_iram_size { + IRAM_SIZE_ONE_KB = 0x0, + IRAM_SIZE_TWO_KB = 0x1, +/* The next two values are available for IPA v4.0 and above */ + IRAM_SIZE_TWO_N_HALF_KB = 0x2, + IRAM_SIZE_THREE_KB = 0x3, +}; + +/* IRQ condition for each type is cleared by writing type-specific register */ #define GSI_CNTXT_TYPE_IRQ_OFFSET \ GSI_EE_N_CNTXT_TYPE_IRQ_OFFSET(GSI_EE_AP) #define GSI_EE_N_CNTXT_TYPE_IRQ_OFFSET(ee) \ @@ -262,15 +298,17 @@ GSI_EE_N_CNTXT_TYPE_IRQ_MSK_OFFSET(GSI_EE_AP) #define GSI_EE_N_CNTXT_TYPE_IRQ_MSK_OFFSET(ee) \ (0x0001f088 + 0x4000 * (ee)) -/* The masks below are used for the TYPE_IRQ and TYPE_IRQ_MASK registers */ -#define CH_CTRL_FMASK GENMASK(0, 0) -#define EV_CTRL_FMASK GENMASK(1, 1) -#define GLOB_EE_FMASK GENMASK(2, 2) -#define IEOB_FMASK GENMASK(3, 3) -#define INTER_EE_CH_CTRL_FMASK GENMASK(4, 4) -#define INTER_EE_EV_CTRL_FMASK GENMASK(5, 5) -#define GENERAL_FMASK GENMASK(6, 6) -#define GSI_CNTXT_TYPE_IRQ_MSK_ALL GENMASK(6, 0) + +/* Values here are bit positions in the TYPE_IRQ and TYPE_IRQ_MSK registers */ +enum gsi_irq_type_id { + GSI_CH_CTRL = 0x0, /* channel allocation, etc. */ + GSI_EV_CTRL = 0x1, /* event ring allocation, etc. */ + GSI_GLOB_EE = 0x2, /* global/general event */ + GSI_IEOB = 0x3, /* TRE completion */ + GSI_INTER_EE_CH_CTRL = 0x4, /* remote-issued stop/reset (unused) */ + GSI_INTER_EE_EV_CTRL = 0x5, /* remote-issued event reset (unused) */ + GSI_GENERAL = 0x6, /* general-purpose event */ +}; #define GSI_CNTXT_SRC_CH_IRQ_OFFSET \ GSI_EE_N_CNTXT_SRC_CH_IRQ_OFFSET(GSI_EE_AP) @@ -329,12 +367,13 @@ GSI_EE_N_CNTXT_GLOB_IRQ_CLR_OFFSET(GSI_EE_AP) #define GSI_EE_N_CNTXT_GLOB_IRQ_CLR_OFFSET(ee) \ (0x0001f110 + 0x4000 * (ee)) -/* The masks below are used for the general IRQ STTS, EN, and CLR registers */ -#define ERROR_INT_FMASK GENMASK(0, 0) -#define GP_INT1_FMASK GENMASK(1, 1) -#define GP_INT2_FMASK GENMASK(2, 2) -#define GP_INT3_FMASK GENMASK(3, 3) -#define GSI_CNTXT_GLOB_IRQ_ALL GENMASK(3, 0) +/* Values here are bit positions in the GLOB_IRQ_* registers */ +enum gsi_global_irq_id { + ERROR_INT = 0x0, + GP_INT1 = 0x1, + GP_INT2 = 0x2, + GP_INT3 = 0x3, +}; #define GSI_CNTXT_GSI_IRQ_STTS_OFFSET \ GSI_EE_N_CNTXT_GSI_IRQ_STTS_OFFSET(GSI_EE_AP) @@ -348,12 +387,13 @@ GSI_EE_N_CNTXT_GSI_IRQ_CLR_OFFSET(GSI_EE_AP) #define GSI_EE_N_CNTXT_GSI_IRQ_CLR_OFFSET(ee) \ (0x0001f128 + 0x4000 * (ee)) -/* The masks below are used for the general IRQ STTS, EN, and CLR registers */ -#define BREAK_POINT_FMASK GENMASK(0, 0) -#define BUS_ERROR_FMASK GENMASK(1, 1) -#define CMD_FIFO_OVRFLOW_FMASK GENMASK(2, 2) -#define MCS_STACK_OVRFLOW_FMASK GENMASK(3, 3) -#define GSI_CNTXT_GSI_IRQ_ALL GENMASK(3, 0) +/* Values here are bit positions in the (general) GSI_IRQ_* registers */ +enum gsi_general_id { + BREAK_POINT = 0x0, + BUS_ERROR = 0x1, + CMD_FIFO_OVRFLOW = 0x2, + MCS_STACK_OVRFLOW = 0x3, +}; #define GSI_CNTXT_INTSET_OFFSET \ GSI_EE_N_CNTXT_INTSET_OFFSET(GSI_EE_AP) @@ -373,6 +413,25 @@ #define ERR_TYPE_FMASK GENMASK(27, 24) #define ERR_EE_FMASK GENMASK(31, 28) +/** enum gsi_err_code - ERR_CODE field values in EE_ERR_LOG */ +enum gsi_err_code { + GSI_INVALID_TRE = 0x1, + GSI_OUT_OF_BUFFERS = 0x2, + GSI_OUT_OF_RESOURCES = 0x3, + GSI_UNSUPPORTED_INTER_EE_OP = 0x4, + GSI_EVT_RING_EMPTY = 0x5, + GSI_NON_ALLOCATED_EVT_ACCESS = 0x6, + /* 7 is not assigned */ + GSI_HWO_1 = 0x8, +}; + +/** enum gsi_err_type - ERR_TYPE field values in EE_ERR_LOG */ +enum gsi_err_type { + GSI_ERR_TYPE_GLOB = 0x1, + GSI_ERR_TYPE_CHAN = 0x2, + GSI_ERR_TYPE_EVT = 0x3, +}; + #define GSI_ERROR_LOG_CLR_OFFSET \ GSI_EE_N_ERROR_LOG_CLR_OFFSET(GSI_EE_AP) #define GSI_EE_N_ERROR_LOG_CLR_OFFSET(ee) \ @@ -384,10 +443,18 @@ (0x0001f400 + 0x4000 * (ee)) #define INTER_EE_RESULT_FMASK GENMASK(2, 0) #define GENERIC_EE_RESULT_FMASK GENMASK(7, 5) -#define GENERIC_EE_SUCCESS_FVAL 1 -#define GENERIC_EE_INCORRECT_DIRECTION_FVAL 3 -#define GENERIC_EE_INCORRECT_CHANNEL_FVAL 5 -#define GENERIC_EE_NO_RESOURCES_FVAL 7 + +/** enum gsi_generic_ee_result - GENERIC_EE_RESULT field values in SCRATCH_0 */ +enum gsi_generic_ee_result { + GENERIC_EE_SUCCESS = 0x1, + GENERIC_EE_CHANNEL_NOT_RUNNING = 0x2, + GENERIC_EE_INCORRECT_DIRECTION = 0x3, + GENERIC_EE_INCORRECT_CHANNEL_TYPE = 0x4, + GENERIC_EE_INCORRECT_CHANNEL = 0x5, + GENERIC_EE_RETRY = 0x6, + GENERIC_EE_NO_RESOURCES = 0x7, +}; + #define USB_MAX_PACKET_FMASK GENMASK(15, 15) /* 0: HS; 1: SS */ #define MHI_BASE_CHANNEL_FMASK GENMASK(31, 24) diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index d92dd3f09b73..002e51448510 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -38,9 +38,9 @@ /* Some commands can wait until indicated pipeline stages are clear */ enum pipeline_clear_options { - pipeline_clear_hps = 0, - pipeline_clear_src_grp = 1, - pipeline_clear_full = 2, + pipeline_clear_hps = 0x0, + pipeline_clear_src_grp = 0x1, + pipeline_clear_full = 0x2, }; /* IPA_CMD_IP_V{4,6}_{FILTER,ROUTING}_INIT */ diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index f7e6f87facf7..4ed09c486abc 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -27,16 +27,16 @@ struct gsi_channel; * a request is *not* an immediate command. */ enum ipa_cmd_opcode { - IPA_CMD_NONE = 0, - IPA_CMD_IP_V4_FILTER_INIT = 3, - IPA_CMD_IP_V6_FILTER_INIT = 4, - IPA_CMD_IP_V4_ROUTING_INIT = 7, - IPA_CMD_IP_V6_ROUTING_INIT = 8, - IPA_CMD_HDR_INIT_LOCAL = 9, - IPA_CMD_REGISTER_WRITE = 12, - IPA_CMD_IP_PACKET_INIT = 16, - IPA_CMD_DMA_SHARED_MEM = 19, - IPA_CMD_IP_PACKET_TAG_STATUS = 20, + IPA_CMD_NONE = 0x0, + IPA_CMD_IP_V4_FILTER_INIT = 0x3, + IPA_CMD_IP_V6_FILTER_INIT = 0x4, + IPA_CMD_IP_V4_ROUTING_INIT = 0x7, + IPA_CMD_IP_V6_ROUTING_INIT = 0x8, + IPA_CMD_HDR_INIT_LOCAL = 0x9, + IPA_CMD_REGISTER_WRITE = 0xc, + IPA_CMD_IP_PACKET_INIT = 0x10, + IPA_CMD_DMA_SHARED_MEM = 0x13, + IPA_CMD_IP_PACKET_TAG_STATUS = 0x14, }; /** @@ -50,7 +50,6 @@ struct ipa_cmd_info { enum dma_data_direction direction; }; - #ifdef IPA_VALIDATE /** diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c index d4c2bc7ad24b..37dada4da680 100644 --- a/drivers/net/ipa/ipa_data-sc7180.c +++ b/drivers/net/ipa/ipa_data-sc7180.c @@ -24,6 +24,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .seq_type = IPA_SEQ_DMA_ONLY, .config = { + .resource_group = 0, .dma_mode = true, .dma_endpoint = IPA_ENDPOINT_AP_LAN_RX, }, @@ -42,6 +43,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .seq_type = IPA_SEQ_INVALID, .config = { + .resource_group = 0, .aggregation = true, .status_enable = true, .rx = { @@ -65,6 +67,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .seq_type = IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP, .config = { + .resource_group = 0, .checksum = true, .qmap = true, .status_enable = true, @@ -88,6 +91,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .seq_type = IPA_SEQ_INVALID, .config = { + .resource_group = 0, .checksum = true, .qmap = true, .aggregation = true, diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c index de2768d71ab5..bd92b619e7fe 100644 --- a/drivers/net/ipa/ipa_data-sdm845.c +++ b/drivers/net/ipa/ipa_data-sdm845.c @@ -26,6 +26,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .seq_type = IPA_SEQ_DMA_ONLY, .config = { + .resource_group = 1, .dma_mode = true, .dma_endpoint = IPA_ENDPOINT_AP_LAN_RX, }, @@ -44,6 +45,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .seq_type = IPA_SEQ_INVALID, .config = { + .resource_group = 1, .aggregation = true, .status_enable = true, .rx = { @@ -67,6 +69,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .seq_type = IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP, .config = { + .resource_group = 1, .checksum = true, .qmap = true, .status_enable = true, @@ -90,6 +93,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .seq_type = IPA_SEQ_INVALID, .config = { + .resource_group = 1, .checksum = true, .qmap = true, .aggregation = true, @@ -146,11 +150,11 @@ static const struct ipa_resource_src ipa_resource_src[] = { .type = IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS, .limits[0] = { .min = 1, - .max = 63, + .max = 255, }, .limits[1] = { .min = 1, - .max = 63, + .max = 255, }, }, { diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index 7fc1058a5ca9..83c4b78373ef 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -45,10 +45,10 @@ * the IPA endpoint. */ -/* The maximum value returned by ipa_resource_group_count() */ -#define IPA_RESOURCE_GROUP_COUNT 4 +/* The maximum value returned by ipa_resource_group_{src,dst}_count() */ +#define IPA_RESOURCE_GROUP_SRC_MAX 5 +#define IPA_RESOURCE_GROUP_DST_MAX 5 -/** enum ipa_resource_type_src - source resource types */ /** * struct gsi_channel_data - GSI channel configuration data * @tre_count: number of TREs in the channel ring @@ -109,6 +109,7 @@ struct ipa_endpoint_rx_data { /** * struct ipa_endpoint_config_data - IPA endpoint hardware configuration + * @resource_group: resource group to assign endpoint to * @checksum: whether checksum offload is enabled * @qmap: whether endpoint uses QMAP protocol * @aggregation: whether endpoint supports aggregation @@ -119,6 +120,7 @@ struct ipa_endpoint_rx_data { * @rx: RX-specific endpoint information (see above) */ struct ipa_endpoint_config_data { + u32 resource_group; bool checksum; bool qmap; bool aggregation; @@ -206,7 +208,7 @@ struct ipa_resource_limits { */ struct ipa_resource_src { enum ipa_resource_type_src type; - struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_COUNT]; + struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_SRC_MAX]; }; /** @@ -216,7 +218,7 @@ struct ipa_resource_src { */ struct ipa_resource_dst { enum ipa_resource_type_dst type; - struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_COUNT]; + struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_DST_MAX]; }; /** diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index b40b711cf4bd..970730045751 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -665,8 +665,8 @@ static u32 ipa_reg_init_hol_block_timer_val(struct ipa *ipa, u32 microseconds) /* ...but we still need to fit into a 32-bit register */ WARN_ON(ticks > U32_MAX); - /* IPA v3.5.1 just records the tick count */ - if (ipa->version == IPA_VERSION_3_5_1) + /* IPA v3.5.1 through v4.1 just record the tick count */ + if (ipa->version < IPA_VERSION_4_2) return (u32)ticks; /* For IPA v4.2, the tick count is represented by base and @@ -751,6 +751,16 @@ static void ipa_endpoint_init_deaggr(struct ipa_endpoint *endpoint) iowrite32(val, endpoint->ipa->reg_virt + offset); } +static void ipa_endpoint_init_rsrc_grp(struct ipa_endpoint *endpoint) +{ + u32 offset = IPA_REG_ENDP_INIT_RSRC_GRP_N_OFFSET(endpoint->endpoint_id); + struct ipa *ipa = endpoint->ipa; + u32 val; + + val = rsrc_grp_encoded(ipa->version, endpoint->data->resource_group); + iowrite32(val, ipa->reg_virt + offset); +} + static void ipa_endpoint_init_seq(struct ipa_endpoint *endpoint) { u32 offset = IPA_REG_ENDP_INIT_SEQ_N_OFFSET(endpoint->endpoint_id); @@ -1207,7 +1217,6 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint) struct gsi *gsi = &ipa->gsi; bool suspended = false; dma_addr_t addr; - bool legacy; u32 retries; u32 len = 1; void *virt; @@ -1269,8 +1278,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint) * complete the channel reset sequence. Finish by suspending the * channel again (if necessary). */ - legacy = ipa->version == IPA_VERSION_3_5_1; - gsi_channel_reset(gsi, endpoint->channel_id, legacy); + gsi_channel_reset(gsi, endpoint->channel_id, true); msleep(1); @@ -1293,21 +1301,19 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint) u32 channel_id = endpoint->channel_id; struct ipa *ipa = endpoint->ipa; bool special; - bool legacy; int ret = 0; /* On IPA v3.5.1, if an RX endpoint is reset while aggregation * is active, we need to handle things specially to recover. * All other cases just need to reset the underlying GSI channel. - * - * IPA v3.5.1 enables the doorbell engine. Newer versions do not. */ - legacy = ipa->version == IPA_VERSION_3_5_1; - special = !endpoint->toward_ipa && endpoint->data->aggregation; + special = ipa->version == IPA_VERSION_3_5_1 && + !endpoint->toward_ipa && + endpoint->data->aggregation; if (special && ipa_endpoint_aggr_active(endpoint)) ret = ipa_endpoint_reset_rx_aggr(endpoint); else - gsi_channel_reset(&ipa->gsi, channel_id, legacy); + gsi_channel_reset(&ipa->gsi, channel_id, true); if (ret) dev_err(&ipa->pdev->dev, @@ -1328,6 +1334,7 @@ static void ipa_endpoint_program(struct ipa_endpoint *endpoint) ipa_endpoint_init_mode(endpoint); ipa_endpoint_init_aggr(endpoint); ipa_endpoint_init_deaggr(endpoint); + ipa_endpoint_init_rsrc_grp(endpoint); ipa_endpoint_init_seq(endpoint); ipa_endpoint_status(endpoint); } @@ -1538,8 +1545,8 @@ int ipa_endpoint_config(struct ipa *ipa) val = ioread32(ipa->reg_virt + IPA_REG_FLAVOR_0_OFFSET); /* Our RX is an IPA producer */ - rx_base = u32_get_bits(val, BAM_PROD_LOWEST_FMASK); - max = rx_base + u32_get_bits(val, BAM_MAX_PROD_PIPES_FMASK); + rx_base = u32_get_bits(val, IPA_PROD_LOWEST_FMASK); + max = rx_base + u32_get_bits(val, IPA_MAX_PROD_PIPES_FMASK); if (max > IPA_ENDPOINT_MAX) { dev_err(dev, "too many endpoints (%u > %u)\n", max, IPA_ENDPOINT_MAX); @@ -1548,7 +1555,7 @@ int ipa_endpoint_config(struct ipa *ipa) rx_mask = GENMASK(max - 1, rx_base); /* Our TX is an IPA consumer */ - max = u32_get_bits(val, BAM_MAX_CONS_PIPES_FMASK); + max = u32_get_bits(val, IPA_MAX_CONS_PIPES_FMASK); tx_mask = GENMASK(max - 1, 0); ipa->available = rx_mask | tx_mask; diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h index 58a245de488e..881ecc27bd6e 100644 --- a/drivers/net/ipa/ipa_endpoint.h +++ b/drivers/net/ipa/ipa_endpoint.h @@ -25,7 +25,7 @@ struct ipa_gsi_endpoint_data; #define IPA_MTU ETH_DATA_LEN enum ipa_endpoint_name { - IPA_ENDPOINT_AP_MODEM_TX = 0, + IPA_ENDPOINT_AP_MODEM_TX, IPA_ENDPOINT_MODEM_LAN_TX, IPA_ENDPOINT_MODEM_COMMAND_TX, IPA_ENDPOINT_AP_COMMAND_TX, diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index cc1ea28f7bc2..61dd7605bcb6 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -139,12 +139,12 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, u32 val; /* assert(mask & ipa->available); */ - val = ioread32(ipa->reg_virt + IPA_REG_SUSPEND_IRQ_EN_OFFSET); + val = ioread32(ipa->reg_virt + IPA_REG_IRQ_SUSPEND_EN_OFFSET); if (enable) val |= mask; else val &= ~mask; - iowrite32(val, ipa->reg_virt + IPA_REG_SUSPEND_IRQ_EN_OFFSET); + iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_SUSPEND_EN_OFFSET); } /* Enable TX_SUSPEND for an endpoint */ @@ -168,7 +168,7 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt) u32 val; val = ioread32(ipa->reg_virt + IPA_REG_IRQ_SUSPEND_INFO_OFFSET); - iowrite32(val, ipa->reg_virt + IPA_REG_SUSPEND_IRQ_CLR_OFFSET); + iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_SUSPEND_CLR_OFFSET); } /* Simulate arrival of an IPA TX_SUSPEND interrupt */ diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h index 727e9c5044d1..b5d63a0cd19e 100644 --- a/drivers/net/ipa/ipa_interrupt.h +++ b/drivers/net/ipa/ipa_interrupt.h @@ -13,22 +13,6 @@ struct ipa; struct ipa_interrupt; /** - * enum ipa_irq_id - IPA interrupt type - * @IPA_IRQ_UC_0: Microcontroller event interrupt - * @IPA_IRQ_UC_1: Microcontroller response interrupt - * @IPA_IRQ_TX_SUSPEND: Data ready interrupt - * - * The data ready interrupt is signaled if data has arrived that is destined - * for an AP RX endpoint whose underlying GSI channel is suspended/stopped. - */ -enum ipa_irq_id { - IPA_IRQ_UC_0 = 2, - IPA_IRQ_UC_1 = 3, - IPA_IRQ_TX_SUSPEND = 14, - IPA_IRQ_COUNT, /* Number of interrupt types (not an index) */ -}; - -/** * typedef ipa_irq_handler_t - IPA interrupt handler function type * @ipa: IPA pointer * @irq_id: interrupt type diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index cd4d993b0bbb..3fb9c5d90b70 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -111,8 +111,7 @@ int ipa_setup(struct ipa *ipa) struct device *dev = &ipa->pdev->dev; int ret; - /* Setup for IPA v3.5.1 has some slight differences */ - ret = gsi_setup(&ipa->gsi, ipa->version == IPA_VERSION_3_5_1); + ret = gsi_setup(&ipa->gsi); if (ret) return ret; @@ -326,7 +325,7 @@ static void ipa_hardware_config(struct ipa *ipa) /* Disable PA mask to allow HOLB drop (hardware workaround) */ val = ioread32(ipa->reg_virt + IPA_REG_TX_CFG_OFFSET); - val &= ~PA_MASK_EN; + val &= ~PA_MASK_EN_FMASK; iowrite32(val, ipa->reg_virt + IPA_REG_TX_CFG_OFFSET); } @@ -336,14 +335,16 @@ static void ipa_hardware_config(struct ipa *ipa) ipa_hardware_config_qsb(ipa); /* Configure aggregation granularity */ - val = ioread32(ipa->reg_virt + IPA_REG_COUNTER_CFG_OFFSET); granularity = ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY); - val = u32_encode_bits(granularity, AGGR_GRANULARITY); + val = u32_encode_bits(granularity, AGGR_GRANULARITY_FMASK); iowrite32(val, ipa->reg_virt + IPA_REG_COUNTER_CFG_OFFSET); - /* Disable hashed IPv4 and IPv6 routing and filtering for IPA v4.2 */ - if (ipa->version == IPA_VERSION_4_2) - iowrite32(0, ipa->reg_virt + IPA_REG_FILT_ROUT_HASH_EN_OFFSET); + /* IPA v4.2 does not support hashed tables, so disable them */ + if (ipa->version == IPA_VERSION_4_2) { + u32 offset = ipa_reg_filt_rout_hash_en_offset(ipa->version); + + iowrite32(0, ipa->reg_virt + offset); + } /* Enable dynamic clock division */ ipa_hardware_dcd_config(ipa); @@ -363,52 +364,41 @@ static void ipa_hardware_deconfig(struct ipa *ipa) #ifdef IPA_VALIDATION -/* # IPA resources used based on version (see IPA_RESOURCE_GROUP_COUNT) */ -static int ipa_resource_group_count(struct ipa *ipa) -{ - switch (ipa->version) { - case IPA_VERSION_3_5_1: - return 3; - - case IPA_VERSION_4_0: - case IPA_VERSION_4_1: - return 4; - - case IPA_VERSION_4_2: - return 1; - - default: - return 0; - } -} - static bool ipa_resource_limits_valid(struct ipa *ipa, const struct ipa_resource_data *data) { - u32 group_count = ipa_resource_group_count(ipa); + u32 group_count; u32 i; u32 j; - if (!group_count) + /* We program at most 6 source or destination resource group limits */ + BUILD_BUG_ON(IPA_RESOURCE_GROUP_SRC_MAX > 6); + + group_count = ipa_resource_group_src_count(ipa->version); + if (!group_count || group_count > IPA_RESOURCE_GROUP_SRC_MAX) return false; - /* Return an error if a non-zero resource group limit is specified - * for a resource not supported by hardware. + /* Return an error if a non-zero resource limit is specified + * for a resource group not supported by hardware. */ for (i = 0; i < data->resource_src_count; i++) { const struct ipa_resource_src *resource; resource = &data->resource_src[i]; - for (j = group_count; j < IPA_RESOURCE_GROUP_COUNT; j++) + for (j = group_count; j < IPA_RESOURCE_GROUP_SRC_MAX; j++) if (resource->limits[j].min || resource->limits[j].max) return false; } + group_count = ipa_resource_group_dst_count(ipa->version); + if (!group_count || group_count > IPA_RESOURCE_GROUP_DST_MAX) + return false; + for (i = 0; i < data->resource_dst_count; i++) { const struct ipa_resource_dst *resource; resource = &data->resource_dst[i]; - for (j = group_count; j < IPA_RESOURCE_GROUP_COUNT; j++) + for (j = group_count; j < IPA_RESOURCE_GROUP_DST_MAX; j++) if (resource->limits[j].min || resource->limits[j].max) return false; } @@ -435,46 +425,64 @@ ipa_resource_config_common(struct ipa *ipa, u32 offset, val = u32_encode_bits(xlimits->min, X_MIN_LIM_FMASK); val |= u32_encode_bits(xlimits->max, X_MAX_LIM_FMASK); - val |= u32_encode_bits(ylimits->min, Y_MIN_LIM_FMASK); - val |= u32_encode_bits(ylimits->max, Y_MAX_LIM_FMASK); + if (ylimits) { + val |= u32_encode_bits(ylimits->min, Y_MIN_LIM_FMASK); + val |= u32_encode_bits(ylimits->max, Y_MAX_LIM_FMASK); + } iowrite32(val, ipa->reg_virt + offset); } -static void ipa_resource_config_src_01(struct ipa *ipa, - const struct ipa_resource_src *resource) +static void ipa_resource_config_src(struct ipa *ipa, + const struct ipa_resource_src *resource) { - u32 offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type); + u32 group_count = ipa_resource_group_src_count(ipa->version); + const struct ipa_resource_limits *ylimits; + u32 offset; - ipa_resource_config_common(ipa, offset, - &resource->limits[0], &resource->limits[1]); -} + offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type); + ylimits = group_count == 1 ? NULL : &resource->limits[1]; + ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits); -static void ipa_resource_config_src_23(struct ipa *ipa, - const struct ipa_resource_src *resource) -{ - u32 offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type); + if (group_count < 2) + return; - ipa_resource_config_common(ipa, offset, - &resource->limits[2], &resource->limits[3]); -} + offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type); + ylimits = group_count == 3 ? NULL : &resource->limits[3]; + ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits); -static void ipa_resource_config_dst_01(struct ipa *ipa, - const struct ipa_resource_dst *resource) -{ - u32 offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type); + if (group_count < 4) + return; - ipa_resource_config_common(ipa, offset, - &resource->limits[0], &resource->limits[1]); + offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type); + ylimits = group_count == 5 ? NULL : &resource->limits[5]; + ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits); } -static void ipa_resource_config_dst_23(struct ipa *ipa, - const struct ipa_resource_dst *resource) +static void ipa_resource_config_dst(struct ipa *ipa, + const struct ipa_resource_dst *resource) { - u32 offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type); + u32 group_count = ipa_resource_group_dst_count(ipa->version); + const struct ipa_resource_limits *ylimits; + u32 offset; + + offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type); + ylimits = group_count == 1 ? NULL : &resource->limits[1]; + ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits); + + if (group_count < 2) + return; + + offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type); + ylimits = group_count == 3 ? NULL : &resource->limits[3]; + ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits); - ipa_resource_config_common(ipa, offset, - &resource->limits[2], &resource->limits[3]); + if (group_count < 4) + return; + + offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type); + ylimits = group_count == 5 ? NULL : &resource->limits[5]; + ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits); } static int @@ -485,15 +493,11 @@ ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data) if (!ipa_resource_limits_valid(ipa, data)) return -EINVAL; - for (i = 0; i < data->resource_src_count; i++) { - ipa_resource_config_src_01(ipa, &data->resource_src[i]); - ipa_resource_config_src_23(ipa, &data->resource_src[i]); - } + for (i = 0; i < data->resource_src_count; i++) + ipa_resource_config_src(ipa, data->resource_src); - for (i = 0; i < data->resource_dst_count; i++) { - ipa_resource_config_dst_01(ipa, &data->resource_dst[i]); - ipa_resource_config_dst_23(ipa, &data->resource_dst[i]); - } + for (i = 0; i < data->resource_dst_count; i++) + ipa_resource_config_dst(ipa, data->resource_dst); return 0; } @@ -678,16 +682,13 @@ static void ipa_validate_build(void) */ BUILD_BUG_ON(GSI_TLV_MAX > U8_MAX); - /* Exceeding 128 bytes makes the transaction pool *much* larger */ - BUILD_BUG_ON(sizeof(struct gsi_trans) > 128); - /* This is used as a divisor */ BUILD_BUG_ON(!IPA_AGGR_GRANULARITY); /* Aggregation granularity value can't be 0, and must fit */ BUILD_BUG_ON(!ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY)); BUILD_BUG_ON(ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY) > - field_max(AGGR_GRANULARITY)); + field_max(AGGR_GRANULARITY_FMASK)); #endif /* IPA_VALIDATE */ } @@ -720,10 +721,8 @@ static int ipa_probe(struct platform_device *pdev) const struct ipa_data *data; struct ipa_clock *clock; struct rproc *rproc; - bool modem_alloc; bool modem_init; struct ipa *ipa; - bool prefetch; phandle ph; int ret; @@ -785,17 +784,12 @@ static int ipa_probe(struct platform_device *pdev) if (ret) goto err_reg_exit; - /* GSI v2.0+ (IPA v4.0+) uses prefetch for the command channel */ - prefetch = ipa->version != IPA_VERSION_3_5_1; - /* IPA v4.2 requires the AP to allocate channels for the modem */ - modem_alloc = ipa->version == IPA_VERSION_4_2; - - ret = gsi_init(&ipa->gsi, pdev, prefetch, data->endpoint_count, - data->endpoint_data, modem_alloc); + ret = gsi_init(&ipa->gsi, pdev, ipa->version, data->endpoint_count, + data->endpoint_data); if (ret) goto err_mem_exit; - /* Result is a non-zero mask endpoints that support filtering */ + /* Result is a non-zero mask of endpoints that support filtering */ ipa->filter_map = ipa_endpoint_init(ipa, data->endpoint_count, data->endpoint_data); if (!ipa->filter_map) { diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 2d45c444a67f..0cc3a3374caa 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -89,7 +89,7 @@ int ipa_mem_setup(struct ipa *ipa) gsi_trans_commit_wait(trans); /* Tell the hardware where the processing context area is located */ - iowrite32(ipa->mem_offset + offset, + iowrite32(ipa->mem_offset + ipa->mem[IPA_MEM_MODEM_PROC_CTX].offset, ipa->reg_virt + IPA_REG_LOCAL_PKT_PROC_CNTXT_BASE_OFFSET); return 0; @@ -160,13 +160,13 @@ int ipa_mem_config(struct ipa *ipa) mem_size = 8 * u32_get_bits(val, SHARED_MEM_SIZE_FMASK); /* If the sizes don't match, issue a warning */ - if (ipa->mem_offset + mem_size > ipa->mem_size) { - dev_warn(dev, "ignoring larger reported memory size: 0x%08x\n", - mem_size); - } else if (ipa->mem_offset + mem_size < ipa->mem_size) { + if (ipa->mem_offset + mem_size < ipa->mem_size) { dev_warn(dev, "limiting IPA memory size to 0x%08x\n", mem_size); ipa->mem_size = mem_size; + } else if (ipa->mem_offset + mem_size > ipa->mem_size) { + dev_dbg(dev, "ignoring larger reported memory size: 0x%08x\n", + mem_size); } /* Prealloc DMA memory for zeroing regions */ diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h index cfac456cea0c..12b6621f4b0e 100644 --- a/drivers/net/ipa/ipa_qmi_msg.h +++ b/drivers/net/ipa/ipa_qmi_msg.h @@ -74,12 +74,12 @@ struct ipa_init_complete_ind { /* The AP tells the modem its platform type. We assume Android. */ enum ipa_platform_type { - IPA_QMI_PLATFORM_TYPE_INVALID = 0, /* Invalid */ - IPA_QMI_PLATFORM_TYPE_TN = 1, /* Data card */ - IPA_QMI_PLATFORM_TYPE_LE = 2, /* Data router */ - IPA_QMI_PLATFORM_TYPE_MSM_ANDROID = 3, /* Android MSM */ - IPA_QMI_PLATFORM_TYPE_MSM_WINDOWS = 4, /* Windows MSM */ - IPA_QMI_PLATFORM_TYPE_MSM_QNX_V01 = 5, /* QNX MSM */ + IPA_QMI_PLATFORM_TYPE_INVALID = 0x0, /* Invalid */ + IPA_QMI_PLATFORM_TYPE_TN = 0x1, /* Data card */ + IPA_QMI_PLATFORM_TYPE_LE = 0x2, /* Data router */ + IPA_QMI_PLATFORM_TYPE_MSM_ANDROID = 0x3, /* Android MSM */ + IPA_QMI_PLATFORM_TYPE_MSM_WINDOWS = 0x4, /* Windows MSM */ + IPA_QMI_PLATFORM_TYPE_MSM_QNX_V01 = 0x5, /* QNX MSM */ }; /* This defines the start and end offset of a range of memory. Both diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index e542598fd775..d02e7ecc6fc0 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -65,14 +65,14 @@ struct ipa; * of valid bits for the register. */ -#define IPA_REG_ENABLED_PIPES_OFFSET 0x00000038 - +/* The next field is not supported for IPA v4.1 */ #define IPA_REG_COMP_CFG_OFFSET 0x0000003c #define ENABLE_FMASK GENMASK(0, 0) #define GSI_SNOC_BYPASS_DIS_FMASK GENMASK(1, 1) #define GEN_QMB_0_SNOC_BYPASS_DIS_FMASK GENMASK(2, 2) #define GEN_QMB_1_SNOC_BYPASS_DIS_FMASK GENMASK(3, 3) #define IPA_DCMP_FAST_CLK_EN_FMASK GENMASK(4, 4) +/* The remaining fields are not present for IPA v3.5.1 */ #define IPA_QMB_SELECT_CONS_EN_FMASK GENMASK(5, 5) #define IPA_QMB_SELECT_PROD_EN_FMASK GENMASK(6, 6) #define GSI_MULTI_INORDER_RD_DIS_FMASK GENMASK(7, 7) @@ -110,6 +110,7 @@ struct ipa; #define TX_0_FMASK GENMASK(19, 19) #define TX_1_FMASK GENMASK(20, 20) #define FNR_FMASK GENMASK(21, 21) +/* The remaining fields are not present for IPA v3.5.1 */ #define QSB2AXI_CMDQ_L_FMASK GENMASK(22, 22) #define AGGR_WRAPPER_FMASK GENMASK(23, 23) #define RAM_SLAVEWAY_FMASK GENMASK(24, 24) @@ -138,25 +139,17 @@ struct ipa; #define IPA_REG_QSB_MAX_READS_OFFSET 0x00000078 #define GEN_QMB_0_MAX_READS_FMASK GENMASK(3, 0) #define GEN_QMB_1_MAX_READS_FMASK GENMASK(7, 4) -/* The next two fields are present for IPA v4.0 and above */ +/* The next two fields are not present for IPA v3.5.1 */ #define GEN_QMB_0_MAX_READS_BEATS_FMASK GENMASK(23, 16) #define GEN_QMB_1_MAX_READS_BEATS_FMASK GENMASK(31, 24) -static inline u32 ipa_reg_state_aggr_active_offset(enum ipa_version version) +static inline u32 ipa_reg_filt_rout_hash_en_offset(enum ipa_version version) { if (version == IPA_VERSION_3_5_1) - return 0x0000010c; + return 0x000008c; - return 0x000000b4; + return 0x0000148; } -/* ipa->available defines the valid bits in the STATE_AGGR_ACTIVE register */ - -/* The next register is present for IPA v4.2 and above */ -#define IPA_REG_FILT_ROUT_HASH_EN_OFFSET 0x00000148 -#define IPV6_ROUTER_HASH_EN GENMASK(0, 0) -#define IPV6_FILTER_HASH_EN GENMASK(4, 4) -#define IPV4_ROUTER_HASH_EN GENMASK(8, 8) -#define IPV4_FILTER_HASH_EN GENMASK(12, 12) static inline u32 ipa_reg_filt_rout_hash_flush_offset(enum ipa_version version) { @@ -166,45 +159,70 @@ static inline u32 ipa_reg_filt_rout_hash_flush_offset(enum ipa_version version) return 0x000014c; } -#define IPV6_ROUTER_HASH_FLUSH GENMASK(0, 0) -#define IPV6_FILTER_HASH_FLUSH GENMASK(4, 4) -#define IPV4_ROUTER_HASH_FLUSH GENMASK(8, 8) -#define IPV4_FILTER_HASH_FLUSH GENMASK(12, 12) +/* The next four fields are used for the hash enable and flush registers */ +#define IPV6_ROUTER_HASH_FMASK GENMASK(0, 0) +#define IPV6_FILTER_HASH_FMASK GENMASK(4, 4) +#define IPV4_ROUTER_HASH_FMASK GENMASK(8, 8) +#define IPV4_FILTER_HASH_FMASK GENMASK(12, 12) + +/* ipa->available defines the valid bits in the STATE_AGGR_ACTIVE register */ +static inline u32 ipa_reg_state_aggr_active_offset(enum ipa_version version) +{ + if (version == IPA_VERSION_3_5_1) + return 0x0000010c; + + return 0x000000b4; +} #define IPA_REG_BCR_OFFSET 0x000001d0 -#define BCR_CMDQ_L_LACK_ONE_ENTRY BIT(0) -#define BCR_TX_NOT_USING_BRESP BIT(1) -#define BCR_SUSPEND_L2_IRQ BIT(3) -#define BCR_HOLB_DROP_L2_IRQ BIT(4) -#define BCR_DUAL_TX BIT(5) +/* The next two fields are not present for IPA v4.2 */ +#define BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK GENMASK(0, 0) +#define BCR_TX_NOT_USING_BRESP_FMASK GENMASK(1, 1) +/* The next field is invalid for IPA v4.1 */ +#define BCR_TX_SUSPEND_IRQ_ASSERT_ONCE_FMASK GENMASK(2, 2) +/* The next two fields are not present for IPA v4.2 */ +#define BCR_SUSPEND_L2_IRQ_FMASK GENMASK(3, 3) +#define BCR_HOLB_DROP_L2_IRQ_FMASK GENMASK(4, 4) +#define BCR_DUAL_TX_FMASK GENMASK(5, 5) +#define BCR_ENABLE_FILTER_DATA_CACHE_FMASK GENMASK(6, 6) +#define BCR_NOTIF_PRIORITY_OVER_ZLT_FMASK GENMASK(7, 7) +#define BCR_FILTER_PREFETCH_EN_FMASK GENMASK(8, 8) +#define BCR_ROUTER_PREFETCH_EN_FMASK GENMASK(9, 9) /* Backward compatibility register value to use for each version */ static inline u32 ipa_reg_bcr_val(enum ipa_version version) { if (version == IPA_VERSION_3_5_1) - return BCR_CMDQ_L_LACK_ONE_ENTRY | BCR_TX_NOT_USING_BRESP | - BCR_SUSPEND_L2_IRQ | BCR_HOLB_DROP_L2_IRQ | BCR_DUAL_TX; + return BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK | + BCR_TX_NOT_USING_BRESP_FMASK | + BCR_SUSPEND_L2_IRQ_FMASK | + BCR_HOLB_DROP_L2_IRQ_FMASK | + BCR_DUAL_TX_FMASK; if (version == IPA_VERSION_4_0 || version == IPA_VERSION_4_1) - return BCR_CMDQ_L_LACK_ONE_ENTRY | BCR_SUSPEND_L2_IRQ | - BCR_HOLB_DROP_L2_IRQ | BCR_DUAL_TX; + return BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK | + BCR_SUSPEND_L2_IRQ_FMASK | + BCR_HOLB_DROP_L2_IRQ_FMASK | + BCR_DUAL_TX_FMASK; return 0x00000000; } +/* The value of the next register must be a multiple of 8 */ #define IPA_REG_LOCAL_PKT_PROC_CNTXT_BASE_OFFSET 0x000001e8 -#define IPA_REG_AGGR_FORCE_CLOSE_OFFSET 0x000001ec /* ipa->available defines the valid bits in the AGGR_FORCE_CLOSE register */ +#define IPA_REG_AGGR_FORCE_CLOSE_OFFSET 0x000001ec + +#define IPA_REG_COUNTER_CFG_OFFSET 0x000001f0 +#define AGGR_GRANULARITY_FMASK GENMASK(8, 4) /* The internal inactivity timer clock is used for the aggregation timer */ -#define TIMER_FREQUENCY 32000 /* 32 KHz inactivity timer clock */ +#define TIMER_FREQUENCY 32000 /* 32 KHz inactivity timer clock */ -#define IPA_REG_COUNTER_CFG_OFFSET 0x000001f0 -#define AGGR_GRANULARITY GENMASK(8, 4) /* Compute the value to use in the AGGR_GRANULARITY field representing the * given number of microseconds. The value is one less than the number of - * timer ticks in the requested period. Zero not a valid granularity value. + * timer ticks in the requested period. 0 not a valid granularity value. */ static inline u32 ipa_aggr_granularity_val(u32 usec) { @@ -213,25 +231,25 @@ static inline u32 ipa_aggr_granularity_val(u32 usec) #define IPA_REG_TX_CFG_OFFSET 0x000001fc /* The first three fields are present for IPA v3.5.1 only */ -#define TX0_PREFETCH_DISABLE GENMASK(0, 0) -#define TX1_PREFETCH_DISABLE GENMASK(1, 1) -#define PREFETCH_ALMOST_EMPTY_SIZE GENMASK(4, 2) -/* The next fields are present for IPA v4.0 and above */ -#define PREFETCH_ALMOST_EMPTY_SIZE_TX0 GENMASK(5, 2) -#define DMAW_SCND_OUTSD_PRED_THRESHOLD GENMASK(9, 6) -#define DMAW_SCND_OUTSD_PRED_EN GENMASK(10, 10) -#define DMAW_MAX_BEATS_256_DIS GENMASK(11, 11) -#define PA_MASK_EN GENMASK(12, 12) -#define PREFETCH_ALMOST_EMPTY_SIZE_TX1 GENMASK(16, 13) -/* The last two fields are present for IPA v4.2 and above */ -#define SSPND_PA_NO_START_STATE GENMASK(18, 18) -#define SSPND_PA_NO_BQ_STATE GENMASK(19, 19) +#define TX0_PREFETCH_DISABLE_FMASK GENMASK(0, 0) +#define TX1_PREFETCH_DISABLE_FMASK GENMASK(1, 1) +#define PREFETCH_ALMOST_EMPTY_SIZE_FMASK GENMASK(4, 2) +/* The next six fields are present for IPA v4.0 and above */ +#define PREFETCH_ALMOST_EMPTY_SIZE_TX0_FMASK GENMASK(5, 2) +#define DMAW_SCND_OUTSD_PRED_THRESHOLD_FMASK GENMASK(9, 6) +#define DMAW_SCND_OUTSD_PRED_EN_FMASK GENMASK(10, 10) +#define DMAW_MAX_BEATS_256_DIS_FMASK GENMASK(11, 11) +#define PA_MASK_EN_FMASK GENMASK(12, 12) +#define PREFETCH_ALMOST_EMPTY_SIZE_TX1_FMASK GENMASK(16, 13) +/* The next two fields are present for IPA v4.2 only */ +#define SSPND_PA_NO_START_STATE_FMASK GENMASK(18, 18) +#define SSPND_PA_NO_BQ_STATE_FMASK GENMASK(19, 19) #define IPA_REG_FLAVOR_0_OFFSET 0x00000210 -#define BAM_MAX_PIPES_FMASK GENMASK(4, 0) -#define BAM_MAX_CONS_PIPES_FMASK GENMASK(12, 8) -#define BAM_MAX_PROD_PIPES_FMASK GENMASK(20, 16) -#define BAM_PROD_LOWEST_FMASK GENMASK(27, 24) +#define IPA_MAX_PIPES_FMASK GENMASK(3, 0) +#define IPA_MAX_CONS_PIPES_FMASK GENMASK(12, 8) +#define IPA_MAX_PROD_PIPES_FMASK GENMASK(20, 16) +#define IPA_PROD_LOWEST_FMASK GENMASK(27, 24) static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) { @@ -244,6 +262,43 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) #define ENTER_IDLE_DEBOUNCE_THRESH_FMASK GENMASK(15, 0) #define CONST_NON_IDLE_ENABLE_FMASK GENMASK(16, 16) +/* # IPA source resource groups available based on version */ +static inline u32 ipa_resource_group_src_count(enum ipa_version version) +{ + switch (version) { + case IPA_VERSION_3_5_1: + case IPA_VERSION_4_0: + case IPA_VERSION_4_1: + return 4; + + case IPA_VERSION_4_2: + return 1; + + default: + return 0; + } +} + +/* # IPA destination resource groups available based on version */ +static inline u32 ipa_resource_group_dst_count(enum ipa_version version) +{ + switch (version) { + case IPA_VERSION_3_5_1: + return 3; + + case IPA_VERSION_4_0: + case IPA_VERSION_4_1: + return 4; + + case IPA_VERSION_4_2: + return 1; + + default: + return 0; + } +} + +/* Not all of the following are valid (depends on the count, above) */ #define IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(rt) \ (0x00000400 + 0x0020 * (rt)) #define IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(rt) \ @@ -256,13 +311,16 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) (0x00000504 + 0x0020 * (rt)) #define IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(rt) \ (0x00000508 + 0x0020 * (rt)) +/* The next four fields are used for all resource group registers */ #define X_MIN_LIM_FMASK GENMASK(5, 0) #define X_MAX_LIM_FMASK GENMASK(13, 8) +/* The next two fields are not always present (if resource count is odd) */ #define Y_MIN_LIM_FMASK GENMASK(21, 16) #define Y_MAX_LIM_FMASK GENMASK(29, 24) #define IPA_REG_ENDP_INIT_CTRL_N_OFFSET(ep) \ (0x00000800 + 0x0070 * (ep)) +/* The next field should only used for IPA v3.5.1 */ #define ENDP_SUSPEND_FMASK GENMASK(0, 0) #define ENDP_DELAY_FMASK GENMASK(1, 1) @@ -273,6 +331,13 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) #define CS_METADATA_HDR_OFFSET_FMASK GENMASK(6, 3) #define CS_GEN_QMB_MASTER_SEL_FMASK GENMASK(8, 8) +/** enum ipa_cs_offload_en - checksum offload field in ENDP_INIT_CFG_N */ +enum ipa_cs_offload_en { + IPA_CS_OFFLOAD_NONE = 0x0, + IPA_CS_OFFLOAD_UL = 0x1, + IPA_CS_OFFLOAD_DL = 0x2, +}; + #define IPA_REG_ENDP_INIT_HDR_N_OFFSET(ep) \ (0x00000810 + 0x0070 * (ep)) #define HDR_LEN_FMASK GENMASK(5, 0) @@ -308,6 +373,14 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) #define PAD_EN_FMASK GENMASK(29, 29) #define HDR_FTCH_DISABLE_FMASK GENMASK(30, 30) +/** enum ipa_mode - mode field in ENDP_INIT_MODE_N */ +enum ipa_mode { + IPA_BASIC = 0x0, + IPA_ENABLE_FRAMING_HDLC = 0x1, + IPA_ENABLE_DEFRAMING_HDLC = 0x2, + IPA_DMA = 0x3, +}; + #define IPA_REG_ENDP_INIT_AGGR_N_OFFSET(ep) \ (0x00000824 + 0x0070 * (ep)) #define AGGR_EN_FMASK GENMASK(1, 0) @@ -319,6 +392,24 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) #define AGGR_FORCE_CLOSE_FMASK GENMASK(22, 22) #define AGGR_HARD_BYTE_LIMIT_ENABLE_FMASK GENMASK(24, 24) +/** enum ipa_aggr_en - aggregation enable field in ENDP_INIT_AGGR_N */ +enum ipa_aggr_en { + IPA_BYPASS_AGGR = 0x0, + IPA_ENABLE_AGGR = 0x1, + IPA_ENABLE_DEAGGR = 0x2, +}; + +/** enum ipa_aggr_type - aggregation type field in ENDP_INIT_AGGR_N */ +enum ipa_aggr_type { + IPA_MBIM_16 = 0x0, + IPA_HDLC = 0x1, + IPA_TLP = 0x2, + IPA_RNDIS = 0x3, + IPA_GENERIC = 0x4, + IPA_COALESCE = 0x5, + IPA_QCMAP = 0x6, +}; + /* Valid only for RX (IPA producer) endpoints */ #define IPA_REG_ENDP_INIT_HOL_BLOCK_EN_N_OFFSET(rxep) \ (0x0000082c + 0x0070 * (rxep)) @@ -327,7 +418,7 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) /* Valid only for RX (IPA producer) endpoints */ #define IPA_REG_ENDP_INIT_HOL_BLOCK_TIMER_N_OFFSET(rxep) \ (0x00000830 + 0x0070 * (rxep)) -/* The next fields are present for IPA v4.2 only */ +/* The next two fields are present for IPA v4.2 only */ #define BASE_VALUE_FMASK GENMASK(4, 0) #define SCALE_FMASK GENMASK(12, 8) @@ -335,13 +426,24 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) #define IPA_REG_ENDP_INIT_DEAGGR_N_OFFSET(txep) \ (0x00000834 + 0x0070 * (txep)) #define DEAGGR_HDR_LEN_FMASK GENMASK(5, 0) +#define SYSPIPE_ERR_DETECTION_FMASK GENMASK(6, 6) #define PACKET_OFFSET_VALID_FMASK GENMASK(7, 7) #define PACKET_OFFSET_LOCATION_FMASK GENMASK(13, 8) +#define IGNORE_MIN_PKT_ERR_FMASK GENMASK(14, 14) #define MAX_PACKET_LEN_FMASK GENMASK(31, 16) #define IPA_REG_ENDP_INIT_RSRC_GRP_N_OFFSET(ep) \ (0x00000838 + 0x0070 * (ep)) -#define RSRC_GRP_FMASK GENMASK(1, 0) +/* Encoded value for RSRC_GRP endpoint register RSRC_GRP field */ +static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp) +{ + switch (version) { + case IPA_VERSION_4_2: + return u32_encode_bits(rsrc_grp, GENMASK(0, 0)); + default: + return u32_encode_bits(rsrc_grp, GENMASK(1, 0)); + } +} /* Valid only for TX (IPA consumer) endpoints */ #define IPA_REG_ENDP_INIT_SEQ_N_OFFSET(txep) \ @@ -351,15 +453,34 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) #define HPS_REP_SEQ_TYPE_FMASK GENMASK(11, 8) #define DPS_REP_SEQ_TYPE_FMASK GENMASK(15, 12) +/** + * enum ipa_seq_type - HPS and DPS sequencer type fields in ENDP_INIT_SEQ_N + * @IPA_SEQ_DMA_ONLY: only DMA is performed + * @IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP: + * second packet processing pass + no decipher + microcontroller + * @IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP: + * packet processing + no decipher + no uCP + HPS REP DMA parser + * @IPA_SEQ_INVALID: invalid sequencer type + * + * The values defined here are broken into 4-bit nibbles that are written + * into fields of the INIT_SEQ_N endpoint registers. + */ +enum ipa_seq_type { + IPA_SEQ_DMA_ONLY = 0x0000, + IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP = 0x0004, + IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP = 0x0806, + IPA_SEQ_INVALID = 0xffff, +}; + #define IPA_REG_ENDP_STATUS_N_OFFSET(ep) \ (0x00000840 + 0x0070 * (ep)) #define STATUS_EN_FMASK GENMASK(0, 0) #define STATUS_ENDP_FMASK GENMASK(5, 1) #define STATUS_LOCATION_FMASK GENMASK(8, 8) -/* The next field is present for IPA v4.0 and above */ +/* The next field is not present for IPA v3.5.1 */ #define STATUS_PKT_SUPPRESS_FMASK GENMASK(9, 9) -/* "er" is either an endpoint ID (for filters) or a route ID (for routes) */ +/* The next register is only present for IPA versions that support hashing */ #define IPA_REG_ENDP_FILTER_ROUTER_HSH_CFG_N_OFFSET(er) \ (0x0000085c + 0x0070 * (er)) #define FILTER_HASH_MSK_SRC_ID_FMASK GENMASK(0, 0) @@ -394,89 +515,67 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) IPA_REG_IRQ_CLR_EE_N_OFFSET(GSI_EE_AP) #define IPA_REG_IRQ_CLR_EE_N_OFFSET(ee) \ (0x00003010 + 0x1000 * (ee)) +/** + * enum ipa_irq_id - Bit positions representing type of IPA IRQ + * @IPA_IRQ_UC_0: Microcontroller event interrupt + * @IPA_IRQ_UC_1: Microcontroller response interrupt + * @IPA_IRQ_TX_SUSPEND: Data ready interrupt + * + * IRQ types not described above are not currently used. + */ +enum ipa_irq_id { + IPA_IRQ_BAD_SNOC_ACCESS = 0x0, + /* Type (bit) 0x1 is not defined */ + IPA_IRQ_UC_0 = 0x2, + IPA_IRQ_UC_1 = 0x3, + IPA_IRQ_UC_2 = 0x4, + IPA_IRQ_UC_3 = 0x5, + IPA_IRQ_UC_IN_Q_NOT_EMPTY = 0x6, + IPA_IRQ_UC_RX_CMD_Q_NOT_FULL = 0x7, + IPA_IRQ_PROC_UC_ACK_Q_NOT_EMPTY = 0x8, + IPA_IRQ_RX_ERR = 0x9, + IPA_IRQ_DEAGGR_ERR = 0xa, + IPA_IRQ_TX_ERR = 0xb, + IPA_IRQ_STEP_MODE = 0xc, + IPA_IRQ_PROC_ERR = 0xd, + IPA_IRQ_TX_SUSPEND = 0xe, + IPA_IRQ_TX_HOLB_DROP = 0xf, + IPA_IRQ_BAM_GSI_IDLE = 0x10, + IPA_IRQ_PIPE_YELLOW_BELOW = 0x11, + IPA_IRQ_PIPE_RED_BELOW = 0x12, + IPA_IRQ_PIPE_YELLOW_ABOVE = 0x13, + IPA_IRQ_PIPE_RED_ABOVE = 0x14, + IPA_IRQ_UCP = 0x15, + IPA_IRQ_DCMP = 0x16, + IPA_IRQ_GSI_EE = 0x17, + IPA_IRQ_GSI_IPA_IF_TLV_RCVD = 0x18, + IPA_IRQ_GSI_UC = 0x19, + IPA_IRQ_COUNT, /* Last; not an id */ +}; #define IPA_REG_IRQ_UC_OFFSET \ IPA_REG_IRQ_UC_EE_N_OFFSET(GSI_EE_AP) #define IPA_REG_IRQ_UC_EE_N_OFFSET(ee) \ (0x0000301c + 0x1000 * (ee)) +#define UC_INTR_FMASK GENMASK(0, 0) +/* ipa->available defines the valid bits in the SUSPEND_INFO register */ #define IPA_REG_IRQ_SUSPEND_INFO_OFFSET \ IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(GSI_EE_AP) #define IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(ee) \ (0x00003030 + 0x1000 * (ee)) -/* ipa->available defines the valid bits in the SUSPEND_INFO register */ -#define IPA_REG_SUSPEND_IRQ_EN_OFFSET \ - IPA_REG_SUSPEND_IRQ_EN_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_SUSPEND_IRQ_EN_EE_N_OFFSET(ee) \ +/* ipa->available defines the valid bits in the IRQ_SUSPEND_EN register */ +#define IPA_REG_IRQ_SUSPEND_EN_OFFSET \ + IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(GSI_EE_AP) +#define IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(ee) \ (0x00003034 + 0x1000 * (ee)) -/* ipa->available defines the valid bits in the SUSPEND_IRQ_EN register */ -#define IPA_REG_SUSPEND_IRQ_CLR_OFFSET \ - IPA_REG_SUSPEND_IRQ_CLR_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_SUSPEND_IRQ_CLR_EE_N_OFFSET(ee) \ +/* ipa->available defines the valid bits in the IRQ_SUSPEND_CLR register */ +#define IPA_REG_IRQ_SUSPEND_CLR_OFFSET \ + IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(GSI_EE_AP) +#define IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(ee) \ (0x00003038 + 0x1000 * (ee)) -/* ipa->available defines the valid bits in the SUSPEND_IRQ_CLR register */ - -/** enum ipa_cs_offload_en - checksum offload field in ENDP_INIT_CFG_N */ -enum ipa_cs_offload_en { - IPA_CS_OFFLOAD_NONE = 0, - IPA_CS_OFFLOAD_UL = 1, - IPA_CS_OFFLOAD_DL = 2, - IPA_CS_RSVD -}; - -/** enum ipa_aggr_en - aggregation enable field in ENDP_INIT_AGGR_N */ -enum ipa_aggr_en { - IPA_BYPASS_AGGR = 0, - IPA_ENABLE_AGGR = 1, - IPA_ENABLE_DEAGGR = 2, -}; - -/** enum ipa_aggr_type - aggregation type field in in_ENDP_INIT_AGGR_N */ -enum ipa_aggr_type { - IPA_MBIM_16 = 0, - IPA_HDLC = 1, - IPA_TLP = 2, - IPA_RNDIS = 3, - IPA_GENERIC = 4, - IPA_COALESCE = 5, - IPA_QCMAP = 6, -}; - -/** enum ipa_mode - mode field in ENDP_INIT_MODE_N */ -enum ipa_mode { - IPA_BASIC = 0, - IPA_ENABLE_FRAMING_HDLC = 1, - IPA_ENABLE_DEFRAMING_HDLC = 2, - IPA_DMA = 3, -}; - -/** - * enum ipa_seq_type - HPS and DPS sequencer type fields in in ENDP_INIT_SEQ_N - * @IPA_SEQ_DMA_ONLY: only DMA is performed - * @IPA_SEQ_PKT_PROCESS_NO_DEC_UCP: - * packet processing + no decipher + microcontroller (Ethernet Bridging) - * @IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP: - * second packet processing pass + no decipher + microcontroller - * @IPA_SEQ_DMA_DEC: DMA + cipher/decipher - * @IPA_SEQ_DMA_COMP_DECOMP: DMA + compression/decompression - * @IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP: - * packet processing + no decipher + no uCP + HPS REP DMA parser - * @IPA_SEQ_INVALID: invalid sequencer type - * - * The values defined here are broken into 4-bit nibbles that are written - * into fields of the INIT_SEQ_N endpoint registers. - */ -enum ipa_seq_type { - IPA_SEQ_DMA_ONLY = 0x0000, - IPA_SEQ_PKT_PROCESS_NO_DEC_UCP = 0x0002, - IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP = 0x0004, - IPA_SEQ_DMA_DEC = 0x0011, - IPA_SEQ_DMA_COMP_DECOMP = 0x0020, - IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP = 0x0806, - IPA_SEQ_INVALID = 0xffff, -}; int ipa_reg_init(struct ipa *ipa); void ipa_reg_exit(struct ipa *ipa); diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index b3790aa952a1..32e2d3e052d5 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -422,8 +422,8 @@ int ipa_table_hash_flush(struct ipa *ipa) return -EBUSY; } - val = IPV4_FILTER_HASH_FLUSH | IPV6_FILTER_HASH_FLUSH; - val |= IPV6_ROUTER_HASH_FLUSH | IPV4_ROUTER_HASH_FLUSH; + val = IPV4_FILTER_HASH_FMASK | IPV6_FILTER_HASH_FMASK; + val |= IPV6_ROUTER_HASH_FMASK | IPV4_ROUTER_HASH_FMASK; ipa_cmd_register_write_add(trans, offset, val, val, false); diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index b382d47bc70d..dee58a6596d4 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -86,32 +86,32 @@ struct ipa_uc_mem_area { /** enum ipa_uc_command - commands from the AP to the microcontroller */ enum ipa_uc_command { - IPA_UC_COMMAND_NO_OP = 0, - IPA_UC_COMMAND_UPDATE_FLAGS = 1, - IPA_UC_COMMAND_DEBUG_RUN_TEST = 2, - IPA_UC_COMMAND_DEBUG_GET_INFO = 3, - IPA_UC_COMMAND_ERR_FATAL = 4, - IPA_UC_COMMAND_CLK_GATE = 5, - IPA_UC_COMMAND_CLK_UNGATE = 6, - IPA_UC_COMMAND_MEMCPY = 7, - IPA_UC_COMMAND_RESET_PIPE = 8, - IPA_UC_COMMAND_REG_WRITE = 9, - IPA_UC_COMMAND_GSI_CH_EMPTY = 10, + IPA_UC_COMMAND_NO_OP = 0x0, + IPA_UC_COMMAND_UPDATE_FLAGS = 0x1, + IPA_UC_COMMAND_DEBUG_RUN_TEST = 0x2, + IPA_UC_COMMAND_DEBUG_GET_INFO = 0x3, + IPA_UC_COMMAND_ERR_FATAL = 0x4, + IPA_UC_COMMAND_CLK_GATE = 0x5, + IPA_UC_COMMAND_CLK_UNGATE = 0x6, + IPA_UC_COMMAND_MEMCPY = 0x7, + IPA_UC_COMMAND_RESET_PIPE = 0x8, + IPA_UC_COMMAND_REG_WRITE = 0x9, + IPA_UC_COMMAND_GSI_CH_EMPTY = 0xa, }; /** enum ipa_uc_response - microcontroller response codes */ enum ipa_uc_response { - IPA_UC_RESPONSE_NO_OP = 0, - IPA_UC_RESPONSE_INIT_COMPLETED = 1, - IPA_UC_RESPONSE_CMD_COMPLETED = 2, - IPA_UC_RESPONSE_DEBUG_GET_INFO = 3, + IPA_UC_RESPONSE_NO_OP = 0x0, + IPA_UC_RESPONSE_INIT_COMPLETED = 0x1, + IPA_UC_RESPONSE_CMD_COMPLETED = 0x2, + IPA_UC_RESPONSE_DEBUG_GET_INFO = 0x3, }; /** enum ipa_uc_event - common cpu events reported by the microcontroller */ enum ipa_uc_event { - IPA_UC_EVENT_NO_OP = 0, - IPA_UC_EVENT_ERROR = 1, - IPA_UC_EVENT_LOG_INFO = 2, + IPA_UC_EVENT_NO_OP = 0x0, + IPA_UC_EVENT_ERROR = 0x1, + IPA_UC_EVENT_LOG_INFO = 0x2, }; static struct ipa_uc_mem_area *ipa_uc_shared(struct ipa *ipa) @@ -129,9 +129,10 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id) if (shared->event == IPA_UC_EVENT_ERROR) dev_err(dev, "microcontroller error event\n"); - else + else if (shared->event != IPA_UC_EVENT_LOG_INFO) dev_err(dev, "unsupported microcontroller event %hhu\n", shared->event); + /* The LOG_INFO event can be safely ignored */ } /* Microcontroller response IPA interrupt handler */ @@ -191,14 +192,19 @@ void ipa_uc_teardown(struct ipa *ipa) static void send_uc_command(struct ipa *ipa, u32 command, u32 command_param) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); + u32 val; + /* Fill in the command data */ shared->command = command; shared->command_param = cpu_to_le32(command_param); shared->command_param_hi = 0; shared->response = 0; shared->response_param = 0; - iowrite32(1, ipa->reg_virt + IPA_REG_IRQ_UC_OFFSET); + /* Use an interrupt to tell the microcontroller the command is ready */ + val = u32_encode_bits(1, UC_INTR_FMASK); + + iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_UC_OFFSET); } /* Tell the microcontroller the AP is shutting down */ diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 11ca5fa902a1..92425e1fd70c 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -101,6 +101,7 @@ struct pcpu_secy_stats { * @real_dev: pointer to underlying netdevice * @stats: MACsec device stats * @secys: linked list of SecY's on the underlying device + * @gro_cells: pointer to the Generic Receive Offload cell * @offload: status of offloading on the MACsec device */ struct macsec_dev { diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c8d803d3616c..d9b6c44a5911 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1096,7 +1096,7 @@ static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *real_dev = vlan->lowerdev; struct netpoll *netpoll; - int err = 0; + int err; netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); err = -ENOMEM; @@ -1339,7 +1339,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return 0; } -/** +/* * reconfigure list of remote source mac address * (only for macvlan devices in source mode) * Note regarding alignment: all netlink data is aligned to 4 Byte, which diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c new file mode 100644 index 000000000000..d3f92781314e --- /dev/null +++ b/drivers/net/mhi_net.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* MHI Network driver - Network over MHI bus + * + * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org> + */ + +#include <linux/if_arp.h> +#include <linux/mhi.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/u64_stats_sync.h> + +#define MHI_NET_MIN_MTU ETH_MIN_MTU +#define MHI_NET_MAX_MTU 0xffff +#define MHI_NET_DEFAULT_MTU 0x4000 + +struct mhi_net_stats { + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t rx_dropped; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + atomic_t rx_queued; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; +}; + +struct mhi_net_dev { + struct mhi_device *mdev; + struct net_device *ndev; + struct delayed_work rx_refill; + struct mhi_net_stats stats; + u32 rx_queue_sz; +}; + +static int mhi_ndo_open(struct net_device *ndev) +{ + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); + + /* Feed the rx buffer pool */ + schedule_delayed_work(&mhi_netdev->rx_refill, 0); + + /* Carrier is established via out-of-band channel (e.g. qmi) */ + netif_carrier_on(ndev); + + netif_start_queue(ndev); + + return 0; +} + +static int mhi_ndo_stop(struct net_device *ndev) +{ + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); + + netif_stop_queue(ndev); + netif_carrier_off(ndev); + cancel_delayed_work_sync(&mhi_netdev->rx_refill); + + return 0; +} + +static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); + struct mhi_device *mdev = mhi_netdev->mdev; + int err; + + err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); + if (unlikely(err)) { + net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", + ndev->name, err); + + u64_stats_update_begin(&mhi_netdev->stats.tx_syncp); + u64_stats_inc(&mhi_netdev->stats.tx_dropped); + u64_stats_update_end(&mhi_netdev->stats.tx_syncp); + + /* drop the packet */ + dev_kfree_skb_any(skb); + } + + if (mhi_queue_is_full(mdev, DMA_TO_DEVICE)) + netif_stop_queue(ndev); + + return NETDEV_TX_OK; +} + +static void mhi_ndo_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp); + stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets); + stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes); + stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors); + stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped); + } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start)); + + do { + start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp); + stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets); + stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes); + stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors); + stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped); + } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start)); +} + +static const struct net_device_ops mhi_netdev_ops = { + .ndo_open = mhi_ndo_open, + .ndo_stop = mhi_ndo_stop, + .ndo_start_xmit = mhi_ndo_xmit, + .ndo_get_stats64 = mhi_ndo_get_stats64, +}; + +static void mhi_net_setup(struct net_device *ndev) +{ + ndev->header_ops = NULL; /* No header */ + ndev->type = ARPHRD_NONE; /* QMAP... */ + ndev->hard_header_len = 0; + ndev->addr_len = 0; + ndev->flags = IFF_POINTOPOINT | IFF_NOARP; + ndev->netdev_ops = &mhi_netdev_ops; + ndev->mtu = MHI_NET_DEFAULT_MTU; + ndev->min_mtu = MHI_NET_MIN_MTU; + ndev->max_mtu = MHI_NET_MAX_MTU; + ndev->tx_queue_len = 1000; +} + +static void mhi_net_dl_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); + struct sk_buff *skb = mhi_res->buf_addr; + int remaining; + + remaining = atomic_dec_return(&mhi_netdev->stats.rx_queued); + + if (unlikely(mhi_res->transaction_status)) { + dev_kfree_skb_any(skb); + + /* MHI layer stopping/resetting the DL channel */ + if (mhi_res->transaction_status == -ENOTCONN) + return; + + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_errors); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); + } else { + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_packets); + u64_stats_add(&mhi_netdev->stats.rx_bytes, mhi_res->bytes_xferd); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); + + skb->protocol = htons(ETH_P_MAP); + skb_put(skb, mhi_res->bytes_xferd); + netif_rx(skb); + } + + /* Refill if RX buffers queue becomes low */ + if (remaining <= mhi_netdev->rx_queue_sz / 2) + schedule_delayed_work(&mhi_netdev->rx_refill, 0); +} + +static void mhi_net_ul_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); + struct net_device *ndev = mhi_netdev->ndev; + struct sk_buff *skb = mhi_res->buf_addr; + + /* Hardware has consumed the buffer, so free the skb (which is not + * freed by the MHI stack) and perform accounting. + */ + dev_consume_skb_any(skb); + + u64_stats_update_begin(&mhi_netdev->stats.tx_syncp); + if (unlikely(mhi_res->transaction_status)) { + + /* MHI layer stopping/resetting the UL channel */ + if (mhi_res->transaction_status == -ENOTCONN) { + u64_stats_update_end(&mhi_netdev->stats.tx_syncp); + return; + } + + u64_stats_inc(&mhi_netdev->stats.tx_errors); + } else { + u64_stats_inc(&mhi_netdev->stats.tx_packets); + u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd); + } + u64_stats_update_end(&mhi_netdev->stats.tx_syncp); + + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); +} + +static void mhi_net_rx_refill_work(struct work_struct *work) +{ + struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev, + rx_refill.work); + struct net_device *ndev = mhi_netdev->ndev; + struct mhi_device *mdev = mhi_netdev->mdev; + int size = READ_ONCE(ndev->mtu); + struct sk_buff *skb; + int err; + + while (atomic_read(&mhi_netdev->stats.rx_queued) < mhi_netdev->rx_queue_sz) { + skb = netdev_alloc_skb(ndev, size); + if (unlikely(!skb)) + break; + + err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT); + if (unlikely(err)) { + net_err_ratelimited("%s: Failed to queue RX buf (%d)\n", + ndev->name, err); + kfree_skb(skb); + break; + } + + atomic_inc(&mhi_netdev->stats.rx_queued); + + /* Do not hog the CPU if rx buffers are consumed faster than + * queued (unlikely). + */ + cond_resched(); + } + + /* If we're still starved of rx buffers, reschedule later */ + if (unlikely(!atomic_read(&mhi_netdev->stats.rx_queued))) + schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); +} + +static int mhi_net_probe(struct mhi_device *mhi_dev, + const struct mhi_device_id *id) +{ + const char *netname = (char *)id->driver_data; + struct device *dev = &mhi_dev->dev; + struct mhi_net_dev *mhi_netdev; + struct net_device *ndev; + int err; + + ndev = alloc_netdev(sizeof(*mhi_netdev), netname, NET_NAME_PREDICTABLE, + mhi_net_setup); + if (!ndev) + return -ENOMEM; + + mhi_netdev = netdev_priv(ndev); + dev_set_drvdata(dev, mhi_netdev); + mhi_netdev->ndev = ndev; + mhi_netdev->mdev = mhi_dev; + SET_NETDEV_DEV(ndev, &mhi_dev->dev); + + /* All MHI net channels have 128 ring elements (at least for now) */ + mhi_netdev->rx_queue_sz = 128; + + INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work); + u64_stats_init(&mhi_netdev->stats.rx_syncp); + u64_stats_init(&mhi_netdev->stats.tx_syncp); + + /* Start MHI channels */ + err = mhi_prepare_for_transfer(mhi_dev); + if (err) + goto out_err; + + err = register_netdev(ndev); + if (err) + goto out_err; + + return 0; + +out_err: + free_netdev(ndev); + return err; +} + +static void mhi_net_remove(struct mhi_device *mhi_dev) +{ + struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); + + unregister_netdev(mhi_netdev->ndev); + + mhi_unprepare_from_transfer(mhi_netdev->mdev); + + free_netdev(mhi_netdev->ndev); +} + +static const struct mhi_device_id mhi_net_id_table[] = { + { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)"mhi_hwip%d" }, + { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)"mhi_swip%d" }, + {} +}; +MODULE_DEVICE_TABLE(mhi, mhi_net_id_table); + +static struct mhi_driver mhi_net_driver = { + .probe = mhi_net_probe, + .remove = mhi_net_remove, + .dl_xfer_cb = mhi_net_dl_callback, + .ul_xfer_cb = mhi_net_ul_callback, + .id_table = mhi_net_id_table, + .driver = { + .name = "mhi_net", + .owner = THIS_MODULE, + }, +}; + +module_mhi_driver(mhi_net_driver); + +MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>"); +MODULE_DESCRIPTION("Network over MHI"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/mii.c b/drivers/net/mii.c index f6a97c859f3a..e71ebb933266 100644 --- a/drivers/net/mii.c +++ b/drivers/net/mii.c @@ -84,15 +84,16 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000); stat1000 = mii->mdio_read(dev, mii->phy_id, MII_STAT1000); } + + ecmd->advertising |= mii_get_an(mii, MII_ADVERTISE); + if (mii->supports_gmii) + ecmd->advertising |= + mii_ctrl1000_to_ethtool_adv_t(ctrl1000); + if (bmcr & BMCR_ANENABLE) { ecmd->advertising |= ADVERTISED_Autoneg; ecmd->autoneg = AUTONEG_ENABLE; - ecmd->advertising |= mii_get_an(mii, MII_ADVERTISE); - if (mii->supports_gmii) - ecmd->advertising |= - mii_ctrl1000_to_ethtool_adv_t(ctrl1000); - if (bmsr & BMSR_ANEGCOMPLETE) { ecmd->lp_advertising = mii_get_an(mii, MII_LPA); ecmd->lp_advertising |= @@ -171,14 +172,15 @@ void mii_ethtool_get_link_ksettings(struct mii_if_info *mii, ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000); stat1000 = mii->mdio_read(dev, mii->phy_id, MII_STAT1000); } + + advertising |= mii_get_an(mii, MII_ADVERTISE); + if (mii->supports_gmii) + advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000); + if (bmcr & BMCR_ANENABLE) { advertising |= ADVERTISED_Autoneg; cmd->base.autoneg = AUTONEG_ENABLE; - advertising |= mii_get_an(mii, MII_ADVERTISE); - if (mii->supports_gmii) - advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000); - if (bmsr & BMSR_ANEGCOMPLETE) { lp_advertising = mii_get_an(mii, MII_LPA); lp_advertising |= diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index fb182bec8f06..2a4892402ed8 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -697,7 +697,7 @@ static struct failover_ops net_failover_ops = { /** * net_failover_create - Create and register a failover instance * - * @dev: standby netdev + * @standby_dev: standby netdev * * Creates a failover netdev and registers a failover instance for a standby * netdev. Used by paravirtual drivers that use 3-netdev model. diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 92001f7af380..ccecba908ded 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -83,6 +83,7 @@ static struct console netconsole_ext; * whether the corresponding netpoll is active or inactive. * Also, other parameters of a target may be modified at * runtime only when it is disabled (enabled == 0). + * @extended: Denotes whether console is extended or not. * @np: The netpoll structure for this target. * Contains the other userspace visible parameters: * dev_name (read-write) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index e7972e88ffe0..fe48817b3985 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -326,6 +326,12 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } + /* Resources for nexthops */ + err = devlink_resource_register(devlink, "nexthops", (u64)-1, + NSIM_RESOURCE_NEXTHOPS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + ¶ms); + out: return err; } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index deea17a0e79c..45d8a7790bd5 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -25,6 +25,7 @@ #include <net/ip6_fib.h> #include <net/fib_rules.h> #include <net/net_namespace.h> +#include <net/nexthop.h> #include "netdevsim.h" @@ -42,9 +43,12 @@ struct nsim_fib_data { struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; + struct nsim_fib_entry nexthops; struct rhashtable fib_rt_ht; struct list_head fib_rt_list; spinlock_t fib_lock; /* Protects hashtable, list and accounting */ + struct notifier_block nexthop_nb; + struct rhashtable nexthop_ht; struct devlink *devlink; }; @@ -86,6 +90,19 @@ static const struct rhashtable_params nsim_fib_rt_ht_params = { .automatic_shrinking = true, }; +struct nsim_nexthop { + struct rhash_head ht_node; + u64 occ; + u32 id; +}; + +static const struct rhashtable_params nsim_nexthop_ht_params = { + .key_offset = offsetof(struct nsim_nexthop, id), + .head_offset = offsetof(struct nsim_nexthop, ht_node), + .key_len = sizeof(u32), + .automatic_shrinking = true, +}; + u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, bool max) { @@ -104,6 +121,9 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: return 0; } @@ -129,6 +149,9 @@ static void nsim_fib_set_max(struct nsim_fib_data *fib_data, case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; + case NSIM_RESOURCE_NEXTHOPS: + entry = &fib_data->nexthops; + break; default: WARN_ON(1); return; @@ -389,11 +412,6 @@ static int nsim_fib4_event(struct nsim_fib_data *data, fen_info = container_of(info, struct fib_entry_notifier_info, info); - if (fen_info->fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return 0; - } - switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib4_rt_insert(data, fen_info); @@ -704,11 +722,6 @@ static int nsim_fib6_event(struct nsim_fib_data *data, fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - if (fen6_info->rt->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); - return 0; - } - if (fen6_info->rt->fib6_src.plen) { NL_SET_ERR_MSG_MOD(info->extack, "IPv6 source-specific route is not supported"); return 0; @@ -838,6 +851,196 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) data->ipv6.rules.num = 0ULL; } +static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + u64 occ = 0; + int i; + + nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL); + if (!nexthop) + return NULL; + + nexthop->id = info->id; + + /* Determine the number of nexthop entries the new nexthop will + * occupy. + */ + + if (!info->is_grp) { + occ = 1; + goto out; + } + + for (i = 0; i < info->nh_grp->num_nh; i++) + occ += info->nh_grp->nh_entries[i].weight; + +out: + nexthop->occ = occ; + return nexthop; +} + +static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop) +{ + kfree(nexthop); +} + +static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ, + bool add, struct netlink_ext_ack *extack) +{ + int err = 0; + + if (add) { + if (data->nexthops.num + occ <= data->nexthops.max) { + data->nexthops.num += occ; + } else { + err = -ENOSPC; + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops"); + } + } else { + if (WARN_ON(occ > data->nexthops.num)) + return -EINVAL; + data->nexthops.num -= occ; + } + + return err; +} + +static int nsim_nexthop_add(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_insert_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to insert nexthop"); + goto err_nexthop_dismiss; + } + + nexthop_set_hw_flags(net, nexthop->id, false, true); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_replace(struct nsim_fib_data *data, + struct nsim_nexthop *nexthop, + struct nsim_nexthop *nexthop_old, + struct netlink_ext_ack *extack) +{ + struct net *net = devlink_net(data->devlink); + int err; + + err = nsim_nexthop_account(data, nexthop->occ, true, extack); + if (err) + return err; + + err = rhashtable_replace_fast(&data->nexthop_ht, + &nexthop_old->ht_node, &nexthop->ht_node, + nsim_nexthop_ht_params); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to replace nexthop"); + goto err_nexthop_dismiss; + } + + nexthop_set_hw_flags(net, nexthop->id, false, true); + nsim_nexthop_account(data, nexthop_old->occ, false, extack); + nsim_nexthop_destroy(nexthop_old); + + return 0; + +err_nexthop_dismiss: + nsim_nexthop_account(data, nexthop->occ, false, extack); + return err; +} + +static int nsim_nexthop_insert(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop, *nexthop_old; + int err; + + nexthop = nsim_nexthop_create(data, info); + if (!nexthop) + return -ENOMEM; + + nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop_old) + err = nsim_nexthop_add(data, nexthop, info->extack); + else + err = nsim_nexthop_replace(data, nexthop, nexthop_old, + info->extack); + + if (err) + nsim_nexthop_destroy(nexthop); + + return err; +} + +static void nsim_nexthop_remove(struct nsim_fib_data *data, + struct nh_notifier_info *info) +{ + struct nsim_nexthop *nexthop; + + nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, + nsim_nexthop_ht_params); + if (!nexthop) + return; + + rhashtable_remove_fast(&data->nexthop_ht, &nexthop->ht_node, + nsim_nexthop_ht_params); + nsim_nexthop_account(data, nexthop->occ, false, info->extack); + nsim_nexthop_destroy(nexthop); +} + +static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, + nexthop_nb); + struct nh_notifier_info *info = ptr; + int err = 0; + + ASSERT_RTNL(); + + switch (event) { + case NEXTHOP_EVENT_REPLACE: + err = nsim_nexthop_insert(data, info); + break; + case NEXTHOP_EVENT_DEL: + nsim_nexthop_remove(data, info); + break; + default: + break; + } + + return notifier_from_errno(err); +} + +static void nsim_nexthop_free(void *ptr, void *arg) +{ + struct nsim_nexthop *nexthop = ptr; + struct nsim_fib_data *data = arg; + struct net *net; + + net = devlink_net(data->devlink); + nexthop_set_hw_flags(net, nexthop->id, false, false); + nsim_nexthop_account(data, nexthop->occ, false, NULL); + nsim_nexthop_destroy(nexthop); +} + static u64 nsim_fib_ipv4_resource_occ_get(void *priv) { struct nsim_fib_data *data = priv; @@ -866,12 +1069,20 @@ static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); } +static u64 nsim_fib_nexthops_res_occ_get(void *priv) +{ + struct nsim_fib_data *data = priv; + + return nsim_fib_get_val(data, NSIM_RESOURCE_NEXTHOPS, false); +} + static void nsim_fib_set_max_all(struct nsim_fib_data *data, struct devlink *devlink) { enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, - NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES + NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, }; int i; @@ -897,20 +1108,32 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, return ERR_PTR(-ENOMEM); data->devlink = devlink; + err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params); + if (err) + goto err_data_free; + spin_lock_init(&data->fib_lock); INIT_LIST_HEAD(&data->fib_rt_list); err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params); if (err) - goto err_data_free; + goto err_rhashtable_nexthop_destroy; nsim_fib_set_max_all(data, devlink); + data->nexthop_nb.notifier_call = nsim_nexthop_event_nb; + err = register_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb, + extack); + if (err) { + pr_err("Failed to register nexthop notifier\n"); + goto err_rhashtable_fib_destroy; + } + data->fib_nb.notifier_call = nsim_fib_event_nb; err = register_fib_notifier(devlink_net(devlink), &data->fib_nb, nsim_fib_dump_inconsistent, extack); if (err) { pr_err("Failed to register fib notifier\n"); - goto err_rhashtable_destroy; + goto err_nexthop_nb_unregister; } devlink_resource_occ_get_register(devlink, @@ -929,11 +1152,20 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_fib_ipv6_rules_res_occ_get, data); + devlink_resource_occ_get_register(devlink, + NSIM_RESOURCE_NEXTHOPS, + nsim_fib_nexthops_res_occ_get, + data); return data; -err_rhashtable_destroy: +err_nexthop_nb_unregister: + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); +err_rhashtable_fib_destroy: rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); +err_rhashtable_nexthop_destroy: + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); err_data_free: kfree(data); return ERR_PTR(err); @@ -942,6 +1174,8 @@ err_data_free: void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) { devlink_resource_occ_get_unregister(devlink, + NSIM_RESOURCE_NEXTHOPS); + devlink_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV6_FIB_RULES); devlink_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV6_FIB); @@ -950,8 +1184,11 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) devlink_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); + unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); + rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, + data); WARN_ON_ONCE(!list_empty(&data->fib_rt_list)); kfree(data); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 827fc80f50a0..698be048041b 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -158,6 +158,7 @@ enum nsim_resource_id { NSIM_RESOURCE_IPV6, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, + NSIM_RESOURCE_NEXTHOPS, }; struct nsim_dev_health { diff --git a/drivers/net/phy/adin.c b/drivers/net/phy/adin.c index 307f0ac1287b..55a0b91816e2 100644 --- a/drivers/net/phy/adin.c +++ b/drivers/net/phy/adin.c @@ -8,6 +8,7 @@ #include <linux/bitfield.h> #include <linux/delay.h> #include <linux/errno.h> +#include <linux/ethtool_netlink.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mii.h> @@ -23,6 +24,7 @@ #define ADIN1300_PHY_CTRL1 0x0012 #define ADIN1300_AUTO_MDI_EN BIT(10) #define ADIN1300_MAN_MDIX_EN BIT(9) +#define ADIN1300_DIAG_CLK_EN BIT(2) #define ADIN1300_RX_ERR_CNT 0x0014 @@ -69,6 +71,31 @@ #define ADIN1300_CLOCK_STOP_REG 0x9400 #define ADIN1300_LPI_WAKE_ERR_CNT_REG 0xa000 +#define ADIN1300_CDIAG_RUN 0xba1b +#define ADIN1300_CDIAG_RUN_EN BIT(0) + +/* + * The XSIM3/2/1 and XSHRT3/2/1 are actually relative. + * For CDIAG_DTLD_RSLTS(0) it's ADIN1300_CDIAG_RSLT_XSIM3/2/1 + * For CDIAG_DTLD_RSLTS(1) it's ADIN1300_CDIAG_RSLT_XSIM3/2/0 + * For CDIAG_DTLD_RSLTS(2) it's ADIN1300_CDIAG_RSLT_XSIM3/1/0 + * For CDIAG_DTLD_RSLTS(3) it's ADIN1300_CDIAG_RSLT_XSIM2/1/0 + */ +#define ADIN1300_CDIAG_DTLD_RSLTS(x) (0xba1d + (x)) +#define ADIN1300_CDIAG_RSLT_BUSY BIT(10) +#define ADIN1300_CDIAG_RSLT_XSIM3 BIT(9) +#define ADIN1300_CDIAG_RSLT_XSIM2 BIT(8) +#define ADIN1300_CDIAG_RSLT_XSIM1 BIT(7) +#define ADIN1300_CDIAG_RSLT_SIM BIT(6) +#define ADIN1300_CDIAG_RSLT_XSHRT3 BIT(5) +#define ADIN1300_CDIAG_RSLT_XSHRT2 BIT(4) +#define ADIN1300_CDIAG_RSLT_XSHRT1 BIT(3) +#define ADIN1300_CDIAG_RSLT_SHRT BIT(2) +#define ADIN1300_CDIAG_RSLT_OPEN BIT(1) +#define ADIN1300_CDIAG_RSLT_GOOD BIT(0) + +#define ADIN1300_CDIAG_FLT_DIST(x) (0xba21 + (x)) + #define ADIN1300_GE_SOFT_RESET_REG 0xff0c #define ADIN1300_GE_SOFT_RESET BIT(0) @@ -321,10 +348,9 @@ static int adin_set_downshift(struct phy_device *phydev, u8 cnt) return -E2BIG; val = FIELD_PREP(ADIN1300_DOWNSPEED_RETRIES_MSK, cnt); - val |= ADIN1300_LINKING_EN; rc = phy_modify(phydev, ADIN1300_PHY_CTRL3, - ADIN1300_LINKING_EN | ADIN1300_DOWNSPEED_RETRIES_MSK, + ADIN1300_DOWNSPEED_RETRIES_MSK, val); if (rc < 0) return rc; @@ -445,12 +471,43 @@ static int adin_phy_ack_intr(struct phy_device *phydev) static int adin_phy_config_intr(struct phy_device *phydev) { - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - return phy_set_bits(phydev, ADIN1300_INT_MASK_REG, - ADIN1300_INT_MASK_EN); + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = adin_phy_ack_intr(phydev); + if (err) + return err; + + err = phy_set_bits(phydev, ADIN1300_INT_MASK_REG, + ADIN1300_INT_MASK_EN); + } else { + err = phy_clear_bits(phydev, ADIN1300_INT_MASK_REG, + ADIN1300_INT_MASK_EN); + if (err) + return err; + + err = adin_phy_ack_intr(phydev); + } + + return err; +} + +static irqreturn_t adin_phy_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, ADIN1300_INT_STATUS_REG); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } - return phy_clear_bits(phydev, ADIN1300_INT_MASK_REG, - ADIN1300_INT_MASK_EN); + if (!(irq_status & ADIN1300_INT_LINK_STAT_CHNG_EN)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int adin_cl45_to_adin_reg(struct phy_device *phydev, int devad, @@ -555,6 +612,14 @@ static int adin_config_aneg(struct phy_device *phydev) { int ret; + ret = phy_clear_bits(phydev, ADIN1300_PHY_CTRL1, ADIN1300_DIAG_CLK_EN); + if (ret < 0) + return ret; + + ret = phy_set_bits(phydev, ADIN1300_PHY_CTRL3, ADIN1300_LINKING_EN); + if (ret < 0) + return ret; + ret = adin_config_mdix(phydev); if (ret) return ret; @@ -725,10 +790,117 @@ static int adin_probe(struct phy_device *phydev) return 0; } +static int adin_cable_test_start(struct phy_device *phydev) +{ + int ret; + + ret = phy_clear_bits(phydev, ADIN1300_PHY_CTRL3, ADIN1300_LINKING_EN); + if (ret < 0) + return ret; + + ret = phy_clear_bits(phydev, ADIN1300_PHY_CTRL1, ADIN1300_DIAG_CLK_EN); + if (ret < 0) + return ret; + + /* wait a bit for the clock to stabilize */ + msleep(50); + + return phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, ADIN1300_CDIAG_RUN, + ADIN1300_CDIAG_RUN_EN); +} + +static int adin_cable_test_report_trans(int result) +{ + int mask; + + if (result & ADIN1300_CDIAG_RSLT_GOOD) + return ETHTOOL_A_CABLE_RESULT_CODE_OK; + if (result & ADIN1300_CDIAG_RSLT_OPEN) + return ETHTOOL_A_CABLE_RESULT_CODE_OPEN; + + /* short with other pairs */ + mask = ADIN1300_CDIAG_RSLT_XSHRT3 | + ADIN1300_CDIAG_RSLT_XSHRT2 | + ADIN1300_CDIAG_RSLT_XSHRT1; + if (result & mask) + return ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT; + + if (result & ADIN1300_CDIAG_RSLT_SHRT) + return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT; + + return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC; +} + +static int adin_cable_test_report_pair(struct phy_device *phydev, + unsigned int pair) +{ + int fault_rslt; + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, + ADIN1300_CDIAG_DTLD_RSLTS(pair)); + if (ret < 0) + return ret; + + fault_rslt = adin_cable_test_report_trans(ret); + + ret = ethnl_cable_test_result(phydev, pair, fault_rslt); + if (ret < 0) + return ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, + ADIN1300_CDIAG_FLT_DIST(pair)); + if (ret < 0) + return ret; + + switch (fault_rslt) { + case ETHTOOL_A_CABLE_RESULT_CODE_OPEN: + case ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT: + case ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT: + return ethnl_cable_test_fault_length(phydev, pair, ret * 100); + default: + return 0; + } +} + +static int adin_cable_test_report(struct phy_device *phydev) +{ + unsigned int pair; + int ret; + + for (pair = ETHTOOL_A_CABLE_PAIR_A; pair <= ETHTOOL_A_CABLE_PAIR_D; pair++) { + ret = adin_cable_test_report_pair(phydev, pair); + if (ret < 0) + return ret; + } + + return 0; +} + +static int adin_cable_test_get_status(struct phy_device *phydev, + bool *finished) +{ + int ret; + + *finished = false; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, ADIN1300_CDIAG_RUN); + if (ret < 0) + return ret; + + if (ret & ADIN1300_CDIAG_RUN_EN) + return 0; + + *finished = true; + + return adin_cable_test_report(phydev); +} + static struct phy_driver adin_driver[] = { { PHY_ID_MATCH_MODEL(PHY_ID_ADIN1200), .name = "ADIN1200", + .flags = PHY_POLL_CABLE_TEST, .probe = adin_probe, .config_init = adin_config_init, .soft_reset = adin_soft_reset, @@ -736,8 +908,8 @@ static struct phy_driver adin_driver[] = { .read_status = adin_read_status, .get_tunable = adin_get_tunable, .set_tunable = adin_set_tunable, - .ack_interrupt = adin_phy_ack_intr, .config_intr = adin_phy_config_intr, + .handle_interrupt = adin_phy_handle_interrupt, .get_sset_count = adin_get_sset_count, .get_strings = adin_get_strings, .get_stats = adin_get_stats, @@ -745,10 +917,13 @@ static struct phy_driver adin_driver[] = { .suspend = genphy_suspend, .read_mmd = adin_read_mmd, .write_mmd = adin_write_mmd, + .cable_test_start = adin_cable_test_start, + .cable_test_get_status = adin_cable_test_get_status, }, { PHY_ID_MATCH_MODEL(PHY_ID_ADIN1300), .name = "ADIN1300", + .flags = PHY_POLL_CABLE_TEST, .probe = adin_probe, .config_init = adin_config_init, .soft_reset = adin_soft_reset, @@ -756,8 +931,8 @@ static struct phy_driver adin_driver[] = { .read_status = adin_read_status, .get_tunable = adin_get_tunable, .set_tunable = adin_set_tunable, - .ack_interrupt = adin_phy_ack_intr, .config_intr = adin_phy_config_intr, + .handle_interrupt = adin_phy_handle_interrupt, .get_sset_count = adin_get_sset_count, .get_strings = adin_get_strings, .get_stats = adin_get_stats, @@ -765,6 +940,8 @@ static struct phy_driver adin_driver[] = { .suspend = genphy_suspend, .read_mmd = adin_read_mmd, .write_mmd = adin_write_mmd, + .cable_test_start = adin_cable_test_start, + .cable_test_get_status = adin_cable_test_get_status, }, }; diff --git a/drivers/net/phy/amd.c b/drivers/net/phy/amd.c index eef35f8c8d45..001bb6d8bfce 100644 --- a/drivers/net/phy/amd.c +++ b/drivers/net/phy/amd.c @@ -20,6 +20,10 @@ #define MII_AM79C_IR_EN_ANEG 0x0100 /* IR enable Aneg Complete */ #define MII_AM79C_IR_IMASK_INIT (MII_AM79C_IR_EN_LINK | MII_AM79C_IR_EN_ANEG) +#define MII_AM79C_IR_LINK_DOWN BIT(2) +#define MII_AM79C_IR_ANEG_DONE BIT(0) +#define MII_AM79C_IR_IMASK_STAT (MII_AM79C_IR_LINK_DOWN | MII_AM79C_IR_ANEG_DONE) + MODULE_DESCRIPTION("AMD PHY driver"); MODULE_AUTHOR("Heiko Schocher <hs@denx.de>"); MODULE_LICENSE("GPL"); @@ -48,22 +52,49 @@ static int am79c_config_intr(struct phy_device *phydev) { int err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = am79c_ack_interrupt(phydev); + if (err) + return err; + err = phy_write(phydev, MII_AM79C_IR, MII_AM79C_IR_IMASK_INIT); - else + } else { err = phy_write(phydev, MII_AM79C_IR, 0); + if (err) + return err; + + err = am79c_ack_interrupt(phydev); + } return err; } +static irqreturn_t am79c_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_AM79C_IR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_AM79C_IR_IMASK_STAT)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static struct phy_driver am79c_driver[] = { { .phy_id = PHY_ID_AM79C874, .name = "AM79C874", .phy_id_mask = 0xfffffff0, /* PHY_BASIC_FEATURES */ .config_init = am79c_config_init, - .ack_interrupt = am79c_ack_interrupt, .config_intr = am79c_config_intr, + .handle_interrupt = am79c_handle_interrupt, } }; module_phy_driver(am79c_driver); diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 41e7c1432497..968dd43a2b1e 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -52,6 +52,7 @@ #define MDIO_AN_TX_VEND_INT_STATUS1_DOWNSHIFT BIT(1) #define MDIO_AN_TX_VEND_INT_STATUS2 0xcc01 +#define MDIO_AN_TX_VEND_INT_STATUS2_MASK BIT(0) #define MDIO_AN_TX_VEND_INT_MASK2 0xd401 #define MDIO_AN_TX_VEND_INT_MASK2_LINK BIT(0) @@ -246,6 +247,13 @@ static int aqr_config_intr(struct phy_device *phydev) bool en = phydev->interrupts == PHY_INTERRUPT_ENABLED; int err; + if (en) { + /* Clear any pending interrupts before enabling them */ + err = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_STATUS2); + if (err < 0) + return err; + } + err = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_MASK2, en ? MDIO_AN_TX_VEND_INT_MASK2_LINK : 0); if (err < 0) @@ -256,18 +264,39 @@ static int aqr_config_intr(struct phy_device *phydev) if (err < 0) return err; - return phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_INT_VEND_MASK, - en ? VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 | - VEND1_GLOBAL_INT_VEND_MASK_AN : 0); + err = phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_INT_VEND_MASK, + en ? VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 | + VEND1_GLOBAL_INT_VEND_MASK_AN : 0); + if (err < 0) + return err; + + if (!en) { + /* Clear any pending interrupts after we have disabled them */ + err = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_STATUS2); + if (err < 0) + return err; + } + + return 0; } -static int aqr_ack_interrupt(struct phy_device *phydev) +static irqreturn_t aqr_handle_interrupt(struct phy_device *phydev) { - int reg; + int irq_status; + + irq_status = phy_read_mmd(phydev, MDIO_MMD_AN, + MDIO_AN_TX_VEND_INT_STATUS2); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MDIO_AN_TX_VEND_INT_STATUS2_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); - reg = phy_read_mmd(phydev, MDIO_MMD_AN, - MDIO_AN_TX_VEND_INT_STATUS2); - return (reg < 0) ? reg : 0; + return IRQ_HANDLED; } static int aqr_read_status(struct phy_device *phydev) @@ -584,7 +613,7 @@ static struct phy_driver aqr_driver[] = { .name = "Aquantia AQ1202", .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, - .ack_interrupt = aqr_ack_interrupt, + .handle_interrupt = aqr_handle_interrupt, .read_status = aqr_read_status, }, { @@ -592,7 +621,7 @@ static struct phy_driver aqr_driver[] = { .name = "Aquantia AQ2104", .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, - .ack_interrupt = aqr_ack_interrupt, + .handle_interrupt = aqr_handle_interrupt, .read_status = aqr_read_status, }, { @@ -600,7 +629,7 @@ static struct phy_driver aqr_driver[] = { .name = "Aquantia AQR105", .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, - .ack_interrupt = aqr_ack_interrupt, + .handle_interrupt = aqr_handle_interrupt, .read_status = aqr_read_status, .suspend = aqr107_suspend, .resume = aqr107_resume, @@ -610,7 +639,7 @@ static struct phy_driver aqr_driver[] = { .name = "Aquantia AQR106", .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, - .ack_interrupt = aqr_ack_interrupt, + .handle_interrupt = aqr_handle_interrupt, .read_status = aqr_read_status, }, { @@ -620,7 +649,7 @@ static struct phy_driver aqr_driver[] = { .config_init = aqr107_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, - .ack_interrupt = aqr_ack_interrupt, + .handle_interrupt = aqr_handle_interrupt, .read_status = aqr107_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, @@ -638,7 +667,7 @@ static struct phy_driver aqr_driver[] = { .config_init = aqcs109_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, - .ack_interrupt = aqr_ack_interrupt, + .handle_interrupt = aqr_handle_interrupt, .read_status = aqr107_read_status, .get_tunable = aqr107_get_tunable, .set_tunable = aqr107_set_tunable, @@ -654,7 +683,7 @@ static struct phy_driver aqr_driver[] = { .name = "Aquantia AQR405", .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, - .ack_interrupt = aqr_ack_interrupt, + .handle_interrupt = aqr_handle_interrupt, .read_status = aqr_read_status, }, }; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index ed601a7e46a0..d0b36fd6c265 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -614,6 +614,11 @@ static int at803x_config_intr(struct phy_device *phydev) value = phy_read(phydev, AT803X_INTR_ENABLE); if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + /* Clear any pending interrupts */ + err = at803x_ack_interrupt(phydev); + if (err) + return err; + value |= AT803X_INTR_ENABLE_AUTONEG_ERR; value |= AT803X_INTR_ENABLE_SPEED_CHANGED; value |= AT803X_INTR_ENABLE_DUPLEX_CHANGED; @@ -621,13 +626,44 @@ static int at803x_config_intr(struct phy_device *phydev) value |= AT803X_INTR_ENABLE_LINK_SUCCESS; err = phy_write(phydev, AT803X_INTR_ENABLE, value); - } - else + } else { err = phy_write(phydev, AT803X_INTR_ENABLE, 0); + if (err) + return err; + + /* Clear any pending interrupts */ + err = at803x_ack_interrupt(phydev); + } return err; } +static irqreturn_t at803x_handle_interrupt(struct phy_device *phydev) +{ + int irq_status, int_enabled; + + irq_status = phy_read(phydev, AT803X_INTR_STATUS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + /* Read the current enabled interrupts */ + int_enabled = phy_read(phydev, AT803X_INTR_ENABLE); + if (int_enabled < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + /* See if this was one of our enabled interrupts */ + if (!(irq_status & int_enabled)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static void at803x_link_change_notify(struct phy_device *phydev) { /* @@ -1062,8 +1098,8 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, /* PHY_GBIT_FEATURES */ .read_status = at803x_read_status, - .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, + .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, .cable_test_start = at803x_cable_test_start, @@ -1082,8 +1118,8 @@ static struct phy_driver at803x_driver[] = { .suspend = at803x_suspend, .resume = at803x_resume, /* PHY_BASIC_FEATURES */ - .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, + .handle_interrupt = at803x_handle_interrupt, }, { /* Qualcomm Atheros AR8031/AR8033 */ PHY_ID_MATCH_EXACT(ATH8031_PHY_ID), @@ -1100,8 +1136,8 @@ static struct phy_driver at803x_driver[] = { /* PHY_GBIT_FEATURES */ .read_status = at803x_read_status, .aneg_done = at803x_aneg_done, - .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, + .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, .cable_test_start = at803x_cable_test_start, @@ -1120,8 +1156,8 @@ static struct phy_driver at803x_driver[] = { .suspend = at803x_suspend, .resume = at803x_resume, /* PHY_BASIC_FEATURES */ - .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, + .handle_interrupt = at803x_handle_interrupt, .cable_test_start = at803x_cable_test_start, .cable_test_get_status = at803x_cable_test_get_status, }, { @@ -1132,8 +1168,8 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .flags = PHY_POLL_CABLE_TEST, /* PHY_BASIC_FEATURES */ - .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, + .handle_interrupt = at803x_handle_interrupt, .cable_test_start = at803x_cable_test_start, .cable_test_get_status = at803x_cable_test_get_status, .read_status = at803x_read_status, diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c index 9ccf28b0a04d..da8f7cb41b44 100644 --- a/drivers/net/phy/bcm-cygnus.c +++ b/drivers/net/phy/bcm-cygnus.c @@ -256,8 +256,8 @@ static struct phy_driver bcm_cygnus_phy_driver[] = { .name = "Broadcom Cygnus PHY", /* PHY_GBIT_FEATURES */ .config_init = bcm_cygnus_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, .suspend = genphy_suspend, .resume = bcm_cygnus_resume, }, { diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index ef6825b30323..53282a6d5928 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -181,21 +181,62 @@ EXPORT_SYMBOL_GPL(bcm_phy_ack_intr); int bcm_phy_config_intr(struct phy_device *phydev) { - int reg; + int reg, err; reg = phy_read(phydev, MII_BCM54XX_ECR); if (reg < 0) return reg; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = bcm_phy_ack_intr(phydev); + if (err) + return err; + reg &= ~MII_BCM54XX_ECR_IM; - else + err = phy_write(phydev, MII_BCM54XX_ECR, reg); + } else { reg |= MII_BCM54XX_ECR_IM; + err = phy_write(phydev, MII_BCM54XX_ECR, reg); + if (err) + return err; - return phy_write(phydev, MII_BCM54XX_ECR, reg); + err = bcm_phy_ack_intr(phydev); + } + return err; } EXPORT_SYMBOL_GPL(bcm_phy_config_intr); +irqreturn_t bcm_phy_handle_interrupt(struct phy_device *phydev) +{ + int irq_status, irq_mask; + + irq_status = phy_read(phydev, MII_BCM54XX_ISR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + /* If a bit from the Interrupt Mask register is set, the corresponding + * bit from the Interrupt Status register is masked. So read the IMR + * and then flip the bits to get the list of possible interrupt + * sources. + */ + irq_mask = phy_read(phydev, MII_BCM54XX_IMR); + if (irq_mask < 0) { + phy_error(phydev); + return IRQ_NONE; + } + irq_mask = ~irq_mask; + + if (!(irq_status & irq_mask)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(bcm_phy_handle_interrupt); + int bcm_phy_read_shadow(struct phy_device *phydev, u16 shadow) { phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index 237a8503c9b4..c3842f87c33b 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -63,6 +63,7 @@ int bcm_phy_modify_rdb(struct phy_device *phydev, u16 rdb, u16 mask, int bcm_phy_ack_intr(struct phy_device *phydev); int bcm_phy_config_intr(struct phy_device *phydev); +irqreturn_t bcm_phy_handle_interrupt(struct phy_device *phydev); int bcm_phy_enable_apd(struct phy_device *phydev, bool dll_pwr_down); diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c index 8998e68bb26b..d8f3024860dc 100644 --- a/drivers/net/phy/bcm54140.c +++ b/drivers/net/phy/bcm54140.c @@ -637,13 +637,29 @@ static int bcm54140_config_init(struct phy_device *phydev) BCM54140_RDB_C_PWR_ISOLATE, 0); } -static int bcm54140_did_interrupt(struct phy_device *phydev) +static irqreturn_t bcm54140_handle_interrupt(struct phy_device *phydev) { - int ret; + int irq_status, irq_mask; + + irq_status = bcm_phy_read_rdb(phydev, BCM54140_RDB_ISR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + irq_mask = bcm_phy_read_rdb(phydev, BCM54140_RDB_IMR); + if (irq_mask < 0) { + phy_error(phydev); + return IRQ_NONE; + } + irq_mask = ~irq_mask; + + if (!(irq_status & irq_mask)) + return IRQ_NONE; - ret = bcm_phy_read_rdb(phydev, BCM54140_RDB_ISR); + phy_trigger_machine(phydev); - return (ret < 0) ? 0 : ret; + return IRQ_HANDLED; } static int bcm54140_ack_intr(struct phy_device *phydev) @@ -665,7 +681,7 @@ static int bcm54140_config_intr(struct phy_device *phydev) BCM54140_RDB_TOP_IMR_PORT0, BCM54140_RDB_TOP_IMR_PORT1, BCM54140_RDB_TOP_IMR_PORT2, BCM54140_RDB_TOP_IMR_PORT3, }; - int reg; + int reg, err; if (priv->port >= ARRAY_SIZE(port_to_imr_bit)) return -EINVAL; @@ -674,12 +690,23 @@ static int bcm54140_config_intr(struct phy_device *phydev) if (reg < 0) return reg; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = bcm54140_ack_intr(phydev); + if (err) + return err; + reg &= ~port_to_imr_bit[priv->port]; - else + err = bcm54140_base_write_rdb(phydev, BCM54140_RDB_TOP_IMR, reg); + } else { reg |= port_to_imr_bit[priv->port]; + err = bcm54140_base_write_rdb(phydev, BCM54140_RDB_TOP_IMR, reg); + if (err) + return err; + + err = bcm54140_ack_intr(phydev); + } - return bcm54140_base_write_rdb(phydev, BCM54140_RDB_TOP_IMR, reg); + return err; } static int bcm54140_get_downshift(struct phy_device *phydev, u8 *data) @@ -834,8 +861,7 @@ static struct phy_driver bcm54140_drivers[] = { .flags = PHY_POLL_CABLE_TEST, .features = PHY_GBIT_FEATURES, .config_init = bcm54140_config_init, - .did_interrupt = bcm54140_did_interrupt, - .ack_interrupt = bcm54140_ack_intr, + .handle_interrupt = bcm54140_handle_interrupt, .config_intr = bcm54140_config_intr, .probe = bcm54140_probe, .suspend = genphy_suspend, diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index 459fb2069c7e..0eb33be824f1 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -25,12 +25,22 @@ static int bcm63xx_config_intr(struct phy_device *phydev) if (reg < 0) return reg; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = bcm_phy_ack_intr(phydev); + if (err) + return err; + reg &= ~MII_BCM63XX_IR_GMASK; - else + err = phy_write(phydev, MII_BCM63XX_IR, reg); + } else { reg |= MII_BCM63XX_IR_GMASK; + err = phy_write(phydev, MII_BCM63XX_IR, reg); + if (err) + return err; + + err = bcm_phy_ack_intr(phydev); + } - err = phy_write(phydev, MII_BCM63XX_IR, reg); return err; } @@ -67,8 +77,8 @@ static struct phy_driver bcm63xx_driver[] = { /* PHY_BASIC_FEATURES */ .flags = PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm63xx_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { /* same phy as above, with just a different OUI */ .phy_id = 0x002bdc00, @@ -77,8 +87,8 @@ static struct phy_driver bcm63xx_driver[] = { /* PHY_BASIC_FEATURES */ .flags = PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm63xx_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, } }; module_phy_driver(bcm63xx_driver); diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index df360e1c5069..4ac8fd190e9d 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -144,35 +144,41 @@ static int bcm87xx_config_intr(struct phy_device *phydev) if (reg < 0) return reg; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = phy_read(phydev, BCM87XX_LASI_STATUS); + if (err) + return err; + reg |= 1; - else + err = phy_write(phydev, BCM87XX_LASI_CONTROL, reg); + } else { reg &= ~1; + err = phy_write(phydev, BCM87XX_LASI_CONTROL, reg); + if (err) + return err; + + err = phy_read(phydev, BCM87XX_LASI_STATUS); + } - err = phy_write(phydev, BCM87XX_LASI_CONTROL, reg); return err; } -static int bcm87xx_did_interrupt(struct phy_device *phydev) +static irqreturn_t bcm87xx_handle_interrupt(struct phy_device *phydev) { - int reg; + int irq_status; - reg = phy_read(phydev, BCM87XX_LASI_STATUS); - - if (reg < 0) { - phydev_err(phydev, - "Error: Read of BCM87XX_LASI_STATUS failed: %d\n", - reg); - return 0; + irq_status = phy_read(phydev, BCM87XX_LASI_STATUS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; } - return (reg & 1) != 0; -} -static int bcm87xx_ack_interrupt(struct phy_device *phydev) -{ - /* Reading the LASI status clears it. */ - bcm87xx_did_interrupt(phydev); - return 0; + if (irq_status == 0) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int bcm8706_match_phy_device(struct phy_device *phydev) @@ -194,9 +200,8 @@ static struct phy_driver bcm87xx_driver[] = { .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, - .ack_interrupt = bcm87xx_ack_interrupt, .config_intr = bcm87xx_config_intr, - .did_interrupt = bcm87xx_did_interrupt, + .handle_interrupt = bcm87xx_handle_interrupt, .match_phy_device = bcm8706_match_phy_device, }, { .phy_id = PHY_ID_BCM8727, @@ -206,9 +211,8 @@ static struct phy_driver bcm87xx_driver[] = { .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, - .ack_interrupt = bcm87xx_ack_interrupt, .config_intr = bcm87xx_config_intr, - .did_interrupt = bcm87xx_did_interrupt, + .handle_interrupt = bcm87xx_handle_interrupt, .match_phy_device = bcm8727_match_phy_device, } }; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index cd271de9609b..8a4ec3222168 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -634,15 +634,43 @@ static int brcm_fet_config_intr(struct phy_device *phydev) if (reg < 0) return reg; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = brcm_fet_ack_interrupt(phydev); + if (err) + return err; + reg &= ~MII_BRCM_FET_IR_MASK; - else + err = phy_write(phydev, MII_BRCM_FET_INTREG, reg); + } else { reg |= MII_BRCM_FET_IR_MASK; + err = phy_write(phydev, MII_BRCM_FET_INTREG, reg); + if (err) + return err; + + err = brcm_fet_ack_interrupt(phydev); + } - err = phy_write(phydev, MII_BRCM_FET_INTREG, reg); return err; } +static irqreturn_t brcm_fet_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_BRCM_FET_INTREG); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (irq_status == 0) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + struct bcm53xx_phy_priv { u64 *stats; }; @@ -681,40 +709,40 @@ static struct phy_driver broadcom_drivers[] = { .name = "Broadcom BCM5411", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM5421, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5421", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM54210E, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54210E", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM5461, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5461", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM54612E, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54612E", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM54616S, .phy_id_mask = 0xfffffff0, @@ -722,8 +750,8 @@ static struct phy_driver broadcom_drivers[] = { /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, .config_aneg = bcm54616s_config_aneg, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, .read_status = bcm54616s_read_status, .probe = bcm54616s_probe, }, { @@ -732,8 +760,8 @@ static struct phy_driver broadcom_drivers[] = { .name = "Broadcom BCM5464", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -743,8 +771,8 @@ static struct phy_driver broadcom_drivers[] = { /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM54810, .phy_id_mask = 0xfffffff0, @@ -752,8 +780,8 @@ static struct phy_driver broadcom_drivers[] = { /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, .suspend = genphy_suspend, .resume = bcm54xx_resume, }, { @@ -763,8 +791,8 @@ static struct phy_driver broadcom_drivers[] = { /* PHY_GBIT_FEATURES */ .config_init = bcm54811_config_init, .config_aneg = bcm5481_config_aneg, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, .suspend = genphy_suspend, .resume = bcm54xx_resume, }, { @@ -774,48 +802,48 @@ static struct phy_driver broadcom_drivers[] = { /* PHY_GBIT_FEATURES */ .config_init = bcm5482_config_init, .read_status = bcm5482_read_status, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM50610, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM50610", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM50610M, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM50610M", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM57780, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM57780", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCMAC131, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCMAC131", /* PHY_BASIC_FEATURES */ .config_init = brcm_fet_config_init, - .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, + .handle_interrupt = brcm_fet_handle_interrupt, }, { .phy_id = PHY_ID_BCM5241, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5241", /* PHY_BASIC_FEATURES */ .config_init = brcm_fet_config_init, - .ack_interrupt = brcm_fet_ack_interrupt, .config_intr = brcm_fet_config_intr, + .handle_interrupt = brcm_fet_handle_interrupt, }, { .phy_id = PHY_ID_BCM5395, .phy_id_mask = 0xfffffff0, @@ -837,16 +865,16 @@ static struct phy_driver broadcom_drivers[] = { .get_stats = bcm53xx_phy_get_stats, .probe = bcm53xx_phy_probe, .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, }, { .phy_id = PHY_ID_BCM89610, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM89610", /* PHY_GBIT_FEATURES */ .config_init = bcm54xx_config_init, - .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, } }; module_phy_driver(broadcom_drivers); diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c index 9d1612a4d7e6..ef5f412e101f 100644 --- a/drivers/net/phy/cicada.c +++ b/drivers/net/phy/cicada.c @@ -87,15 +87,42 @@ static int cis820x_config_intr(struct phy_device *phydev) { int err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = cis820x_ack_interrupt(phydev); + if (err) + return err; + err = phy_write(phydev, MII_CIS8201_IMASK, MII_CIS8201_IMASK_MASK); - else + } else { err = phy_write(phydev, MII_CIS8201_IMASK, 0); + if (err) + return err; + + err = cis820x_ack_interrupt(phydev); + } return err; } +static irqreturn_t cis820x_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_CIS8201_ISTAT); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_CIS8201_IMASK_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + /* Cicada 8201, a.k.a Vitesse VSC8201 */ static struct phy_driver cis820x_driver[] = { { @@ -104,16 +131,16 @@ static struct phy_driver cis820x_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = &cis820x_config_init, - .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, + .handle_interrupt = &cis820x_handle_interrupt, }, { .phy_id = 0x000fc440, .name = "Cicada Cis8204", .phy_id_mask = 0x000fffc0, /* PHY_GBIT_FEATURES */ .config_init = &cis820x_config_init, - .ack_interrupt = &cis820x_ack_interrupt, .config_intr = &cis820x_config_intr, + .handle_interrupt = &cis820x_handle_interrupt, } }; module_phy_driver(cis820x_driver); diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c index 942f277463a4..a3b3842c67e5 100644 --- a/drivers/net/phy/davicom.c +++ b/drivers/net/phy/davicom.c @@ -47,6 +47,10 @@ #define MII_DM9161_INTR_STOP \ (MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK \ | MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK) +#define MII_DM9161_INTR_CHANGE \ + (MII_DM9161_INTR_DPLX_CHANGE | \ + MII_DM9161_INTR_SPD_CHANGE | \ + MII_DM9161_INTR_LINK_CHANGE) /* DM9161 10BT Configuration/Status */ #define MII_DM9161_10BTCSR 0x12 @@ -57,24 +61,58 @@ MODULE_AUTHOR("Andy Fleming"); MODULE_LICENSE("GPL"); +static int dm9161_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_DM9161_INTR); + + return (err < 0) ? err : 0; +} + #define DM9161_DELAY 1 static int dm9161_config_intr(struct phy_device *phydev) { - int temp; + int temp, err; temp = phy_read(phydev, MII_DM9161_INTR); if (temp < 0) return temp; - if (PHY_INTERRUPT_ENABLED == phydev->interrupts) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = dm9161_ack_interrupt(phydev); + if (err) + return err; + temp &= ~(MII_DM9161_INTR_STOP); - else + err = phy_write(phydev, MII_DM9161_INTR, temp); + } else { temp |= MII_DM9161_INTR_STOP; + err = phy_write(phydev, MII_DM9161_INTR, temp); + if (err) + return err; + + err = dm9161_ack_interrupt(phydev); + } + + return err; +} - temp = phy_write(phydev, MII_DM9161_INTR, temp); +static irqreturn_t dm9161_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_DM9161_INTR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } - return temp; + if (!(irq_status & MII_DM9161_INTR_CHANGE)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int dm9161_config_aneg(struct phy_device *phydev) @@ -132,13 +170,6 @@ static int dm9161_config_init(struct phy_device *phydev) return phy_write(phydev, MII_BMCR, BMCR_ANENABLE); } -static int dm9161_ack_interrupt(struct phy_device *phydev) -{ - int err = phy_read(phydev, MII_DM9161_INTR); - - return (err < 0) ? err : 0; -} - static struct phy_driver dm91xx_driver[] = { { .phy_id = 0x0181b880, @@ -147,8 +178,8 @@ static struct phy_driver dm91xx_driver[] = { /* PHY_BASIC_FEATURES */ .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, - .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, + .handle_interrupt = dm9161_handle_interrupt, }, { .phy_id = 0x0181b8b0, .name = "Davicom DM9161B/C", @@ -156,8 +187,8 @@ static struct phy_driver dm91xx_driver[] = { /* PHY_BASIC_FEATURES */ .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, - .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, + .handle_interrupt = dm9161_handle_interrupt, }, { .phy_id = 0x0181b8a0, .name = "Davicom DM9161A", @@ -165,15 +196,15 @@ static struct phy_driver dm91xx_driver[] = { /* PHY_BASIC_FEATURES */ .config_init = dm9161_config_init, .config_aneg = dm9161_config_aneg, - .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, + .handle_interrupt = dm9161_handle_interrupt, }, { .phy_id = 0x00181b80, .name = "Davicom DM9131", .phy_id_mask = 0x0ffffff0, /* PHY_BASIC_FEATURES */ - .ack_interrupt = dm9161_ack_interrupt, .config_intr = dm9161_config_intr, + .handle_interrupt = dm9161_handle_interrupt, } }; module_phy_driver(dm91xx_driver); diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index fec58ad69e02..0ee23d29c0d4 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -37,6 +37,8 @@ #define MII_LXT970_ISR 18 /* Interrupt Status Register */ +#define MII_LXT970_IRS_MINT BIT(15) + #define MII_LXT970_CONFIG 19 /* Configuration Register */ /* ------------------------------------------------------------------------- */ @@ -47,6 +49,7 @@ #define MII_LXT971_IER_IEN 0x00f2 #define MII_LXT971_ISR 19 /* Interrupt Status Register */ +#define MII_LXT971_ISR_MASK 0x00f0 /* register definitions for the 973 */ #define MII_LXT973_PCR 16 /* Port Configuration Register */ @@ -75,10 +78,50 @@ static int lxt970_ack_interrupt(struct phy_device *phydev) static int lxt970_config_intr(struct phy_device *phydev) { - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - return phy_write(phydev, MII_LXT970_IER, MII_LXT970_IER_IEN); - else - return phy_write(phydev, MII_LXT970_IER, 0); + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = lxt970_ack_interrupt(phydev); + if (err) + return err; + + err = phy_write(phydev, MII_LXT970_IER, MII_LXT970_IER_IEN); + } else { + err = phy_write(phydev, MII_LXT970_IER, 0); + if (err) + return err; + + err = lxt970_ack_interrupt(phydev); + } + + return err; +} + +static irqreturn_t lxt970_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + /* The interrupt status register is cleared by reading BMSR + * followed by MII_LXT970_ISR + */ + irq_status = phy_read(phydev, MII_BMSR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + irq_status = phy_read(phydev, MII_LXT970_ISR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_LXT970_IRS_MINT)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int lxt970_config_init(struct phy_device *phydev) @@ -99,10 +142,41 @@ static int lxt971_ack_interrupt(struct phy_device *phydev) static int lxt971_config_intr(struct phy_device *phydev) { - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - return phy_write(phydev, MII_LXT971_IER, MII_LXT971_IER_IEN); - else - return phy_write(phydev, MII_LXT971_IER, 0); + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = lxt971_ack_interrupt(phydev); + if (err) + return err; + + err = phy_write(phydev, MII_LXT971_IER, MII_LXT971_IER_IEN); + } else { + err = phy_write(phydev, MII_LXT971_IER, 0); + if (err) + return err; + + err = lxt971_ack_interrupt(phydev); + } + + return err; +} + +static irqreturn_t lxt971_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_LXT971_ISR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_LXT971_ISR_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } /* @@ -237,15 +311,15 @@ static struct phy_driver lxt97x_driver[] = { .phy_id_mask = 0xfffffff0, /* PHY_BASIC_FEATURES */ .config_init = lxt970_config_init, - .ack_interrupt = lxt970_ack_interrupt, .config_intr = lxt970_config_intr, + .handle_interrupt = lxt970_handle_interrupt, }, { .phy_id = 0x001378e0, .name = "LXT971", .phy_id_mask = 0xfffffff0, /* PHY_BASIC_FEATURES */ - .ack_interrupt = lxt971_ack_interrupt, .config_intr = lxt971_config_intr, + .handle_interrupt = lxt971_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, }, { diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 5aec673a0120..587930a7f48b 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -80,8 +80,11 @@ #define MII_M1111_HWCFG_MODE_FIBER_RGMII 0x3 #define MII_M1111_HWCFG_MODE_SGMII_NO_CLK 0x4 #define MII_M1111_HWCFG_MODE_RTBI 0x7 +#define MII_M1111_HWCFG_MODE_COPPER_1000X_AN 0x8 #define MII_M1111_HWCFG_MODE_COPPER_RTBI 0x9 #define MII_M1111_HWCFG_MODE_COPPER_RGMII 0xb +#define MII_M1111_HWCFG_MODE_COPPER_1000X_NOAN 0xc +#define MII_M1111_HWCFG_SERIAL_AN_BYPASS BIT(12) #define MII_M1111_HWCFG_FIBER_COPPER_RES BIT(13) #define MII_M1111_HWCFG_FIBER_COPPER_AUTO BIT(15) @@ -314,16 +317,43 @@ static int marvell_config_intr(struct phy_device *phydev) { int err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = marvell_ack_interrupt(phydev); + if (err) + return err; + err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_INIT); - else + } else { err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_CLEAR); + if (err) + return err; + + err = marvell_ack_interrupt(phydev); + } return err; } +static irqreturn_t marvell_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_M1011_IEVENT); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_M1011_IMASK_INIT)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static int marvell_set_polarity(struct phy_device *phydev, int polarity) { int reg; @@ -629,6 +659,51 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) return genphy_check_and_restart_aneg(phydev, changed); } +static int m88e1111_config_aneg(struct phy_device *phydev) +{ + int extsr = phy_read(phydev, MII_M1111_PHY_EXT_SR); + int err; + + if (extsr < 0) + return extsr; + + /* If not using SGMII or copper 1000BaseX modes, use normal process. + * Steps below are only required for these modes. + */ + if (phydev->interface != PHY_INTERFACE_MODE_SGMII && + (extsr & MII_M1111_HWCFG_MODE_MASK) != + MII_M1111_HWCFG_MODE_COPPER_1000X_AN) + return marvell_config_aneg(phydev); + + err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); + if (err < 0) + goto error; + + /* Configure the copper link first */ + err = marvell_config_aneg(phydev); + if (err < 0) + goto error; + + /* Do not touch the fiber page if we're in copper->sgmii mode */ + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) + return 0; + + /* Then the fiber link */ + err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); + if (err < 0) + goto error; + + err = marvell_config_aneg_fiber(phydev); + if (err < 0) + goto error; + + return marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); + +error: + marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); + return err; +} + static int m88e1510_config_aneg(struct phy_device *phydev) { int err; @@ -696,7 +771,7 @@ static void marvell_config_led(struct phy_device *phydev) static int marvell_config_init(struct phy_device *phydev) { - /* Set defalut LED */ + /* Set default LED */ marvell_config_led(phydev); /* Set registers from marvell,reg-init DT property */ @@ -814,6 +889,28 @@ static int m88e1111_config_init_rtbi(struct phy_device *phydev) MII_M1111_HWCFG_FIBER_COPPER_AUTO); } +static int m88e1111_config_init_1000basex(struct phy_device *phydev) +{ + int extsr = phy_read(phydev, MII_M1111_PHY_EXT_SR); + int err, mode; + + if (extsr < 0) + return extsr; + + /* If using copper mode, ensure 1000BaseX auto-negotiation is enabled */ + mode = extsr & MII_M1111_HWCFG_MODE_MASK; + if (mode == MII_M1111_HWCFG_MODE_COPPER_1000X_NOAN) { + err = phy_modify(phydev, MII_M1111_PHY_EXT_SR, + MII_M1111_HWCFG_MODE_MASK | + MII_M1111_HWCFG_SERIAL_AN_BYPASS, + MII_M1111_HWCFG_MODE_COPPER_1000X_AN | + MII_M1111_HWCFG_SERIAL_AN_BYPASS); + if (err < 0) + return err; + } + return 0; +} + static int m88e1111_config_init(struct phy_device *phydev) { int err; @@ -836,6 +933,12 @@ static int m88e1111_config_init(struct phy_device *phydev) return err; } + if (phydev->interface == PHY_INTERFACE_MODE_1000BASEX) { + err = m88e1111_config_init_1000basex(phydev); + if (err < 0) + return err; + } + err = marvell_of_reg_init(phydev); if (err < 0) return err; @@ -1583,18 +1686,6 @@ static int marvell_aneg_done(struct phy_device *phydev) return (retval < 0) ? retval : (retval & MII_M1011_PHY_STATUS_RESOLVED); } -static int m88e1121_did_interrupt(struct phy_device *phydev) -{ - int imask; - - imask = phy_read(phydev, MII_M1011_IEVENT); - - if (imask & MII_M1011_IMASK_INIT) - return 1; - - return 0; -} - static void m88e1318_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { @@ -2621,8 +2712,8 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = marvell_config_init, .config_aneg = m88e1101_config_aneg, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2639,8 +2730,8 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = m88e1111_config_init, .config_aneg = marvell_config_aneg, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2658,10 +2749,31 @@ static struct phy_driver marvell_drivers[] = { /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = m88e1111_config_init, - .config_aneg = marvell_config_aneg, + .config_aneg = m88e1111_config_aneg, + .read_status = marvell_read_status, + .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, + .resume = genphy_resume, + .suspend = genphy_suspend, + .read_page = marvell_read_page, + .write_page = marvell_write_page, + .get_sset_count = marvell_get_sset_count, + .get_strings = marvell_get_strings, + .get_stats = marvell_get_stats, + .get_tunable = m88e1111_get_tunable, + .set_tunable = m88e1111_set_tunable, + }, + { + .phy_id = MARVELL_PHY_ID_88E1111_FINISAR, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .name = "Marvell 88E1111 (Finisar)", + /* PHY_GBIT_FEATURES */ + .probe = marvell_probe, + .config_init = m88e1111_config_init, + .config_aneg = m88e1111_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2680,8 +2792,8 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = m88e1118_config_init, .config_aneg = m88e1118_config_aneg, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2699,9 +2811,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = marvell_config_init, .config_aneg = m88e1121_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2721,9 +2832,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = m88e1318_config_init, .config_aneg = m88e1318_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .get_wol = m88e1318_get_wol, .set_wol = m88e1318_set_wol, .resume = genphy_resume, @@ -2743,8 +2853,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = m88e1145_config_init, .config_aneg = m88e1101_config_aneg, .read_status = genphy_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2763,8 +2873,8 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = m88e1149_config_init, .config_aneg = m88e1118_config_aneg, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2781,8 +2891,8 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = m88e1111_config_init, .config_aneg = marvell_config_aneg, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2798,8 +2908,8 @@ static struct phy_driver marvell_drivers[] = { /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = m88e1116r_config_init, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2820,9 +2930,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = m88e1510_config_init, .config_aneg = m88e1510_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .get_wol = m88e1318_get_wol, .set_wol = m88e1318_set_wol, .resume = marvell_resume, @@ -2849,9 +2958,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = marvell_config_init, .config_aneg = m88e1510_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2875,9 +2983,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = marvell_config_init, .config_aneg = m88e1510_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2900,9 +3007,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = m88e3016_config_init, .aneg_done = marvell_aneg_done, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2921,9 +3027,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = marvell_config_init, .config_aneg = m88e6390_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2946,9 +3051,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = marvell_config_init, .config_aneg = m88e1510_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2968,9 +3072,8 @@ static struct phy_driver marvell_drivers[] = { .config_init = marvell_config_init, .config_aneg = m88e1510_config_aneg, .read_status = marvell_read_status, - .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, - .did_interrupt = m88e1121_did_interrupt, + .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, .suspend = genphy_suspend, .read_page = marvell_read_page, @@ -2989,6 +3092,7 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = { { MARVELL_PHY_ID_88E1101, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1112, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1111, MARVELL_PHY_ID_MASK }, + { MARVELL_PHY_ID_88E1111_FINISAR, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1118, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1121R, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1145, MARVELL_PHY_ID_MASK }, diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 757e950fb745..e59067c64e97 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -472,7 +472,7 @@ static inline void of_mdiobus_link_mdiodev(struct mii_bus *mdio, #endif /** - * mdiobus_create_device_from_board_info - create a full MDIO device given + * mdiobus_create_device - create a full MDIO device given * a mdio_board_info structure * @bus: MDIO bus to create the devices on * @bi: mdio_board_info structure describing the devices diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index a644e8e5071c..9f1f2b6c97d4 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -44,16 +44,32 @@ static int lan88xx_phy_config_intr(struct phy_device *phydev) LAN88XX_INT_MASK_LINK_CHANGE_); } else { rc = phy_write(phydev, LAN88XX_INT_MASK, 0); + if (rc) + return rc; + + /* Ack interrupts after they have been disabled */ + rc = phy_read(phydev, LAN88XX_INT_STS); } return rc < 0 ? rc : 0; } -static int lan88xx_phy_ack_interrupt(struct phy_device *phydev) +static irqreturn_t lan88xx_handle_interrupt(struct phy_device *phydev) { - int rc = phy_read(phydev, LAN88XX_INT_STS); + int irq_status; - return rc < 0 ? rc : 0; + irq_status = phy_read(phydev, LAN88XX_INT_STS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & LAN88XX_INT_STS_LINK_CHANGE_)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int lan88xx_suspend(struct phy_device *phydev) @@ -340,8 +356,8 @@ static struct phy_driver microchip_phy_driver[] = { .config_init = lan88xx_config_init, .config_aneg = lan88xx_config_aneg, - .ack_interrupt = lan88xx_phy_ack_interrupt, .config_intr = lan88xx_phy_config_intr, + .handle_interrupt = lan88xx_handle_interrupt, .suspend = lan88xx_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index fed3e395f18e..4dc00bd5a8d2 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -189,18 +189,34 @@ static int lan87xx_phy_config_intr(struct phy_device *phydev) rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, 0x7FFF); rc = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE); val = LAN87XX_MASK_LINK_UP | LAN87XX_MASK_LINK_DOWN; - } + rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, val); + } else { + rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, val); + if (rc) + return rc; - rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, val); + rc = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE); + } return rc < 0 ? rc : 0; } -static int lan87xx_phy_ack_interrupt(struct phy_device *phydev) +static irqreturn_t lan87xx_handle_interrupt(struct phy_device *phydev) { - int rc = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE); + int irq_status; - return rc < 0 ? rc : 0; + irq_status = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (irq_status == 0) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int lan87xx_config_init(struct phy_device *phydev) @@ -219,10 +235,9 @@ static struct phy_driver microchip_t1_phy_driver[] = { .features = PHY_BASIC_T1_FEATURES, .config_init = lan87xx_config_init, - .config_aneg = genphy_config_aneg, - .ack_interrupt = lan87xx_phy_ack_interrupt, .config_intr = lan87xx_phy_config_intr, + .handle_interrupt = lan87xx_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 6bc7406a1ce7..2f2157e3deab 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -1498,7 +1498,7 @@ static irqreturn_t vsc8584_handle_interrupt(struct phy_device *phydev) vsc8584_handle_macsec_interrupt(phydev); if (irq_status & MII_VSC85XX_INT_MASK_LINK_CHG) - phy_mac_interrupt(phydev); + phy_trigger_machine(phydev); return IRQ_HANDLED; } @@ -1541,16 +1541,6 @@ static int vsc85xx_config_init(struct phy_device *phydev) return 0; } -static int vsc8584_did_interrupt(struct phy_device *phydev) -{ - int rc = 0; - - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); - - return (rc < 0) ? 0 : rc & MII_VSC85XX_INT_MASK_MASK; -} - static int vsc8514_config_pre_init(struct phy_device *phydev) { /* These are the settings to override the silicon default @@ -1933,6 +1923,10 @@ static int vsc85xx_config_intr(struct phy_device *phydev) int rc; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + rc = vsc85xx_ack_interrupt(phydev); + if (rc) + return rc; + vsc8584_config_macsec_intr(phydev); vsc8584_config_ts_intr(phydev); @@ -1943,11 +1937,33 @@ static int vsc85xx_config_intr(struct phy_device *phydev) if (rc < 0) return rc; rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + if (rc < 0) + return rc; + + rc = vsc85xx_ack_interrupt(phydev); } return rc; } +static irqreturn_t vsc85xx_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_VSC85XX_INT_STATUS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_VSC85XX_INT_MASK_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static int vsc85xx_config_aneg(struct phy_device *phydev) { int rc; @@ -2114,7 +2130,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, @@ -2139,9 +2155,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_aneg = &vsc85xx_config_aneg, .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8574_probe, @@ -2163,7 +2178,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc8514_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, @@ -2187,7 +2202,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, @@ -2211,7 +2226,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, @@ -2235,7 +2250,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, @@ -2259,7 +2274,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, @@ -2283,9 +2298,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8574_probe, @@ -2308,9 +2322,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8584_probe, @@ -2333,9 +2346,7 @@ static struct phy_driver vsc85xx_driver[] = { .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .handle_interrupt = &vsc8584_handle_interrupt, - .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8574_probe, @@ -2359,9 +2370,8 @@ static struct phy_driver vsc85xx_driver[] = { .config_aneg = &vsc85xx_config_aneg, .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, - .ack_interrupt = &vsc85xx_ack_interrupt, + .handle_interrupt = vsc85xx_handle_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8574_probe, @@ -2386,9 +2396,7 @@ static struct phy_driver vsc85xx_driver[] = { .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .handle_interrupt = &vsc8584_handle_interrupt, - .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8584_probe, @@ -2411,9 +2419,7 @@ static struct phy_driver vsc85xx_driver[] = { .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .handle_interrupt = &vsc8584_handle_interrupt, - .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8584_probe, @@ -2436,9 +2442,7 @@ static struct phy_driver vsc85xx_driver[] = { .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .handle_interrupt = &vsc8584_handle_interrupt, - .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, - .did_interrupt = &vsc8584_did_interrupt, .suspend = &genphy_suspend, .resume = &genphy_resume, .probe = &vsc8584_probe, diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index b97ee79f3cdf..d8a61456d1ce 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -136,7 +136,7 @@ static void vsc85xx_ts_write_csr(struct phy_device *phydev, enum ts_blk blk, phy_ts_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_1588); - if (!cond || (cond && upper)) + if (!cond || upper) phy_ts_base_write(phydev, MSCC_PHY_TS_CSR_DATA_MSB, upper); phy_ts_base_write(phydev, MSCC_PHY_TS_CSR_DATA_LSB, lower); @@ -1510,6 +1510,8 @@ void vsc8584_config_ts_intr(struct phy_device *phydev) int vsc8584_ptp_init(struct phy_device *phydev) { switch (phydev->phy_id & phydev->drv->phy_id_mask) { + case PHY_ID_VSC8572: + case PHY_ID_VSC8574: case PHY_ID_VSC8575: case PHY_ID_VSC8582: case PHY_ID_VSC8584: diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c index a72fa0d2e7c7..afd7afa1f498 100644 --- a/drivers/net/phy/nxp-tja11xx.c +++ b/drivers/net/phy/nxp-tja11xx.c @@ -44,6 +44,9 @@ #define MII_CFG2_SLEEP_REQUEST_TO_16MS 0x3 #define MII_INTSRC 21 +#define MII_INTSRC_LINK_FAIL BIT(10) +#define MII_INTSRC_LINK_UP BIT(9) +#define MII_INTSRC_MASK (MII_INTSRC_LINK_FAIL | MII_INTSRC_LINK_UP) #define MII_INTSRC_TEMP_ERR BIT(1) #define MII_INTSRC_UV_ERR BIT(3) @@ -597,11 +600,42 @@ static int tja11xx_ack_interrupt(struct phy_device *phydev) static int tja11xx_config_intr(struct phy_device *phydev) { int value = 0; + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = tja11xx_ack_interrupt(phydev); + if (err) + return err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP; + err = phy_write(phydev, MII_INTEN, value); + } else { + err = phy_write(phydev, MII_INTEN, value); + if (err) + return err; + + err = tja11xx_ack_interrupt(phydev); + } + + return err; +} + +static irqreturn_t tja11xx_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, MII_INTSRC); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_INTSRC_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); - return phy_write(phydev, MII_INTEN, value); + return IRQ_HANDLED; } static int tja11xx_cable_test_start(struct phy_device *phydev) @@ -747,8 +781,8 @@ static struct phy_driver tja11xx_driver[] = { .get_sset_count = tja11xx_get_sset_count, .get_strings = tja11xx_get_strings, .get_stats = tja11xx_get_stats, - .ack_interrupt = tja11xx_ack_interrupt, .config_intr = tja11xx_config_intr, + .handle_interrupt = tja11xx_handle_interrupt, .cable_test_start = tja11xx_cable_test_start, .cable_test_get_status = tja11xx_cable_test_get_status, }, { @@ -770,8 +804,8 @@ static struct phy_driver tja11xx_driver[] = { .get_sset_count = tja11xx_get_sset_count, .get_strings = tja11xx_get_strings, .get_stats = tja11xx_get_stats, - .ack_interrupt = tja11xx_ack_interrupt, .config_intr = tja11xx_config_intr, + .handle_interrupt = tja11xx_handle_interrupt, .cable_test_start = tja11xx_cable_test_start, .cable_test_get_status = tja11xx_cable_test_get_status, } diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index bd11e62bfdfe..077f2929c45e 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -9,7 +9,7 @@ #include <linux/phy.h> /** - * genphy_c45_setup_forced - configures a forced speed + * genphy_c45_pma_setup_forced - configures a forced speed * @phydev: target phy_device struct */ int genphy_c45_pma_setup_forced(struct phy_device *phydev) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 35525a671400..dce86bad8231 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -489,14 +489,15 @@ void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies) EXPORT_SYMBOL(phy_queue_state_machine); /** - * phy_queue_state_machine - Trigger the state machine to run now + * phy_trigger_machine - Trigger the state machine to run now * * @phydev: the phy_device struct */ -static void phy_trigger_machine(struct phy_device *phydev) +void phy_trigger_machine(struct phy_device *phydev) { phy_queue_state_machine(phydev, 0); } +EXPORT_SYMBOL(phy_trigger_machine); static void phy_abort_cable_test(struct phy_device *phydev) { @@ -924,7 +925,7 @@ void phy_stop_machine(struct phy_device *phydev) * Must not be called from interrupt context, or while the * phydev->lock is held. */ -static void phy_error(struct phy_device *phydev) +void phy_error(struct phy_device *phydev) { WARN_ON(1); @@ -934,6 +935,7 @@ static void phy_error(struct phy_device *phydev) phy_trigger_machine(phydev); } +EXPORT_SYMBOL(phy_error); /** * phy_disable_interrupts - Disable the PHY interrupts from the PHY side diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 5dab6be6fc38..81f672911305 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1156,7 +1156,7 @@ void phy_attached_info(struct phy_device *phydev) } EXPORT_SYMBOL(phy_attached_info); -#define ATTACHED_FMT "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%s)" +#define ATTACHED_FMT "attached PHY driver %s(mii_bus:phy_addr=%s, irq=%s)" char *phy_attached_info_irq(struct phy_device *phydev) { char *irq_str; @@ -1181,19 +1181,17 @@ EXPORT_SYMBOL(phy_attached_info_irq); void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) { - const char *drv_name = phydev->drv ? phydev->drv->name : "unbound"; + const char *unbound = phydev->drv ? "" : "[unbound] "; char *irq_str = phy_attached_info_irq(phydev); if (!fmt) { - phydev_info(phydev, ATTACHED_FMT "\n", - drv_name, phydev_name(phydev), - irq_str); + phydev_info(phydev, ATTACHED_FMT "\n", unbound, + phydev_name(phydev), irq_str); } else { va_list ap; - phydev_info(phydev, ATTACHED_FMT, - drv_name, phydev_name(phydev), - irq_str); + phydev_info(phydev, ATTACHED_FMT, unbound, + phydev_name(phydev), irq_str); va_start(ap, fmt); vprintk(fmt, ap); @@ -2463,6 +2461,19 @@ int genphy_soft_reset(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_soft_reset); +irqreturn_t genphy_handle_interrupt_no_ack(struct phy_device *phydev) +{ + /* It seems there are cases where the interrupts are handled by another + * entity (ie an IRQ controller embedded inside the PHY) and do not + * need any other interraction from phylib. In this case, just trigger + * the state machine directly. + */ + phy_trigger_machine(phydev); + + return 0; +} +EXPORT_SYMBOL(genphy_handle_interrupt_no_ack); + /** * genphy_read_abilities - read PHY abilities from Clause 22 registers * @phydev: target phy_device struct @@ -2735,7 +2746,7 @@ static int phy_get_int_delay_property(struct device *dev, const char *name) #endif /** - * phy_get_delay_index - returns the index of the internal delay + * phy_get_internal_delay - returns the index of the internal delay * @phydev: phy_device struct * @dev: pointer to the devices device struct * @delay_values: array of delays the PHY supports @@ -2815,7 +2826,7 @@ EXPORT_SYMBOL(phy_get_internal_delay); static bool phy_drv_supports_irq(struct phy_driver *phydrv) { - return phydrv->config_intr && phydrv->ack_interrupt; + return phydrv->config_intr && (phydrv->ack_interrupt || phydrv->handle_interrupt); } /** @@ -2947,6 +2958,13 @@ static int phy_remove(struct device *dev) return 0; } +static void phy_shutdown(struct device *dev) +{ + struct phy_device *phydev = to_phy_device(dev); + + phy_disable_interrupts(phydev); +} + /** * phy_driver_register - register a phy_driver with the PHY layer * @new_driver: new phy_driver to register @@ -2970,6 +2988,7 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) new_driver->mdiodrv.driver.bus = &mdio_bus_type; new_driver->mdiodrv.driver.probe = phy_probe; new_driver->mdiodrv.driver.remove = phy_remove; + new_driver->mdiodrv.driver.shutdown = phy_shutdown; new_driver->mdiodrv.driver.owner = owner; new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS; diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index 59a94e07e7c5..f550576eb9da 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -66,11 +66,11 @@ static void phy_led_trigger_format_name(struct phy_device *phy, char *buf, static int phy_led_trigger_register(struct phy_device *phy, struct phy_led_trigger *plt, - unsigned int speed) + unsigned int speed, + const char *suffix) { plt->speed = speed; - phy_led_trigger_format_name(phy, plt->name, sizeof(plt->name), - phy_speed_to_str(speed)); + phy_led_trigger_format_name(phy, plt->name, sizeof(plt->name), suffix); plt->trigger.name = plt->name; return led_trigger_register(&plt->trigger); @@ -99,12 +99,7 @@ int phy_led_triggers_register(struct phy_device *phy) goto out_clear; } - phy_led_trigger_format_name(phy, phy->led_link_trigger->name, - sizeof(phy->led_link_trigger->name), - "link"); - phy->led_link_trigger->trigger.name = phy->led_link_trigger->name; - - err = led_trigger_register(&phy->led_link_trigger->trigger); + err = phy_led_trigger_register(phy, phy->led_link_trigger, 0, "link"); if (err) goto out_free_link; @@ -119,7 +114,8 @@ int phy_led_triggers_register(struct phy_device *phy) for (i = 0; i < phy->phy_num_led_triggers; i++) { err = phy_led_trigger_register(phy, &phy->phy_led_triggers[i], - speeds[i]); + speeds[i], + phy_speed_to_str(speeds[i])); if (err) goto out_unreg; } diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index fe2296fdda19..84f6e197f965 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1649,7 +1649,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, EXPORT_SYMBOL_GPL(phylink_ethtool_set_pauseparam); /** - * phylink_ethtool_get_eee_err() - read the energy efficient ethernet error + * phylink_get_eee_err() - read the energy efficient ethernet error * counter * @pl: a pointer to a &struct phylink returned from phylink_create(). * @@ -2515,9 +2515,10 @@ int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, changed = ret > 0; + /* Ensure ISOLATE bit is disabled */ bmcr = mode == MLO_AN_INBAND ? BMCR_ANENABLE : 0; ret = mdiobus_modify(pcs->bus, pcs->addr, MII_BMCR, - BMCR_ANENABLE, bmcr); + BMCR_ANENABLE | BMCR_ISOLATE, bmcr); if (ret < 0) return ret; diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 575580d3ffe0..f71eda945c6a 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -41,6 +41,12 @@ #define RTL8211E_RX_DELAY BIT(11) #define RTL8201F_ISR 0x1e +#define RTL8201F_ISR_ANERR BIT(15) +#define RTL8201F_ISR_DUPLEX BIT(13) +#define RTL8201F_ISR_LINK BIT(11) +#define RTL8201F_ISR_MASK (RTL8201F_ISR_ANERR | \ + RTL8201F_ISR_DUPLEX | \ + RTL8201F_ISR_LINK) #define RTL8201F_IER 0x13 #define RTL8366RB_POWER_SAVE 0x15 @@ -102,24 +108,45 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) static int rtl8201_config_intr(struct phy_device *phydev) { u16 val; + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = rtl8201_ack_interrupt(phydev); + if (err) + return err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) val = BIT(13) | BIT(12) | BIT(11); - else + err = phy_write_paged(phydev, 0x7, RTL8201F_IER, val); + } else { val = 0; + err = phy_write_paged(phydev, 0x7, RTL8201F_IER, val); + if (err) + return err; - return phy_write_paged(phydev, 0x7, RTL8201F_IER, val); + err = rtl8201_ack_interrupt(phydev); + } + + return err; } static int rtl8211b_config_intr(struct phy_device *phydev) { int err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = rtl821x_ack_interrupt(phydev); + if (err) + return err; + err = phy_write(phydev, RTL821x_INER, RTL8211B_INER_INIT); - else + } else { err = phy_write(phydev, RTL821x_INER, 0); + if (err) + return err; + + err = rtl821x_ack_interrupt(phydev); + } return err; } @@ -128,11 +155,20 @@ static int rtl8211e_config_intr(struct phy_device *phydev) { int err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = rtl821x_ack_interrupt(phydev); + if (err) + return err; + err = phy_write(phydev, RTL821x_INER, RTL8211E_INER_LINK_STATUS); - else + } else { err = phy_write(phydev, RTL821x_INER, 0); + if (err) + return err; + + err = rtl821x_ack_interrupt(phydev); + } return err; } @@ -140,13 +176,85 @@ static int rtl8211e_config_intr(struct phy_device *phydev) static int rtl8211f_config_intr(struct phy_device *phydev) { u16 val; + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = rtl8211f_ack_interrupt(phydev); + if (err) + return err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) val = RTL8211F_INER_LINK_STATUS; - else + err = phy_write_paged(phydev, 0xa42, RTL821x_INER, val); + } else { val = 0; + err = phy_write_paged(phydev, 0xa42, RTL821x_INER, val); + if (err) + return err; + + err = rtl8211f_ack_interrupt(phydev); + } + + return err; +} + +static irqreturn_t rtl8201_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, RTL8201F_ISR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & RTL8201F_ISR_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + +static irqreturn_t rtl821x_handle_interrupt(struct phy_device *phydev) +{ + int irq_status, irq_enabled; + + irq_status = phy_read(phydev, RTL821x_INSR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + irq_enabled = phy_read(phydev, RTL821x_INER); + if (irq_enabled < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & irq_enabled)) + return IRQ_NONE; + + phy_trigger_machine(phydev); - return phy_write_paged(phydev, 0xa42, RTL821x_INER, val); + return IRQ_HANDLED; +} + +static irqreturn_t rtl8211f_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read_paged(phydev, 0xa43, RTL8211F_INSR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & RTL8211F_INER_LINK_STATUS)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int rtl8211_config_aneg(struct phy_device *phydev) @@ -556,8 +664,8 @@ static struct phy_driver realtek_drvs[] = { }, { PHY_ID_MATCH_EXACT(0x001cc816), .name = "RTL8201F Fast Ethernet", - .ack_interrupt = &rtl8201_ack_interrupt, .config_intr = &rtl8201_config_intr, + .handle_interrupt = rtl8201_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, .read_page = rtl821x_read_page, @@ -582,8 +690,8 @@ static struct phy_driver realtek_drvs[] = { }, { PHY_ID_MATCH_EXACT(0x001cc912), .name = "RTL8211B Gigabit Ethernet", - .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211b_config_intr, + .handle_interrupt = rtl821x_handle_interrupt, .read_mmd = &genphy_read_mmd_unsupported, .write_mmd = &genphy_write_mmd_unsupported, .suspend = rtl8211b_suspend, @@ -601,8 +709,8 @@ static struct phy_driver realtek_drvs[] = { }, { PHY_ID_MATCH_EXACT(0x001cc914), .name = "RTL8211DN Gigabit Ethernet", - .ack_interrupt = rtl821x_ack_interrupt, .config_intr = rtl8211e_config_intr, + .handle_interrupt = rtl821x_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, .read_page = rtl821x_read_page, @@ -611,8 +719,8 @@ static struct phy_driver realtek_drvs[] = { PHY_ID_MATCH_EXACT(0x001cc915), .name = "RTL8211E Gigabit Ethernet", .config_init = &rtl8211e_config_init, - .ack_interrupt = &rtl821x_ack_interrupt, .config_intr = &rtl8211e_config_intr, + .handle_interrupt = rtl821x_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, .read_page = rtl821x_read_page, @@ -621,8 +729,8 @@ static struct phy_driver realtek_drvs[] = { PHY_ID_MATCH_EXACT(0x001cc916), .name = "RTL8211F Gigabit Ethernet", .config_init = &rtl8211f_config_init, - .ack_interrupt = &rtl8211f_ack_interrupt, .config_intr = &rtl8211f_config_intr, + .handle_interrupt = rtl8211f_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, .read_page = rtl821x_read_page, @@ -662,6 +770,46 @@ static struct phy_driver realtek_drvs[] = { .read_mmd = rtl822x_read_mmd, .write_mmd = rtl822x_write_mmd, }, { + PHY_ID_MATCH_EXACT(0x001cc838), + .name = "RTL8226-CG 2.5Gbps PHY", + .get_features = rtl822x_get_features, + .config_aneg = rtl822x_config_aneg, + .read_status = rtl822x_read_status, + .suspend = genphy_suspend, + .resume = rtlgen_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, + }, { + PHY_ID_MATCH_EXACT(0x001cc848), + .name = "RTL8226B-CG_RTL8221B-CG 2.5Gbps PHY", + .get_features = rtl822x_get_features, + .config_aneg = rtl822x_config_aneg, + .read_status = rtl822x_read_status, + .suspend = genphy_suspend, + .resume = rtlgen_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, + }, { + PHY_ID_MATCH_EXACT(0x001cc849), + .name = "RTL8221B-VB-CG 2.5Gbps PHY", + .get_features = rtl822x_get_features, + .config_aneg = rtl822x_config_aneg, + .read_status = rtl822x_read_status, + .suspend = genphy_suspend, + .resume = rtlgen_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, + }, { + PHY_ID_MATCH_EXACT(0x001cc84a), + .name = "RTL8221B-VM-CG 2.5Gbps PHY", + .get_features = rtl822x_get_features, + .config_aneg = rtl822x_config_aneg, + .read_status = rtl822x_read_status, + .suspend = genphy_suspend, + .resume = rtlgen_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, + }, { PHY_ID_MATCH_EXACT(0x001cc961), .name = "RTL8366RB Gigabit Ethernet", .config_init = &rtl8366rb_config_init, @@ -670,8 +818,8 @@ static struct phy_driver realtek_drvs[] = { * irq is requested and ACKed by reading the status register, * which is done by the irqchip code. */ - .ack_interrupt = genphy_no_ack_interrupt, .config_intr = genphy_no_config_intr, + .handle_interrupt = genphy_handle_interrupt_no_ack, .suspend = genphy_suspend, .resume = genphy_resume, }, diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 0fc39ac5ca88..33372756a451 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -48,6 +48,13 @@ struct smsc_phy_priv { struct clk *refclk; }; +static int smsc_phy_ack_interrupt(struct phy_device *phydev) +{ + int rc = phy_read(phydev, MII_LAN83C185_ISF); + + return rc < 0 ? rc : 0; +} + static int smsc_phy_config_intr(struct phy_device *phydev) { struct smsc_phy_priv *priv = phydev->priv; @@ -55,21 +62,47 @@ static int smsc_phy_config_intr(struct phy_device *phydev) int rc; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + rc = smsc_phy_ack_interrupt(phydev); + if (rc) + return rc; + intmask = MII_LAN83C185_ISF_INT4 | MII_LAN83C185_ISF_INT6; if (priv->energy_enable) intmask |= MII_LAN83C185_ISF_INT7; - } + rc = phy_write(phydev, MII_LAN83C185_IM, intmask); + } else { + rc = phy_write(phydev, MII_LAN83C185_IM, intmask); + if (rc) + return rc; - rc = phy_write(phydev, MII_LAN83C185_IM, intmask); + rc = smsc_phy_ack_interrupt(phydev); + } return rc < 0 ? rc : 0; } -static int smsc_phy_ack_interrupt(struct phy_device *phydev) +static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev) { - int rc = phy_read (phydev, MII_LAN83C185_ISF); + int irq_status, irq_enabled; - return rc < 0 ? rc : 0; + irq_enabled = phy_read(phydev, MII_LAN83C185_IM); + if (irq_enabled < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + irq_status = phy_read(phydev, MII_LAN83C185_ISF); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & irq_enabled)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static int smsc_phy_config_init(struct phy_device *phydev) @@ -314,8 +347,8 @@ static struct phy_driver smsc_phy_driver[] = { .soft_reset = smsc_phy_reset, /* IRQ related */ - .ack_interrupt = smsc_phy_ack_interrupt, .config_intr = smsc_phy_config_intr, + .handle_interrupt = smsc_phy_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, @@ -333,8 +366,8 @@ static struct phy_driver smsc_phy_driver[] = { .soft_reset = smsc_phy_reset, /* IRQ related */ - .ack_interrupt = smsc_phy_ack_interrupt, .config_intr = smsc_phy_config_intr, + .handle_interrupt = smsc_phy_handle_interrupt, /* Statistics */ .get_sset_count = smsc_get_sset_count, @@ -362,8 +395,8 @@ static struct phy_driver smsc_phy_driver[] = { .config_aneg = lan87xx_config_aneg, /* IRQ related */ - .ack_interrupt = smsc_phy_ack_interrupt, .config_intr = smsc_phy_config_intr, + .handle_interrupt = smsc_phy_handle_interrupt, /* Statistics */ .get_sset_count = smsc_get_sset_count, @@ -385,8 +418,8 @@ static struct phy_driver smsc_phy_driver[] = { .config_init = lan911x_config_init, /* IRQ related */ - .ack_interrupt = smsc_phy_ack_interrupt, .config_intr = smsc_phy_config_intr, + .handle_interrupt = smsc_phy_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, @@ -410,8 +443,8 @@ static struct phy_driver smsc_phy_driver[] = { .config_aneg = lan87xx_config_aneg_ext, /* IRQ related */ - .ack_interrupt = smsc_phy_ack_interrupt, .config_intr = smsc_phy_config_intr, + .handle_interrupt = smsc_phy_handle_interrupt, /* Statistics */ .get_sset_count = smsc_get_sset_count, @@ -436,8 +469,8 @@ static struct phy_driver smsc_phy_driver[] = { .soft_reset = smsc_phy_reset, /* IRQ related */ - .ack_interrupt = smsc_phy_ack_interrupt, .config_intr = smsc_phy_config_intr, + .handle_interrupt = smsc_phy_handle_interrupt, /* Statistics */ .get_sset_count = smsc_get_sset_count, diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c index d735a01380ed..431fe5e0ce31 100644 --- a/drivers/net/phy/ste10Xp.c +++ b/drivers/net/phy/ste10Xp.c @@ -48,32 +48,55 @@ static int ste10Xp_config_init(struct phy_device *phydev) return 0; } +static int ste10Xp_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_XCIIS); + + if (err < 0) + return err; + + return 0; +} + static int ste10Xp_config_intr(struct phy_device *phydev) { - int err, value; + int err; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + /* clear any pending interrupts */ + err = ste10Xp_ack_interrupt(phydev); + if (err) + return err; + /* Enable all STe101P interrupts (PR12) */ err = phy_write(phydev, MII_XIE, MII_XIE_DEFAULT_MASK); - /* clear any pending interrupts */ - if (err == 0) { - value = phy_read(phydev, MII_XCIIS); - if (value < 0) - err = value; - } - } else + } else { err = phy_write(phydev, MII_XIE, 0); + if (err) + return err; + + err = ste10Xp_ack_interrupt(phydev); + } return err; } -static int ste10Xp_ack_interrupt(struct phy_device *phydev) +static irqreturn_t ste10Xp_handle_interrupt(struct phy_device *phydev) { - int err = phy_read(phydev, MII_XCIIS); - if (err < 0) - return err; + int irq_status; - return 0; + irq_status = phy_read(phydev, MII_XCIIS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & MII_XIE_DEFAULT_MASK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; } static struct phy_driver ste10xp_pdriver[] = { @@ -83,8 +106,8 @@ static struct phy_driver ste10xp_pdriver[] = { .name = "STe101p", /* PHY_BASIC_FEATURES */ .config_init = ste10Xp_config_init, - .ack_interrupt = ste10Xp_ack_interrupt, .config_intr = ste10Xp_config_intr, + .handle_interrupt = ste10Xp_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -93,8 +116,8 @@ static struct phy_driver ste10xp_pdriver[] = { .name = "STe100p", /* PHY_BASIC_FEATURES */ .config_init = ste10Xp_config_init, - .ack_interrupt = ste10Xp_ack_interrupt, .config_intr = ste10Xp_config_intr, + .handle_interrupt = ste10Xp_handle_interrupt, .suspend = genphy_suspend, .resume = genphy_resume, } }; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index bb680352708a..16704e243162 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -40,6 +40,11 @@ #define MII_VSC8244_ISTAT_SPEED 0x4000 #define MII_VSC8244_ISTAT_LINK 0x2000 #define MII_VSC8244_ISTAT_DUPLEX 0x1000 +#define MII_VSC8244_ISTAT_MASK (MII_VSC8244_ISTAT_SPEED | \ + MII_VSC8244_ISTAT_LINK | \ + MII_VSC8244_ISTAT_DUPLEX) + +#define MII_VSC8221_ISTAT_MASK MII_VSC8244_ISTAT_LINK /* Vitesse Auxiliary Control/Status Register */ #define MII_VSC8244_AUX_CONSTAT 0x1c @@ -270,25 +275,14 @@ static int vsc8601_config_init(struct phy_device *phydev) return 0; } -static int vsc824x_ack_interrupt(struct phy_device *phydev) -{ - int err = 0; - - /* Don't bother to ACK the interrupts if interrupts - * are disabled. The 824x cannot clear the interrupts - * if they are disabled. - */ - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) - err = phy_read(phydev, MII_VSC8244_ISTAT); - - return (err < 0) ? err : 0; -} - static int vsc82xx_config_intr(struct phy_device *phydev) { int err; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + /* Don't bother to ACK the interrupts since the 824x cannot + * clear the interrupts if they are disabled. + */ err = phy_write(phydev, MII_VSC8244_IMASK, (phydev->drv->phy_id == PHY_ID_VSC8234 || phydev->drv->phy_id == PHY_ID_VSC8244 || @@ -311,6 +305,31 @@ static int vsc82xx_config_intr(struct phy_device *phydev) return err; } +static irqreturn_t vsc82xx_handle_interrupt(struct phy_device *phydev) +{ + int irq_status, irq_mask; + + if (phydev->drv->phy_id == PHY_ID_VSC8244 || + phydev->drv->phy_id == PHY_ID_VSC8572 || + phydev->drv->phy_id == PHY_ID_VSC8601) + irq_mask = MII_VSC8244_ISTAT_MASK; + else + irq_mask = MII_VSC8221_ISTAT_MASK; + + irq_status = phy_read(phydev, MII_VSC8244_ISTAT); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & irq_mask)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static int vsc8221_config_init(struct phy_device *phydev) { int err; @@ -390,8 +409,8 @@ static struct phy_driver vsc82xx_driver[] = { /* PHY_GBIT_FEATURES */ .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, + .handle_interrupt = &vsc82xx_handle_interrupt, }, { .phy_id = PHY_ID_VSC8244, .name = "Vitesse VSC8244", @@ -399,8 +418,8 @@ static struct phy_driver vsc82xx_driver[] = { /* PHY_GBIT_FEATURES */ .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, + .handle_interrupt = &vsc82xx_handle_interrupt, }, { .phy_id = PHY_ID_VSC8572, .name = "Vitesse VSC8572", @@ -408,16 +427,16 @@ static struct phy_driver vsc82xx_driver[] = { /* PHY_GBIT_FEATURES */ .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, + .handle_interrupt = &vsc82xx_handle_interrupt, }, { .phy_id = PHY_ID_VSC8601, .name = "Vitesse VSC8601", .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = &vsc8601_config_init, - .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, + .handle_interrupt = &vsc82xx_handle_interrupt, }, { .phy_id = PHY_ID_VSC7385, .name = "Vitesse VSC7385", @@ -461,8 +480,8 @@ static struct phy_driver vsc82xx_driver[] = { /* PHY_GBIT_FEATURES */ .config_init = &vsc824x_config_init, .config_aneg = &vsc82x4_config_aneg, - .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, + .handle_interrupt = &vsc82xx_handle_interrupt, }, { /* Vitesse 8221 */ .phy_id = PHY_ID_VSC8221, @@ -470,8 +489,8 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8221", /* PHY_GBIT_FEATURES */ .config_init = &vsc8221_config_init, - .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, + .handle_interrupt = &vsc82xx_handle_interrupt, }, { /* Vitesse 8211 */ .phy_id = PHY_ID_VSC8211, @@ -479,8 +498,8 @@ static struct phy_driver vsc82xx_driver[] = { .name = "Vitesse VSC8211", /* PHY_GBIT_FEATURES */ .config_init = &vsc8221_config_init, - .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, + .handle_interrupt = &vsc82xx_handle_interrupt, } }; module_phy_driver(vsc82xx_driver); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 07f1f3933927..b4092127a92c 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -975,11 +975,11 @@ static void team_port_disable(struct team *team, } #define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ NETIF_F_HIGHDMA | NETIF_F_LRO) #define TEAM_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_RXCSUM | NETIF_F_ALL_TSO) + NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) static void __team_compute_features(struct team *team) { @@ -1009,8 +1009,7 @@ static void __team_compute_features(struct team *team) team->dev->vlan_features = vlan_features; team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX | - NETIF_F_GSO_UDP_L4; + NETIF_F_HW_VLAN_STAG_TX; team->dev->hard_header_len = max_hard_header_len; team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; @@ -2175,7 +2174,7 @@ static void team_setup(struct net_device *dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; + dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; dev->features |= dev->hw_features; dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index be69d272052f..3d45d56172cb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -107,17 +107,6 @@ struct tap_filter { #define TUN_FLOW_EXPIRE (3 * HZ) -struct tun_pcpu_stats { - u64_stats_t rx_packets; - u64_stats_t rx_bytes; - u64_stats_t tx_packets; - u64_stats_t tx_bytes; - struct u64_stats_sync syncp; - u32 rx_dropped; - u32 tx_dropped; - u32 rx_frame_errors; -}; - /* A tun_file connects an open character device to a tuntap netdevice. It * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and @@ -207,7 +196,7 @@ struct tun_struct { void *security; u32 flow_count; u32 rx_batched; - struct tun_pcpu_stats __percpu *pcpu_stats; + atomic_long_t rx_frame_errors; struct bpf_prog __rcu *xdp_prog; struct tun_prog __rcu *steering_prog; struct tun_prog __rcu *filter_prog; @@ -1066,7 +1055,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; drop: - this_cpu_inc(tun->pcpu_stats->tx_dropped); + atomic_long_inc(&dev->tx_dropped); skb_tx_error(skb); kfree_skb(skb); rcu_read_unlock(); @@ -1103,37 +1092,12 @@ static void tun_set_headroom(struct net_device *dev, int new_hr) static void tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0; struct tun_struct *tun = netdev_priv(dev); - struct tun_pcpu_stats *p; - int i; - - for_each_possible_cpu(i) { - u64 rxpackets, rxbytes, txpackets, txbytes; - unsigned int start; - p = per_cpu_ptr(tun->pcpu_stats, i); - do { - start = u64_stats_fetch_begin(&p->syncp); - rxpackets = u64_stats_read(&p->rx_packets); - rxbytes = u64_stats_read(&p->rx_bytes); - txpackets = u64_stats_read(&p->tx_packets); - txbytes = u64_stats_read(&p->tx_bytes); - } while (u64_stats_fetch_retry(&p->syncp, start)); + dev_get_tstats64(dev, stats); - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->tx_packets += txpackets; - stats->tx_bytes += txbytes; - - /* u32 counters */ - rx_dropped += p->rx_dropped; - rx_frame_errors += p->rx_frame_errors; - tx_dropped += p->tx_dropped; - } - stats->rx_dropped = rx_dropped; - stats->rx_frame_errors = rx_frame_errors; - stats->tx_dropped = tx_dropped; + stats->rx_frame_errors += + (unsigned long)atomic_long_read(&tun->rx_frame_errors); } static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog, @@ -1247,7 +1211,7 @@ resample: void *frame = tun_xdp_to_ptr(xdp); if (__ptr_ring_produce(&tfile->tx_ring, frame)) { - this_cpu_inc(tun->pcpu_stats->tx_dropped); + atomic_long_inc(&dev->tx_dropped); xdp_return_frame_rx_napi(xdp); drops++; } @@ -1283,7 +1247,7 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_select_queue = tun_select_queue, .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = tun_set_headroom, - .ndo_get_stats64 = tun_net_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_bpf = tun_xdp, .ndo_xdp_xmit = tun_xdp_xmit, .ndo_change_carrier = tun_net_change_carrier, @@ -1577,7 +1541,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, trace_xdp_exception(tun->dev, xdp_prog, act); fallthrough; case XDP_DROP: - this_cpu_inc(tun->pcpu_stats->rx_dropped); + atomic_long_inc(&tun->dev->rx_dropped); break; } @@ -1683,7 +1647,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, size_t total_len = iov_iter_count(from); size_t len = total_len, align = tun->align, linear; struct virtio_net_hdr gso = { 0 }; - struct tun_pcpu_stats *stats; int good_linear; int copylen; bool zerocopy = false; @@ -1752,7 +1715,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, */ skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); if (IS_ERR(skb)) { - this_cpu_inc(tun->pcpu_stats->rx_dropped); + atomic_long_inc(&tun->dev->rx_dropped); return PTR_ERR(skb); } if (!skb) @@ -1781,7 +1744,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) - this_cpu_inc(tun->pcpu_stats->rx_dropped); + atomic_long_inc(&tun->dev->rx_dropped); if (frags) mutex_unlock(&tfile->napi_mutex); return PTR_ERR(skb); @@ -1795,7 +1758,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (err) { err = -EFAULT; drop: - this_cpu_inc(tun->pcpu_stats->rx_dropped); + atomic_long_inc(&tun->dev->rx_dropped); kfree_skb(skb); if (frags) { tfile->napi.skb = NULL; @@ -1807,7 +1770,7 @@ drop: } if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { - this_cpu_inc(tun->pcpu_stats->rx_frame_errors); + atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); if (frags) { tfile->napi.skb = NULL; @@ -1830,7 +1793,7 @@ drop: pi.proto = htons(ETH_P_IPV6); break; default: - this_cpu_inc(tun->pcpu_stats->rx_dropped); + atomic_long_inc(&tun->dev->rx_dropped); kfree_skb(skb); return -EINVAL; } @@ -1910,7 +1873,7 @@ drop: skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { - this_cpu_inc(tun->pcpu_stats->rx_dropped); + atomic_long_inc(&tun->dev->rx_dropped); napi_free_frags(&tfile->napi); rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); @@ -1942,12 +1905,9 @@ drop: } rcu_read_unlock(); - stats = get_cpu_ptr(tun->pcpu_stats); - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->rx_packets); - u64_stats_add(&stats->rx_bytes, len); - u64_stats_update_end(&stats->syncp); - put_cpu_ptr(stats); + preempt_disable(); + dev_sw_netstats_rx_add(tun->dev, len); + preempt_enable(); if (rxhash) tun_flow_update(tun, rxhash, tfile); @@ -1979,7 +1939,6 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, { int vnet_hdr_sz = 0; size_t size = xdp_frame->len; - struct tun_pcpu_stats *stats; size_t ret; if (tun->flags & IFF_VNET_HDR) { @@ -1996,12 +1955,9 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; - stats = get_cpu_ptr(tun->pcpu_stats); - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->tx_packets); - u64_stats_add(&stats->tx_bytes, ret); - u64_stats_update_end(&stats->syncp); - put_cpu_ptr(tun->pcpu_stats); + preempt_disable(); + dev_sw_netstats_tx_add(tun->dev, 1, ret); + preempt_enable(); return ret; } @@ -2013,7 +1969,6 @@ static ssize_t tun_put_user(struct tun_struct *tun, struct iov_iter *iter) { struct tun_pi pi = { 0, skb->protocol }; - struct tun_pcpu_stats *stats; ssize_t total; int vlan_offset = 0; int vlan_hlen = 0; @@ -2091,12 +2046,9 @@ static ssize_t tun_put_user(struct tun_struct *tun, done: /* caller is in process context, */ - stats = get_cpu_ptr(tun->pcpu_stats); - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->tx_packets); - u64_stats_add(&stats->tx_bytes, skb->len + vlan_hlen); - u64_stats_update_end(&stats->syncp); - put_cpu_ptr(tun->pcpu_stats); + preempt_disable(); + dev_sw_netstats_tx_add(tun->dev, 1, skb->len + vlan_hlen); + preempt_enable(); return total; } @@ -2235,11 +2187,11 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); - free_percpu(tun->pcpu_stats); - /* We clear pcpu_stats so that tun_set_iff() can tell if + free_percpu(dev->tstats); + /* We clear tstats so that tun_set_iff() can tell if * tun_free_netdev() has been called from register_netdevice(). */ - tun->pcpu_stats = NULL; + dev->tstats = NULL; tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); @@ -2370,7 +2322,6 @@ static int tun_xdp_one(struct tun_struct *tun, unsigned int datasize = xdp->data_end - xdp->data; struct tun_xdp_hdr *hdr = xdp->data_hard_start; struct virtio_net_hdr *gso = &hdr->gso; - struct tun_pcpu_stats *stats; struct bpf_prog *xdp_prog; struct sk_buff *skb = NULL; u32 rxhash = 0, act; @@ -2428,7 +2379,7 @@ build: skb_put(skb, xdp->data_end - xdp->data); if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { - this_cpu_inc(tun->pcpu_stats->rx_frame_errors); + atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); err = -EINVAL; goto out; @@ -2451,14 +2402,10 @@ build: netif_receive_skb(skb); - /* No need for get_cpu_ptr() here since this function is + /* No need to disable preemption here since this function is * always called with bh disabled */ - stats = this_cpu_ptr(tun->pcpu_stats); - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->rx_packets); - u64_stats_add(&stats->rx_bytes, datasize); - u64_stats_update_end(&stats->syncp); + dev_sw_netstats_rx_add(tun->dev, datasize); if (rxhash) tun_flow_update(tun, rxhash, tfile); @@ -2751,8 +2698,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); - tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); - if (!tun->pcpu_stats) { + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) { err = -ENOMEM; goto err_free_dev; } @@ -2807,16 +2754,16 @@ err_detach: tun_detach_all(dev); /* We are here because register_netdevice() has failed. * If register_netdevice() already called tun_free_netdev() - * while dealing with the error, tun->pcpu_stats has been cleared. + * while dealing with the error, dev->stats has been cleared. */ - if (!tun->pcpu_stats) + if (!dev->tstats) goto err_free_dev; err_free_flow: tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); err_free_stat: - free_percpu(tun->pcpu_stats); + free_percpu(dev->tstats); err_free_dev: free_netdev(dev); return err; diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index b46993d5f997..1e3719028780 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -628,4 +628,13 @@ config USB_NET_AQC111 This driver should work with at least the following devices: * Aquantia AQtion USB to 5GbE +config USB_RTL8153_ECM + tristate "RTL8153 ECM support" + depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n) + default y + help + This option supports ECM mode for RTL8153 ethernet adapter, when + CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not + supported by r8152 driver. + endif # USB_NET_DRIVERS diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index 99fd12be2111..4964f7b326fb 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -41,3 +41,4 @@ obj-$(CONFIG_USB_NET_QMI_WWAN) += qmi_wwan.o obj-$(CONFIG_USB_NET_CDC_MBIM) += cdc_mbim.o obj-$(CONFIG_USB_NET_CH9200) += ch9200.o obj-$(CONFIG_USB_NET_AQC111) += aqc111.o +obj-$(CONFIG_USB_RTL8153_ECM) += r8153_ecm.o diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 0717c18015c9..73b97f4cc1ec 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -641,7 +641,7 @@ static const struct net_device_ops aqc111_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = aqc111_change_mtu, .ndo_set_mac_address = aqc111_set_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index ef548beba684..6e13d8165852 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -194,7 +194,7 @@ static const struct net_device_ops ax88172_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = asix_ioctl, @@ -580,7 +580,7 @@ static const struct net_device_ops ax88772_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = asix_ioctl, @@ -1050,7 +1050,7 @@ static const struct net_device_ops ax88178_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = asix_set_multicast, diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index fd3a04d98dc1..b404c9462dce 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -120,7 +120,7 @@ static const struct net_device_ops ax88172a_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = asix_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = phy_do_ioctl_running, diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 5541f3faedbc..d650b39b6e5d 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1031,7 +1031,7 @@ static const struct net_device_ops ax88179_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = ax88179_change_mtu, .ndo_set_mac_address = ax88179_set_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index eb100eb33de3..5db66272fc82 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -98,7 +98,7 @@ static const struct net_device_ops cdc_mbim_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = cdc_ncm_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index e04f588538cc..2bac57d5e8d5 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -793,7 +793,7 @@ static const struct net_device_ops cdc_ncm_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_set_rx_mode = usbnet_set_rx_mode, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = cdc_ncm_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -1317,7 +1317,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) break; } - /* calculate frame number withing this NDP */ + /* calculate frame number within this NDP */ if (ctx->is_ndp16) { ndplen = le16_to_cpu(ndp.ndp16->wLength); index = (ndplen - sizeof(struct usb_cdc_ncm_ndp16)) / sizeof(struct usb_cdc_ncm_dpe16) - 1; diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 915ac75b55fc..b5d2ac55a874 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -343,7 +343,7 @@ static const struct net_device_ops dm9601_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = dm9601_ioctl, .ndo_set_rx_mode = dm9601_set_multicast, diff --git a/drivers/net/usb/int51x1.c b/drivers/net/usb/int51x1.c index cb5bc1a7fa5a..ed05f992c612 100644 --- a/drivers/net/usb/int51x1.c +++ b/drivers/net/usb/int51x1.c @@ -133,7 +133,7 @@ static const struct net_device_ops int51x1_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = int51x1_set_multicast, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 65b315bc60ab..bf243edeb064 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -822,20 +822,19 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, u32 length, u8 *data) { int i; - int ret; u32 buf; unsigned long timeout; - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (buf & OTP_PWR_DN_PWRDN_N_) { /* clear it and wait to be cleared */ - ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); + lan78xx_write_reg(dev, OTP_PWR_DN, 0); timeout = jiffies + HZ; do { usleep_range(1, 10); - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_PWR_DN"); @@ -845,18 +844,18 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, } for (i = 0; i < length; i++) { - ret = lan78xx_write_reg(dev, OTP_ADDR1, + lan78xx_write_reg(dev, OTP_ADDR1, ((offset + i) >> 8) & OTP_ADDR1_15_11); - ret = lan78xx_write_reg(dev, OTP_ADDR2, + lan78xx_write_reg(dev, OTP_ADDR2, ((offset + i) & OTP_ADDR2_10_3)); - ret = lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); - ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); + lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); timeout = jiffies + HZ; do { udelay(1); - ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); + lan78xx_read_reg(dev, OTP_STATUS, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_STATUS"); @@ -864,7 +863,7 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, } } while (buf & OTP_STATUS_BUSY_); - ret = lan78xx_read_reg(dev, OTP_RD_DATA, &buf); + lan78xx_read_reg(dev, OTP_RD_DATA, &buf); data[i] = (u8)(buf & 0xFF); } @@ -876,20 +875,19 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, u32 length, u8 *data) { int i; - int ret; u32 buf; unsigned long timeout; - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (buf & OTP_PWR_DN_PWRDN_N_) { /* clear it and wait to be cleared */ - ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); + lan78xx_write_reg(dev, OTP_PWR_DN, 0); timeout = jiffies + HZ; do { udelay(1); - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_PWR_DN completion"); @@ -899,21 +897,21 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, } /* set to BYTE program mode */ - ret = lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); + lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); for (i = 0; i < length; i++) { - ret = lan78xx_write_reg(dev, OTP_ADDR1, + lan78xx_write_reg(dev, OTP_ADDR1, ((offset + i) >> 8) & OTP_ADDR1_15_11); - ret = lan78xx_write_reg(dev, OTP_ADDR2, + lan78xx_write_reg(dev, OTP_ADDR2, ((offset + i) & OTP_ADDR2_10_3)); - ret = lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); - ret = lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); - ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); + lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); + lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); timeout = jiffies + HZ; do { udelay(1); - ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); + lan78xx_read_reg(dev, OTP_STATUS, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "Timeout on OTP_STATUS completion"); @@ -1038,7 +1036,6 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) container_of(param, struct lan78xx_priv, set_multicast); struct lan78xx_net *dev = pdata->dev; int i; - int ret; netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n", pdata->rfe_ctl); @@ -1047,14 +1044,14 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) DP_SEL_VHF_HASH_LEN, pdata->mchash_table); for (i = 1; i < NUM_OF_MAF; i++) { - ret = lan78xx_write_reg(dev, MAF_HI(i), 0); - ret = lan78xx_write_reg(dev, MAF_LO(i), + lan78xx_write_reg(dev, MAF_HI(i), 0); + lan78xx_write_reg(dev, MAF_LO(i), pdata->pfilter_table[i][1]); - ret = lan78xx_write_reg(dev, MAF_HI(i), + lan78xx_write_reg(dev, MAF_HI(i), pdata->pfilter_table[i][0]); } - ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); } static void lan78xx_set_multicast(struct net_device *netdev) @@ -1124,7 +1121,6 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, u16 lcladv, u16 rmtadv) { u32 flow = 0, fct_flow = 0; - int ret; u8 cap; if (dev->fc_autoneg) @@ -1147,10 +1143,10 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, (cap & FLOW_CTRL_RX ? "enabled" : "disabled"), (cap & FLOW_CTRL_TX ? "enabled" : "disabled")); - ret = lan78xx_write_reg(dev, FCT_FLOW, fct_flow); + lan78xx_write_reg(dev, FCT_FLOW, fct_flow); /* threshold value should be set before enabling flow */ - ret = lan78xx_write_reg(dev, FLOW, flow); + lan78xx_write_reg(dev, FLOW, flow); return 0; } @@ -1663,11 +1659,10 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { static void lan78xx_init_mac_address(struct lan78xx_net *dev) { u32 addr_lo, addr_hi; - int ret; u8 addr[6]; - ret = lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); - ret = lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); + lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); + lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); addr[0] = addr_lo & 0xFF; addr[1] = (addr_lo >> 8) & 0xFF; @@ -1700,12 +1695,12 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) (addr[2] << 16) | (addr[3] << 24); addr_hi = addr[4] | (addr[5] << 8); - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + lan78xx_write_reg(dev, RX_ADDRH, addr_hi); } - ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); - ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); ether_addr_copy(dev->net->dev_addr, addr); } @@ -1838,7 +1833,7 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) static void lan78xx_link_status_change(struct net_device *net) { struct phy_device *phydev = net->phydev; - int ret, temp; + int temp; /* At forced 100 F/H mode, chip may fail to set mode correctly * when cable is switched between long(~50+m) and short one. @@ -1849,7 +1844,7 @@ static void lan78xx_link_status_change(struct net_device *net) /* disable phy interrupt */ temp = phy_read(phydev, LAN88XX_INT_MASK); temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; - ret = phy_write(phydev, LAN88XX_INT_MASK, temp); + phy_write(phydev, LAN88XX_INT_MASK, temp); temp = phy_read(phydev, MII_BMCR); temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); @@ -1863,7 +1858,7 @@ static void lan78xx_link_status_change(struct net_device *net) /* enable phy interrupt back */ temp = phy_read(phydev, LAN88XX_INT_MASK); temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; - ret = phy_write(phydev, LAN88XX_INT_MASK, temp); + phy_write(phydev, LAN88XX_INT_MASK, temp); } } @@ -1917,14 +1912,13 @@ static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd) struct lan78xx_net *dev = container_of(data, struct lan78xx_net, domain_data); u32 buf; - int ret; /* call register access here because irq_bus_lock & irq_bus_sync_unlock * are only two callbacks executed in non-atomic contex. */ - ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf); + lan78xx_read_reg(dev, INT_EP_CTL, &buf); if (buf != data->irqenable) - ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); + lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); mutex_unlock(&data->irq_lock); } @@ -1991,7 +1985,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev) static int lan8835_fixup(struct phy_device *phydev) { int buf; - int ret; struct lan78xx_net *dev = netdev_priv(phydev->attached_dev); /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */ @@ -2001,11 +1994,11 @@ static int lan8835_fixup(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf); /* RGMII MAC TXC Delay Enable */ - ret = lan78xx_write_reg(dev, MAC_RGMII_ID, + lan78xx_write_reg(dev, MAC_RGMII_ID, MAC_RGMII_ID_TXC_DELAY_EN_); /* RGMII TX DLL Tune Adjust */ - ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); + lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); dev->interface = PHY_INTERFACE_MODE_RGMII_TXID; @@ -2189,28 +2182,27 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size) { - int ret = 0; u32 buf; bool rxenabled; - ret = lan78xx_read_reg(dev, MAC_RX, &buf); + lan78xx_read_reg(dev, MAC_RX, &buf); rxenabled = ((buf & MAC_RX_RXEN_) != 0); if (rxenabled) { buf &= ~MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); } /* add 4 to size for FCS */ buf &= ~MAC_RX_MAX_SIZE_MASK_; buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_); - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); if (rxenabled) { buf |= MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); } return 0; @@ -2267,13 +2259,12 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) int ll_mtu = new_mtu + netdev->hard_header_len; int old_hard_mtu = dev->hard_mtu; int old_rx_urb_size = dev->rx_urb_size; - int ret; /* no second zero-length packet read wanted after mtu-sized packets */ if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; - ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); + lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); netdev->mtu = new_mtu; @@ -2296,7 +2287,6 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) struct lan78xx_net *dev = netdev_priv(netdev); struct sockaddr *addr = p; u32 addr_lo, addr_hi; - int ret; if (netif_running(netdev)) return -EBUSY; @@ -2313,12 +2303,12 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) addr_hi = netdev->dev_addr[4] | netdev->dev_addr[5] << 8; - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + lan78xx_write_reg(dev, RX_ADDRH, addr_hi); /* Added to support MAC address changes */ - ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); - ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); return 0; } @@ -2330,7 +2320,6 @@ static int lan78xx_set_features(struct net_device *netdev, struct lan78xx_net *dev = netdev_priv(netdev); struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); unsigned long flags; - int ret; spin_lock_irqsave(&pdata->rfe_ctl_lock, flags); @@ -2354,7 +2343,7 @@ static int lan78xx_set_features(struct net_device *netdev, spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags); - ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); return 0; } @@ -3804,7 +3793,6 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len) static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) { u32 buf; - int ret; int mask_index; u16 crc; u32 temp_wucsr; @@ -3813,26 +3801,26 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) const u8 ipv6_multicast[3] = { 0x33, 0x33 }; const u8 arp_type[2] = { 0x08, 0x06 }; - ret = lan78xx_read_reg(dev, MAC_TX, &buf); + lan78xx_read_reg(dev, MAC_TX, &buf); buf &= ~MAC_TX_TXEN_; - ret = lan78xx_write_reg(dev, MAC_TX, buf); - ret = lan78xx_read_reg(dev, MAC_RX, &buf); + lan78xx_write_reg(dev, MAC_TX, buf); + lan78xx_read_reg(dev, MAC_RX, &buf); buf &= ~MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); - ret = lan78xx_write_reg(dev, WUCSR, 0); - ret = lan78xx_write_reg(dev, WUCSR2, 0); - ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + lan78xx_write_reg(dev, WUCSR, 0); + lan78xx_write_reg(dev, WUCSR2, 0); + lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); temp_wucsr = 0; temp_pmt_ctl = 0; - ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); + lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_; temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_; for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++) - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); + lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); mask_index = 0; if (wol & WAKE_PHY) { @@ -3861,30 +3849,30 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) /* set WUF_CFG & WUF_MASK for IPv4 Multicast */ crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3); - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + lan78xx_write_reg(dev, WUF_CFG(mask_index), WUF_CFGX_EN_ | WUF_CFGX_TYPE_MCAST_ | (0 << WUF_CFGX_OFFSET_SHIFT_) | (crc & WUF_CFGX_CRC16_MASK_)); - ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); - ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); + lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; /* for IPv6 Multicast */ crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2); - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + lan78xx_write_reg(dev, WUF_CFG(mask_index), WUF_CFGX_EN_ | WUF_CFGX_TYPE_MCAST_ | (0 << WUF_CFGX_OFFSET_SHIFT_) | (crc & WUF_CFGX_CRC16_MASK_)); - ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); - ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); + lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; temp_pmt_ctl |= PMT_CTL_WOL_EN_; @@ -3905,16 +3893,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) * for packettype (offset 12,13) = ARP (0x0806) */ crc = lan78xx_wakeframe_crc16(arp_type, 2); - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + lan78xx_write_reg(dev, WUF_CFG(mask_index), WUF_CFGX_EN_ | WUF_CFGX_TYPE_ALL_ | (0 << WUF_CFGX_OFFSET_SHIFT_) | (crc & WUF_CFGX_CRC16_MASK_)); - ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); - ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); + lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; temp_pmt_ctl |= PMT_CTL_WOL_EN_; @@ -3922,7 +3910,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; } - ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr); + lan78xx_write_reg(dev, WUCSR, temp_wucsr); /* when multiple WOL bits are set */ if (hweight_long((unsigned long)wol) > 1) { @@ -3930,16 +3918,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_; temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; } - ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); + lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); /* clear WUPS */ - ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + lan78xx_read_reg(dev, PMT_CTL, &buf); buf |= PMT_CTL_WUPS_MASK_; - ret = lan78xx_write_reg(dev, PMT_CTL, buf); + lan78xx_write_reg(dev, PMT_CTL, buf); - ret = lan78xx_read_reg(dev, MAC_RX, &buf); + lan78xx_read_reg(dev, MAC_RX, &buf); buf |= MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); return 0; } diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 09bfa6a4dfbc..fc512b780d15 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -462,7 +462,7 @@ static const struct net_device_ops mcs7830_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = mcs7830_ioctl, .ndo_set_rx_mode = mcs7830_set_multicast, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index fc378ff56775..d166c321ee9b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -72,7 +72,6 @@ struct qmimux_hdr { struct qmimux_priv { struct net_device *real_dev; u8 mux_id; - struct pcpu_sw_netstats __percpu *stats64; }; static int qmimux_open(struct net_device *dev) @@ -108,34 +107,19 @@ static netdev_tx_t qmimux_start_xmit(struct sk_buff *skb, struct net_device *dev skb->dev = priv->real_dev; ret = dev_queue_xmit(skb); - if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - struct pcpu_sw_netstats *stats64 = this_cpu_ptr(priv->stats64); - - u64_stats_update_begin(&stats64->syncp); - stats64->tx_packets++; - stats64->tx_bytes += len; - u64_stats_update_end(&stats64->syncp); - } else { + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) + dev_sw_netstats_tx_add(dev, 1, len); + else dev->stats.tx_dropped++; - } return ret; } -static void qmimux_get_stats64(struct net_device *net, - struct rtnl_link_stats64 *stats) -{ - struct qmimux_priv *priv = netdev_priv(net); - - netdev_stats_to_stats64(stats, &net->stats); - dev_fetch_sw_netstats(stats, priv->stats64); -} - static const struct net_device_ops qmimux_netdev_ops = { .ndo_open = qmimux_open, .ndo_stop = qmimux_stop, .ndo_start_xmit = qmimux_start_xmit, - .ndo_get_stats64 = qmimux_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, }; static void qmimux_setup(struct net_device *dev) @@ -224,14 +208,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) net->stats.rx_errors++; return 0; } else { - struct pcpu_sw_netstats *stats64; - struct qmimux_priv *priv = netdev_priv(net); - - stats64 = this_cpu_ptr(priv->stats64); - u64_stats_update_begin(&stats64->syncp); - stats64->rx_packets++; - stats64->rx_bytes += pkt_len; - u64_stats_update_end(&stats64->syncp); + dev_sw_netstats_rx_add(net, pkt_len); } skip: @@ -256,8 +233,8 @@ static int qmimux_register_device(struct net_device *real_dev, u8 mux_id) priv->mux_id = mux_id; priv->real_dev = real_dev; - priv->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!priv->stats64) { + new_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!new_dev->tstats) { err = -ENOBUFS; goto out_free_newdev; } @@ -292,7 +269,7 @@ static void qmimux_unregister_device(struct net_device *dev, struct qmimux_priv *priv = netdev_priv(dev); struct net_device *real_dev = priv->real_dev; - free_percpu(priv->stats64); + free_percpu(dev->tstats); netdev_upper_dev_unlink(real_dev, dev); unregister_netdevice_queue(dev, head); @@ -598,7 +575,7 @@ static const struct net_device_ops qmi_wwan_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = qmi_wwan_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b1770489aca5..c448d6089821 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -26,6 +26,7 @@ #include <linux/acpi.h> #include <linux/firmware.h> #include <crypto/hash.h> +#include <linux/usb/r8152.h> /* Information for net-next */ #define NETNEXT_VERSION "11" @@ -653,18 +654,6 @@ enum rtl_register_content { #define INTR_LINK 0x0004 -#define RTL8152_REQT_READ 0xc0 -#define RTL8152_REQT_WRITE 0x40 -#define RTL8152_REQ_GET_REGS 0x05 -#define RTL8152_REQ_SET_REGS 0x05 - -#define BYTE_EN_DWORD 0xff -#define BYTE_EN_WORD 0x33 -#define BYTE_EN_BYTE 0x11 -#define BYTE_EN_SIX_BYTES 0x3f -#define BYTE_EN_START_MASK 0x0f -#define BYTE_EN_END_MASK 0xf0 - #define RTL8153_MAX_PACKET 9216 /* 9K */ #define RTL8153_MAX_MTU (RTL8153_MAX_PACKET - VLAN_ETH_HLEN - \ ETH_FCS_LEN) @@ -689,21 +678,9 @@ enum rtl8152_flags { LENOVO_MACPASSTHRU, }; -/* Define these values to match your device */ -#define VENDOR_ID_REALTEK 0x0bda -#define VENDOR_ID_MICROSOFT 0x045e -#define VENDOR_ID_SAMSUNG 0x04e8 -#define VENDOR_ID_LENOVO 0x17ef -#define VENDOR_ID_LINKSYS 0x13b1 -#define VENDOR_ID_NVIDIA 0x0955 -#define VENDOR_ID_TPLINK 0x2357 - #define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082 #define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2 0xa387 -#define MCU_TYPE_PLA 0x0100 -#define MCU_TYPE_USB 0x0000 - struct tally_counter { __le64 tx_packets; __le64 rx_packets; @@ -898,6 +875,7 @@ struct fw_header { * struct fw_mac - a firmware block used by RTL_FW_PLA and RTL_FW_USB. * The layout of the firmware block is: * <struct fw_mac> + <info> + <firmware data>. + * @blk_hdr: firmware descriptor (type, length) * @fw_offset: offset of the firmware binary data. The start address of * the data would be the address of struct fw_mac + @fw_offset. * @fw_reg: the register to load the firmware. Depends on chip. @@ -911,6 +889,7 @@ struct fw_header { * @bp_num: the break point number which needs to be set for this firmware. * Depends on the firmware. * @bp: break points. Depends on firmware. + * @reserved: reserved space (unused) * @fw_ver_reg: the register to store the fw version. * @fw_ver_data: the firmware version of the current type. * @info: additional information for debugging, and is followed by the @@ -936,8 +915,10 @@ struct fw_mac { /** * struct fw_phy_patch_key - a firmware block used by RTL_FW_PHY_START. * This is used to set patch key when loading the firmware of PHY. + * @blk_hdr: firmware descriptor (type, length) * @key_reg: the register to write the patch key. * @key_data: patch key. + * @reserved: reserved space (unused) */ struct fw_phy_patch_key { struct fw_block blk_hdr; @@ -950,6 +931,7 @@ struct fw_phy_patch_key { * struct fw_phy_nc - a firmware block used by RTL_FW_PHY_NC. * The layout of the firmware block is: * <struct fw_phy_nc> + <info> + <firmware data>. + * @blk_hdr: firmware descriptor (type, length) * @fw_offset: offset of the firmware binary data. The start address of * the data would be the address of struct fw_phy_nc + @fw_offset. * @fw_reg: the register to load the firmware. Depends on chip. @@ -958,8 +940,9 @@ struct fw_phy_patch_key { * @patch_en_addr: the register of enabling patch mode. Depends on chip. * @patch_en_value: patch mode enabled mask. Depends on the firmware. * @mode_reg: the regitster of switching the mode. - * @mod_pre: the mode needing to be set before loading the firmware. - * @mod_post: the mode to be set when finishing to load the firmware. + * @mode_pre: the mode needing to be set before loading the firmware. + * @mode_post: the mode to be set when finishing to load the firmware. + * @reserved: reserved space (unused) * @bp_start: the start register of break points. Depends on chip. * @bp_num: the break point number which needs to be set for this firmware. * Depends on the firmware. @@ -6615,7 +6598,7 @@ static int rtl_fw_init(struct r8152 *tp) return 0; } -static u8 rtl_get_version(struct usb_interface *intf) +u8 rtl8152_get_version(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); u32 ocp_data = 0; @@ -6673,12 +6656,13 @@ static u8 rtl_get_version(struct usb_interface *intf) return version; } +EXPORT_SYMBOL_GPL(rtl8152_get_version); static int rtl8152_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); - u8 version = rtl_get_version(intf); + u8 version = rtl8152_get_version(intf); struct r8152 *tp; struct net_device *netdev; int ret; diff --git a/drivers/net/usb/r8153_ecm.c b/drivers/net/usb/r8153_ecm.c new file mode 100644 index 000000000000..2c3fabd38b16 --- /dev/null +++ b/drivers/net/usb/r8153_ecm.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/mii.h> +#include <linux/usb.h> +#include <linux/usb/cdc.h> +#include <linux/usb/usbnet.h> +#include <linux/usb/r8152.h> + +#define OCP_BASE 0xe86c + +static int pla_read_word(struct usbnet *dev, u16 index) +{ + u16 byen = BYTE_EN_WORD; + u8 shift = index & 2; + __le32 tmp; + int ret; + + if (shift) + byen <<= shift; + + index &= ~3; + + ret = usbnet_read_cmd(dev, RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, index, + MCU_TYPE_PLA | byen, &tmp, sizeof(tmp)); + if (ret < 0) + goto out; + + ret = __le32_to_cpu(tmp); + ret >>= (shift * 8); + ret &= 0xffff; + +out: + return ret; +} + +static int pla_write_word(struct usbnet *dev, u16 index, u32 data) +{ + u32 mask = 0xffff; + u16 byen = BYTE_EN_WORD; + u8 shift = index & 2; + __le32 tmp; + int ret; + + data &= mask; + + if (shift) { + byen <<= shift; + mask <<= (shift * 8); + data <<= (shift * 8); + } + + index &= ~3; + + ret = usbnet_read_cmd(dev, RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, index, + MCU_TYPE_PLA | byen, &tmp, sizeof(tmp)); + + if (ret < 0) + goto out; + + data |= __le32_to_cpu(tmp) & ~mask; + tmp = __cpu_to_le32(data); + + ret = usbnet_write_cmd(dev, RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, index, + MCU_TYPE_PLA | byen, &tmp, sizeof(tmp)); + +out: + return ret; +} + +static int r8153_ecm_mdio_read(struct net_device *netdev, int phy_id, int reg) +{ + struct usbnet *dev = netdev_priv(netdev); + int ret; + + ret = pla_write_word(dev, OCP_BASE, 0xa000); + if (ret < 0) + goto out; + + ret = pla_read_word(dev, 0xb400 + reg * 2); + +out: + return ret; +} + +static void r8153_ecm_mdio_write(struct net_device *netdev, int phy_id, int reg, int val) +{ + struct usbnet *dev = netdev_priv(netdev); + int ret; + + ret = pla_write_word(dev, OCP_BASE, 0xa000); + if (ret < 0) + return; + + ret = pla_write_word(dev, 0xb400 + reg * 2, val); +} + +static int r8153_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int status; + + status = usbnet_cdc_bind(dev, intf); + if (status < 0) + return status; + + dev->mii.dev = dev->net; + dev->mii.mdio_read = r8153_ecm_mdio_read; + dev->mii.mdio_write = r8153_ecm_mdio_write; + dev->mii.reg_num_mask = 0x1f; + dev->mii.supports_gmii = 1; + + return status; +} + +static const struct driver_info r8153_info = { + .description = "RTL8153 ECM Device", + .flags = FLAG_ETHER, + .bind = r8153_bind, + .unbind = usbnet_cdc_unbind, + .status = usbnet_cdc_status, + .manage_power = usbnet_manage_power, +}; + +static const struct usb_device_id products[] = { +{ + USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_REALTEK, 0x8153, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&r8153_info, +}, + + { }, /* END */ +}; +MODULE_DEVICE_TABLE(usb, products); + +static int rtl8153_ecm_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ +#if IS_REACHABLE(CONFIG_USB_RTL8152) + if (rtl8152_get_version(intf)) + return -ENODEV; +#endif + + return usbnet_probe(intf, id); +} + +static struct usb_driver r8153_ecm_driver = { + .name = "r8153_ecm", + .id_table = products, + .probe = rtl8153_ecm_probe, + .disconnect = usbnet_disconnect, + .suspend = usbnet_suspend, + .resume = usbnet_resume, + .reset_resume = usbnet_resume, + .supports_autosuspend = 1, + .disable_hub_initiated_lpm = 1, +}; + +module_usb_driver(r8153_ecm_driver); + +MODULE_AUTHOR("Hayes Wang"); +MODULE_DESCRIPTION("Realtek USB ECM device"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 6fa7a009a24a..6609d21ef894 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -279,7 +279,7 @@ static const struct net_device_ops rndis_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index 0abd257b634c..55a244eca5ca 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -184,7 +184,7 @@ static const struct net_device_ops sierra_net_device_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 8689835a5214..4353b370249f 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1435,7 +1435,7 @@ static const struct net_device_ops smsc75xx_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = smsc75xx_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index ea0d5f04dc3a..4c8ee1cff4d4 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1041,7 +1041,7 @@ static const struct net_device_ops smsc95xx_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = smsc95xx_ioctl, diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index e04c8054c2cf..878557ad03ad 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -308,7 +308,7 @@ static const struct net_device_ops sr9700_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = sr9700_ioctl, .ndo_set_rx_mode = sr9700_set_multicast, diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 681e0def6356..da56735d7755 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -681,7 +681,7 @@ static const struct net_device_ops sr9800_netdev_ops = { .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = sr_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = sr_ioctl, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 6062dc27870e..1447da1d5729 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -304,7 +304,7 @@ static void __usbnet_status_stop_force(struct usbnet *dev) */ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) { - struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64); + struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->net->tstats); unsigned long flags; int status; @@ -980,15 +980,6 @@ int usbnet_set_link_ksettings(struct net_device *net, } EXPORT_SYMBOL_GPL(usbnet_set_link_ksettings); -void usbnet_get_stats64(struct net_device *net, struct rtnl_link_stats64 *stats) -{ - struct usbnet *dev = netdev_priv(net); - - netdev_stats_to_stats64(stats, &net->stats); - dev_fetch_sw_netstats(stats, dev->stats64); -} -EXPORT_SYMBOL_GPL(usbnet_get_stats64); - u32 usbnet_get_link (struct net_device *net) { struct usbnet *dev = netdev_priv(net); @@ -1220,7 +1211,7 @@ static void tx_complete (struct urb *urb) struct usbnet *dev = entry->dev; if (urb->status == 0) { - struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64); + struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->net->tstats); unsigned long flags; flags = u64_stats_update_begin_irqsave(&stats64->syncp); @@ -1596,7 +1587,7 @@ void usbnet_disconnect (struct usb_interface *intf) usb_free_urb(dev->interrupt); kfree(dev->padding_pkt); - free_percpu(dev->stats64); + free_percpu(net->tstats); free_netdev(net); } EXPORT_SYMBOL_GPL(usbnet_disconnect); @@ -1608,7 +1599,7 @@ static const struct net_device_ops usbnet_netdev_ops = { .ndo_tx_timeout = usbnet_tx_timeout, .ndo_set_rx_mode = usbnet_set_rx_mode, .ndo_change_mtu = usbnet_change_mtu, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -1671,8 +1662,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->driver_info = info; dev->driver_name = name; - dev->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->stats64) + net->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!net->tstats) goto out0; dev->msg_enable = netif_msg_init (msg_level, NETIF_MSG_DRV @@ -1812,7 +1803,7 @@ out1: */ cancel_work_sync(&dev->kevent); del_timer_sync(&dev->delay); - free_percpu(dev->stats64); + free_percpu(net->tstats); out0: free_netdev(net); out: diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 8c737668008a..359d3ab33c4d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1329,7 +1329,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, } if (ifmp && tbp[IFLA_IFNAME]) { - nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); + nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); @@ -1379,7 +1379,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, eth_hw_addr_random(dev); if (tb[IFLA_IFNAME]) - nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); + nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); else snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1a557aeba32b..236fcc55a5fd 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -66,6 +66,7 @@ struct vxlan_net { struct list_head vxlan_list; struct hlist_head sock_list[PORT_HASH_SIZE]; spinlock_t sock_lock; + struct notifier_block nexthop_notifier_block; }; /* Forwarding table entry */ @@ -3210,7 +3211,7 @@ static const struct net_device_ops vxlan_netdev_ether_ops = { .ndo_open = vxlan_open, .ndo_stop = vxlan_stop, .ndo_start_xmit = vxlan_xmit, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_rx_mode = vxlan_set_multicast_list, .ndo_change_mtu = vxlan_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -3229,7 +3230,7 @@ static const struct net_device_ops vxlan_netdev_raw_ops = { .ndo_open = vxlan_open, .ndo_stop = vxlan_stop, .ndo_start_xmit = vxlan_xmit, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = vxlan_change_mtu, .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, }; @@ -4683,9 +4684,14 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh) static int vxlan_nexthop_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct nexthop *nh = ptr; + struct nh_notifier_info *info = ptr; + struct nexthop *nh; + + if (event != NEXTHOP_EVENT_DEL) + return NOTIFY_DONE; - if (!nh || event != NEXTHOP_EVENT_DEL) + nh = nexthop_find_by_id(info->net, info->id); + if (!nh) return NOTIFY_DONE; vxlan_fdb_nh_flush(nh); @@ -4693,10 +4699,6 @@ static int vxlan_nexthop_event(struct notifier_block *nb, return NOTIFY_DONE; } -static struct notifier_block vxlan_nexthop_notifier_block __read_mostly = { - .notifier_call = vxlan_nexthop_event, -}; - static __net_init int vxlan_init_net(struct net *net) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); @@ -4704,11 +4706,13 @@ static __net_init int vxlan_init_net(struct net *net) INIT_LIST_HEAD(&vn->vxlan_list); spin_lock_init(&vn->sock_lock); + vn->nexthop_notifier_block.notifier_call = vxlan_nexthop_event; for (h = 0; h < PORT_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vn->sock_list[h]); - return register_nexthop_notifier(net, &vxlan_nexthop_notifier_block); + return register_nexthop_notifier(net, &vn->nexthop_notifier_block, + NULL); } static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) @@ -4740,8 +4744,11 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) LIST_HEAD(list); rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) - unregister_nexthop_notifier(net, &vxlan_nexthop_notifier_block); + list_for_each_entry(net, net_list, exit_list) { + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + + unregister_nexthop_notifier(net, &vn->nexthop_notifier_block); + } list_for_each_entry(net, net_list, exit_list) vxlan_destroy_tunnels(net, &list); diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 39e5ab261d7c..4029fde71a9e 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -321,51 +321,6 @@ config IXP4XX_HSS Say Y here if you want to use built-in HSS ports on IXP4xx processor. -config DLCI - tristate "Frame Relay DLCI support" - help - Support for the Frame Relay protocol. - - Frame Relay is a fast low-cost way to connect to a remote Internet - access provider or to form a private wide area network. The one - physical line from your box to the local "switch" (i.e. the entry - point to the Frame Relay network, usually at the phone company) can - carry several logical point-to-point connections to other computers - connected to the Frame Relay network. For a general explanation of - the protocol, check out <http://www.mplsforum.org/>. - - To use frame relay, you need supporting hardware (called FRAD) and - certain programs from the net-tools package as explained in - <file:Documentation/networking/framerelay.rst>. - - To compile this driver as a module, choose M here: the - module will be called dlci. - -config DLCI_MAX - int "Max DLCI per device" - depends on DLCI - default "8" - help - How many logical point-to-point frame relay connections (the - identifiers of which are called DCLIs) should be handled by each - of your hardware frame relay access devices. - - Go with the default. - -config SDLA - tristate "SDLA (Sangoma S502/S508) support" - depends on DLCI && ISA - help - Driver for the Sangoma S502A, S502E, and S508 Frame Relay Access - Devices. - - These are multi-protocol cards, but only Frame Relay is supported - by the driver at this time. Please read - <file:Documentation/networking/framerelay.rst>. - - To compile this driver as a module, choose M here: the - module will be called sdla. - # X.25 network drivers config LAPBETHER tristate "LAPB over Ethernet driver" @@ -383,21 +338,6 @@ config LAPBETHER If unsure, say N. -config X25_ASY - tristate "X.25 async driver" - depends on LAPB && X25 && TTY - help - Send and receive X.25 frames over regular asynchronous serial - lines such as telephone lines equipped with ordinary modems. - - Experts should note that this driver doesn't currently comply with - the asynchronous HDLS framing protocols in CCITT recommendation X.25. - - To compile this driver as a module, choose M here: the - module will be called x25_asy. - - If unsure, say N. - config SBNI tristate "Granch SBNI12 Leased Line adapter support" depends on X86 diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 380271a011e4..081666c36ca2 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -18,12 +18,9 @@ obj-$(CONFIG_HOSTESS_SV11) += z85230.o hostess_sv11.o obj-$(CONFIG_SEALEVEL_4021) += z85230.o sealevel.o obj-$(CONFIG_COSA) += cosa.o obj-$(CONFIG_FARSYNC) += farsync.o -obj-$(CONFIG_X25_ASY) += x25_asy.o obj-$(CONFIG_LANMEDIA) += lmc/ -obj-$(CONFIG_DLCI) += dlci.o -obj-$(CONFIG_SDLA) += sdla.o obj-$(CONFIG_LAPBETHER) += lapbether.o obj-$(CONFIG_SBNI) += sbni.o obj-$(CONFIG_N2) += n2.o diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c deleted file mode 100644 index 3ca4daf63389..000000000000 --- a/drivers/net/wan/dlci.c +++ /dev/null @@ -1,541 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * DLCI Implementation of Frame Relay protocol for Linux, according to - * RFC 1490. This generic device provides en/decapsulation for an - * underlying hardware driver. Routes & IPs are assigned to these - * interfaces. Requires 'dlcicfg' program to create usable - * interfaces, the initial one, 'dlci' is for IOCTL use only. - * - * Version: @(#)dlci.c 0.35 4 Jan 1997 - * - * Author: Mike McLagan <mike.mclagan@linux.org> - * - * Changes: - * - * 0.15 Mike Mclagan Packet freeing, bug in kmalloc call - * DLCI_RET handling - * 0.20 Mike McLagan More conservative on which packets - * are returned for retry and which are - * are dropped. If DLCI_RET_DROP is - * returned from the FRAD, the packet is - * sent back to Linux for re-transmission - * 0.25 Mike McLagan Converted to use SIOC IOCTL calls - * 0.30 Jim Freeman Fixed to allow IPX traffic - * 0.35 Michael Elizabeth Fixed incorrect memcpy_fromfs - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/fcntl.h> -#include <linux/interrupt.h> -#include <linux/ptrace.h> -#include <linux/ioport.h> -#include <linux/in.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/if_arp.h> -#include <linux/if_frad.h> -#include <linux/bitops.h> - -#include <net/sock.h> - -#include <asm/io.h> -#include <asm/dma.h> -#include <linux/uaccess.h> - -static const char version[] = "DLCI driver v0.35, 4 Jan 1997, mike.mclagan@linux.org"; - -static LIST_HEAD(dlci_devs); - -static void dlci_setup(struct net_device *); - -/* - * these encapsulate the RFC 1490 requirements as well as - * deal with packet transmission and reception, working with - * the upper network layers - */ - -static int dlci_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned len) -{ - struct frhdr hdr; - unsigned int hlen; - char *dest; - - hdr.control = FRAD_I_UI; - switch (type) - { - case ETH_P_IP: - hdr.IP_NLPID = FRAD_P_IP; - hlen = sizeof(hdr.control) + sizeof(hdr.IP_NLPID); - break; - - /* feel free to add other types, if necessary */ - - default: - hdr.pad = FRAD_P_PADDING; - hdr.NLPID = FRAD_P_SNAP; - memset(hdr.OUI, 0, sizeof(hdr.OUI)); - hdr.PID = htons(type); - hlen = sizeof(hdr); - break; - } - - dest = skb_push(skb, hlen); - if (!dest) - return 0; - - memcpy(dest, &hdr, hlen); - - return hlen; -} - -static void dlci_receive(struct sk_buff *skb, struct net_device *dev) -{ - struct frhdr *hdr; - int process, header; - - if (!pskb_may_pull(skb, sizeof(*hdr))) { - netdev_notice(dev, "invalid data no header\n"); - dev->stats.rx_errors++; - kfree_skb(skb); - return; - } - - hdr = (struct frhdr *) skb->data; - process = 0; - header = 0; - skb->dev = dev; - - if (hdr->control != FRAD_I_UI) - { - netdev_notice(dev, "Invalid header flag 0x%02X\n", - hdr->control); - dev->stats.rx_errors++; - } - else - switch (hdr->IP_NLPID) - { - case FRAD_P_PADDING: - if (hdr->NLPID != FRAD_P_SNAP) - { - netdev_notice(dev, "Unsupported NLPID 0x%02X\n", - hdr->NLPID); - dev->stats.rx_errors++; - break; - } - - if (hdr->OUI[0] + hdr->OUI[1] + hdr->OUI[2] != 0) - { - netdev_notice(dev, "Unsupported organizationally unique identifier 0x%02X-%02X-%02X\n", - hdr->OUI[0], - hdr->OUI[1], - hdr->OUI[2]); - dev->stats.rx_errors++; - break; - } - - /* at this point, it's an EtherType frame */ - header = sizeof(struct frhdr); - /* Already in network order ! */ - skb->protocol = hdr->PID; - process = 1; - break; - - case FRAD_P_IP: - header = sizeof(hdr->control) + sizeof(hdr->IP_NLPID); - skb->protocol = htons(ETH_P_IP); - process = 1; - break; - - case FRAD_P_SNAP: - case FRAD_P_Q933: - case FRAD_P_CLNP: - netdev_notice(dev, "Unsupported NLPID 0x%02X\n", - hdr->pad); - dev->stats.rx_errors++; - break; - - default: - netdev_notice(dev, "Invalid pad byte 0x%02X\n", - hdr->pad); - dev->stats.rx_errors++; - break; - } - - if (process) - { - /* we've set up the protocol, so discard the header */ - skb_reset_mac_header(skb); - skb_pull(skb, header); - dev->stats.rx_bytes += skb->len; - netif_rx(skb); - dev->stats.rx_packets++; - } - else - dev_kfree_skb(skb); -} - -static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev) -{ - struct dlci_local *dlp = netdev_priv(dev); - - if (skb) { - struct netdev_queue *txq = skb_get_tx_queue(dev, skb); - netdev_start_xmit(skb, dlp->slave, txq, false); - } - return NETDEV_TX_OK; -} - -static int dlci_config(struct net_device *dev, struct dlci_conf __user *conf, int get) -{ - struct dlci_conf config; - struct dlci_local *dlp; - struct frad_local *flp; - int err; - - dlp = netdev_priv(dev); - - flp = netdev_priv(dlp->slave); - - if (!get) - { - if (copy_from_user(&config, conf, sizeof(struct dlci_conf))) - return -EFAULT; - if (config.flags & ~DLCI_VALID_FLAGS) - return -EINVAL; - memcpy(&dlp->config, &config, sizeof(struct dlci_conf)); - dlp->configured = 1; - } - - err = (*flp->dlci_conf)(dlp->slave, dev, get); - if (err) - return err; - - if (get) - { - if (copy_to_user(conf, &dlp->config, sizeof(struct dlci_conf))) - return -EFAULT; - } - - return 0; -} - -static int dlci_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct dlci_local *dlp; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - dlp = netdev_priv(dev); - - switch (cmd) - { - case DLCI_GET_SLAVE: - if (!*(short *)(dev->dev_addr)) - return -EINVAL; - - strncpy(ifr->ifr_slave, dlp->slave->name, sizeof(ifr->ifr_slave)); - break; - - case DLCI_GET_CONF: - case DLCI_SET_CONF: - if (!*(short *)(dev->dev_addr)) - return -EINVAL; - - return dlci_config(dev, ifr->ifr_data, cmd == DLCI_GET_CONF); - - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int dlci_change_mtu(struct net_device *dev, int new_mtu) -{ - struct dlci_local *dlp = netdev_priv(dev); - - return dev_set_mtu(dlp->slave, new_mtu); -} - -static int dlci_open(struct net_device *dev) -{ - struct dlci_local *dlp; - struct frad_local *flp; - int err; - - dlp = netdev_priv(dev); - - if (!*(short *)(dev->dev_addr)) - return -EINVAL; - - if (!netif_running(dlp->slave)) - return -ENOTCONN; - - flp = netdev_priv(dlp->slave); - err = (*flp->activate)(dlp->slave, dev); - if (err) - return err; - - netif_start_queue(dev); - - return 0; -} - -static int dlci_close(struct net_device *dev) -{ - struct dlci_local *dlp; - struct frad_local *flp; - - netif_stop_queue(dev); - - dlp = netdev_priv(dev); - - flp = netdev_priv(dlp->slave); - (*flp->deactivate)(dlp->slave, dev); - - return 0; -} - -static int dlci_add(struct dlci_add *dlci) -{ - struct net_device *master, *slave; - struct dlci_local *dlp; - struct frad_local *flp; - int err = -EINVAL; - - - /* validate slave device */ - slave = dev_get_by_name(&init_net, dlci->devname); - if (!slave) - return -ENODEV; - - if (slave->type != ARPHRD_FRAD || netdev_priv(slave) == NULL) - goto err1; - - /* create device name */ - master = alloc_netdev(sizeof(struct dlci_local), "dlci%d", - NET_NAME_UNKNOWN, dlci_setup); - if (!master) { - err = -ENOMEM; - goto err1; - } - - /* make sure same slave not already registered */ - rtnl_lock(); - list_for_each_entry(dlp, &dlci_devs, list) { - if (dlp->slave == slave) { - err = -EBUSY; - goto err2; - } - } - - *(short *)(master->dev_addr) = dlci->dlci; - - dlp = netdev_priv(master); - dlp->slave = slave; - dlp->master = master; - - flp = netdev_priv(slave); - err = (*flp->assoc)(slave, master); - if (err < 0) - goto err2; - - err = register_netdevice(master); - if (err < 0) - goto err2; - - strcpy(dlci->devname, master->name); - - list_add(&dlp->list, &dlci_devs); - rtnl_unlock(); - - return 0; - - err2: - rtnl_unlock(); - free_netdev(master); - err1: - dev_put(slave); - return err; -} - -static int dlci_del(struct dlci_add *dlci) -{ - struct dlci_local *dlp; - struct frad_local *flp; - struct net_device *master, *slave; - int err; - bool found = false; - - rtnl_lock(); - - /* validate slave device */ - master = __dev_get_by_name(&init_net, dlci->devname); - if (!master) { - err = -ENODEV; - goto out; - } - - list_for_each_entry(dlp, &dlci_devs, list) { - if (dlp->master == master) { - found = true; - break; - } - } - if (!found) { - err = -ENODEV; - goto out; - } - - if (netif_running(master)) { - err = -EBUSY; - goto out; - } - - dlp = netdev_priv(master); - slave = dlp->slave; - flp = netdev_priv(slave); - - err = (*flp->deassoc)(slave, master); - if (!err) { - list_del(&dlp->list); - - unregister_netdevice(master); - - dev_put(slave); - } -out: - rtnl_unlock(); - return err; -} - -static int dlci_ioctl(unsigned int cmd, void __user *arg) -{ - struct dlci_add add; - int err; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&add, arg, sizeof(struct dlci_add))) - return -EFAULT; - - switch (cmd) - { - case SIOCADDDLCI: - err = dlci_add(&add); - - if (!err) - if (copy_to_user(arg, &add, sizeof(struct dlci_add))) - return -EFAULT; - break; - - case SIOCDELDLCI: - err = dlci_del(&add); - break; - - default: - err = -EINVAL; - } - - return err; -} - -static const struct header_ops dlci_header_ops = { - .create = dlci_header, -}; - -static const struct net_device_ops dlci_netdev_ops = { - .ndo_open = dlci_open, - .ndo_stop = dlci_close, - .ndo_do_ioctl = dlci_dev_ioctl, - .ndo_start_xmit = dlci_transmit, - .ndo_change_mtu = dlci_change_mtu, -}; - -static void dlci_setup(struct net_device *dev) -{ - struct dlci_local *dlp = netdev_priv(dev); - - dev->flags = 0; - dev->header_ops = &dlci_header_ops; - dev->netdev_ops = &dlci_netdev_ops; - dev->needs_free_netdev = true; - - dlp->receive = dlci_receive; - - dev->type = ARPHRD_DLCI; - dev->hard_header_len = sizeof(struct frhdr); - dev->addr_len = sizeof(short); - -} - -/* if slave is unregistering, then cleanup master */ -static int dlci_dev_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (dev_net(dev) != &init_net) - return NOTIFY_DONE; - - if (event == NETDEV_UNREGISTER) { - struct dlci_local *dlp; - - list_for_each_entry(dlp, &dlci_devs, list) { - if (dlp->slave == dev) { - list_del(&dlp->list); - unregister_netdevice(dlp->master); - dev_put(dlp->slave); - break; - } - } - } - return NOTIFY_DONE; -} - -static struct notifier_block dlci_notifier = { - .notifier_call = dlci_dev_event, -}; - -static int __init init_dlci(void) -{ - dlci_ioctl_set(dlci_ioctl); - register_netdevice_notifier(&dlci_notifier); - - printk("%s.\n", version); - - return 0; -} - -static void __exit dlci_exit(void) -{ - struct dlci_local *dlp, *nxt; - - dlci_ioctl_set(NULL); - unregister_netdevice_notifier(&dlci_notifier); - - rtnl_lock(); - list_for_each_entry_safe(dlp, nxt, &dlci_devs, list) { - unregister_netdevice(dlp->master); - dev_put(dlp->slave); - } - rtnl_unlock(); -} - -module_init(init_dlci); -module_exit(dlci_exit); - -MODULE_AUTHOR("Mike McLagan"); -MODULE_DESCRIPTION("Frame Relay DLCI layer"); -MODULE_LICENSE("GPL"); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 409e5a7ad8e2..0720f5f92caa 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -871,6 +871,45 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) return 0; } +static int fr_snap_parse(struct sk_buff *skb, struct pvc_device *pvc) +{ + /* OUI 00-00-00 indicates an Ethertype follows */ + if (skb->data[0] == 0x00 && + skb->data[1] == 0x00 && + skb->data[2] == 0x00) { + if (!pvc->main) + return -1; + skb->dev = pvc->main; + skb->protocol = *(__be16 *)(skb->data + 3); /* Ethertype */ + skb_pull(skb, 5); + skb_reset_mac_header(skb); + return 0; + + /* OUI 00-80-C2 stands for the 802.1 organization */ + } else if (skb->data[0] == 0x00 && + skb->data[1] == 0x80 && + skb->data[2] == 0xC2) { + /* PID 00-07 stands for Ethernet frames without FCS */ + if (skb->data[3] == 0x00 && + skb->data[4] == 0x07) { + if (!pvc->ether) + return -1; + skb_pull(skb, 5); + if (skb->len < ETH_HLEN) + return -1; + skb->protocol = eth_type_trans(skb, pvc->ether); + return 0; + + /* PID unsupported */ + } else { + return -1; + } + + /* OUI unsupported */ + } else { + return -1; + } +} static int fr_rx(struct sk_buff *skb) { @@ -880,9 +919,9 @@ static int fr_rx(struct sk_buff *skb) u8 *data = skb->data; u16 dlci; struct pvc_device *pvc; - struct net_device *dev = NULL; + struct net_device *dev; - if (skb->len <= 4 || fh->ea1 || data[2] != FR_UI) + if (skb->len < 4 || fh->ea1 || !fh->ea2 || data[2] != FR_UI) goto rx_error; dlci = q922_to_dlci(skb->data); @@ -904,8 +943,7 @@ static int fr_rx(struct sk_buff *skb) netdev_info(frad, "No PVC for received frame's DLCI %d\n", dlci); #endif - dev_kfree_skb_any(skb); - return NET_RX_DROP; + goto rx_drop; } if (pvc->state.fecn != fh->fecn) { @@ -931,63 +969,51 @@ static int fr_rx(struct sk_buff *skb) } if (data[3] == NLPID_IP) { + if (!pvc->main) + goto rx_drop; skb_pull(skb, 4); /* Remove 4-byte header (hdr, UI, NLPID) */ - dev = pvc->main; + skb->dev = pvc->main; skb->protocol = htons(ETH_P_IP); + skb_reset_mac_header(skb); } else if (data[3] == NLPID_IPV6) { + if (!pvc->main) + goto rx_drop; skb_pull(skb, 4); /* Remove 4-byte header (hdr, UI, NLPID) */ - dev = pvc->main; + skb->dev = pvc->main; skb->protocol = htons(ETH_P_IPV6); + skb_reset_mac_header(skb); - } else if (skb->len > 10 && data[3] == FR_PAD && - data[4] == NLPID_SNAP && data[5] == FR_PAD) { - u16 oui = ntohs(*(__be16*)(data + 6)); - u16 pid = ntohs(*(__be16*)(data + 8)); - skb_pull(skb, 10); - - switch ((((u32)oui) << 16) | pid) { - case ETH_P_ARP: /* routed frame with SNAP */ - case ETH_P_IPX: - case ETH_P_IP: /* a long variant */ - case ETH_P_IPV6: - dev = pvc->main; - skb->protocol = htons(pid); - break; - - case 0x80C20007: /* bridged Ethernet frame */ - if ((dev = pvc->ether) != NULL) - skb->protocol = eth_type_trans(skb, dev); - break; - - default: - netdev_info(frad, "Unsupported protocol, OUI=%x PID=%x\n", - oui, pid); - dev_kfree_skb_any(skb); - return NET_RX_DROP; + } else if (data[3] == FR_PAD) { + if (skb->len < 5) + goto rx_error; + if (data[4] == NLPID_SNAP) { /* A SNAP header follows */ + skb_pull(skb, 5); + if (skb->len < 5) /* Incomplete SNAP header */ + goto rx_error; + if (fr_snap_parse(skb, pvc)) + goto rx_drop; + } else { + goto rx_drop; } + } else { netdev_info(frad, "Unsupported protocol, NLPID=%x length=%i\n", data[3], skb->len); - dev_kfree_skb_any(skb); - return NET_RX_DROP; + goto rx_drop; } - if (dev) { - dev->stats.rx_packets++; /* PVC traffic */ - dev->stats.rx_bytes += skb->len; - if (pvc->state.becn) - dev->stats.rx_compressed++; - skb->dev = dev; - netif_rx(skb); - return NET_RX_SUCCESS; - } else { - dev_kfree_skb_any(skb); - return NET_RX_DROP; - } + dev = skb->dev; + dev->stats.rx_packets++; /* PVC traffic */ + dev->stats.rx_bytes += skb->len; + if (pvc->state.becn) + dev->stats.rx_compressed++; + netif_rx(skb); + return NET_RX_SUCCESS; - rx_error: +rx_error: frad->stats.rx_errors++; /* Mark error */ +rx_drop: dev_kfree_skb_any(skb); return NET_RX_DROP; } diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 36600b0a0ab0..93c7e8502845 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -353,9 +353,8 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ switch(xc.command){ case lmc_xilinx_reset: /*fold02*/ { - u16 mii; spin_lock_irqsave(&sc->lmc_lock, flags); - mii = lmc_mii_readreg (sc, 0, 16); + lmc_mii_readreg (sc, 0, 16); /* * Make all of them 0 and make input @@ -424,10 +423,9 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/ break; case lmc_xilinx_load_prom: /*fold02*/ { - u16 mii; int timeout = 500000; spin_lock_irqsave(&sc->lmc_lock, flags); - mii = lmc_mii_readreg (sc, 0, 16); + lmc_mii_readreg (sc, 0, 16); /* * Make all of them 0 and make input @@ -1185,7 +1183,6 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/ int i; s32 stat; unsigned int badtx; - u32 firstcsr; int max_work = LMC_RXDESCS; int handled = 0; @@ -1203,8 +1200,6 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/ goto lmc_int_fail_out; } - firstcsr = csr; - /* always go through this loop at least once */ while (csr & sc->lmc_intrmask) { handled = 1; diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c deleted file mode 100644 index bc2c1c7fb1a4..000000000000 --- a/drivers/net/wan/sdla.c +++ /dev/null @@ -1,1655 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SDLA An implementation of a driver for the Sangoma S502/S508 series - * multi-protocol PC interface card. Initial offering is with - * the DLCI driver, providing Frame Relay support for linux. - * - * Global definitions for the Frame relay interface. - * - * Version: @(#)sdla.c 0.30 12 Sep 1996 - * - * Credits: Sangoma Technologies, for the use of 2 cards for an extended - * period of time. - * David Mandelstam <dm@sangoma.com> for getting me started on - * this project, and incentive to complete it. - * Gene Kozen <74604.152@compuserve.com> for providing me with - * important information about the cards. - * - * Author: Mike McLagan <mike.mclagan@linux.org> - * - * Changes: - * 0.15 Mike McLagan Improved error handling, packet dropping - * 0.20 Mike McLagan New transmit/receive flags for config - * If in FR mode, don't accept packets from - * non DLCI devices. - * 0.25 Mike McLagan Fixed problem with rejecting packets - * from non DLCI devices. - * 0.30 Mike McLagan Fixed kernel panic when used with modified - * ifconfig - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/fcntl.h> -#include <linux/interrupt.h> -#include <linux/ptrace.h> -#include <linux/ioport.h> -#include <linux/in.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/timer.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/if_arp.h> -#include <linux/if_frad.h> -#include <linux/sdla.h> -#include <linux/bitops.h> - -#include <asm/io.h> -#include <asm/dma.h> -#include <linux/uaccess.h> - -static const char* version = "SDLA driver v0.30, 12 Sep 1996, mike.mclagan@linux.org"; - -static unsigned int valid_port[] = { 0x250, 0x270, 0x280, 0x300, 0x350, 0x360, 0x380, 0x390}; - -static unsigned int valid_mem[] = { - 0xA0000, 0xA2000, 0xA4000, 0xA6000, 0xA8000, 0xAA000, 0xAC000, 0xAE000, - 0xB0000, 0xB2000, 0xB4000, 0xB6000, 0xB8000, 0xBA000, 0xBC000, 0xBE000, - 0xC0000, 0xC2000, 0xC4000, 0xC6000, 0xC8000, 0xCA000, 0xCC000, 0xCE000, - 0xD0000, 0xD2000, 0xD4000, 0xD6000, 0xD8000, 0xDA000, 0xDC000, 0xDE000, - 0xE0000, 0xE2000, 0xE4000, 0xE6000, 0xE8000, 0xEA000, 0xEC000, 0xEE000}; - -static DEFINE_SPINLOCK(sdla_lock); - -/********************************************************* - * - * these are the core routines that access the card itself - * - *********************************************************/ - -#define SDLA_WINDOW(dev,addr) outb((((addr) >> 13) & 0x1F), (dev)->base_addr + SDLA_REG_Z80_WINDOW) - -static void __sdla_read(struct net_device *dev, int addr, void *buf, short len) -{ - char *temp; - const void *base; - int offset, bytes; - - temp = buf; - while(len) - { - offset = addr & SDLA_ADDR_MASK; - bytes = offset + len > SDLA_WINDOW_SIZE ? SDLA_WINDOW_SIZE - offset : len; - base = (const void *) (dev->mem_start + offset); - - SDLA_WINDOW(dev, addr); - memcpy(temp, base, bytes); - - addr += bytes; - temp += bytes; - len -= bytes; - } -} - -static void sdla_read(struct net_device *dev, int addr, void *buf, short len) -{ - unsigned long flags; - spin_lock_irqsave(&sdla_lock, flags); - __sdla_read(dev, addr, buf, len); - spin_unlock_irqrestore(&sdla_lock, flags); -} - -static void __sdla_write(struct net_device *dev, int addr, - const void *buf, short len) -{ - const char *temp; - void *base; - int offset, bytes; - - temp = buf; - while(len) - { - offset = addr & SDLA_ADDR_MASK; - bytes = offset + len > SDLA_WINDOW_SIZE ? SDLA_WINDOW_SIZE - offset : len; - base = (void *) (dev->mem_start + offset); - - SDLA_WINDOW(dev, addr); - memcpy(base, temp, bytes); - - addr += bytes; - temp += bytes; - len -= bytes; - } -} - -static void sdla_write(struct net_device *dev, int addr, - const void *buf, short len) -{ - unsigned long flags; - - spin_lock_irqsave(&sdla_lock, flags); - __sdla_write(dev, addr, buf, len); - spin_unlock_irqrestore(&sdla_lock, flags); -} - - -static void sdla_clear(struct net_device *dev) -{ - unsigned long flags; - char *base; - int len, addr, bytes; - - len = 65536; - addr = 0; - bytes = SDLA_WINDOW_SIZE; - base = (void *) dev->mem_start; - - spin_lock_irqsave(&sdla_lock, flags); - while(len) - { - SDLA_WINDOW(dev, addr); - memset(base, 0, bytes); - - addr += bytes; - len -= bytes; - } - spin_unlock_irqrestore(&sdla_lock, flags); - -} - -static char sdla_byte(struct net_device *dev, int addr) -{ - unsigned long flags; - char byte, *temp; - - temp = (void *) (dev->mem_start + (addr & SDLA_ADDR_MASK)); - - spin_lock_irqsave(&sdla_lock, flags); - SDLA_WINDOW(dev, addr); - byte = *temp; - spin_unlock_irqrestore(&sdla_lock, flags); - - return byte; -} - -static void sdla_stop(struct net_device *dev) -{ - struct frad_local *flp; - - flp = netdev_priv(dev); - switch(flp->type) - { - case SDLA_S502A: - outb(SDLA_S502A_HALT, dev->base_addr + SDLA_REG_CONTROL); - flp->state = SDLA_HALT; - break; - case SDLA_S502E: - outb(SDLA_HALT, dev->base_addr + SDLA_REG_Z80_CONTROL); - outb(SDLA_S502E_ENABLE, dev->base_addr + SDLA_REG_CONTROL); - flp->state = SDLA_S502E_ENABLE; - break; - case SDLA_S507: - flp->state &= ~SDLA_CPUEN; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - break; - case SDLA_S508: - flp->state &= ~SDLA_CPUEN; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - break; - } -} - -static void sdla_start(struct net_device *dev) -{ - struct frad_local *flp; - - flp = netdev_priv(dev); - switch(flp->type) - { - case SDLA_S502A: - outb(SDLA_S502A_NMI, dev->base_addr + SDLA_REG_CONTROL); - outb(SDLA_S502A_START, dev->base_addr + SDLA_REG_CONTROL); - flp->state = SDLA_S502A_START; - break; - case SDLA_S502E: - outb(SDLA_S502E_CPUEN, dev->base_addr + SDLA_REG_Z80_CONTROL); - outb(0x00, dev->base_addr + SDLA_REG_CONTROL); - flp->state = 0; - break; - case SDLA_S507: - flp->state |= SDLA_CPUEN; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - break; - case SDLA_S508: - flp->state |= SDLA_CPUEN; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - break; - } -} - -/**************************************************** - * - * this is used for the S502A/E cards to determine - * the speed of the onboard CPU. Calibration is - * necessary for the Frame Relay code uploaded - * later. Incorrect results cause timing problems - * with link checks & status messages - * - ***************************************************/ - -static int sdla_z80_poll(struct net_device *dev, int z80_addr, int jiffs, char resp1, char resp2) -{ - unsigned long start, done, now; - char resp, *temp; - - start = now = jiffies; - done = jiffies + jiffs; - - temp = (void *)dev->mem_start; - temp += z80_addr & SDLA_ADDR_MASK; - - resp = ~resp1; - while (time_before(jiffies, done) && (resp != resp1) && (!resp2 || (resp != resp2))) - { - if (jiffies != now) - { - SDLA_WINDOW(dev, z80_addr); - now = jiffies; - resp = *temp; - } - } - return time_before(jiffies, done) ? jiffies - start : -1; -} - -/* constants for Z80 CPU speed */ -#define Z80_READY '1' /* Z80 is ready to begin */ -#define LOADER_READY '2' /* driver is ready to begin */ -#define Z80_SCC_OK '3' /* SCC is on board */ -#define Z80_SCC_BAD '4' /* SCC was not found */ - -static int sdla_cpuspeed(struct net_device *dev, struct ifreq *ifr) -{ - int jiffs; - char data; - - sdla_start(dev); - if (sdla_z80_poll(dev, 0, 3*HZ, Z80_READY, 0) < 0) - return -EIO; - - data = LOADER_READY; - sdla_write(dev, 0, &data, 1); - - if ((jiffs = sdla_z80_poll(dev, 0, 8*HZ, Z80_SCC_OK, Z80_SCC_BAD)) < 0) - return -EIO; - - sdla_stop(dev); - sdla_read(dev, 0, &data, 1); - - if (data == Z80_SCC_BAD) - { - printk("%s: SCC bad\n", dev->name); - return -EIO; - } - - if (data != Z80_SCC_OK) - return -EINVAL; - - if (jiffs < 165) - ifr->ifr_mtu = SDLA_CPU_16M; - else if (jiffs < 220) - ifr->ifr_mtu = SDLA_CPU_10M; - else if (jiffs < 258) - ifr->ifr_mtu = SDLA_CPU_8M; - else if (jiffs < 357) - ifr->ifr_mtu = SDLA_CPU_7M; - else if (jiffs < 467) - ifr->ifr_mtu = SDLA_CPU_5M; - else - ifr->ifr_mtu = SDLA_CPU_3M; - - return 0; -} - -/************************************************ - * - * Direct interaction with the Frame Relay code - * starts here. - * - ************************************************/ - -struct _dlci_stat -{ - short dlci; - char flags; -} __packed; - -struct _frad_stat -{ - char flags; - struct _dlci_stat dlcis[SDLA_MAX_DLCI]; -}; - -static void sdla_errors(struct net_device *dev, int cmd, int dlci, int ret, int len, void *data) -{ - struct _dlci_stat *pstatus; - short *pdlci; - int i; - char *state, line[30]; - - switch (ret) - { - case SDLA_RET_MODEM: - state = data; - if (*state & SDLA_MODEM_DCD_LOW) - netdev_info(dev, "Modem DCD unexpectedly low!\n"); - if (*state & SDLA_MODEM_CTS_LOW) - netdev_info(dev, "Modem CTS unexpectedly low!\n"); - /* I should probably do something about this! */ - break; - - case SDLA_RET_CHANNEL_OFF: - netdev_info(dev, "Channel became inoperative!\n"); - /* same here */ - break; - - case SDLA_RET_CHANNEL_ON: - netdev_info(dev, "Channel became operative!\n"); - /* same here */ - break; - - case SDLA_RET_DLCI_STATUS: - netdev_info(dev, "Status change reported by Access Node\n"); - len /= sizeof(struct _dlci_stat); - for(pstatus = data, i=0;i < len;i++,pstatus++) - { - if (pstatus->flags & SDLA_DLCI_NEW) - state = "new"; - else if (pstatus->flags & SDLA_DLCI_DELETED) - state = "deleted"; - else if (pstatus->flags & SDLA_DLCI_ACTIVE) - state = "active"; - else - { - sprintf(line, "unknown status: %02X", pstatus->flags); - state = line; - } - netdev_info(dev, "DLCI %i: %s\n", - pstatus->dlci, state); - /* same here */ - } - break; - - case SDLA_RET_DLCI_UNKNOWN: - netdev_info(dev, "Received unknown DLCIs:"); - len /= sizeof(short); - for(pdlci = data,i=0;i < len;i++,pdlci++) - pr_cont(" %i", *pdlci); - pr_cont("\n"); - break; - - case SDLA_RET_TIMEOUT: - netdev_err(dev, "Command timed out!\n"); - break; - - case SDLA_RET_BUF_OVERSIZE: - netdev_info(dev, "Bc/CIR overflow, acceptable size is %i\n", - len); - break; - - case SDLA_RET_BUF_TOO_BIG: - netdev_info(dev, "Buffer size over specified max of %i\n", - len); - break; - - case SDLA_RET_CHANNEL_INACTIVE: - case SDLA_RET_DLCI_INACTIVE: - case SDLA_RET_CIR_OVERFLOW: - case SDLA_RET_NO_BUFS: - if (cmd == SDLA_INFORMATION_WRITE) - break; - fallthrough; - - default: - netdev_dbg(dev, "Cmd 0x%02X generated return code 0x%02X\n", - cmd, ret); - /* Further processing could be done here */ - break; - } -} - -static int sdla_cmd(struct net_device *dev, int cmd, short dlci, short flags, - void *inbuf, short inlen, void *outbuf, short *outlen) -{ - static struct _frad_stat status; - struct frad_local *flp; - struct sdla_cmd *cmd_buf; - unsigned long pflags; - unsigned long jiffs; - int ret, waiting, len; - long window; - - flp = netdev_priv(dev); - window = flp->type == SDLA_S508 ? SDLA_508_CMD_BUF : SDLA_502_CMD_BUF; - cmd_buf = (struct sdla_cmd *)(dev->mem_start + (window & SDLA_ADDR_MASK)); - ret = 0; - len = 0; - jiffs = jiffies + HZ; /* 1 second is plenty */ - - spin_lock_irqsave(&sdla_lock, pflags); - SDLA_WINDOW(dev, window); - cmd_buf->cmd = cmd; - cmd_buf->dlci = dlci; - cmd_buf->flags = flags; - - if (inbuf) - memcpy(cmd_buf->data, inbuf, inlen); - - cmd_buf->length = inlen; - - cmd_buf->opp_flag = 1; - spin_unlock_irqrestore(&sdla_lock, pflags); - - waiting = 1; - len = 0; - while (waiting && time_before_eq(jiffies, jiffs)) - { - if (waiting++ % 3) - { - spin_lock_irqsave(&sdla_lock, pflags); - SDLA_WINDOW(dev, window); - waiting = ((volatile int)(cmd_buf->opp_flag)); - spin_unlock_irqrestore(&sdla_lock, pflags); - } - } - - if (!waiting) - { - - spin_lock_irqsave(&sdla_lock, pflags); - SDLA_WINDOW(dev, window); - ret = cmd_buf->retval; - len = cmd_buf->length; - if (outbuf && outlen) - { - *outlen = *outlen >= len ? len : *outlen; - - if (*outlen) - memcpy(outbuf, cmd_buf->data, *outlen); - } - - /* This is a local copy that's used for error handling */ - if (ret) - memcpy(&status, cmd_buf->data, len > sizeof(status) ? sizeof(status) : len); - - spin_unlock_irqrestore(&sdla_lock, pflags); - } - else - ret = SDLA_RET_TIMEOUT; - - if (ret != SDLA_RET_OK) - sdla_errors(dev, cmd, dlci, ret, len, &status); - - return ret; -} - -/*********************************************** - * - * these functions are called by the DLCI driver - * - ***********************************************/ - -static int sdla_reconfig(struct net_device *dev); - -static int sdla_activate(struct net_device *slave, struct net_device *master) -{ - struct frad_local *flp; - int i; - - flp = netdev_priv(slave); - - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->master[i] == master) - break; - - if (i == CONFIG_DLCI_MAX) - return -ENODEV; - - flp->dlci[i] = abs(flp->dlci[i]); - - if (netif_running(slave) && (flp->config.station == FRAD_STATION_NODE)) - sdla_cmd(slave, SDLA_ACTIVATE_DLCI, 0, 0, &flp->dlci[i], sizeof(short), NULL, NULL); - - return 0; -} - -static int sdla_deactivate(struct net_device *slave, struct net_device *master) -{ - struct frad_local *flp; - int i; - - flp = netdev_priv(slave); - - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->master[i] == master) - break; - - if (i == CONFIG_DLCI_MAX) - return -ENODEV; - - flp->dlci[i] = -abs(flp->dlci[i]); - - if (netif_running(slave) && (flp->config.station == FRAD_STATION_NODE)) - sdla_cmd(slave, SDLA_DEACTIVATE_DLCI, 0, 0, &flp->dlci[i], sizeof(short), NULL, NULL); - - return 0; -} - -static int sdla_assoc(struct net_device *slave, struct net_device *master) -{ - struct frad_local *flp; - int i; - - if (master->type != ARPHRD_DLCI) - return -EINVAL; - - flp = netdev_priv(slave); - - for(i=0;i<CONFIG_DLCI_MAX;i++) - { - if (!flp->master[i]) - break; - if (abs(flp->dlci[i]) == *(short *)(master->dev_addr)) - return -EADDRINUSE; - } - - if (i == CONFIG_DLCI_MAX) - return -EMLINK; /* #### Alan: Comments on this ?? */ - - - flp->master[i] = master; - flp->dlci[i] = -*(short *)(master->dev_addr); - master->mtu = slave->mtu; - - if (netif_running(slave)) { - if (flp->config.station == FRAD_STATION_CPE) - sdla_reconfig(slave); - else - sdla_cmd(slave, SDLA_ADD_DLCI, 0, 0, master->dev_addr, sizeof(short), NULL, NULL); - } - - return 0; -} - -static int sdla_deassoc(struct net_device *slave, struct net_device *master) -{ - struct frad_local *flp; - int i; - - flp = netdev_priv(slave); - - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->master[i] == master) - break; - - if (i == CONFIG_DLCI_MAX) - return -ENODEV; - - flp->master[i] = NULL; - flp->dlci[i] = 0; - - - if (netif_running(slave)) { - if (flp->config.station == FRAD_STATION_CPE) - sdla_reconfig(slave); - else - sdla_cmd(slave, SDLA_DELETE_DLCI, 0, 0, master->dev_addr, sizeof(short), NULL, NULL); - } - - return 0; -} - -static int sdla_dlci_conf(struct net_device *slave, struct net_device *master, int get) -{ - struct frad_local *flp; - struct dlci_local *dlp; - int i; - short len, ret; - - flp = netdev_priv(slave); - - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->master[i] == master) - break; - - if (i == CONFIG_DLCI_MAX) - return -ENODEV; - - dlp = netdev_priv(master); - - ret = SDLA_RET_OK; - len = sizeof(struct dlci_conf); - if (netif_running(slave)) { - if (get) - ret = sdla_cmd(slave, SDLA_READ_DLCI_CONFIGURATION, abs(flp->dlci[i]), 0, - NULL, 0, &dlp->config, &len); - else - ret = sdla_cmd(slave, SDLA_SET_DLCI_CONFIGURATION, abs(flp->dlci[i]), 0, - &dlp->config, sizeof(struct dlci_conf) - 4 * sizeof(short), NULL, NULL); - } - - return ret == SDLA_RET_OK ? 0 : -EIO; -} - -/************************** - * - * now for the Linux driver - * - **************************/ - -/* NOTE: the DLCI driver deals with freeing the SKB!! */ -static netdev_tx_t sdla_transmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct frad_local *flp; - int ret, addr, accept, i; - short size; - unsigned long flags; - struct buf_entry *pbuf; - - flp = netdev_priv(dev); - ret = 0; - accept = 1; - - netif_stop_queue(dev); - - /* - * stupid GateD insists on setting up the multicast router thru us - * and we're ill equipped to handle a non Frame Relay packet at this - * time! - */ - - accept = 1; - switch (dev->type) - { - case ARPHRD_FRAD: - if (skb->dev->type != ARPHRD_DLCI) - { - netdev_warn(dev, "Non DLCI device, type %i, tried to send on FRAD module\n", - skb->dev->type); - accept = 0; - } - break; - default: - netdev_warn(dev, "unknown firmware type 0x%04X\n", - dev->type); - accept = 0; - break; - } - if (accept) - { - /* this is frame specific, but till there's a PPP module, it's the default */ - switch (flp->type) - { - case SDLA_S502A: - case SDLA_S502E: - ret = sdla_cmd(dev, SDLA_INFORMATION_WRITE, *(short *)(skb->dev->dev_addr), 0, skb->data, skb->len, NULL, NULL); - break; - case SDLA_S508: - size = sizeof(addr); - ret = sdla_cmd(dev, SDLA_INFORMATION_WRITE, *(short *)(skb->dev->dev_addr), 0, NULL, skb->len, &addr, &size); - if (ret == SDLA_RET_OK) - { - - spin_lock_irqsave(&sdla_lock, flags); - SDLA_WINDOW(dev, addr); - pbuf = (void *)(dev->mem_start + (addr & SDLA_ADDR_MASK)); - __sdla_write(dev, pbuf->buf_addr, skb->data, skb->len); - SDLA_WINDOW(dev, addr); - pbuf->opp_flag = 1; - spin_unlock_irqrestore(&sdla_lock, flags); - } - break; - } - - switch (ret) - { - case SDLA_RET_OK: - dev->stats.tx_packets++; - break; - - case SDLA_RET_CIR_OVERFLOW: - case SDLA_RET_BUF_OVERSIZE: - case SDLA_RET_NO_BUFS: - dev->stats.tx_dropped++; - break; - - default: - dev->stats.tx_errors++; - break; - } - } - netif_wake_queue(dev); - for(i=0;i<CONFIG_DLCI_MAX;i++) - { - if(flp->master[i]!=NULL) - netif_wake_queue(flp->master[i]); - } - - dev_kfree_skb(skb); - return NETDEV_TX_OK; -} - -static void sdla_receive(struct net_device *dev) -{ - struct net_device *master; - struct frad_local *flp; - struct dlci_local *dlp; - struct sk_buff *skb; - - struct sdla_cmd *cmd; - struct buf_info *pbufi; - struct buf_entry *pbuf; - - unsigned long flags; - int i=0, received, success, addr, buf_base, buf_top; - short dlci, len, len2, split; - - flp = netdev_priv(dev); - success = 1; - received = addr = buf_top = buf_base = 0; - len = dlci = 0; - skb = NULL; - master = NULL; - cmd = NULL; - pbufi = NULL; - pbuf = NULL; - - spin_lock_irqsave(&sdla_lock, flags); - - switch (flp->type) - { - case SDLA_S502A: - case SDLA_S502E: - cmd = (void *) (dev->mem_start + (SDLA_502_RCV_BUF & SDLA_ADDR_MASK)); - SDLA_WINDOW(dev, SDLA_502_RCV_BUF); - success = cmd->opp_flag; - if (!success) - break; - - dlci = cmd->dlci; - len = cmd->length; - break; - - case SDLA_S508: - pbufi = (void *) (dev->mem_start + (SDLA_508_RXBUF_INFO & SDLA_ADDR_MASK)); - SDLA_WINDOW(dev, SDLA_508_RXBUF_INFO); - pbuf = (void *) (dev->mem_start + ((pbufi->rse_base + flp->buffer * sizeof(struct buf_entry)) & SDLA_ADDR_MASK)); - success = pbuf->opp_flag; - if (!success) - break; - - buf_top = pbufi->buf_top; - buf_base = pbufi->buf_base; - dlci = pbuf->dlci; - len = pbuf->length; - addr = pbuf->buf_addr; - break; - } - - /* common code, find the DLCI and get the SKB */ - if (success) - { - for (i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->dlci[i] == dlci) - break; - - if (i == CONFIG_DLCI_MAX) - { - netdev_notice(dev, "Received packet from invalid DLCI %i, ignoring\n", - dlci); - dev->stats.rx_errors++; - success = 0; - } - } - - if (success) - { - master = flp->master[i]; - skb = dev_alloc_skb(len + sizeof(struct frhdr)); - if (skb == NULL) - { - netdev_notice(dev, "Memory squeeze, dropping packet\n"); - dev->stats.rx_dropped++; - success = 0; - } - else - skb_reserve(skb, sizeof(struct frhdr)); - } - - /* pick up the data */ - switch (flp->type) - { - case SDLA_S502A: - case SDLA_S502E: - if (success) - __sdla_read(dev, SDLA_502_RCV_BUF + SDLA_502_DATA_OFS, skb_put(skb,len), len); - - SDLA_WINDOW(dev, SDLA_502_RCV_BUF); - cmd->opp_flag = 0; - break; - - case SDLA_S508: - if (success) - { - /* is this buffer split off the end of the internal ring buffer */ - split = addr + len > buf_top + 1 ? len - (buf_top - addr + 1) : 0; - len2 = len - split; - - __sdla_read(dev, addr, skb_put(skb, len2), len2); - if (split) - __sdla_read(dev, buf_base, skb_put(skb, split), split); - } - - /* increment the buffer we're looking at */ - SDLA_WINDOW(dev, SDLA_508_RXBUF_INFO); - flp->buffer = (flp->buffer + 1) % pbufi->rse_num; - pbuf->opp_flag = 0; - break; - } - - if (success) - { - dev->stats.rx_packets++; - dlp = netdev_priv(master); - (*dlp->receive)(skb, master); - } - - spin_unlock_irqrestore(&sdla_lock, flags); -} - -static irqreturn_t sdla_isr(int dummy, void *dev_id) -{ - struct net_device *dev; - struct frad_local *flp; - char byte; - - dev = dev_id; - - flp = netdev_priv(dev); - - if (!flp->initialized) - { - netdev_warn(dev, "irq %d for uninitialized device\n", dev->irq); - return IRQ_NONE; - } - - byte = sdla_byte(dev, flp->type == SDLA_S508 ? SDLA_508_IRQ_INTERFACE : SDLA_502_IRQ_INTERFACE); - switch (byte) - { - case SDLA_INTR_RX: - sdla_receive(dev); - break; - - /* the command will get an error return, which is processed above */ - case SDLA_INTR_MODEM: - case SDLA_INTR_STATUS: - sdla_cmd(dev, SDLA_READ_DLC_STATUS, 0, 0, NULL, 0, NULL, NULL); - break; - - case SDLA_INTR_TX: - case SDLA_INTR_COMPLETE: - case SDLA_INTR_TIMER: - netdev_warn(dev, "invalid irq flag 0x%02X\n", byte); - break; - } - - /* the S502E requires a manual acknowledgement of the interrupt */ - if (flp->type == SDLA_S502E) - { - flp->state &= ~SDLA_S502E_INTACK; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - flp->state |= SDLA_S502E_INTACK; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - } - - /* this clears the byte, informing the Z80 we're done */ - byte = 0; - sdla_write(dev, flp->type == SDLA_S508 ? SDLA_508_IRQ_INTERFACE : SDLA_502_IRQ_INTERFACE, &byte, sizeof(byte)); - return IRQ_HANDLED; -} - -static void sdla_poll(struct timer_list *t) -{ - struct frad_local *flp = from_timer(flp, t, timer); - struct net_device *dev = flp->dev; - - if (sdla_byte(dev, SDLA_502_RCV_BUF)) - sdla_receive(dev); - - flp->timer.expires = 1; - add_timer(&flp->timer); -} - -static int sdla_close(struct net_device *dev) -{ - struct frad_local *flp; - struct intr_info intr; - int len, i; - short dlcis[CONFIG_DLCI_MAX]; - - flp = netdev_priv(dev); - - len = 0; - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->dlci[i]) - dlcis[len++] = abs(flp->dlci[i]); - len *= 2; - - if (flp->config.station == FRAD_STATION_NODE) - { - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->dlci[i] > 0) - sdla_cmd(dev, SDLA_DEACTIVATE_DLCI, 0, 0, dlcis, len, NULL, NULL); - sdla_cmd(dev, SDLA_DELETE_DLCI, 0, 0, &flp->dlci[i], sizeof(flp->dlci[i]), NULL, NULL); - } - - memset(&intr, 0, sizeof(intr)); - /* let's start up the reception */ - switch(flp->type) - { - case SDLA_S502A: - del_timer(&flp->timer); - break; - - case SDLA_S502E: - sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(char) + sizeof(short), NULL, NULL); - flp->state &= ~SDLA_S502E_INTACK; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - break; - - case SDLA_S507: - break; - - case SDLA_S508: - sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(struct intr_info), NULL, NULL); - flp->state &= ~SDLA_S508_INTEN; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - break; - } - - sdla_cmd(dev, SDLA_DISABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL); - - netif_stop_queue(dev); - - return 0; -} - -struct conf_data { - struct frad_conf config; - short dlci[CONFIG_DLCI_MAX]; -}; - -static int sdla_open(struct net_device *dev) -{ - struct frad_local *flp; - struct dlci_local *dlp; - struct conf_data data; - struct intr_info intr; - int len, i; - char byte; - - flp = netdev_priv(dev); - - if (!flp->initialized) - return -EPERM; - - if (!flp->configured) - return -EPERM; - - /* time to send in the configuration */ - len = 0; - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->dlci[i]) - data.dlci[len++] = abs(flp->dlci[i]); - len *= 2; - - memcpy(&data.config, &flp->config, sizeof(struct frad_conf)); - len += sizeof(struct frad_conf); - - sdla_cmd(dev, SDLA_DISABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL); - sdla_cmd(dev, SDLA_SET_DLCI_CONFIGURATION, 0, 0, &data, len, NULL, NULL); - - if (flp->type == SDLA_S508) - flp->buffer = 0; - - sdla_cmd(dev, SDLA_ENABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL); - - /* let's start up the reception */ - memset(&intr, 0, sizeof(intr)); - switch(flp->type) - { - case SDLA_S502A: - flp->timer.expires = 1; - add_timer(&flp->timer); - break; - - case SDLA_S502E: - flp->state |= SDLA_S502E_ENABLE; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - flp->state |= SDLA_S502E_INTACK; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - byte = 0; - sdla_write(dev, SDLA_502_IRQ_INTERFACE, &byte, sizeof(byte)); - intr.flags = SDLA_INTR_RX | SDLA_INTR_STATUS | SDLA_INTR_MODEM; - sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(char) + sizeof(short), NULL, NULL); - break; - - case SDLA_S507: - break; - - case SDLA_S508: - flp->state |= SDLA_S508_INTEN; - outb(flp->state, dev->base_addr + SDLA_REG_CONTROL); - byte = 0; - sdla_write(dev, SDLA_508_IRQ_INTERFACE, &byte, sizeof(byte)); - intr.flags = SDLA_INTR_RX | SDLA_INTR_STATUS | SDLA_INTR_MODEM; - intr.irq = dev->irq; - sdla_cmd(dev, SDLA_SET_IRQ_TRIGGER, 0, 0, &intr, sizeof(struct intr_info), NULL, NULL); - break; - } - - if (flp->config.station == FRAD_STATION_CPE) - { - byte = SDLA_ICS_STATUS_ENQ; - sdla_cmd(dev, SDLA_ISSUE_IN_CHANNEL_SIGNAL, 0, 0, &byte, sizeof(byte), NULL, NULL); - } - else - { - sdla_cmd(dev, SDLA_ADD_DLCI, 0, 0, data.dlci, len - sizeof(struct frad_conf), NULL, NULL); - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->dlci[i] > 0) - sdla_cmd(dev, SDLA_ACTIVATE_DLCI, 0, 0, &flp->dlci[i], 2*sizeof(flp->dlci[i]), NULL, NULL); - } - - /* configure any specific DLCI settings */ - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->dlci[i]) - { - dlp = netdev_priv(flp->master[i]); - if (dlp->configured) - sdla_cmd(dev, SDLA_SET_DLCI_CONFIGURATION, abs(flp->dlci[i]), 0, &dlp->config, sizeof(struct dlci_conf), NULL, NULL); - } - - netif_start_queue(dev); - - return 0; -} - -static int sdla_config(struct net_device *dev, struct frad_conf __user *conf, int get) -{ - struct frad_local *flp; - struct conf_data data; - int i; - short size; - - if (dev->type == 0xFFFF) - return -EUNATCH; - - flp = netdev_priv(dev); - - if (!get) - { - if (netif_running(dev)) - return -EBUSY; - - if(copy_from_user(&data.config, conf, sizeof(struct frad_conf))) - return -EFAULT; - - if (data.config.station & ~FRAD_STATION_NODE) - return -EINVAL; - - if (data.config.flags & ~FRAD_VALID_FLAGS) - return -EINVAL; - - if ((data.config.kbaud < 0) || - ((data.config.kbaud > 128) && (flp->type != SDLA_S508))) - return -EINVAL; - - if (data.config.clocking & ~(FRAD_CLOCK_INT | SDLA_S508_PORT_RS232)) - return -EINVAL; - - if ((data.config.mtu < 0) || (data.config.mtu > SDLA_MAX_MTU)) - return -EINVAL; - - if ((data.config.T391 < 5) || (data.config.T391 > 30)) - return -EINVAL; - - if ((data.config.T392 < 5) || (data.config.T392 > 30)) - return -EINVAL; - - if ((data.config.N391 < 1) || (data.config.N391 > 255)) - return -EINVAL; - - if ((data.config.N392 < 1) || (data.config.N392 > 10)) - return -EINVAL; - - if ((data.config.N393 < 1) || (data.config.N393 > 10)) - return -EINVAL; - - memcpy(&flp->config, &data.config, sizeof(struct frad_conf)); - flp->config.flags |= SDLA_DIRECT_RECV; - - if (flp->type == SDLA_S508) - flp->config.flags |= SDLA_TX70_RX30; - - if (dev->mtu != flp->config.mtu) - { - /* this is required to change the MTU */ - dev->mtu = flp->config.mtu; - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->master[i]) - flp->master[i]->mtu = flp->config.mtu; - } - - flp->config.mtu += sizeof(struct frhdr); - - /* off to the races! */ - if (!flp->configured) - sdla_start(dev); - - flp->configured = 1; - } - else - { - /* no sense reading if the CPU isn't started */ - if (netif_running(dev)) - { - size = sizeof(data); - if (sdla_cmd(dev, SDLA_READ_DLCI_CONFIGURATION, 0, 0, NULL, 0, &data, &size) != SDLA_RET_OK) - return -EIO; - } - else - if (flp->configured) - memcpy(&data.config, &flp->config, sizeof(struct frad_conf)); - else - memset(&data.config, 0, sizeof(struct frad_conf)); - - memcpy(&flp->config, &data.config, sizeof(struct frad_conf)); - data.config.flags &= FRAD_VALID_FLAGS; - data.config.mtu -= data.config.mtu > sizeof(struct frhdr) ? sizeof(struct frhdr) : data.config.mtu; - return copy_to_user(conf, &data.config, sizeof(struct frad_conf))?-EFAULT:0; - } - - return 0; -} - -static int sdla_xfer(struct net_device *dev, struct sdla_mem __user *info, int read) -{ - struct sdla_mem mem; - char *temp; - - if(copy_from_user(&mem, info, sizeof(mem))) - return -EFAULT; - - if (read) - { - temp = kzalloc(mem.len, GFP_KERNEL); - if (!temp) - return -ENOMEM; - sdla_read(dev, mem.addr, temp, mem.len); - if(copy_to_user(mem.data, temp, mem.len)) - { - kfree(temp); - return -EFAULT; - } - kfree(temp); - } - else - { - temp = memdup_user(mem.data, mem.len); - if (IS_ERR(temp)) - return PTR_ERR(temp); - sdla_write(dev, mem.addr, temp, mem.len); - kfree(temp); - } - return 0; -} - -static int sdla_reconfig(struct net_device *dev) -{ - struct frad_local *flp; - struct conf_data data; - int i, len; - - flp = netdev_priv(dev); - - len = 0; - for(i=0;i<CONFIG_DLCI_MAX;i++) - if (flp->dlci[i]) - data.dlci[len++] = flp->dlci[i]; - len *= 2; - - memcpy(&data, &flp->config, sizeof(struct frad_conf)); - len += sizeof(struct frad_conf); - - sdla_cmd(dev, SDLA_DISABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL); - sdla_cmd(dev, SDLA_SET_DLCI_CONFIGURATION, 0, 0, &data, len, NULL, NULL); - sdla_cmd(dev, SDLA_ENABLE_COMMUNICATIONS, 0, 0, NULL, 0, NULL, NULL); - - return 0; -} - -static int sdla_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct frad_local *flp; - - if(!capable(CAP_NET_ADMIN)) - return -EPERM; - - flp = netdev_priv(dev); - - if (!flp->initialized) - return -EINVAL; - - switch (cmd) - { - case FRAD_GET_CONF: - case FRAD_SET_CONF: - return sdla_config(dev, ifr->ifr_data, cmd == FRAD_GET_CONF); - - case SDLA_IDENTIFY: - ifr->ifr_flags = flp->type; - break; - - case SDLA_CPUSPEED: - return sdla_cpuspeed(dev, ifr); - -/* ========================================================== -NOTE: This is rather a useless action right now, as the - current driver does not support protocols other than - FR. However, Sangoma has modules for a number of - other protocols in the works. -============================================================*/ - case SDLA_PROTOCOL: - if (flp->configured) - return -EALREADY; - - switch (ifr->ifr_flags) - { - case ARPHRD_FRAD: - dev->type = ifr->ifr_flags; - break; - default: - return -ENOPROTOOPT; - } - break; - - case SDLA_CLEARMEM: - sdla_clear(dev); - break; - - case SDLA_WRITEMEM: - case SDLA_READMEM: - if(!capable(CAP_SYS_RAWIO)) - return -EPERM; - return sdla_xfer(dev, ifr->ifr_data, cmd == SDLA_READMEM); - - case SDLA_START: - sdla_start(dev); - break; - - case SDLA_STOP: - sdla_stop(dev); - break; - - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int sdla_change_mtu(struct net_device *dev, int new_mtu) -{ - if (netif_running(dev)) - return -EBUSY; - - /* for now, you can't change the MTU! */ - return -EOPNOTSUPP; -} - -static int sdla_set_config(struct net_device *dev, struct ifmap *map) -{ - struct frad_local *flp; - int i; - char byte; - unsigned base; - int err = -EINVAL; - - flp = netdev_priv(dev); - - if (flp->initialized) - return -EINVAL; - - for(i=0; i < ARRAY_SIZE(valid_port); i++) - if (valid_port[i] == map->base_addr) - break; - - if (i == ARRAY_SIZE(valid_port)) - return -EINVAL; - - if (!request_region(map->base_addr, SDLA_IO_EXTENTS, dev->name)){ - pr_warn("io-port 0x%04lx in use\n", dev->base_addr); - return -EINVAL; - } - base = map->base_addr; - - /* test for card types, S502A, S502E, S507, S508 */ - /* these tests shut down the card completely, so clear the state */ - flp->type = SDLA_UNKNOWN; - flp->state = 0; - - for(i=1;i<SDLA_IO_EXTENTS;i++) - if (inb(base + i) != 0xFF) - break; - - if (i == SDLA_IO_EXTENTS) { - outb(SDLA_HALT, base + SDLA_REG_Z80_CONTROL); - if ((inb(base + SDLA_S502_STS) & 0x0F) == 0x08) { - outb(SDLA_S502E_INTACK, base + SDLA_REG_CONTROL); - if ((inb(base + SDLA_S502_STS) & 0x0F) == 0x0C) { - outb(SDLA_HALT, base + SDLA_REG_CONTROL); - flp->type = SDLA_S502E; - goto got_type; - } - } - } - - for(byte=inb(base),i=0;i<SDLA_IO_EXTENTS;i++) - if (inb(base + i) != byte) - break; - - if (i == SDLA_IO_EXTENTS) { - outb(SDLA_HALT, base + SDLA_REG_CONTROL); - if ((inb(base + SDLA_S502_STS) & 0x7E) == 0x30) { - outb(SDLA_S507_ENABLE, base + SDLA_REG_CONTROL); - if ((inb(base + SDLA_S502_STS) & 0x7E) == 0x32) { - outb(SDLA_HALT, base + SDLA_REG_CONTROL); - flp->type = SDLA_S507; - goto got_type; - } - } - } - - outb(SDLA_HALT, base + SDLA_REG_CONTROL); - if ((inb(base + SDLA_S508_STS) & 0x3F) == 0x00) { - outb(SDLA_S508_INTEN, base + SDLA_REG_CONTROL); - if ((inb(base + SDLA_S508_STS) & 0x3F) == 0x10) { - outb(SDLA_HALT, base + SDLA_REG_CONTROL); - flp->type = SDLA_S508; - goto got_type; - } - } - - outb(SDLA_S502A_HALT, base + SDLA_REG_CONTROL); - if (inb(base + SDLA_S502_STS) == 0x40) { - outb(SDLA_S502A_START, base + SDLA_REG_CONTROL); - if (inb(base + SDLA_S502_STS) == 0x40) { - outb(SDLA_S502A_INTEN, base + SDLA_REG_CONTROL); - if (inb(base + SDLA_S502_STS) == 0x44) { - outb(SDLA_S502A_START, base + SDLA_REG_CONTROL); - flp->type = SDLA_S502A; - goto got_type; - } - } - } - - netdev_notice(dev, "Unknown card type\n"); - err = -ENODEV; - goto fail; - -got_type: - switch(base) { - case 0x270: - case 0x280: - case 0x380: - case 0x390: - if (flp->type != SDLA_S508 && flp->type != SDLA_S507) - goto fail; - } - - switch (map->irq) { - case 2: - if (flp->type != SDLA_S502E) - goto fail; - break; - - case 10: - case 11: - case 12: - case 15: - case 4: - if (flp->type != SDLA_S508 && flp->type != SDLA_S507) - goto fail; - break; - case 3: - case 5: - case 7: - if (flp->type == SDLA_S502A) - goto fail; - break; - - default: - goto fail; - } - - err = -EAGAIN; - if (request_irq(dev->irq, sdla_isr, 0, dev->name, dev)) - goto fail; - - if (flp->type == SDLA_S507) { - switch(dev->irq) { - case 3: - flp->state = SDLA_S507_IRQ3; - break; - case 4: - flp->state = SDLA_S507_IRQ4; - break; - case 5: - flp->state = SDLA_S507_IRQ5; - break; - case 7: - flp->state = SDLA_S507_IRQ7; - break; - case 10: - flp->state = SDLA_S507_IRQ10; - break; - case 11: - flp->state = SDLA_S507_IRQ11; - break; - case 12: - flp->state = SDLA_S507_IRQ12; - break; - case 15: - flp->state = SDLA_S507_IRQ15; - break; - } - } - - for(i=0; i < ARRAY_SIZE(valid_mem); i++) - if (valid_mem[i] == map->mem_start) - break; - - err = -EINVAL; - if (i == ARRAY_SIZE(valid_mem)) - goto fail2; - - if (flp->type == SDLA_S502A && (map->mem_start & 0xF000) >> 12 == 0x0E) - goto fail2; - - if (flp->type != SDLA_S507 && map->mem_start >> 16 == 0x0B) - goto fail2; - - if (flp->type == SDLA_S507 && map->mem_start >> 16 == 0x0D) - goto fail2; - - byte = flp->type != SDLA_S508 ? SDLA_8K_WINDOW : 0; - byte |= (map->mem_start & 0xF000) >> (12 + (flp->type == SDLA_S508 ? 1 : 0)); - switch(flp->type) { - case SDLA_S502A: - case SDLA_S502E: - switch (map->mem_start >> 16) { - case 0x0A: - byte |= SDLA_S502_SEG_A; - break; - case 0x0C: - byte |= SDLA_S502_SEG_C; - break; - case 0x0D: - byte |= SDLA_S502_SEG_D; - break; - case 0x0E: - byte |= SDLA_S502_SEG_E; - break; - } - break; - case SDLA_S507: - switch (map->mem_start >> 16) { - case 0x0A: - byte |= SDLA_S507_SEG_A; - break; - case 0x0B: - byte |= SDLA_S507_SEG_B; - break; - case 0x0C: - byte |= SDLA_S507_SEG_C; - break; - case 0x0E: - byte |= SDLA_S507_SEG_E; - break; - } - break; - case SDLA_S508: - switch (map->mem_start >> 16) { - case 0x0A: - byte |= SDLA_S508_SEG_A; - break; - case 0x0C: - byte |= SDLA_S508_SEG_C; - break; - case 0x0D: - byte |= SDLA_S508_SEG_D; - break; - case 0x0E: - byte |= SDLA_S508_SEG_E; - break; - } - break; - } - - /* set the memory bits, and enable access */ - outb(byte, base + SDLA_REG_PC_WINDOW); - - switch(flp->type) - { - case SDLA_S502E: - flp->state = SDLA_S502E_ENABLE; - break; - case SDLA_S507: - flp->state |= SDLA_MEMEN; - break; - case SDLA_S508: - flp->state = SDLA_MEMEN; - break; - } - outb(flp->state, base + SDLA_REG_CONTROL); - - dev->irq = map->irq; - dev->base_addr = base; - dev->mem_start = map->mem_start; - dev->mem_end = dev->mem_start + 0x2000; - flp->initialized = 1; - return 0; - -fail2: - free_irq(map->irq, dev); -fail: - release_region(base, SDLA_IO_EXTENTS); - return err; -} - -static const struct net_device_ops sdla_netdev_ops = { - .ndo_open = sdla_open, - .ndo_stop = sdla_close, - .ndo_do_ioctl = sdla_ioctl, - .ndo_set_config = sdla_set_config, - .ndo_start_xmit = sdla_transmit, - .ndo_change_mtu = sdla_change_mtu, -}; - -static void setup_sdla(struct net_device *dev) -{ - struct frad_local *flp = netdev_priv(dev); - - netdev_boot_setup_check(dev); - - dev->netdev_ops = &sdla_netdev_ops; - dev->flags = 0; - dev->type = 0xFFFF; - dev->hard_header_len = 0; - dev->addr_len = 0; - dev->mtu = SDLA_MAX_MTU; - - flp->activate = sdla_activate; - flp->deactivate = sdla_deactivate; - flp->assoc = sdla_assoc; - flp->deassoc = sdla_deassoc; - flp->dlci_conf = sdla_dlci_conf; - flp->dev = dev; - - timer_setup(&flp->timer, sdla_poll, 0); - flp->timer.expires = 1; -} - -static struct net_device *sdla; - -static int __init init_sdla(void) -{ - int err; - - printk("%s.\n", version); - - sdla = alloc_netdev(sizeof(struct frad_local), "sdla0", - NET_NAME_UNKNOWN, setup_sdla); - if (!sdla) - return -ENOMEM; - - err = register_netdev(sdla); - if (err) - free_netdev(sdla); - - return err; -} - -static void __exit exit_sdla(void) -{ - struct frad_local *flp = netdev_priv(sdla); - - unregister_netdev(sdla); - if (flp->initialized) { - free_irq(sdla->irq, sdla); - release_region(sdla->base_addr, SDLA_IO_EXTENTS); - } - del_timer_sync(&flp->timer); - free_netdev(sdla); -} - -MODULE_LICENSE("GPL"); - -module_init(init_sdla); -module_exit(exit_sdla); diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c deleted file mode 100644 index 54b1a5aee82d..000000000000 --- a/drivers/net/wan/x25_asy.c +++ /dev/null @@ -1,836 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Things to sort out: - * - * o tbusy handling - * o allow users to set the parameters - * o sync/async switching ? - * - * Note: This does _not_ implement CCITT X.25 asynchronous framing - * recommendations. Its primarily for testing purposes. If you wanted - * to do CCITT then in theory all you need is to nick the HDLC async - * checksum routines from ppp.c - * Changes: - * - * 2000-10-29 Henner Eisen lapb_data_indication() return status. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> - -#include <linux/uaccess.h> -#include <linux/bitops.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/in.h> -#include <linux/tty.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/if_arp.h> -#include <linux/lapb.h> -#include <linux/init.h> -#include <linux/rtnetlink.h> -#include <linux/slab.h> -#include <net/x25device.h> -#include "x25_asy.h" - -static struct net_device **x25_asy_devs; -static int x25_asy_maxdev = SL_NRUNIT; - -module_param(x25_asy_maxdev, int, 0); -MODULE_LICENSE("GPL"); - -static int x25_asy_esc(unsigned char *p, unsigned char *d, int len); -static void x25_asy_unesc(struct x25_asy *sl, unsigned char c); -static void x25_asy_setup(struct net_device *dev); - -/* Find a free X.25 channel, and link in this `tty' line. */ -static struct x25_asy *x25_asy_alloc(void) -{ - struct net_device *dev = NULL; - struct x25_asy *sl; - int i; - - if (x25_asy_devs == NULL) - return NULL; /* Master array missing ! */ - - for (i = 0; i < x25_asy_maxdev; i++) { - dev = x25_asy_devs[i]; - - /* Not allocated ? */ - if (dev == NULL) - break; - - sl = netdev_priv(dev); - /* Not in use ? */ - if (!test_and_set_bit(SLF_INUSE, &sl->flags)) - return sl; - } - - - /* Sorry, too many, all slots in use */ - if (i >= x25_asy_maxdev) - return NULL; - - /* If no channels are available, allocate one */ - if (!dev) { - char name[IFNAMSIZ]; - sprintf(name, "x25asy%d", i); - - dev = alloc_netdev(sizeof(struct x25_asy), name, - NET_NAME_UNKNOWN, x25_asy_setup); - if (!dev) - return NULL; - - /* Initialize channel control data */ - sl = netdev_priv(dev); - dev->base_addr = i; - - /* register device so that it can be ifconfig'ed */ - if (register_netdev(dev) == 0) { - /* (Re-)Set the INUSE bit. Very Important! */ - set_bit(SLF_INUSE, &sl->flags); - x25_asy_devs[i] = dev; - return sl; - } else { - pr_warn("%s(): register_netdev() failure\n", __func__); - free_netdev(dev); - } - } - return NULL; -} - - -/* Free an X.25 channel. */ -static void x25_asy_free(struct x25_asy *sl) -{ - /* Free all X.25 frame buffers. */ - kfree(sl->rbuff); - sl->rbuff = NULL; - kfree(sl->xbuff); - sl->xbuff = NULL; - - if (!test_and_clear_bit(SLF_INUSE, &sl->flags)) - netdev_err(sl->dev, "x25_asy_free for already free unit\n"); -} - -static int x25_asy_change_mtu(struct net_device *dev, int newmtu) -{ - struct x25_asy *sl = netdev_priv(dev); - unsigned char *xbuff, *rbuff; - int len; - - len = 2 * newmtu; - xbuff = kmalloc(len + 4, GFP_ATOMIC); - rbuff = kmalloc(len + 4, GFP_ATOMIC); - - if (xbuff == NULL || rbuff == NULL) { - kfree(xbuff); - kfree(rbuff); - return -ENOMEM; - } - - spin_lock_bh(&sl->lock); - xbuff = xchg(&sl->xbuff, xbuff); - if (sl->xleft) { - if (sl->xleft <= len) { - memcpy(sl->xbuff, sl->xhead, sl->xleft); - } else { - sl->xleft = 0; - dev->stats.tx_dropped++; - } - } - sl->xhead = sl->xbuff; - - rbuff = xchg(&sl->rbuff, rbuff); - if (sl->rcount) { - if (sl->rcount <= len) { - memcpy(sl->rbuff, rbuff, sl->rcount); - } else { - sl->rcount = 0; - dev->stats.rx_over_errors++; - set_bit(SLF_ERROR, &sl->flags); - } - } - - dev->mtu = newmtu; - sl->buffsize = len; - - spin_unlock_bh(&sl->lock); - - kfree(xbuff); - kfree(rbuff); - return 0; -} - - -/* Set the "sending" flag. This must be atomic, hence the ASM. */ - -static inline void x25_asy_lock(struct x25_asy *sl) -{ - netif_stop_queue(sl->dev); -} - - -/* Clear the "sending" flag. This must be atomic, hence the ASM. */ - -static inline void x25_asy_unlock(struct x25_asy *sl) -{ - netif_wake_queue(sl->dev); -} - -/* Send an LAPB frame to the LAPB module to process. */ - -static void x25_asy_bump(struct x25_asy *sl) -{ - struct net_device *dev = sl->dev; - struct sk_buff *skb; - int count; - int err; - - count = sl->rcount; - dev->stats.rx_bytes += count; - - skb = dev_alloc_skb(count); - if (skb == NULL) { - netdev_warn(sl->dev, "memory squeeze, dropping packet\n"); - dev->stats.rx_dropped++; - return; - } - skb_put_data(skb, sl->rbuff, count); - err = lapb_data_received(sl->dev, skb); - if (err != LAPB_OK) { - kfree_skb(skb); - printk(KERN_DEBUG "x25_asy: data received err - %d\n", err); - } else { - dev->stats.rx_packets++; - } -} - -/* Encapsulate one IP datagram and stuff into a TTY queue. */ -static void x25_asy_encaps(struct x25_asy *sl, unsigned char *icp, int len) -{ - unsigned char *p; - int actual, count, mtu = sl->dev->mtu; - - if (len > mtu) { - /* Sigh, shouldn't occur BUT ... */ - len = mtu; - printk(KERN_DEBUG "%s: truncating oversized transmit packet!\n", - sl->dev->name); - sl->dev->stats.tx_dropped++; - x25_asy_unlock(sl); - return; - } - - p = icp; - count = x25_asy_esc(p, sl->xbuff, len); - - /* Order of next two lines is *very* important. - * When we are sending a little amount of data, - * the transfer may be completed inside driver.write() - * routine, because it's running with interrupts enabled. - * In this case we *never* got WRITE_WAKEUP event, - * if we did not request it before write operation. - * 14 Oct 1994 Dmitry Gorodchanin. - */ - set_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); - actual = sl->tty->ops->write(sl->tty, sl->xbuff, count); - sl->xleft = count - actual; - sl->xhead = sl->xbuff + actual; -} - -/* - * Called by the driver when there's room for more data. If we have - * more packets to send, we send them here. - */ -static void x25_asy_write_wakeup(struct tty_struct *tty) -{ - int actual; - struct x25_asy *sl = tty->disc_data; - - /* First make sure we're connected. */ - if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev)) - return; - - if (sl->xleft <= 0) { - /* Now serial buffer is almost free & we can start - * transmission of another packet */ - sl->dev->stats.tx_packets++; - clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - x25_asy_unlock(sl); - return; - } - - actual = tty->ops->write(tty, sl->xhead, sl->xleft); - sl->xleft -= actual; - sl->xhead += actual; -} - -static void x25_asy_timeout(struct net_device *dev, unsigned int txqueue) -{ - struct x25_asy *sl = netdev_priv(dev); - - spin_lock(&sl->lock); - if (netif_queue_stopped(dev)) { - /* May be we must check transmitter timeout here ? - * 14 Oct 1994 Dmitry Gorodchanin. - */ - netdev_warn(dev, "transmit timed out, %s?\n", - (tty_chars_in_buffer(sl->tty) || sl->xleft) ? - "bad line quality" : "driver error"); - sl->xleft = 0; - clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); - x25_asy_unlock(sl); - } - spin_unlock(&sl->lock); -} - -/* Encapsulate an IP datagram and kick it into a TTY queue. */ - -static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct x25_asy *sl = netdev_priv(dev); - int err; - - if (!netif_running(sl->dev)) { - netdev_err(dev, "xmit call when iface is down\n"); - kfree_skb(skb); - return NETDEV_TX_OK; - } - - /* There should be a pseudo header of 1 byte added by upper layers. - * Check to make sure it is there before reading it. - */ - if (skb->len < 1) { - kfree_skb(skb); - return NETDEV_TX_OK; - } - - switch (skb->data[0]) { - case X25_IFACE_DATA: - break; - case X25_IFACE_CONNECT: /* Connection request .. do nothing */ - err = lapb_connect_request(dev); - if (err != LAPB_OK) - netdev_err(dev, "lapb_connect_request error: %d\n", - err); - kfree_skb(skb); - return NETDEV_TX_OK; - case X25_IFACE_DISCONNECT: /* do nothing - hang up ?? */ - err = lapb_disconnect_request(dev); - if (err != LAPB_OK) - netdev_err(dev, "lapb_disconnect_request error: %d\n", - err); - fallthrough; - default: - kfree_skb(skb); - return NETDEV_TX_OK; - } - skb_pull(skb, 1); /* Remove control byte */ - /* - * If we are busy already- too bad. We ought to be able - * to queue things at this point, to allow for a little - * frame buffer. Oh well... - * ----------------------------------------------------- - * I hate queues in X.25 driver. May be it's efficient, - * but for me latency is more important. ;) - * So, no queues ! - * 14 Oct 1994 Dmitry Gorodchanin. - */ - - err = lapb_data_request(dev, skb); - if (err != LAPB_OK) { - netdev_err(dev, "lapb_data_request error: %d\n", err); - kfree_skb(skb); - return NETDEV_TX_OK; - } - return NETDEV_TX_OK; -} - - -/* - * LAPB interface boilerplate - */ - -/* - * Called when I frame data arrive. We add a pseudo header for upper - * layers and pass it to upper layers. - */ - -static int x25_asy_data_indication(struct net_device *dev, struct sk_buff *skb) -{ - if (skb_cow(skb, 1)) { - kfree_skb(skb); - return NET_RX_DROP; - } - skb_push(skb, 1); - skb->data[0] = X25_IFACE_DATA; - - skb->protocol = x25_type_trans(skb, dev); - - return netif_rx(skb); -} - -/* - * Data has emerged from the LAPB protocol machine. We don't handle - * busy cases too well. Its tricky to see how to do this nicely - - * perhaps lapb should allow us to bounce this ? - */ - -static void x25_asy_data_transmit(struct net_device *dev, struct sk_buff *skb) -{ - struct x25_asy *sl = netdev_priv(dev); - - spin_lock(&sl->lock); - if (netif_queue_stopped(sl->dev) || sl->tty == NULL) { - spin_unlock(&sl->lock); - netdev_err(dev, "tbusy drop\n"); - kfree_skb(skb); - return; - } - /* We were not busy, so we are now... :-) */ - if (skb != NULL) { - x25_asy_lock(sl); - dev->stats.tx_bytes += skb->len; - x25_asy_encaps(sl, skb->data, skb->len); - dev_kfree_skb(skb); - } - spin_unlock(&sl->lock); -} - -/* - * LAPB connection establish/down information. - */ - -static void x25_asy_connected(struct net_device *dev, int reason) -{ - struct x25_asy *sl = netdev_priv(dev); - struct sk_buff *skb; - unsigned char *ptr; - - skb = dev_alloc_skb(1); - if (skb == NULL) { - netdev_err(dev, "out of memory\n"); - return; - } - - ptr = skb_put(skb, 1); - *ptr = X25_IFACE_CONNECT; - - skb->protocol = x25_type_trans(skb, sl->dev); - netif_rx(skb); -} - -static void x25_asy_disconnected(struct net_device *dev, int reason) -{ - struct x25_asy *sl = netdev_priv(dev); - struct sk_buff *skb; - unsigned char *ptr; - - skb = dev_alloc_skb(1); - if (skb == NULL) { - netdev_err(dev, "out of memory\n"); - return; - } - - ptr = skb_put(skb, 1); - *ptr = X25_IFACE_DISCONNECT; - - skb->protocol = x25_type_trans(skb, sl->dev); - netif_rx(skb); -} - -static const struct lapb_register_struct x25_asy_callbacks = { - .connect_confirmation = x25_asy_connected, - .connect_indication = x25_asy_connected, - .disconnect_confirmation = x25_asy_disconnected, - .disconnect_indication = x25_asy_disconnected, - .data_indication = x25_asy_data_indication, - .data_transmit = x25_asy_data_transmit, -}; - - -/* Open the low-level part of the X.25 channel. Easy! */ -static int x25_asy_open(struct net_device *dev) -{ - struct x25_asy *sl = netdev_priv(dev); - unsigned long len; - - if (sl->tty == NULL) - return -ENODEV; - - /* - * Allocate the X.25 frame buffers: - * - * rbuff Receive buffer. - * xbuff Transmit buffer. - */ - - len = dev->mtu * 2; - - sl->rbuff = kmalloc(len + 4, GFP_KERNEL); - if (sl->rbuff == NULL) - goto norbuff; - sl->xbuff = kmalloc(len + 4, GFP_KERNEL); - if (sl->xbuff == NULL) - goto noxbuff; - - sl->buffsize = len; - sl->rcount = 0; - sl->xleft = 0; - sl->flags &= (1 << SLF_INUSE); /* Clear ESCAPE & ERROR flags */ - - return 0; - - /* Cleanup */ - kfree(sl->xbuff); - sl->xbuff = NULL; -noxbuff: - kfree(sl->rbuff); - sl->rbuff = NULL; -norbuff: - return -ENOMEM; -} - - -/* Close the low-level part of the X.25 channel. Easy! */ -static int x25_asy_close(struct net_device *dev) -{ - struct x25_asy *sl = netdev_priv(dev); - - spin_lock(&sl->lock); - if (sl->tty) - clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); - - sl->rcount = 0; - sl->xleft = 0; - spin_unlock(&sl->lock); - return 0; -} - -/* - * Handle the 'receiver data ready' interrupt. - * This function is called by the 'tty_io' module in the kernel when - * a block of X.25 data has been received, which can now be decapsulated - * and sent on to some IP layer for further processing. - */ - -static void x25_asy_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) -{ - struct x25_asy *sl = tty->disc_data; - - if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev)) - return; - - - /* Read the characters out of the buffer */ - while (count--) { - if (fp && *fp++) { - if (!test_and_set_bit(SLF_ERROR, &sl->flags)) - sl->dev->stats.rx_errors++; - cp++; - continue; - } - x25_asy_unesc(sl, *cp++); - } -} - -/* - * Open the high-level part of the X.25 channel. - * This function is called by the TTY module when the - * X.25 line discipline is called for. Because we are - * sure the tty line exists, we only have to link it to - * a free X.25 channel... - */ - -static int x25_asy_open_tty(struct tty_struct *tty) -{ - struct x25_asy *sl; - int err; - - if (tty->ops->write == NULL) - return -EOPNOTSUPP; - - /* OK. Find a free X.25 channel to use. */ - sl = x25_asy_alloc(); - if (sl == NULL) - return -ENFILE; - - sl->tty = tty; - tty->disc_data = sl; - tty->receive_room = 65536; - tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - - /* Restore default settings */ - sl->dev->type = ARPHRD_X25; - - /* Perform the low-level X.25 async init */ - err = x25_asy_open(sl->dev); - if (err) { - x25_asy_free(sl); - return err; - } - /* Done. We have linked the TTY line to a channel. */ - return 0; -} - - -/* - * Close down an X.25 channel. - * This means flushing out any pending queues, and then restoring the - * TTY line discipline to what it was before it got hooked to X.25 - * (which usually is TTY again). - */ -static void x25_asy_close_tty(struct tty_struct *tty) -{ - struct x25_asy *sl = tty->disc_data; - - /* First make sure we're connected. */ - if (!sl || sl->magic != X25_ASY_MAGIC) - return; - - rtnl_lock(); - if (sl->dev->flags & IFF_UP) - dev_close(sl->dev); - rtnl_unlock(); - - tty->disc_data = NULL; - sl->tty = NULL; - x25_asy_free(sl); -} - - /************************************************************************ - * STANDARD X.25 ENCAPSULATION * - ************************************************************************/ - -static int x25_asy_esc(unsigned char *s, unsigned char *d, int len) -{ - unsigned char *ptr = d; - unsigned char c; - - /* - * Send an initial END character to flush out any - * data that may have accumulated in the receiver - * due to line noise. - */ - - *ptr++ = X25_END; /* Send 10111110 bit seq */ - - /* - * For each byte in the packet, send the appropriate - * character sequence, according to the X.25 protocol. - */ - - while (len-- > 0) { - switch (c = *s++) { - case X25_END: - *ptr++ = X25_ESC; - *ptr++ = X25_ESCAPE(X25_END); - break; - case X25_ESC: - *ptr++ = X25_ESC; - *ptr++ = X25_ESCAPE(X25_ESC); - break; - default: - *ptr++ = c; - break; - } - } - *ptr++ = X25_END; - return ptr - d; -} - -static void x25_asy_unesc(struct x25_asy *sl, unsigned char s) -{ - - switch (s) { - case X25_END: - if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && - sl->rcount >= 2) - x25_asy_bump(sl); - clear_bit(SLF_ESCAPE, &sl->flags); - sl->rcount = 0; - return; - case X25_ESC: - set_bit(SLF_ESCAPE, &sl->flags); - return; - case X25_ESCAPE(X25_ESC): - case X25_ESCAPE(X25_END): - if (test_and_clear_bit(SLF_ESCAPE, &sl->flags)) - s = X25_UNESCAPE(s); - break; - } - if (!test_bit(SLF_ERROR, &sl->flags)) { - if (sl->rcount < sl->buffsize) { - sl->rbuff[sl->rcount++] = s; - return; - } - sl->dev->stats.rx_over_errors++; - set_bit(SLF_ERROR, &sl->flags); - } -} - - -/* Perform I/O control on an active X.25 channel. */ -static int x25_asy_ioctl(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct x25_asy *sl = tty->disc_data; - - /* First make sure we're connected. */ - if (!sl || sl->magic != X25_ASY_MAGIC) - return -EINVAL; - - switch (cmd) { - case SIOCGIFNAME: - if (copy_to_user((void __user *)arg, sl->dev->name, - strlen(sl->dev->name) + 1)) - return -EFAULT; - return 0; - case SIOCSIFHWADDR: - return -EINVAL; - default: - return tty_mode_ioctl(tty, file, cmd, arg); - } -} - -static int x25_asy_open_dev(struct net_device *dev) -{ - int err; - struct x25_asy *sl = netdev_priv(dev); - if (sl->tty == NULL) - return -ENODEV; - - err = lapb_register(dev, &x25_asy_callbacks); - if (err != LAPB_OK) - return -ENOMEM; - - netif_start_queue(dev); - - return 0; -} - -static int x25_asy_close_dev(struct net_device *dev) -{ - int err; - - netif_stop_queue(dev); - - err = lapb_unregister(dev); - if (err != LAPB_OK) - pr_err("%s: lapb_unregister error: %d\n", - __func__, err); - - x25_asy_close(dev); - - return 0; -} - -static const struct net_device_ops x25_asy_netdev_ops = { - .ndo_open = x25_asy_open_dev, - .ndo_stop = x25_asy_close_dev, - .ndo_start_xmit = x25_asy_xmit, - .ndo_tx_timeout = x25_asy_timeout, - .ndo_change_mtu = x25_asy_change_mtu, -}; - -/* Initialise the X.25 driver. Called by the device init code */ -static void x25_asy_setup(struct net_device *dev) -{ - struct x25_asy *sl = netdev_priv(dev); - - sl->magic = X25_ASY_MAGIC; - sl->dev = dev; - spin_lock_init(&sl->lock); - set_bit(SLF_INUSE, &sl->flags); - - /* - * Finish setting up the DEVICE info. - */ - - dev->mtu = SL_MTU; - dev->min_mtu = 0; - dev->max_mtu = 65534; - dev->netdev_ops = &x25_asy_netdev_ops; - dev->watchdog_timeo = HZ*20; - dev->hard_header_len = 0; - dev->addr_len = 0; - dev->type = ARPHRD_X25; - dev->tx_queue_len = 10; - - /* When transmitting data: - * first this driver removes a pseudo header of 1 byte, - * then the lapb module prepends an LAPB header of at most 3 bytes. - */ - dev->needed_headroom = 3 - 1; - - /* New-style flags. */ - dev->flags = IFF_NOARP; -} - -static struct tty_ldisc_ops x25_ldisc = { - .owner = THIS_MODULE, - .magic = TTY_LDISC_MAGIC, - .name = "X.25", - .open = x25_asy_open_tty, - .close = x25_asy_close_tty, - .ioctl = x25_asy_ioctl, - .receive_buf = x25_asy_receive_buf, - .write_wakeup = x25_asy_write_wakeup, -}; - -static int __init init_x25_asy(void) -{ - if (x25_asy_maxdev < 4) - x25_asy_maxdev = 4; /* Sanity */ - - pr_info("X.25 async: version 0.00 ALPHA (dynamic channels, max=%d)\n", - x25_asy_maxdev); - - x25_asy_devs = kcalloc(x25_asy_maxdev, sizeof(struct net_device *), - GFP_KERNEL); - if (!x25_asy_devs) - return -ENOMEM; - - return tty_register_ldisc(N_X25, &x25_ldisc); -} - - -static void __exit exit_x25_asy(void) -{ - struct net_device *dev; - int i; - - for (i = 0; i < x25_asy_maxdev; i++) { - dev = x25_asy_devs[i]; - if (dev) { - struct x25_asy *sl = netdev_priv(dev); - - spin_lock_bh(&sl->lock); - if (sl->tty) - tty_hangup(sl->tty); - - spin_unlock_bh(&sl->lock); - /* - * VSV = if dev->start==0, then device - * unregistered while close proc. - */ - unregister_netdev(dev); - free_netdev(dev); - } - } - - kfree(x25_asy_devs); - tty_unregister_ldisc(N_X25); -} - -module_init(init_x25_asy); -module_exit(exit_x25_asy); diff --git a/drivers/net/wan/x25_asy.h b/drivers/net/wan/x25_asy.h deleted file mode 100644 index 87798287c9ca..000000000000 --- a/drivers/net/wan/x25_asy.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_X25_ASY_H -#define _LINUX_X25_ASY_H - -/* X.25 asy configuration. */ -#define SL_NRUNIT 256 /* MAX number of X.25 channels; - This can be overridden with - insmod -ox25_asy_maxdev=nnn */ -#define SL_MTU 256 - -/* X25 async protocol characters. */ -#define X25_END 0x7E /* indicates end of frame */ -#define X25_ESC 0x7D /* indicates byte stuffing */ -#define X25_ESCAPE(x) ((x)^0x20) -#define X25_UNESCAPE(x) ((x)^0x20) - - -struct x25_asy { - int magic; - - /* Various fields. */ - spinlock_t lock; - struct tty_struct *tty; /* ptr to TTY structure */ - struct net_device *dev; /* easy for intr handling */ - - /* These are pointers to the malloc()ed frame buffers. */ - unsigned char *rbuff; /* receiver buffer */ - int rcount; /* received chars counter */ - unsigned char *xbuff; /* transmitter buffer */ - unsigned char *xhead; /* pointer to next byte to XMIT */ - int xleft; /* bytes left in XMIT queue */ - int buffsize; /* Max buffers sizes */ - - unsigned long flags; /* Flag values/ mode etc */ -#define SLF_INUSE 0 /* Channel in use */ -#define SLF_ESCAPE 1 /* ESC received */ -#define SLF_ERROR 2 /* Parity, etc. error */ -}; - - - -#define X25_ASY_MAGIC 0x5303 - -int x25_asy_init(struct net_device *dev); - -#endif /* _LINUX_X25_ASY.H */ diff --git a/drivers/net/wimax/Kconfig b/drivers/net/wimax/Kconfig deleted file mode 100644 index 2249e3d77a76..000000000000 --- a/drivers/net/wimax/Kconfig +++ /dev/null @@ -1,18 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# WiMAX LAN device drivers configuration -# - - -comment "Enable WiMAX (Networking options) to see the WiMAX drivers" - depends on WIMAX = n - -if WIMAX - -menu "WiMAX Wireless Broadband devices" - -source "drivers/net/wimax/i2400m/Kconfig" - -endmenu - -endif diff --git a/drivers/net/wimax/Makefile b/drivers/net/wimax/Makefile deleted file mode 100644 index b4575bacf994..000000000000 --- a/drivers/net/wimax/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_WIMAX_I2400M) += i2400m/ diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index c9f65e96ccb0..a3ed49cd95c3 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -215,7 +215,7 @@ static const struct net_device_ops netdev_ops = { .ndo_open = wg_open, .ndo_stop = wg_stop, .ndo_start_xmit = wg_xmit, - .ndo_get_stats64 = ip_tunnel_get_stats64 + .ndo_get_stats64 = dev_get_tstats64 }; static void wg_destruct(struct net_device *dev) diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 170a64e67709..7add2002ff4c 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -18,19 +18,6 @@ menuconfig WLAN if WLAN -config WIRELESS_WDS - bool "mac80211-based legacy WDS support" if EXPERT - help - This option enables the deprecated WDS support, the newer - mac80211-based 4-addr AP/client support supersedes it with - a much better feature set (HT, VHT, ...) - - We plan to remove this option and code, so if you find - that you have to enable it, please let us know on the - linux-wireless@vger.kernel.org mailing list, so we can - help you migrate to 4-addr AP/client (or, if it's really - necessary, give up on our plan of removing it). - source "drivers/net/wireless/admtek/Kconfig" source "drivers/net/wireless/ath/Kconfig" source "drivers/net/wireless/atmel/Kconfig" diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index e06b74a54a69..13b4f5f50f8a 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -661,7 +661,6 @@ struct ath9k_vif_iter_data { int naps; /* number of AP vifs */ int nmeshes; /* number of mesh vifs */ int nstations; /* number of station vifs */ - int nwds; /* number of WDS vifs */ int nadhocs; /* number of adhoc vifs */ int nocbs; /* number of OCB vifs */ int nbcnvifs; /* number of beaconing vifs */ diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 26ea51a72156..017a43bc400c 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -735,10 +735,10 @@ static int read_file_misc(struct seq_file *file, void *data) ath9k_calculate_iter_data(sc, ctx, &iter_data); seq_printf(file, - "VIFS: CTX %i(%i) AP: %i STA: %i MESH: %i WDS: %i", + "VIFS: CTX %i(%i) AP: %i STA: %i MESH: %i", i++, (int)(ctx->assigned), iter_data.naps, iter_data.nstations, - iter_data.nmeshes, iter_data.nwds); + iter_data.nmeshes); seq_printf(file, " ADHOC: %i OCB: %i TOTAL: %hi BEACON-VIF: %hi\n", iter_data.nadhocs, iter_data.nocbs, sc->cur_chan->nvifs, sc->nbcnvifs); diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 690fe3a1b516..42a208787f5a 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -832,12 +832,6 @@ static const struct ieee80211_iface_limit if_limits[] = { BIT(NL80211_IFTYPE_P2P_GO) }, }; -#ifdef CONFIG_WIRELESS_WDS -static const struct ieee80211_iface_limit wds_limits[] = { - { .max = 2048, .types = BIT(NL80211_IFTYPE_WDS) }, -}; -#endif - #ifdef CONFIG_ATH9K_CHANNEL_CONTEXT static const struct ieee80211_iface_limit if_limits_multi[] = { @@ -874,15 +868,6 @@ static const struct ieee80211_iface_combination if_comb[] = { BIT(NL80211_CHAN_WIDTH_40), #endif }, -#ifdef CONFIG_WIRELESS_WDS - { - .limits = wds_limits, - .n_limits = ARRAY_SIZE(wds_limits), - .max_interfaces = 2048, - .num_different_channels = 1, - .beacon_int_infra_match = true, - }, -#endif }; #ifdef CONFIG_ATH9K_CHANNEL_CONTEXT @@ -897,7 +882,6 @@ static void ath9k_set_mcc_capab(struct ath_softc *sc, struct ieee80211_hw *hw) ieee80211_hw_set(hw, QUEUE_CONTROL); hw->queues = ATH9K_NUM_TX_QUEUES; hw->offchannel_tx_hw_queue = hw->queues - 1; - hw->wiphy->interface_modes &= ~ BIT(NL80211_IFTYPE_WDS); hw->wiphy->iface_combinations = if_comb_multi; hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb_multi); hw->wiphy->max_scan_ssids = 255; @@ -953,9 +937,6 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_MESH_POINT) | -#ifdef CONFIG_WIRELESS_WDS - BIT(NL80211_IFTYPE_WDS) | -#endif BIT(NL80211_IFTYPE_OCB); if (ath9k_is_chanctx_enabled()) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 8dbf68b94228..caebe3fd6869 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -973,9 +973,6 @@ static void ath9k_vif_iter(struct ath9k_vif_iter_data *iter_data, if (vif->bss_conf.enable_beacon) ath9k_vif_iter_set_beacon(iter_data, vif); break; - case NL80211_IFTYPE_WDS: - iter_data->nwds++; - break; default: break; } @@ -1136,8 +1133,6 @@ void ath9k_calculate_summary_state(struct ath_softc *sc, ah->opmode = NL80211_IFTYPE_MESH_POINT; else if (iter_data.nocbs) ah->opmode = NL80211_IFTYPE_OCB; - else if (iter_data.nwds) - ah->opmode = NL80211_IFTYPE_AP; else if (iter_data.nadhocs) ah->opmode = NL80211_IFTYPE_ADHOC; else diff --git a/drivers/net/wireless/ath/carl9170/mac.c b/drivers/net/wireless/ath/carl9170/mac.c index b2eeb9fd68d2..6cdbee5beb07 100644 --- a/drivers/net/wireless/ath/carl9170/mac.c +++ b/drivers/net/wireless/ath/carl9170/mac.c @@ -329,10 +329,6 @@ int carl9170_set_operating_mode(struct ar9170 *ar) /* iwlagn 802.11n STA Workaround */ rx_ctrl |= AR9170_MAC_RX_CTRL_PASS_TO_HOST; break; - case NL80211_IFTYPE_WDS: - cam_mode |= AR9170_MAC_CAM_AP_WDS; - rx_ctrl |= AR9170_MAC_RX_CTRL_PASS_TO_HOST; - break; case NL80211_IFTYPE_STATION: cam_mode |= AR9170_MAC_CAM_STA; rx_ctrl |= AR9170_MAC_RX_CTRL_PASS_TO_HOST; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index dbef9d8fc893..cca3b086aa70 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -646,7 +646,6 @@ static int carl9170_op_add_interface(struct ieee80211_hw *hw, case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_AP: if ((vif->type == NL80211_IFTYPE_STATION) || - (vif->type == NL80211_IFTYPE_WDS) || (vif->type == NL80211_IFTYPE_AP) || (vif->type == NL80211_IFTYPE_MESH_POINT)) break; diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index f175dbaffc30..150a366e8f62 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -4961,12 +4961,11 @@ static int b43_op_add_interface(struct ieee80211_hw *hw, struct b43_wldev *dev; int err = -EOPNOTSUPP; - /* TODO: allow WDS/AP devices to coexist */ + /* TODO: allow AP devices to coexist */ if (vif->type != NL80211_IFTYPE_AP && vif->type != NL80211_IFTYPE_MESH_POINT && vif->type != NL80211_IFTYPE_STATION && - vif->type != NL80211_IFTYPE_WDS && vif->type != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; @@ -5576,9 +5575,6 @@ static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev) BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_MESH_POINT) | BIT(NL80211_IFTYPE_STATION) | -#ifdef CONFIG_WIRELESS_WDS - BIT(NL80211_IFTYPE_WDS) | -#endif BIT(NL80211_IFTYPE_ADHOC); hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index a27125b7922c..7692a2618c97 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -3381,11 +3381,10 @@ static int b43legacy_op_add_interface(struct ieee80211_hw *hw, unsigned long flags; int err = -EOPNOTSUPP; - /* TODO: allow WDS/AP devices to coexist */ + /* TODO: allow AP devices to coexist */ if (vif->type != NL80211_IFTYPE_AP && vif->type != NL80211_IFTYPE_STATION && - vif->type != NL80211_IFTYPE_WDS && vif->type != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; @@ -3805,9 +3804,6 @@ static int b43legacy_wireless_init(struct ssb_device *dev) hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_STATION) | -#ifdef CONFIG_WIRELESS_WDS - BIT(NL80211_IFTYPE_WDS) | -#endif BIT(NL80211_IFTYPE_ADHOC); hw->queues = 1; /* FIXME: hardware has more queues */ hw->max_rates = 2; diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index bf6dbeb61842..ad726bd100ec 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -126,28 +126,13 @@ qtnf_netdev_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (unlikely(skb->protocol == htons(ETH_P_PAE))) { qtnf_packet_send_hi_pri(skb); - qtnf_update_tx_stats(ndev, skb); + dev_sw_netstats_tx_add(ndev, 1, skb->len); return NETDEV_TX_OK; } return qtnf_bus_data_tx(mac->bus, skb, mac->macid, vif->vifid); } -/* Netdev handler for getting stats. - */ -static void qtnf_netdev_get_stats64(struct net_device *ndev, - struct rtnl_link_stats64 *stats) -{ - struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev); - - netdev_stats_to_stats64(stats, &ndev->stats); - - if (!vif->stats64) - return; - - dev_fetch_sw_netstats(stats, vif->stats64); -} - /* Netdev handler for transmission timeout. */ static void qtnf_netdev_tx_timeout(struct net_device *ndev, unsigned int txqueue) @@ -211,13 +196,27 @@ static int qtnf_netdev_port_parent_id(struct net_device *ndev, return 0; } +static int qtnf_netdev_alloc_pcpu_stats(struct net_device *dev) +{ + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + + return dev->tstats ? 0 : -ENOMEM; +} + +static void qtnf_netdev_free_pcpu_stats(struct net_device *dev) +{ + free_percpu(dev->tstats); +} + /* Network device ops handlers */ const struct net_device_ops qtnf_netdev_ops = { + .ndo_init = qtnf_netdev_alloc_pcpu_stats, + .ndo_uninit = qtnf_netdev_free_pcpu_stats, .ndo_open = qtnf_netdev_open, .ndo_stop = qtnf_netdev_close, .ndo_start_xmit = qtnf_netdev_hard_start_xmit, .ndo_tx_timeout = qtnf_netdev_tx_timeout, - .ndo_get_stats64 = qtnf_netdev_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = qtnf_netdev_set_mac_address, .ndo_get_port_parent_id = qtnf_netdev_port_parent_id, }; @@ -448,10 +447,6 @@ static struct qtnf_wmac *qtnf_core_mac_alloc(struct qtnf_bus *bus, qtnf_sta_list_init(&vif->sta_list); INIT_WORK(&vif->high_pri_tx_work, qtnf_vif_send_data_high_pri); skb_queue_head_init(&vif->high_pri_tx_queue); - vif->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!vif->stats64) - pr_warn("VIF%u.%u: per cpu stats allocation failed\n", - macid, i); } qtnf_mac_init_primary_intf(mac); @@ -531,7 +526,6 @@ static void qtnf_core_mac_detach(struct qtnf_bus *bus, unsigned int macid) } rtnl_unlock(); qtnf_sta_list_free(&vif->sta_list); - free_percpu(vif->stats64); } if (mac->wiphy_registered) @@ -924,46 +918,6 @@ void qtnf_wake_all_queues(struct net_device *ndev) } EXPORT_SYMBOL_GPL(qtnf_wake_all_queues); -void qtnf_update_rx_stats(struct net_device *ndev, const struct sk_buff *skb) -{ - struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev); - struct pcpu_sw_netstats *stats64; - - if (unlikely(!vif || !vif->stats64)) { - ndev->stats.rx_packets++; - ndev->stats.rx_bytes += skb->len; - return; - } - - stats64 = this_cpu_ptr(vif->stats64); - - u64_stats_update_begin(&stats64->syncp); - stats64->rx_packets++; - stats64->rx_bytes += skb->len; - u64_stats_update_end(&stats64->syncp); -} -EXPORT_SYMBOL_GPL(qtnf_update_rx_stats); - -void qtnf_update_tx_stats(struct net_device *ndev, const struct sk_buff *skb) -{ - struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev); - struct pcpu_sw_netstats *stats64; - - if (unlikely(!vif || !vif->stats64)) { - ndev->stats.tx_packets++; - ndev->stats.tx_bytes += skb->len; - return; - } - - stats64 = this_cpu_ptr(vif->stats64); - - u64_stats_update_begin(&stats64->syncp); - stats64->tx_packets++; - stats64->tx_bytes += skb->len; - u64_stats_update_end(&stats64->syncp); -} -EXPORT_SYMBOL_GPL(qtnf_update_tx_stats); - struct dentry *qtnf_get_debugfs_dir(void) { return qtnf_debugfs_dir; diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 269ce12cf8bf..b204a24074ab 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -70,8 +70,6 @@ struct qtnf_vif { struct qtnf_sta_list sta_list; unsigned long cons_tx_timeout_cnt; int generation; - - struct pcpu_sw_netstats __percpu *stats64; }; struct qtnf_mac_info { @@ -139,8 +137,6 @@ int qtnf_cmd_send_update_phy_params(struct qtnf_wmac *mac, u32 changed); struct qtnf_wmac *qtnf_core_get_mac(const struct qtnf_bus *bus, u8 macid); struct net_device *qtnf_classify_skb(struct qtnf_bus *bus, struct sk_buff *skb); void qtnf_wake_all_queues(struct net_device *ndev); -void qtnf_update_rx_stats(struct net_device *ndev, const struct sk_buff *skb); -void qtnf_update_tx_stats(struct net_device *ndev, const struct sk_buff *skb); void qtnf_virtual_intf_cleanup(struct net_device *ndev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c index 9a20c0f29078..0003df577cb3 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c @@ -489,7 +489,7 @@ static void qtnf_pearl_data_tx_reclaim(struct qtnf_pcie_pearl_state *ps) PCI_DMA_TODEVICE); if (skb->dev) { - qtnf_update_tx_stats(skb->dev, skb); + dev_sw_netstats_tx_add(skb->dev, 1, skb->len); if (unlikely(priv->tx_stopped)) { qtnf_wake_all_queues(skb->dev); priv->tx_stopped = 0; @@ -756,7 +756,7 @@ static int qtnf_pcie_pearl_rx_poll(struct napi_struct *napi, int budget) skb_put(skb, psize); ndev = qtnf_classify_skb(bus, skb); if (likely(ndev)) { - qtnf_update_rx_stats(ndev, skb); + dev_sw_netstats_rx_add(ndev, skb->len); skb->protocol = eth_type_trans(skb, ndev); napi_gro_receive(napi, skb); } else { diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c index 4b87d3151017..24f1be8ddcef 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c @@ -418,7 +418,7 @@ static void qtnf_topaz_data_tx_reclaim(struct qtnf_pcie_topaz_state *ts) PCI_DMA_TODEVICE); if (skb->dev) { - qtnf_update_tx_stats(skb->dev, skb); + dev_sw_netstats_tx_add(skb->dev, 1, skb->len); if (unlikely(priv->tx_stopped)) { qtnf_wake_all_queues(skb->dev); priv->tx_stopped = 0; @@ -662,7 +662,7 @@ static int qtnf_topaz_rx_poll(struct napi_struct *napi, int budget) skb_put(skb, psize); ndev = qtnf_classify_skb(bus, skb); if (likely(ndev)) { - qtnf_update_rx_stats(ndev, skb); + dev_sw_netstats_rx_add(ndev, skb->len); skb->protocol = eth_type_trans(skb, ndev); netif_receive_skb(skb); } else { diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00config.c b/drivers/net/wireless/ralink/rt2x00/rt2x00config.c index 0ee1813e8453..6bafdd991171 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00config.c @@ -32,7 +32,6 @@ void rt2x00lib_config_intf(struct rt2x00_dev *rt2x00dev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: - case NL80211_IFTYPE_WDS: conf.sync = TSF_SYNC_AP_NONE; break; case NL80211_IFTYPE_STATION: diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index b04f76551ca4..61a4f1ad31e2 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -194,8 +194,7 @@ static void rt2x00lib_beaconupdate_iter(void *data, u8 *mac, if (vif->type != NL80211_IFTYPE_AP && vif->type != NL80211_IFTYPE_ADHOC && - vif->type != NL80211_IFTYPE_MESH_POINT && - vif->type != NL80211_IFTYPE_WDS) + vif->type != NL80211_IFTYPE_MESH_POINT) return; /* @@ -1437,9 +1436,6 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) #ifdef CONFIG_MAC80211_MESH BIT(NL80211_IFTYPE_MESH_POINT) | #endif -#ifdef CONFIG_WIRELESS_WDS - BIT(NL80211_IFTYPE_WDS) | -#endif BIT(NL80211_IFTYPE_AP); rt2x00dev->hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 2f68a31072ae..dea5babd30fe 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -408,8 +408,7 @@ static void rt2x00mac_set_tim_iter(void *data, u8 *mac, if (vif->type != NL80211_IFTYPE_AP && vif->type != NL80211_IFTYPE_ADHOC && - vif->type != NL80211_IFTYPE_MESH_POINT && - vif->type != NL80211_IFTYPE_WDS) + vif->type != NL80211_IFTYPE_MESH_POINT) return; set_bit(DELAYED_UPDATE_BEACON, &intf->delayed_flags); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 75b5d545b49e..9fe77556858e 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -3379,7 +3379,7 @@ static const struct net_device_ops rndis_wlan_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_get_stats64 = usbnet_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_set_rx_mode = rndis_wlan_set_multicast_list, diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 3e9895bec15f..920cac4385bf 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2413,12 +2413,11 @@ static ssize_t store_rxbuf(struct device *dev, const char *buf, size_t len) { char *endp; - unsigned long target; if (!capable(CAP_NET_ADMIN)) return -EPERM; - target = simple_strtoul(buf, &endp, 0); + simple_strtoul(buf, &endp, 0); if (endp == buf) return -EBADMSG; diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index ba6c486d6465..f8e5d78d9078 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -136,7 +136,7 @@ static struct nci_ops s3fwrn5_nci_ops = { }; int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev, - const struct s3fwrn5_phy_ops *phy_ops, unsigned int max_payload) + const struct s3fwrn5_phy_ops *phy_ops) { struct s3fwrn5_info *info; int ret; @@ -148,7 +148,6 @@ int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev, info->phy_id = phy_id; info->pdev = pdev; info->phy_ops = phy_ops; - info->max_payload = max_payload; mutex_init(&info->mutex); s3fwrn5_set_mode(info, S3FWRN5_MODE_COLD); diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index ec930ee2c847..4cde6dd5c019 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -266,7 +266,7 @@ static int s3fwrn5_fw_complete_update_mode(struct s3fwrn5_fw_info *fw_info) } /* - * Firmware header stucture: + * Firmware header structure: * * 0x00 - 0x0B : Date and time string (w/o NUL termination) * 0x10 - 0x13 : Firmware version diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c index dc995286be84..0ffa389066a0 100644 --- a/drivers/nfc/s3fwrn5/i2c.c +++ b/drivers/nfc/s3fwrn5/i2c.c @@ -19,7 +19,6 @@ #define S3FWRN5_I2C_DRIVER_NAME "s3fwrn5_i2c" -#define S3FWRN5_I2C_MAX_PAYLOAD 32 #define S3FWRN5_EN_WAIT_TIME 150 struct s3fwrn5_i2c_phy { @@ -248,8 +247,7 @@ static int s3fwrn5_i2c_probe(struct i2c_client *client, if (ret < 0) return ret; - ret = s3fwrn5_probe(&phy->ndev, phy, &phy->i2c_dev->dev, &i2c_phy_ops, - S3FWRN5_I2C_MAX_PAYLOAD); + ret = s3fwrn5_probe(&phy->ndev, phy, &phy->i2c_dev->dev, &i2c_phy_ops); if (ret < 0) return ret; diff --git a/drivers/nfc/s3fwrn5/s3fwrn5.h b/drivers/nfc/s3fwrn5/s3fwrn5.h index ede68bb5eeae..bb8f936d13a2 100644 --- a/drivers/nfc/s3fwrn5/s3fwrn5.h +++ b/drivers/nfc/s3fwrn5/s3fwrn5.h @@ -34,7 +34,6 @@ struct s3fwrn5_info { struct device *pdev; const struct s3fwrn5_phy_ops *phy_ops; - unsigned int max_payload; struct s3fwrn5_fw_info fw_info; @@ -45,7 +44,7 @@ static inline int s3fwrn5_set_mode(struct s3fwrn5_info *info, enum s3fwrn5_mode mode) { if (!info->phy_ops->set_mode) - return -ENOTSUPP; + return -EOPNOTSUPP; info->phy_ops->set_mode(info->phy_id, mode); @@ -55,7 +54,7 @@ static inline int s3fwrn5_set_mode(struct s3fwrn5_info *info, static inline enum s3fwrn5_mode s3fwrn5_get_mode(struct s3fwrn5_info *info) { if (!info->phy_ops->get_mode) - return -ENOTSUPP; + return -EOPNOTSUPP; return info->phy_ops->get_mode(info->phy_id); } @@ -63,7 +62,7 @@ static inline enum s3fwrn5_mode s3fwrn5_get_mode(struct s3fwrn5_info *info) static inline int s3fwrn5_set_wake(struct s3fwrn5_info *info, bool wake) { if (!info->phy_ops->set_wake) - return -ENOTSUPP; + return -EOPNOTSUPP; info->phy_ops->set_wake(info->phy_id, wake); @@ -73,13 +72,13 @@ static inline int s3fwrn5_set_wake(struct s3fwrn5_info *info, bool wake) static inline int s3fwrn5_write(struct s3fwrn5_info *info, struct sk_buff *skb) { if (!info->phy_ops->write) - return -ENOTSUPP; + return -EOPNOTSUPP; return info->phy_ops->write(info->phy_id, skb); } int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev, - const struct s3fwrn5_phy_ops *phy_ops, unsigned int max_payload); + const struct s3fwrn5_phy_ops *phy_ops); void s3fwrn5_remove(struct nci_dev *ndev); int s3fwrn5_recv_frame(struct nci_dev *ndev, struct sk_buff *skb, diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c index 179f6c472e50..c1c959f7e52b 100644 --- a/drivers/ptp/ptp_idt82p33.c +++ b/drivers/ptp/ptp_idt82p33.c @@ -21,6 +21,7 @@ MODULE_DESCRIPTION("Driver for IDT 82p33xxx clock devices"); MODULE_AUTHOR("IDT support-1588 <IDT-support-1588@lm.renesas.com>"); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL"); +MODULE_FIRMWARE(FW_FILENAME); /* Module Parameters */ static u32 sync_tod_timeout = SYNC_TOD_TIMEOUT_SEC; @@ -77,11 +78,10 @@ static void idt82p33_timespec_to_byte_array(struct timespec64 const *ts, } } -static int idt82p33_xfer(struct idt82p33 *idt82p33, - unsigned char regaddr, - unsigned char *buf, - unsigned int count, - int write) +static int idt82p33_xfer_read(struct idt82p33 *idt82p33, + unsigned char regaddr, + unsigned char *buf, + unsigned int count) { struct i2c_client *client = idt82p33->client; struct i2c_msg msg[2]; @@ -93,7 +93,7 @@ static int idt82p33_xfer(struct idt82p33 *idt82p33, msg[0].buf = ®addr; msg[1].addr = client->addr; - msg[1].flags = write ? 0 : I2C_M_RD; + msg[1].flags = I2C_M_RD; msg[1].len = count; msg[1].buf = buf; @@ -109,6 +109,31 @@ static int idt82p33_xfer(struct idt82p33 *idt82p33, return 0; } +static int idt82p33_xfer_write(struct idt82p33 *idt82p33, + u8 regaddr, + u8 *buf, + u16 count) +{ + struct i2c_client *client = idt82p33->client; + /* we add 1 byte for device register */ + u8 msg[IDT82P33_MAX_WRITE_COUNT + 1]; + int err; + + if (count > IDT82P33_MAX_WRITE_COUNT) + return -EINVAL; + + msg[0] = regaddr; + memcpy(&msg[1], buf, count); + + err = i2c_master_send(client, msg, count + 1); + if (err < 0) { + dev_err(&client->dev, "i2c_master_send returned %d\n", err); + return err; + } + + return 0; +} + static int idt82p33_page_offset(struct idt82p33 *idt82p33, unsigned char val) { int err; @@ -116,7 +141,7 @@ static int idt82p33_page_offset(struct idt82p33 *idt82p33, unsigned char val) if (idt82p33->page_offset == val) return 0; - err = idt82p33_xfer(idt82p33, PAGE_ADDR, &val, sizeof(val), 1); + err = idt82p33_xfer_write(idt82p33, PAGE_ADDR, &val, sizeof(val)); if (err) dev_err(&idt82p33->client->dev, "failed to set page offset %d\n", val); @@ -137,11 +162,12 @@ static int idt82p33_rdwr(struct idt82p33 *idt82p33, unsigned int regaddr, err = idt82p33_page_offset(idt82p33, page); if (err) - goto out; + return err; - err = idt82p33_xfer(idt82p33, offset, buf, count, write); -out: - return err; + if (write) + return idt82p33_xfer_write(idt82p33, offset, buf, count); + + return idt82p33_xfer_read(idt82p33, offset, buf, count); } static int idt82p33_read(struct idt82p33 *idt82p33, unsigned int regaddr, @@ -294,7 +320,6 @@ static int _idt82p33_adjfine(struct idt82p33_channel *channel, long scaled_ppm) { struct idt82p33 *idt82p33 = channel->idt82p33; unsigned char buf[5] = {0}; - int neg_adj = 0; int err, i; s64 fcw; @@ -314,16 +339,9 @@ static int _idt82p33_adjfine(struct idt82p33_channel *channel, long scaled_ppm) * FCW = ------------- * 168 * 2^4 */ - if (scaled_ppm < 0) { - neg_adj = 1; - scaled_ppm = -scaled_ppm; - } fcw = scaled_ppm * 244140625ULL; - fcw = div_u64(fcw, 2688); - - if (neg_adj) - fcw = -fcw; + fcw = div_s64(fcw, 2688); for (i = 0; i < 5; i++) { buf[i] = fcw & 0xff; @@ -448,8 +466,11 @@ static int idt82p33_measure_tod_write_overhead(struct idt82p33_channel *channel) err = idt82p33_measure_settime_gettime_gap_overhead(channel, &gap_ns); - if (err) + if (err) { + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", __func__, err); return err; + } err = idt82p33_measure_one_byte_write_overhead(channel, &one_byte_write_ns); @@ -518,13 +539,10 @@ static int idt82p33_sync_tod(struct idt82p33_channel *channel, bool enable) u8 sync_cnfg; int err; - if (enable == channel->sync_tod_on) { - if (enable && sync_tod_timeout) { - mod_delayed_work(system_wq, &channel->sync_tod_work, - sync_tod_timeout * HZ); - } - return 0; - } + /* Turn it off after sync_tod_timeout seconds */ + if (enable && sync_tod_timeout) + ptp_schedule_worker(channel->ptp_clock, + sync_tod_timeout * HZ); err = idt82p33_read(idt82p33, channel->dpll_sync_cnfg, &sync_cnfg, sizeof(sync_cnfg)); @@ -532,29 +550,17 @@ static int idt82p33_sync_tod(struct idt82p33_channel *channel, bool enable) return err; sync_cnfg &= ~SYNC_TOD; - if (enable) sync_cnfg |= SYNC_TOD; - err = idt82p33_write(idt82p33, channel->dpll_sync_cnfg, - &sync_cnfg, sizeof(sync_cnfg)); - if (err) - return err; - - channel->sync_tod_on = enable; - - if (enable && sync_tod_timeout) { - mod_delayed_work(system_wq, &channel->sync_tod_work, - sync_tod_timeout * HZ); - } - - return 0; + return idt82p33_write(idt82p33, channel->dpll_sync_cnfg, + &sync_cnfg, sizeof(sync_cnfg)); } -static void idt82p33_sync_tod_work_handler(struct work_struct *work) +static long idt82p33_sync_tod_work_handler(struct ptp_clock_info *ptp) { struct idt82p33_channel *channel = - container_of(work, struct idt82p33_channel, sync_tod_work.work); + container_of(ptp, struct idt82p33_channel, caps); struct idt82p33 *idt82p33 = channel->idt82p33; mutex_lock(&idt82p33->reg_lock); @@ -562,35 +568,46 @@ static void idt82p33_sync_tod_work_handler(struct work_struct *work) (void)idt82p33_sync_tod(channel, false); mutex_unlock(&idt82p33->reg_lock); + + /* Return a negative value here to not reschedule */ + return -1; } -static int idt82p33_pps_enable(struct idt82p33_channel *channel, bool enable) +static int idt82p33_output_enable(struct idt82p33_channel *channel, + bool enable, unsigned int outn) { struct idt82p33 *idt82p33 = channel->idt82p33; - u8 mask, outn, val; int err; + u8 val; + + err = idt82p33_read(idt82p33, OUT_MUX_CNFG(outn), &val, sizeof(val)); + if (err) + return err; + if (enable) + val &= ~SQUELCH_ENABLE; + else + val |= SQUELCH_ENABLE; + + return idt82p33_write(idt82p33, OUT_MUX_CNFG(outn), &val, sizeof(val)); +} + +static int idt82p33_output_mask_enable(struct idt82p33_channel *channel, + bool enable) +{ + u16 mask; + int err; + u8 outn; mask = channel->output_mask; outn = 0; while (mask) { if (mask & 0x1) { - err = idt82p33_read(idt82p33, OUT_MUX_CNFG(outn), - &val, sizeof(val)); - if (err) - return err; - - if (enable) - val &= ~SQUELCH_ENABLE; - else - val |= SQUELCH_ENABLE; - - err = idt82p33_write(idt82p33, OUT_MUX_CNFG(outn), - &val, sizeof(val)); - + err = idt82p33_output_enable(channel, enable, outn); if (err) return err; } + mask >>= 0x1; outn++; } @@ -598,6 +615,20 @@ static int idt82p33_pps_enable(struct idt82p33_channel *channel, bool enable) return 0; } +static int idt82p33_perout_enable(struct idt82p33_channel *channel, + bool enable, + struct ptp_perout_request *perout) +{ + unsigned int flags = perout->flags; + + /* Enable/disable output based on output_mask */ + if (flags == PEROUT_ENABLE_OUTPUT_MASK) + return idt82p33_output_mask_enable(channel, enable); + + /* Enable/disable individual output instead */ + return idt82p33_output_enable(channel, enable, perout->index); +} + static int idt82p33_enable_tod(struct idt82p33_channel *channel) { struct idt82p33 *idt82p33 = channel->idt82p33; @@ -611,15 +642,13 @@ static int idt82p33_enable_tod(struct idt82p33_channel *channel) if (err) return err; - err = idt82p33_pps_enable(channel, false); - - if (err) - return err; - err = idt82p33_measure_tod_write_overhead(channel); - if (err) + if (err) { + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", __func__, err); return err; + } err = _idt82p33_settime(channel, &ts); @@ -638,10 +667,8 @@ static void idt82p33_ptp_clock_unregister_all(struct idt82p33 *idt82p33) channel = &idt82p33->channel[i]; - if (channel->ptp_clock) { + if (channel->ptp_clock) ptp_clock_unregister(channel->ptp_clock); - cancel_delayed_work_sync(&channel->sync_tod_work); - } } } @@ -659,14 +686,15 @@ static int idt82p33_enable(struct ptp_clock_info *ptp, if (rq->type == PTP_CLK_REQ_PEROUT) { if (!on) - err = idt82p33_pps_enable(channel, false); - + err = idt82p33_perout_enable(channel, false, + &rq->perout); /* Only accept a 1-PPS aligned to the second. */ else if (rq->perout.start.nsec || rq->perout.period.sec != 1 || rq->perout.period.nsec) { err = -ERANGE; } else - err = idt82p33_pps_enable(channel, true); + err = idt82p33_perout_enable(channel, true, + &rq->perout); } mutex_unlock(&idt82p33->reg_lock); @@ -674,6 +702,48 @@ static int idt82p33_enable(struct ptp_clock_info *ptp, return err; } +static int idt82p33_adjwritephase(struct ptp_clock_info *ptp, s32 offset_ns) +{ + struct idt82p33_channel *channel = + container_of(ptp, struct idt82p33_channel, caps); + struct idt82p33 *idt82p33 = channel->idt82p33; + s64 offset_regval, offset_fs; + u8 val[4] = {0}; + int err; + + offset_fs = (s64)(-offset_ns) * 1000000; + + if (offset_fs > WRITE_PHASE_OFFSET_LIMIT) + offset_fs = WRITE_PHASE_OFFSET_LIMIT; + else if (offset_fs < -WRITE_PHASE_OFFSET_LIMIT) + offset_fs = -WRITE_PHASE_OFFSET_LIMIT; + + /* Convert from phaseoffset_fs to register value */ + offset_regval = div_s64(offset_fs * 1000, IDT_T0DPLL_PHASE_RESOL); + + val[0] = offset_regval & 0xFF; + val[1] = (offset_regval >> 8) & 0xFF; + val[2] = (offset_regval >> 16) & 0xFF; + val[3] = (offset_regval >> 24) & 0x1F; + val[3] |= PH_OFFSET_EN; + + mutex_lock(&idt82p33->reg_lock); + + err = idt82p33_dpll_set_mode(channel, PLL_MODE_WPH); + if (err) { + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", __func__, err); + goto out; + } + + err = idt82p33_write(idt82p33, channel->dpll_phase_cnfg, val, + sizeof(val)); + +out: + mutex_unlock(&idt82p33->reg_lock); + return err; +} + static int idt82p33_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { struct idt82p33_channel *channel = @@ -683,6 +753,9 @@ static int idt82p33_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) mutex_lock(&idt82p33->reg_lock); err = _idt82p33_adjfine(channel, scaled_ppm); + if (err) + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", __func__, err); mutex_unlock(&idt82p33->reg_lock); return err; @@ -706,10 +779,15 @@ static int idt82p33_adjtime(struct ptp_clock_info *ptp, s64 delta_ns) if (err) { mutex_unlock(&idt82p33->reg_lock); + dev_err(&idt82p33->client->dev, + "Adjtime failed in %s with err %d!\n", __func__, err); return err; } err = idt82p33_sync_tod(channel, true); + if (err) + dev_err(&idt82p33->client->dev, + "Sync_tod failed in %s with err %d!\n", __func__, err); mutex_unlock(&idt82p33->reg_lock); @@ -725,6 +803,9 @@ static int idt82p33_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) mutex_lock(&idt82p33->reg_lock); err = _idt82p33_gettime(channel, ts); + if (err) + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", __func__, err); mutex_unlock(&idt82p33->reg_lock); return err; @@ -740,6 +821,9 @@ static int idt82p33_settime(struct ptp_clock_info *ptp, mutex_lock(&idt82p33->reg_lock); err = _idt82p33_settime(channel, ts); + if (err) + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", __func__, err); mutex_unlock(&idt82p33->reg_lock); return err; @@ -772,9 +856,6 @@ static int idt82p33_channel_init(struct idt82p33_channel *channel, int index) return -EINVAL; } - INIT_DELAYED_WORK(&channel->sync_tod_work, - idt82p33_sync_tod_work_handler); - channel->sync_tod_on = false; channel->current_freq_ppb = 0; return 0; @@ -784,11 +865,14 @@ static void idt82p33_caps_init(struct ptp_clock_info *caps) { caps->owner = THIS_MODULE; caps->max_adj = 92000; + caps->n_per_out = 11; + caps->adjphase = idt82p33_adjwritephase; caps->adjfine = idt82p33_adjfine; caps->adjtime = idt82p33_adjtime; caps->gettime64 = idt82p33_gettime; caps->settime64 = idt82p33_settime; caps->enable = idt82p33_enable; + caps->do_aux_work = idt82p33_sync_tod_work_handler; } static int idt82p33_enable_channel(struct idt82p33 *idt82p33, u32 index) @@ -802,23 +886,18 @@ static int idt82p33_enable_channel(struct idt82p33 *idt82p33, u32 index) channel = &idt82p33->channel[index]; err = idt82p33_channel_init(channel, index); - if (err) + if (err) { + dev_err(&idt82p33->client->dev, + "Channel_init failed in %s with err %d!\n", + __func__, err); return err; + } channel->idt82p33 = idt82p33; idt82p33_caps_init(&channel->caps); snprintf(channel->caps.name, sizeof(channel->caps.name), "IDT 82P33 PLL%u", index); - channel->caps.n_per_out = hweight8(channel->output_mask); - - err = idt82p33_dpll_set_mode(channel, PLL_MODE_DCO); - if (err) - return err; - - err = idt82p33_enable_tod(channel); - if (err) - return err; channel->ptp_clock = ptp_clock_register(&channel->caps, NULL); @@ -831,6 +910,22 @@ static int idt82p33_enable_channel(struct idt82p33 *idt82p33, u32 index) if (!channel->ptp_clock) return -ENOTSUPP; + err = idt82p33_dpll_set_mode(channel, PLL_MODE_DCO); + if (err) { + dev_err(&idt82p33->client->dev, + "Dpll_set_mode failed in %s with err %d!\n", + __func__, err); + return err; + } + + err = idt82p33_enable_tod(channel); + if (err) { + dev_err(&idt82p33->client->dev, + "Enable_tod failed in %s with err %d!\n", + __func__, err); + return err; + } + dev_info(&idt82p33->client->dev, "PLL%d registered as ptp%d\n", index, channel->ptp_clock->index); @@ -850,8 +945,11 @@ static int idt82p33_load_firmware(struct idt82p33 *idt82p33) err = request_firmware(&fw, FW_FILENAME, &idt82p33->client->dev); - if (err) + if (err) { + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", __func__, err); return err; + } dev_dbg(&idt82p33->client->dev, "firmware size %zu bytes\n", fw->size); @@ -935,8 +1033,12 @@ static int idt82p33_probe(struct i2c_client *client, for (i = 0; i < MAX_PHC_PLL; i++) { if (idt82p33->pll_mask & (1 << i)) { err = idt82p33_enable_channel(idt82p33, i); - if (err) + if (err) { + dev_err(&idt82p33->client->dev, + "Failed in %s with err %d!\n", + __func__, err); break; + } } } } else { diff --git a/drivers/ptp/ptp_idt82p33.h b/drivers/ptp/ptp_idt82p33.h index 9d46966d25f1..1c7a0f0872e8 100644 --- a/drivers/ptp/ptp_idt82p33.h +++ b/drivers/ptp/ptp_idt82p33.h @@ -56,6 +56,8 @@ #define PLL_MODE_SHIFT (0) #define PLL_MODE_MASK (0x1F) +#define PEROUT_ENABLE_OUTPUT_MASK (0xdeadbeef) + enum pll_mode { PLL_MODE_MIN = 0, PLL_MODE_AUTOMATIC = PLL_MODE_MIN, @@ -93,6 +95,7 @@ enum hw_tod_trig_sel { #define MAX_MEASURMENT_COUNT (5) #define SNAP_THRESHOLD_NS (150000) #define SYNC_TOD_TIMEOUT_SEC (5) +#define IDT82P33_MAX_WRITE_COUNT (512) #define PLLMASK_ADDR_HI 0xFF #define PLLMASK_ADDR_LO 0xA5 diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index f73b4756ed5e..32ea41b4356b 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -701,6 +701,19 @@ enum qeth_pnso_mode { QETH_PNSO_ADDR_INFO, }; +enum qeth_link_mode { + QETH_LINK_MODE_UNKNOWN, + QETH_LINK_MODE_FIBRE_SHORT, + QETH_LINK_MODE_FIBRE_LONG, +}; + +struct qeth_link_info { + u32 speed; + u8 duplex; + u8 port; + enum qeth_link_mode link_mode; +}; + #define QETH_BROADCAST_WITH_ECHO 0x01 #define QETH_BROADCAST_WITHOUT_ECHO 0x02 struct qeth_card_info { @@ -732,6 +745,7 @@ struct qeth_card_info { struct qeth_card_blkt blkt; __u32 diagass_support; __u32 hwtrap; + struct qeth_link_info link_info; }; enum qeth_discipline_id { @@ -796,12 +810,6 @@ struct qeth_rx { u8 bufs_refill; }; -struct carrier_info { - __u8 card_type; - __u16 port_mode; - __u32 port_speed; -}; - struct qeth_switch_info { __u32 capabilities; __u32 settings; @@ -1108,7 +1116,7 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, int qeth_query_switch_attributes(struct qeth_card *card, struct qeth_switch_info *sw_info); int qeth_query_card_info(struct qeth_card *card, - struct carrier_info *carrier_info); + struct qeth_link_info *link_info); int qeth_setadpparms_set_access_ctrl(struct qeth_card *card, enum qeth_ipa_isolation_modes mode); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 93c9b30ab17a..d9ba0586373b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4868,8 +4868,8 @@ out_free: static int qeth_query_card_info_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct carrier_info *carrier_info = (struct carrier_info *)reply->param; struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; + struct qeth_link_info *link_info = reply->param; struct qeth_query_card_info *card_info; QETH_CARD_TEXT(card, 2, "qcrdincb"); @@ -4877,14 +4877,67 @@ static int qeth_query_card_info_cb(struct qeth_card *card, return -EIO; card_info = &cmd->data.setadapterparms.data.card_info; - carrier_info->card_type = card_info->card_type; - carrier_info->port_mode = card_info->port_mode; - carrier_info->port_speed = card_info->port_speed; + netdev_dbg(card->dev, + "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n", + card_info->card_type, card_info->port_mode, + card_info->port_speed); + + switch (card_info->port_mode) { + case CARD_INFO_PORTM_FULLDUPLEX: + link_info->duplex = DUPLEX_FULL; + break; + case CARD_INFO_PORTM_HALFDUPLEX: + link_info->duplex = DUPLEX_HALF; + break; + default: + link_info->duplex = DUPLEX_UNKNOWN; + } + + switch (card_info->card_type) { + case CARD_INFO_TYPE_1G_COPPER_A: + case CARD_INFO_TYPE_1G_COPPER_B: + link_info->speed = SPEED_1000; + link_info->port = PORT_TP; + break; + case CARD_INFO_TYPE_1G_FIBRE_A: + case CARD_INFO_TYPE_1G_FIBRE_B: + link_info->speed = SPEED_1000; + link_info->port = PORT_FIBRE; + break; + case CARD_INFO_TYPE_10G_FIBRE_A: + case CARD_INFO_TYPE_10G_FIBRE_B: + link_info->speed = SPEED_10000; + link_info->port = PORT_FIBRE; + break; + default: + switch (card_info->port_speed) { + case CARD_INFO_PORTS_10M: + link_info->speed = SPEED_10; + break; + case CARD_INFO_PORTS_100M: + link_info->speed = SPEED_100; + break; + case CARD_INFO_PORTS_1G: + link_info->speed = SPEED_1000; + break; + case CARD_INFO_PORTS_10G: + link_info->speed = SPEED_10000; + break; + case CARD_INFO_PORTS_25G: + link_info->speed = SPEED_25000; + break; + default: + link_info->speed = SPEED_UNKNOWN; + } + + link_info->port = PORT_OTHER; + } + return 0; } int qeth_query_card_info(struct qeth_card *card, - struct carrier_info *carrier_info) + struct qeth_link_info *link_info) { struct qeth_cmd_buffer *iob; @@ -4894,8 +4947,162 @@ int qeth_query_card_info(struct qeth_card *card, iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_CARD_INFO, 0); if (!iob) return -ENOMEM; - return qeth_send_ipa_cmd(card, iob, qeth_query_card_info_cb, - (void *)carrier_info); + + return qeth_send_ipa_cmd(card, iob, qeth_query_card_info_cb, link_info); +} + +static int qeth_init_link_info_oat_cb(struct qeth_card *card, + struct qeth_reply *reply_priv, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; + struct qeth_link_info *link_info = reply_priv->param; + struct qeth_query_oat_physical_if *phys_if; + struct qeth_query_oat_reply *reply; + + if (qeth_setadpparms_inspect_rc(cmd)) + return -EIO; + + /* Multi-part reply is unexpected, don't bother: */ + if (cmd->data.setadapterparms.hdr.used_total > 1) + return -EINVAL; + + /* Expect the reply to start with phys_if data: */ + reply = &cmd->data.setadapterparms.data.query_oat.reply[0]; + if (reply->type != QETH_QOAT_REPLY_TYPE_PHYS_IF || + reply->length < sizeof(*reply)) + return -EINVAL; + + phys_if = &reply->phys_if; + + switch (phys_if->speed_duplex) { + case QETH_QOAT_PHYS_SPEED_10M_HALF: + link_info->speed = SPEED_10; + link_info->duplex = DUPLEX_HALF; + break; + case QETH_QOAT_PHYS_SPEED_10M_FULL: + link_info->speed = SPEED_10; + link_info->duplex = DUPLEX_FULL; + break; + case QETH_QOAT_PHYS_SPEED_100M_HALF: + link_info->speed = SPEED_100; + link_info->duplex = DUPLEX_HALF; + break; + case QETH_QOAT_PHYS_SPEED_100M_FULL: + link_info->speed = SPEED_100; + link_info->duplex = DUPLEX_FULL; + break; + case QETH_QOAT_PHYS_SPEED_1000M_HALF: + link_info->speed = SPEED_1000; + link_info->duplex = DUPLEX_HALF; + break; + case QETH_QOAT_PHYS_SPEED_1000M_FULL: + link_info->speed = SPEED_1000; + link_info->duplex = DUPLEX_FULL; + break; + case QETH_QOAT_PHYS_SPEED_10G_FULL: + link_info->speed = SPEED_10000; + link_info->duplex = DUPLEX_FULL; + break; + case QETH_QOAT_PHYS_SPEED_25G_FULL: + link_info->speed = SPEED_25000; + link_info->duplex = DUPLEX_FULL; + break; + case QETH_QOAT_PHYS_SPEED_UNKNOWN: + default: + link_info->speed = SPEED_UNKNOWN; + link_info->duplex = DUPLEX_UNKNOWN; + break; + } + + switch (phys_if->media_type) { + case QETH_QOAT_PHYS_MEDIA_COPPER: + link_info->port = PORT_TP; + link_info->link_mode = QETH_LINK_MODE_UNKNOWN; + break; + case QETH_QOAT_PHYS_MEDIA_FIBRE_SHORT: + link_info->port = PORT_FIBRE; + link_info->link_mode = QETH_LINK_MODE_FIBRE_SHORT; + break; + case QETH_QOAT_PHYS_MEDIA_FIBRE_LONG: + link_info->port = PORT_FIBRE; + link_info->link_mode = QETH_LINK_MODE_FIBRE_LONG; + break; + default: + link_info->port = PORT_OTHER; + link_info->link_mode = QETH_LINK_MODE_UNKNOWN; + break; + } + + return 0; +} + +static void qeth_init_link_info(struct qeth_card *card) +{ + card->info.link_info.duplex = DUPLEX_FULL; + + if (IS_IQD(card) || IS_VM_NIC(card)) { + card->info.link_info.speed = SPEED_10000; + card->info.link_info.port = PORT_FIBRE; + card->info.link_info.link_mode = QETH_LINK_MODE_FIBRE_SHORT; + } else { + switch (card->info.link_type) { + case QETH_LINK_TYPE_FAST_ETH: + case QETH_LINK_TYPE_LANE_ETH100: + card->info.link_info.speed = SPEED_100; + card->info.link_info.port = PORT_TP; + break; + case QETH_LINK_TYPE_GBIT_ETH: + case QETH_LINK_TYPE_LANE_ETH1000: + card->info.link_info.speed = SPEED_1000; + card->info.link_info.port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_10GBIT_ETH: + card->info.link_info.speed = SPEED_10000; + card->info.link_info.port = PORT_FIBRE; + break; + case QETH_LINK_TYPE_25GBIT_ETH: + card->info.link_info.speed = SPEED_25000; + card->info.link_info.port = PORT_FIBRE; + break; + default: + dev_info(&card->gdev->dev, "Unknown link type %x\n", + card->info.link_type); + card->info.link_info.speed = SPEED_UNKNOWN; + card->info.link_info.port = PORT_OTHER; + } + + card->info.link_info.link_mode = QETH_LINK_MODE_UNKNOWN; + } + + /* Get more accurate data via QUERY OAT: */ + if (qeth_adp_supported(card, IPA_SETADP_QUERY_OAT)) { + struct qeth_link_info link_info; + struct qeth_cmd_buffer *iob; + + iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_OAT, + SETADP_DATA_SIZEOF(query_oat)); + if (iob) { + struct qeth_ipa_cmd *cmd = __ipa_cmd(iob); + struct qeth_query_oat *oat_req; + + oat_req = &cmd->data.setadapterparms.data.query_oat; + oat_req->subcmd_code = QETH_QOAT_SCOPE_INTERFACE; + + if (!qeth_send_ipa_cmd(card, iob, + qeth_init_link_info_oat_cb, + &link_info)) { + if (link_info.speed != SPEED_UNKNOWN) + card->info.link_info.speed = link_info.speed; + if (link_info.duplex != DUPLEX_UNKNOWN) + card->info.link_info.duplex = link_info.duplex; + if (link_info.port != PORT_OTHER) + card->info.link_info.port = link_info.port; + if (link_info.link_mode != QETH_LINK_MODE_UNKNOWN) + card->info.link_info.link_mode = link_info.link_mode; + } + } + } } /** @@ -5282,6 +5489,8 @@ retriable: goto out; } + qeth_init_link_info(card); + rc = qeth_init_qdio_queues(card); if (rc) { QETH_CARD_TEXT_(card, 2, "9err%d", rc); diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 6541bab96822..e4bde7daf083 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -489,9 +489,45 @@ struct qeth_set_access_ctrl { __u8 reserved[8]; } __attribute__((packed)); +#define QETH_QOAT_PHYS_SPEED_UNKNOWN 0x00 +#define QETH_QOAT_PHYS_SPEED_10M_HALF 0x01 +#define QETH_QOAT_PHYS_SPEED_10M_FULL 0x02 +#define QETH_QOAT_PHYS_SPEED_100M_HALF 0x03 +#define QETH_QOAT_PHYS_SPEED_100M_FULL 0x04 +#define QETH_QOAT_PHYS_SPEED_1000M_HALF 0x05 +#define QETH_QOAT_PHYS_SPEED_1000M_FULL 0x06 +// n/a 0x07 +#define QETH_QOAT_PHYS_SPEED_10G_FULL 0x08 +// n/a 0x09 +#define QETH_QOAT_PHYS_SPEED_25G_FULL 0x0A + +#define QETH_QOAT_PHYS_MEDIA_COPPER 0x01 +#define QETH_QOAT_PHYS_MEDIA_FIBRE_SHORT 0x02 +#define QETH_QOAT_PHYS_MEDIA_FIBRE_LONG 0x04 + +struct qeth_query_oat_physical_if { + u8 res_head[33]; + u8 speed_duplex; + u8 media_type; + u8 res_tail[29]; +}; + +#define QETH_QOAT_REPLY_TYPE_PHYS_IF 0x0004 + +struct qeth_query_oat_reply { + u16 type; + u16 length; + u16 version; + u8 res[10]; + struct qeth_query_oat_physical_if phys_if; +}; + +#define QETH_QOAT_SCOPE_INTERFACE 0x00000001 + struct qeth_query_oat { - __u32 subcmd_code; - __u8 reserved[12]; + u32 subcmd_code; + u8 reserved[12]; + struct qeth_query_oat_reply reply[]; } __packed; struct qeth_qoat_priv { diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index b5caa723326e..3a51bbff0ffe 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -324,8 +324,8 @@ static int qeth_set_per_queue_coalesce(struct net_device *dev, u32 queue, /* Autoneg and full-duplex are supported and advertised unconditionally. */ /* Always advertise and support all speeds up to specified, and only one */ /* specified port type. */ -static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd, - int maxspeed, int porttype) +static void qeth_set_ethtool_link_modes(struct ethtool_link_ksettings *cmd, + enum qeth_link_mode link_mode) { ethtool_link_ksettings_zero_link_mode(cmd, supported); ethtool_link_ksettings_zero_link_mode(cmd, advertising); @@ -334,186 +334,119 @@ static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd, ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - switch (porttype) { + switch (cmd->base.port) { case PORT_TP: ethtool_link_ksettings_add_link_mode(cmd, supported, TP); ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); + + switch (cmd->base.speed) { + case SPEED_10000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10000baseT_Full); + fallthrough; + case SPEED_1000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseT_Half); + fallthrough; + case SPEED_100: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 100baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 100baseT_Half); + fallthrough; + case SPEED_10: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10baseT_Half); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10baseT_Half); + break; + default: + break; + } + break; case PORT_FIBRE: ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); - break; - default: - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); - WARN_ON_ONCE(1); - } - /* partially does fall through, to also select lower speeds */ - switch (maxspeed) { - case SPEED_25000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 25000baseSR_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 25000baseSR_Full); - break; - case SPEED_10000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10000baseT_Full); - fallthrough; - case SPEED_1000: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 1000baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 1000baseT_Half); - fallthrough; - case SPEED_100: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 100baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 100baseT_Half); - fallthrough; - case SPEED_10: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Half); + switch (cmd->base.speed) { + case SPEED_25000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 25000baseSR_Full); + break; + case SPEED_10000: + if (link_mode == QETH_LINK_MODE_FIBRE_LONG) { + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10000baseLR_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10000baseLR_Full); + } else if (link_mode == QETH_LINK_MODE_FIBRE_SHORT) { + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10000baseSR_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10000baseSR_Full); + } + break; + case SPEED_1000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseX_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseX_Full); + break; + default: + break; + } + break; default: - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, - 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - 10baseT_Half); - WARN_ON_ONCE(1); + break; } } - static int qeth_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { struct qeth_card *card = netdev->ml_priv; - enum qeth_link_types link_type; - struct carrier_info carrier_info; - int rc; + struct qeth_link_info link_info; - if (IS_IQD(card) || IS_VM_NIC(card)) - link_type = QETH_LINK_TYPE_10GBIT_ETH; - else - link_type = card->info.link_type; - - cmd->base.duplex = DUPLEX_FULL; + cmd->base.speed = card->info.link_info.speed; + cmd->base.duplex = card->info.link_info.duplex; + cmd->base.port = card->info.link_info.port; cmd->base.autoneg = AUTONEG_ENABLE; cmd->base.phy_address = 0; cmd->base.mdio_support = 0; cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - switch (link_type) { - case QETH_LINK_TYPE_FAST_ETH: - case QETH_LINK_TYPE_LANE_ETH100: - cmd->base.speed = SPEED_100; - cmd->base.port = PORT_TP; - break; - case QETH_LINK_TYPE_GBIT_ETH: - case QETH_LINK_TYPE_LANE_ETH1000: - cmd->base.speed = SPEED_1000; - cmd->base.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_10GBIT_ETH: - cmd->base.speed = SPEED_10000; - cmd->base.port = PORT_FIBRE; - break; - case QETH_LINK_TYPE_25GBIT_ETH: - cmd->base.speed = SPEED_25000; - cmd->base.port = PORT_FIBRE; - break; - default: - cmd->base.speed = SPEED_10; - cmd->base.port = PORT_TP; - } - qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port); - /* Check if we can obtain more accurate information. */ - /* If QUERY_CARD_INFO command is not supported or fails, */ - /* just return the heuristics that was filled above. */ - rc = qeth_query_card_info(card, &carrier_info); - if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ - return 0; - if (rc) /* report error from the hardware operation */ - return rc; - /* on success, fill in the information got from the hardware */ - - netdev_dbg(netdev, - "card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n", - carrier_info.card_type, - carrier_info.port_mode, - carrier_info.port_speed); - - /* Update attributes for which we've obtained more authoritative */ - /* information, leave the rest the way they where filled above. */ - switch (carrier_info.card_type) { - case CARD_INFO_TYPE_1G_COPPER_A: - case CARD_INFO_TYPE_1G_COPPER_B: - cmd->base.port = PORT_TP; - qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); - break; - case CARD_INFO_TYPE_1G_FIBRE_A: - case CARD_INFO_TYPE_1G_FIBRE_B: - cmd->base.port = PORT_FIBRE; - qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port); - break; - case CARD_INFO_TYPE_10G_FIBRE_A: - case CARD_INFO_TYPE_10G_FIBRE_B: - cmd->base.port = PORT_FIBRE; - qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port); - break; - } - - switch (carrier_info.port_mode) { - case CARD_INFO_PORTM_FULLDUPLEX: - cmd->base.duplex = DUPLEX_FULL; - break; - case CARD_INFO_PORTM_HALFDUPLEX: - cmd->base.duplex = DUPLEX_HALF; - break; + if (!qeth_query_card_info(card, &link_info)) { + if (link_info.speed != SPEED_UNKNOWN) + cmd->base.speed = link_info.speed; + if (link_info.duplex != DUPLEX_UNKNOWN) + cmd->base.duplex = link_info.duplex; + if (link_info.port != PORT_OTHER) + cmd->base.port = link_info.port; } - switch (carrier_info.port_speed) { - case CARD_INFO_PORTS_10M: - cmd->base.speed = SPEED_10; - break; - case CARD_INFO_PORTS_100M: - cmd->base.speed = SPEED_100; - break; - case CARD_INFO_PORTS_1G: - cmd->base.speed = SPEED_1000; - break; - case CARD_INFO_PORTS_10G: - cmd->base.speed = SPEED_10000; - break; - case CARD_INFO_PORTS_25G: - cmd->base.speed = SPEED_25000; - break; - } + qeth_set_ethtool_link_modes(cmd, card->info.link_info.link_mode); return 0; } diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 28f6dda95736..daeb837abec3 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -737,8 +737,6 @@ static void qeth_l2_dev2br_an_set_cb(void *priv, * * On enable, emits a series of address notifications for all * currently registered hosts. - * - * Must be called under rtnl_lock */ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable) { @@ -1289,16 +1287,19 @@ static void qeth_l2_dev2br_worker(struct work_struct *work) if (READ_ONCE(card->info.pnso_mode) == QETH_PNSO_NONE) goto free; - /* Potential re-config in progress, try again later: */ - if (!rtnl_trylock()) { - queue_delayed_work(card->event_wq, dwork, - msecs_to_jiffies(100)); - return; - } - if (!netif_device_present(card->dev)) - goto out_unlock; - if (data->ac_event.lost_event_mask) { + /* Potential re-config in progress, try again later: */ + if (!rtnl_trylock()) { + queue_delayed_work(card->event_wq, dwork, + msecs_to_jiffies(100)); + return; + } + + if (!netif_device_present(card->dev)) { + rtnl_unlock(); + goto free; + } + QETH_DBF_MESSAGE(3, "Address change notification overflow on device %x\n", CARD_DEVID(card)); @@ -1328,6 +1329,8 @@ static void qeth_l2_dev2br_worker(struct work_struct *work) "Address Notification resynced on device %x\n", CARD_DEVID(card)); } + + rtnl_unlock(); } else { for (i = 0; i < data->ac_event.num_entries; i++) { struct qeth_ipacmd_addr_change_entry *entry = @@ -1339,9 +1342,6 @@ static void qeth_l2_dev2br_worker(struct work_struct *work) } } -out_unlock: - rtnl_unlock(); - free: kfree(data); } @@ -2310,11 +2310,8 @@ static void qeth_l2_set_offline(struct qeth_card *card) card->state = CARD_STATE_DOWN; qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE); - if (priv->brport_features & BR_LEARNING_SYNC) { - rtnl_lock(); + if (priv->brport_features & BR_LEARNING_SYNC) qeth_l2_dev2br_fdb_flush(card); - rtnl_unlock(); - } } /* Returns zero if the command is successfully "consumed" */ diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index b1c1d2510d55..264b6c782382 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -104,10 +104,7 @@ static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, qeth_l3_convert_addr_to_bits(ipatoe->addr, ipatoe_bits, (ipatoe->proto == QETH_PROT_IPV4) ? 4 : 16); - if (addr->proto == QETH_PROT_IPV4) - rc = !memcmp(addr_bits, ipatoe_bits, ipatoe->mask_bits); - else - rc = !memcmp(addr_bits, ipatoe_bits, ipatoe->mask_bits); + rc = !memcmp(addr_bits, ipatoe_bits, ipatoe->mask_bits); if (rc) break; } diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 9888a7061873..101def7dc73d 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1159,7 +1159,7 @@ static u32 fq_to_tag(struct qman_fq *fq) static u32 __poll_portal_slow(struct qman_portal *p, u32 is); static inline unsigned int __poll_portal_fast(struct qman_portal *p, - unsigned int poll_limit); + unsigned int poll_limit, bool sched_napi); static void qm_congestion_task(struct work_struct *work); static void qm_mr_process_task(struct work_struct *work); @@ -1174,7 +1174,7 @@ static irqreturn_t portal_isr(int irq, void *ptr) /* DQRR-handling if it's interrupt-driven */ if (is & QM_PIRQ_DQRI) { - __poll_portal_fast(p, QMAN_POLL_LIMIT); + __poll_portal_fast(p, QMAN_POLL_LIMIT, true); clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI; } /* Handling of anything else that's interrupt-driven */ @@ -1602,7 +1602,7 @@ static noinline void clear_vdqcr(struct qman_portal *p, struct qman_fq *fq) * user callbacks to call into any QMan API. */ static inline unsigned int __poll_portal_fast(struct qman_portal *p, - unsigned int poll_limit) + unsigned int poll_limit, bool sched_napi) { const struct qm_dqrr_entry *dq; struct qman_fq *fq; @@ -1636,7 +1636,7 @@ static inline unsigned int __poll_portal_fast(struct qman_portal *p, * and we don't want multiple if()s in the critical * path (SDQCR). */ - res = fq->cb.dqrr(p, fq, dq); + res = fq->cb.dqrr(p, fq, dq, sched_napi); if (res == qman_cb_dqrr_stop) break; /* Check for VDQCR completion */ @@ -1646,7 +1646,7 @@ static inline unsigned int __poll_portal_fast(struct qman_portal *p, /* SDQCR: context_b points to the FQ */ fq = tag_to_fq(be32_to_cpu(dq->context_b)); /* Now let the callback do its stuff */ - res = fq->cb.dqrr(p, fq, dq); + res = fq->cb.dqrr(p, fq, dq, sched_napi); /* * The callback can request that we exit without * consuming this entry nor advancing; @@ -1753,7 +1753,7 @@ EXPORT_SYMBOL(qman_start_using_portal); int qman_p_poll_dqrr(struct qman_portal *p, unsigned int limit) { - return __poll_portal_fast(p, limit); + return __poll_portal_fast(p, limit, false); } EXPORT_SYMBOL(qman_p_poll_dqrr); diff --git a/drivers/soc/fsl/qbman/qman_test_api.c b/drivers/soc/fsl/qbman/qman_test_api.c index 7066b2f1467c..28fbddc3c204 100644 --- a/drivers/soc/fsl/qbman/qman_test_api.c +++ b/drivers/soc/fsl/qbman/qman_test_api.c @@ -45,7 +45,8 @@ static enum qman_cb_dqrr_result cb_dqrr(struct qman_portal *, struct qman_fq *, - const struct qm_dqrr_entry *); + const struct qm_dqrr_entry *, + bool sched_napi); static void cb_ern(struct qman_portal *, struct qman_fq *, const union qm_mr_entry *); static void cb_fqs(struct qman_portal *, struct qman_fq *, @@ -208,7 +209,8 @@ failed: static enum qman_cb_dqrr_result cb_dqrr(struct qman_portal *p, struct qman_fq *fq, - const struct qm_dqrr_entry *dq) + const struct qm_dqrr_entry *dq, + bool sched_napi) { if (WARN_ON(fd_neq(&fd_dq, &dq->fd))) { pr_err("BADNESS: dequeued frame doesn't match;\n"); diff --git a/drivers/soc/fsl/qbman/qman_test_stash.c b/drivers/soc/fsl/qbman/qman_test_stash.c index e87b65403b67..b7e8e5ec884c 100644 --- a/drivers/soc/fsl/qbman/qman_test_stash.c +++ b/drivers/soc/fsl/qbman/qman_test_stash.c @@ -275,7 +275,8 @@ static inline int process_frame_data(struct hp_handler *handler, static enum qman_cb_dqrr_result normal_dqrr(struct qman_portal *portal, struct qman_fq *fq, - const struct qm_dqrr_entry *dqrr) + const struct qm_dqrr_entry *dqrr, + bool sched_napi) { struct hp_handler *handler = (struct hp_handler *)fq; @@ -293,7 +294,8 @@ skip: static enum qman_cb_dqrr_result special_dqrr(struct qman_portal *portal, struct qman_fq *fq, - const struct qm_dqrr_entry *dqrr) + const struct qm_dqrr_entry *dqrr, + bool sched_napi) { struct hp_handler *handler = (struct hp_handler *)fq; diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 2d0310448eba..443ca3f3cdf0 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -114,6 +114,8 @@ source "drivers/staging/kpc2000/Kconfig" source "drivers/staging/qlge/Kconfig" +source "drivers/staging/wimax/Kconfig" + source "drivers/staging/wfx/Kconfig" source "drivers/staging/hikey9xx/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 757a892ab5b9..dc45128ef525 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -47,5 +47,6 @@ obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/ obj-$(CONFIG_KPC2000) += kpc2000/ obj-$(CONFIG_QLGE) += qlge/ +obj-$(CONFIG_WIMAX) += wimax/ obj-$(CONFIG_WFX) += wfx/ obj-y += hikey9xx/ diff --git a/Documentation/admin-guide/wimax/i2400m.rst b/drivers/staging/wimax/Documentation/i2400m.rst index 194388c0c351..194388c0c351 100644 --- a/Documentation/admin-guide/wimax/i2400m.rst +++ b/drivers/staging/wimax/Documentation/i2400m.rst diff --git a/Documentation/admin-guide/wimax/index.rst b/drivers/staging/wimax/Documentation/index.rst index fdf7c1f99ff5..fdf7c1f99ff5 100644 --- a/Documentation/admin-guide/wimax/index.rst +++ b/drivers/staging/wimax/Documentation/index.rst diff --git a/Documentation/admin-guide/wimax/wimax.rst b/drivers/staging/wimax/Documentation/wimax.rst index 817ee8ba2732..817ee8ba2732 100644 --- a/Documentation/admin-guide/wimax/wimax.rst +++ b/drivers/staging/wimax/Documentation/wimax.rst diff --git a/net/wimax/Kconfig b/drivers/staging/wimax/Kconfig index d13762bc4abc..ded8b70b25ee 100644 --- a/net/wimax/Kconfig +++ b/drivers/staging/wimax/Kconfig @@ -22,6 +22,8 @@ menuconfig WIMAX If unsure, it is safe to select M (module). +if WIMAX + config WIMAX_DEBUG_LEVEL int "WiMAX debug level" depends on WIMAX @@ -38,3 +40,7 @@ config WIMAX_DEBUG_LEVEL If set at zero, this will compile out all the debug code. It is recommended that it is left at 8. + +source "drivers/staging/wimax/i2400m/Kconfig" + +endif diff --git a/net/wimax/Makefile b/drivers/staging/wimax/Makefile index c2a71ae487ac..0e3f988656aa 100644 --- a/net/wimax/Makefile +++ b/drivers/staging/wimax/Makefile @@ -11,3 +11,5 @@ wimax-y := \ stack.o wimax-$(CONFIG_DEBUG_FS) += debugfs.o + +obj-$(CONFIG_WIMAX_I2400M) += i2400m/ diff --git a/drivers/staging/wimax/TODO b/drivers/staging/wimax/TODO new file mode 100644 index 000000000000..26e4cb9e9599 --- /dev/null +++ b/drivers/staging/wimax/TODO @@ -0,0 +1,18 @@ +There are no known users of this driver as of October 2020, and it will +be removed unless someone turns out to still need it in future releases. + +According to https://en.wikipedia.org/wiki/List_of_WiMAX_networks, there +have been many public wimax networks, but it appears that many of these +have migrated to LTE or discontinued their service altogether. As most +PCs and phones lack WiMAX hardware support, the remaining networks tend +to use standalone routers. These almost certainly run Linux, but not a +modern kernel or the mainline wimax driver stack. + +NetworkManager appears to have dropped userspace support in 2015 +https://bugzilla.gnome.org/show_bug.cgi?id=747846, the www.linuxwimax.org +site had already shut down earlier. + +WiMax is apparently still being deployed on airport campus networks +("AeroMACS"), but in a frequency band that was not supported by the old +Intel 2400m (used in Sandy Bridge laptops and earlier), which is the +only driver using the kernel's wimax stack. diff --git a/net/wimax/debug-levels.h b/drivers/staging/wimax/debug-levels.h index ebc287cde336..b854802d1d00 100644 --- a/net/wimax/debug-levels.h +++ b/drivers/staging/wimax/debug-levels.h @@ -13,7 +13,7 @@ #define D_MODULENAME wimax #define D_MASTER CONFIG_WIMAX_DEBUG_LEVEL -#include <linux/wimax/debug.h> +#include "linux-wimax-debug.h" /* List of all the enabled modules */ enum d_module { diff --git a/net/wimax/debugfs.c b/drivers/staging/wimax/debugfs.c index 3c54bb6b925a..e11bff61ffcf 100644 --- a/net/wimax/debugfs.c +++ b/drivers/staging/wimax/debugfs.c @@ -7,7 +7,7 @@ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> */ #include <linux/debugfs.h> -#include <linux/wimax.h> +#include "linux-wimax.h" #include "wimax-internal.h" #define D_SUBMODULE debugfs diff --git a/drivers/net/wimax/i2400m/Kconfig b/drivers/staging/wimax/i2400m/Kconfig index 843b905a26a3..843b905a26a3 100644 --- a/drivers/net/wimax/i2400m/Kconfig +++ b/drivers/staging/wimax/i2400m/Kconfig diff --git a/drivers/net/wimax/i2400m/Makefile b/drivers/staging/wimax/i2400m/Makefile index b1db1eff0648..b1db1eff0648 100644 --- a/drivers/net/wimax/i2400m/Makefile +++ b/drivers/staging/wimax/i2400m/Makefile diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/staging/wimax/i2400m/control.c index 8df98757d901..fe885aa56cf3 100644 --- a/drivers/net/wimax/i2400m/control.c +++ b/drivers/staging/wimax/i2400m/control.c @@ -77,7 +77,7 @@ #include "i2400m.h" #include <linux/kernel.h> #include <linux/slab.h> -#include <linux/wimax/i2400m.h> +#include "linux-wimax-i2400m.h" #include <linux/export.h> #include <linux/moduleparam.h> diff --git a/drivers/net/wimax/i2400m/debug-levels.h b/drivers/staging/wimax/i2400m/debug-levels.h index 00942bb1489b..a317e9fbb734 100644 --- a/drivers/net/wimax/i2400m/debug-levels.h +++ b/drivers/staging/wimax/i2400m/debug-levels.h @@ -13,7 +13,7 @@ #define D_MODULENAME i2400m #define D_MASTER CONFIG_WIMAX_I2400M_DEBUG_LEVEL -#include <linux/wimax/debug.h> +#include "../linux-wimax-debug.h" /* List of all the enabled modules */ enum d_module { diff --git a/drivers/net/wimax/i2400m/debugfs.c b/drivers/staging/wimax/i2400m/debugfs.c index 1c640b41ea4c..1c640b41ea4c 100644 --- a/drivers/net/wimax/i2400m/debugfs.c +++ b/drivers/staging/wimax/i2400m/debugfs.c diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/staging/wimax/i2400m/driver.c index ecb3fccca603..dc8939ff78c0 100644 --- a/drivers/net/wimax/i2400m/driver.c +++ b/drivers/staging/wimax/i2400m/driver.c @@ -50,7 +50,7 @@ */ #include "i2400m.h" #include <linux/etherdevice.h> -#include <linux/wimax/i2400m.h> +#include "linux-wimax-i2400m.h" #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/suspend.h> diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/staging/wimax/i2400m/fw.c index 6c9a41bff2e0..6c9a41bff2e0 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/staging/wimax/i2400m/fw.c diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/staging/wimax/i2400m/i2400m-usb.h index eff4f464a23e..eff4f464a23e 100644 --- a/drivers/net/wimax/i2400m/i2400m-usb.h +++ b/drivers/staging/wimax/i2400m/i2400m-usb.h diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/staging/wimax/i2400m/i2400m.h index a3733a6d14f5..de22cc6f2c5c 100644 --- a/drivers/net/wimax/i2400m/i2400m.h +++ b/drivers/staging/wimax/i2400m/i2400m.h @@ -156,8 +156,8 @@ #include <linux/completion.h> #include <linux/rwsem.h> #include <linux/atomic.h> -#include <net/wimax.h> -#include <linux/wimax/i2400m.h> +#include "../net-wimax.h" +#include "linux-wimax-i2400m.h" #include <asm/byteorder.h> enum { diff --git a/include/uapi/linux/wimax/i2400m.h b/drivers/staging/wimax/i2400m/linux-wimax-i2400m.h index fd198bc24a3c..fd198bc24a3c 100644 --- a/include/uapi/linux/wimax/i2400m.h +++ b/drivers/staging/wimax/i2400m/linux-wimax-i2400m.h diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/staging/wimax/i2400m/netdev.c index a7fcbceb6e6b..a7fcbceb6e6b 100644 --- a/drivers/net/wimax/i2400m/netdev.c +++ b/drivers/staging/wimax/i2400m/netdev.c diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/staging/wimax/i2400m/op-rfkill.c index 5c79f052cad2..fbddf2e18c14 100644 --- a/drivers/net/wimax/i2400m/op-rfkill.c +++ b/drivers/staging/wimax/i2400m/op-rfkill.c @@ -18,7 +18,7 @@ * switch (coming from sysfs, the wimax stack or user space). */ #include "i2400m.h" -#include <linux/wimax/i2400m.h> +#include "linux-wimax-i2400m.h" #include <linux/slab.h> diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/staging/wimax/i2400m/rx.c index c9fb619a9e01..c9fb619a9e01 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/staging/wimax/i2400m/rx.c diff --git a/drivers/net/wimax/i2400m/sysfs.c b/drivers/staging/wimax/i2400m/sysfs.c index 895ee265909b..895ee265909b 100644 --- a/drivers/net/wimax/i2400m/sysfs.c +++ b/drivers/staging/wimax/i2400m/sysfs.c diff --git a/drivers/net/wimax/i2400m/tx.c b/drivers/staging/wimax/i2400m/tx.c index 1255302e251e..1255302e251e 100644 --- a/drivers/net/wimax/i2400m/tx.c +++ b/drivers/staging/wimax/i2400m/tx.c diff --git a/drivers/net/wimax/i2400m/usb-debug-levels.h b/drivers/staging/wimax/i2400m/usb-debug-levels.h index b6f7335de765..8fd0111560f6 100644 --- a/drivers/net/wimax/i2400m/usb-debug-levels.h +++ b/drivers/staging/wimax/i2400m/usb-debug-levels.h @@ -13,7 +13,7 @@ #define D_MODULENAME i2400m_usb #define D_MASTER CONFIG_WIMAX_I2400M_DEBUG_LEVEL -#include <linux/wimax/debug.h> +#include "../linux-wimax-debug.h" /* List of all the enabled modules */ enum d_module { diff --git a/drivers/net/wimax/i2400m/usb-fw.c b/drivers/staging/wimax/i2400m/usb-fw.c index 27ab233650d5..27ab233650d5 100644 --- a/drivers/net/wimax/i2400m/usb-fw.c +++ b/drivers/staging/wimax/i2400m/usb-fw.c diff --git a/drivers/net/wimax/i2400m/usb-notif.c b/drivers/staging/wimax/i2400m/usb-notif.c index 5d429f816125..5d429f816125 100644 --- a/drivers/net/wimax/i2400m/usb-notif.c +++ b/drivers/staging/wimax/i2400m/usb-notif.c diff --git a/drivers/net/wimax/i2400m/usb-rx.c b/drivers/staging/wimax/i2400m/usb-rx.c index 5b64bda7d9e7..5b64bda7d9e7 100644 --- a/drivers/net/wimax/i2400m/usb-rx.c +++ b/drivers/staging/wimax/i2400m/usb-rx.c diff --git a/drivers/net/wimax/i2400m/usb-tx.c b/drivers/staging/wimax/i2400m/usb-tx.c index 3ba9d70cca1b..3ba9d70cca1b 100644 --- a/drivers/net/wimax/i2400m/usb-tx.c +++ b/drivers/staging/wimax/i2400m/usb-tx.c diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/staging/wimax/i2400m/usb.c index b684e97ac976..3b84dd7b5567 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/staging/wimax/i2400m/usb.c @@ -49,7 +49,7 @@ * usb_reset_device() */ #include "i2400m-usb.h" -#include <linux/wimax/i2400m.h> +#include "linux-wimax-i2400m.h" #include <linux/debugfs.h> #include <linux/slab.h> #include <linux/module.h> diff --git a/net/wimax/id-table.c b/drivers/staging/wimax/id-table.c index 02eee37b7e31..0e6f4aa87bc9 100644 --- a/net/wimax/id-table.c +++ b/drivers/staging/wimax/id-table.c @@ -28,7 +28,7 @@ #include <net/genetlink.h> #include <linux/netdevice.h> #include <linux/list.h> -#include <linux/wimax.h> +#include "linux-wimax.h" #include "wimax-internal.h" diff --git a/include/linux/wimax/debug.h b/drivers/staging/wimax/linux-wimax-debug.h index cdae052bcdcd..5b5ec405143b 100644 --- a/include/linux/wimax/debug.h +++ b/drivers/staging/wimax/linux-wimax-debug.h @@ -60,7 +60,7 @@ * #define D_MODULENAME modulename * #define D_MASTER 10 * - * #include <linux/wimax/debug.h> + * #include "linux-wimax-debug.h" * * enum d_module { * D_SUBMODULE_DECLARE(submodule_1), diff --git a/include/uapi/linux/wimax.h b/drivers/staging/wimax/linux-wimax.h index 9f6b77af2f6d..9f6b77af2f6d 100644 --- a/include/uapi/linux/wimax.h +++ b/drivers/staging/wimax/linux-wimax.h diff --git a/include/net/wimax.h b/drivers/staging/wimax/net-wimax.h index f6e31d2f47aa..f578e345e2bd 100644 --- a/include/net/wimax.h +++ b/drivers/staging/wimax/net-wimax.h @@ -236,7 +236,7 @@ #ifndef __NET__WIMAX_H__ #define __NET__WIMAX_H__ -#include <linux/wimax.h> +#include "linux-wimax.h" #include <net/genetlink.h> #include <linux/netdevice.h> diff --git a/net/wimax/op-msg.c b/drivers/staging/wimax/op-msg.c index 6460b5785758..e20ac7d84e82 100644 --- a/net/wimax/op-msg.c +++ b/drivers/staging/wimax/op-msg.c @@ -60,7 +60,7 @@ #include <linux/slab.h> #include <net/genetlink.h> #include <linux/netdevice.h> -#include <linux/wimax.h> +#include "linux-wimax.h" #include <linux/security.h> #include <linux/export.h> #include "wimax-internal.h" diff --git a/net/wimax/op-reset.c b/drivers/staging/wimax/op-reset.c index 9899b2e56721..b3f000cbe112 100644 --- a/net/wimax/op-reset.c +++ b/drivers/staging/wimax/op-reset.c @@ -13,9 +13,9 @@ * disconnect and reconnect the device). */ -#include <net/wimax.h> +#include "net-wimax.h" #include <net/genetlink.h> -#include <linux/wimax.h> +#include "linux-wimax.h" #include <linux/security.h> #include <linux/export.h> #include "wimax-internal.h" diff --git a/net/wimax/op-rfkill.c b/drivers/staging/wimax/op-rfkill.c index 248d10b60b05..78b294481a59 100644 --- a/net/wimax/op-rfkill.c +++ b/drivers/staging/wimax/op-rfkill.c @@ -45,9 +45,9 @@ * wimax_rfkill_rm() [called by wimax_dev_add/rm()] */ -#include <net/wimax.h> +#include "net-wimax.h" #include <net/genetlink.h> -#include <linux/wimax.h> +#include "linux-wimax.h" #include <linux/security.h> #include <linux/rfkill.h> #include <linux/export.h> diff --git a/net/wimax/op-state-get.c b/drivers/staging/wimax/op-state-get.c index 5bc712de1563..c5bfbed505f5 100644 --- a/net/wimax/op-state-get.c +++ b/drivers/staging/wimax/op-state-get.c @@ -10,9 +10,9 @@ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> */ -#include <net/wimax.h> +#include "net-wimax.h" #include <net/genetlink.h> -#include <linux/wimax.h> +#include "linux-wimax.h" #include <linux/security.h> #include "wimax-internal.h" diff --git a/net/wimax/stack.c b/drivers/staging/wimax/stack.c index b6dd9d956ed8..ace24a6dfd2d 100644 --- a/net/wimax/stack.c +++ b/drivers/staging/wimax/stack.c @@ -39,7 +39,7 @@ #include <linux/gfp.h> #include <net/genetlink.h> #include <linux/netdevice.h> -#include <linux/wimax.h> +#include "linux-wimax.h" #include <linux/module.h> #include "wimax-internal.h" @@ -388,17 +388,24 @@ void wimax_dev_init(struct wimax_dev *wimax_dev) } EXPORT_SYMBOL_GPL(wimax_dev_init); +/* + * There are multiple enums reusing the same values, adding + * others is only possible if they use a compatible policy. + */ static const struct nla_policy wimax_gnl_policy[WIMAX_GNL_ATTR_MAX + 1] = { - [WIMAX_GNL_RESET_IFIDX] = { .type = NLA_U32, }, - [WIMAX_GNL_RFKILL_IFIDX] = { .type = NLA_U32, }, - [WIMAX_GNL_RFKILL_STATE] = { - .type = NLA_U32 /* enum wimax_rf_state */ - }, - [WIMAX_GNL_STGET_IFIDX] = { .type = NLA_U32, }, - [WIMAX_GNL_MSG_IFIDX] = { .type = NLA_U32, }, - [WIMAX_GNL_MSG_DATA] = { - .type = NLA_UNSPEC, /* libnl doesn't grok BINARY yet */ - }, + /* + * WIMAX_GNL_RESET_IFIDX, WIMAX_GNL_RFKILL_IFIDX, + * WIMAX_GNL_STGET_IFIDX, WIMAX_GNL_MSG_IFIDX + */ + [1] = { .type = NLA_U32, }, + /* + * WIMAX_GNL_RFKILL_STATE, WIMAX_GNL_MSG_PIPE_NAME + */ + [2] = { .type = NLA_U32, }, /* enum wimax_rf_state */ + /* + * WIMAX_GNL_MSG_DATA + */ + [3] = { .type = NLA_UNSPEC, }, /* libnl doesn't grok BINARY yet */ }; static const struct genl_small_ops wimax_gnl_ops[] = { diff --git a/net/wimax/wimax-internal.h b/drivers/staging/wimax/wimax-internal.h index 40751207296c..a6b6990642a1 100644 --- a/net/wimax/wimax-internal.h +++ b/drivers/staging/wimax/wimax-internal.h @@ -22,7 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/device.h> -#include <net/wimax.h> +#include "net-wimax.h" /* diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 5d5ff2203fa2..d7493016cd46 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -186,6 +186,7 @@ struct atmdev_ops { /* only send is required */ void __user *arg); #endif int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); + int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); void (*phy_put)(struct atm_dev *dev,unsigned char value, unsigned long addr); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b16bf48aab6..581b2a2e78eb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,9 +36,11 @@ struct seq_operations; struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; +struct kobject; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; +extern struct kobject *btf_kobj; typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, struct bpf_iter_aux_info *aux); @@ -310,6 +312,7 @@ enum bpf_return_type { RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ + RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -1294,6 +1297,10 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +enum bpf_iter_feature { + BPF_ITER_RESCHED = BIT(0), +}; + #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; @@ -1302,6 +1309,7 @@ struct bpf_iter_reg { bpf_iter_show_fdinfo_t show_fdinfo; bpf_iter_fill_link_info_t fill_link_info; u32 ctx_arg_info_size; + u32 feature; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aaacb6aafc87..0d1c33ace398 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -7,6 +7,7 @@ #ifndef _LINUX_BPF_LSM_H #define _LINUX_BPF_LSM_H +#include <linux/sched.h> #include <linux/bpf.h> #include <linux/lsm_hooks.h> @@ -26,6 +27,8 @@ extern struct lsm_blob_sizes bpf_lsm_blob_sizes; int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog); +bool bpf_lsm_is_sleepable_hook(u32 btf_id); + static inline struct bpf_storage_blob *bpf_inode( const struct inode *inode) { @@ -35,12 +38,29 @@ static inline struct bpf_storage_blob *bpf_inode( return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + if (unlikely(!task->security)) + return NULL; + + return task->security + bpf_lsm_blob_sizes.lbs_task; +} + extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); +void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ +static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) +{ + return false; +} + static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { @@ -53,10 +73,20 @@ static inline struct bpf_storage_blob *bpf_inode( return NULL; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + return NULL; +} + static inline void bpf_inode_storage_free(struct inode *inode) { } +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2e6f568377f1..99f7fd657d87 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e83ef6f6bf43..306869d4743b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -45,7 +45,7 @@ struct bpf_reg_state { enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ - u16 range; + int range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index eeae59d3ceb7..35d21fddaf2d 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -89,7 +89,7 @@ static inline int nla_put_u64_0pad(struct sk_buff *skb, int attrtype, u64 value) nla_get_u64, nla_put_u64_0pad, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, nla_put, false) + nla_strscpy, nla_put, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ nla_memcpy, nla_put, false) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 770408b2fdaf..5e8cc9c3d45a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3417,6 +3417,8 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) #define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) +#define WLAN_AKM_SUITE_WFA_DPP SUITE(WLAN_OUI_WFA, 2) + #define WLAN_MAX_KEY_LEN 32 #define WLAN_PMK_NAME_LEN 16 @@ -3427,6 +3429,7 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 +#define WLAN_OUI_TYPE_WFA_DPP 0x1A #define WLAN_OUI_MICROSOFT 0x0050f2 #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 556caed00258..b979005ea39c 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -25,6 +25,7 @@ struct br_ip { #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ip6; #endif + unsigned char mac_addr[ETH_ALEN]; } dst; __be16 proto; __u16 vid; diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h deleted file mode 100644 index 52224de798aa..000000000000 --- a/include/linux/if_frad.h +++ /dev/null @@ -1,92 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * DLCI/FRAD Definitions for Frame Relay Access Devices. DLCI devices are - * created for each DLCI associated with a FRAD. The FRAD driver - * is not truly a network device, but the lower level device - * handler. This allows other FRAD manufacturers to use the DLCI - * code, including its RFC1490 encapsulation alongside the current - * implementation for the Sangoma cards. - * - * Version: @(#)if_ifrad.h 0.15 31 Mar 96 - * - * Author: Mike McLagan <mike.mclagan@linux.org> - * - * Changes: - * 0.15 Mike McLagan changed structure defs (packed) - * re-arranged flags - * added DLCI_RET vars - */ -#ifndef _FRAD_H_ -#define _FRAD_H_ - -#include <uapi/linux/if_frad.h> - - -#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE) - -/* these are the fields of an RFC 1490 header */ -struct frhdr -{ - unsigned char control; - - /* for IP packets, this can be the NLPID */ - unsigned char pad; - - unsigned char NLPID; - unsigned char OUI[3]; - __be16 PID; - -#define IP_NLPID pad -} __packed; - -/* see RFC 1490 for the definition of the following */ -#define FRAD_I_UI 0x03 - -#define FRAD_P_PADDING 0x00 -#define FRAD_P_Q933 0x08 -#define FRAD_P_SNAP 0x80 -#define FRAD_P_CLNP 0x81 -#define FRAD_P_IP 0xCC - -struct dlci_local -{ - struct net_device *master; - struct net_device *slave; - struct dlci_conf config; - int configured; - struct list_head list; - - /* callback function */ - void (*receive)(struct sk_buff *skb, struct net_device *); -}; - -struct frad_local -{ - /* devices which this FRAD is slaved to */ - struct net_device *master[CONFIG_DLCI_MAX]; - short dlci[CONFIG_DLCI_MAX]; - - struct frad_conf config; - int configured; /* has this device been configured */ - int initialized; /* mem_start, port, irq set ? */ - - /* callback functions */ - int (*activate)(struct net_device *, struct net_device *); - int (*deactivate)(struct net_device *, struct net_device *); - int (*assoc)(struct net_device *, struct net_device *); - int (*deassoc)(struct net_device *, struct net_device *); - int (*dlci_conf)(struct net_device *, struct net_device *, int get); - - /* fields that are used by the Sangoma SDLA cards */ - struct timer_list timer; - struct net_device *dev; - int type; /* adapter type */ - int state; /* state of the S502/8 control latch */ - int buffer; /* current buffer for S508 firmware */ -}; - -#endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ - -extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); - -#endif diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3515ca64e638..53aa0343bf69 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -105,7 +105,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) -#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN) +#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ @@ -126,7 +126,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) #define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ - IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index ff7b7607c8cf..52b1610eae68 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -25,6 +25,9 @@ #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 +/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ +#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 + /* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do * not have a model ID. So the switch driver traps reads to the ID2 * register and returns the switch family ID diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d4841e5a5f45..b9bb7dff32e2 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -737,4 +737,11 @@ int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir, int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir, struct sk_buff *skb, size_t len, enum mhi_flags mflags); +/** + * mhi_queue_is_full - Determine whether queueing new elements is possible + * @mhi_dev: Device associated with the channels + * @dir: DMA direction for the channel + */ +bool mhi_queue_is_full(struct mhi_device *mhi_dev, enum dma_data_direction dir); + #endif /* _MHI_H_ */ diff --git a/include/linux/module.h b/include/linux/module.h index 6264617bab4d..c4e7a887f469 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -475,6 +475,10 @@ struct module { unsigned int num_bpf_raw_events; struct bpf_raw_event_map *bpf_raw_events; #endif +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + unsigned int btf_data_size; + void *btf_data; +#endif #ifdef CONFIG_JUMP_LABEL struct jump_entry *jump_entries; unsigned int num_jump_entries; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 0b17c4322b09..934de56644e7 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -207,8 +207,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ - NETIF_F_GSO_SCTP) +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ + NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) /* * If one device supports one of these features, then enable them diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..03433a4c929e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1490,7 +1490,7 @@ struct net_device_ops { }; /** - * enum net_device_priv_flags - &struct net_device priv_flags + * enum netdev_priv_flags - &struct net_device priv_flags * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to @@ -2557,6 +2557,18 @@ static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int l u64_stats_update_end(&tstats->syncp); } +static inline void dev_sw_netstats_tx_add(struct net_device *dev, + unsigned int packets, + unsigned int len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += len; + tstats->tx_packets += packets; + u64_stats_update_end(&tstats->syncp); +} + static inline void dev_lstats_add(struct net_device *dev, unsigned int len) { struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats); @@ -2584,6 +2596,20 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) #define netdev_alloc_pcpu_stats(type) \ __netdev_alloc_pcpu_stats(type, GFP_KERNEL) +#define devm_netdev_alloc_pcpu_stats(dev, type) \ +({ \ + typeof(type) __percpu *pcpu_stats = devm_alloc_percpu(dev, type);\ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ +}) + enum netdev_lag_tx_type { NETDEV_LAG_TX_TYPE_UNKNOWN, NETDEV_LAG_TX_TYPE_RANDOM, @@ -3576,7 +3602,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) } /** - * netif_subqueue_stopped - test status of subqueue + * __netif_subqueue_stopped - test status of subqueue * @dev: network device * @queue_index: sub queue index * @@ -3590,6 +3616,13 @@ static inline bool __netif_subqueue_stopped(const struct net_device *dev, return netif_tx_queue_stopped(txq); } +/** + * netif_subqueue_stopped - test status of subqueue + * @dev: network device + * @skb: sub queue buffer pointer + * + * Check individual transmit queue of a device with multiple transmit queues. + */ static inline bool netif_subqueue_stopped(const struct net_device *dev, struct sk_buff *skb) { @@ -4501,6 +4534,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats); void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); +void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ab192720e2d6..46d9a0c26c67 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -198,6 +198,9 @@ struct ip_set_region { u32 elements; /* Number of elements vs timeout */ }; +/* The max revision number supported by any set type + 1 */ +#define IPSET_REVISION_MAX 9 + /* The core set type structure */ struct ip_set_type { struct list_head list; @@ -215,6 +218,8 @@ struct ip_set_type { u8 family; /* Type revisions */ u8 revision_min, revision_max; + /* Revision-specific supported (create) flags */ + u8 create_flags[IPSET_REVISION_MAX+1]; /* Set features to control swapping */ u16 features; diff --git a/include/linux/phy.h b/include/linux/phy.h index 56563e5e0dc7..8849a00a093f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1480,6 +1480,7 @@ int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_loopback(struct phy_device *phydev, bool enable); int genphy_soft_reset(struct phy_device *phydev); +irqreturn_t genphy_handle_interrupt_no_ack(struct phy_device *phydev); static inline int genphy_config_aneg(struct phy_device *phydev) { @@ -1540,8 +1541,10 @@ void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); +void phy_error(struct phy_device *phydev); void phy_state_machine(struct work_struct *work); void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); +void phy_trigger_machine(struct phy_device *phydev); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h new file mode 100644 index 000000000000..388846766bb2 --- /dev/null +++ b/include/linux/platform_data/hirschmann-hellcreek.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* + * Hirschmann Hellcreek TSN switch platform data. + * + * Copyright (C) 2020 Linutronix GmbH + * Author Kurt Kanzenbach <kurt@linutronix.de> + */ + +#ifndef _HIRSCHMANN_HELLCREEK_H_ +#define _HIRSCHMANN_HELLCREEK_H_ + +#include <linux/types.h> + +struct hellcreek_platform_data { + int num_ports; /* Amount of switch ports */ + int is_100_mbits; /* Is it configured to 100 or 1000 mbit/s */ + int qbv_support; /* Qbv support on front TSN ports */ + int qbv_on_cpu_port; /* Qbv support on the CPU port */ + int qbu_support; /* Qbu support on front TSN ports */ + u16 module_id; /* Module identificaton */ +}; + +#endif /* _HIRSCHMANN_HELLCREEK_H_ */ diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index d3e8ba5c7125..0d47fd33b228 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -12,6 +12,19 @@ #include <linux/pps_kernel.h> #include <linux/ptp_clock.h> +/** + * struct ptp_clock_request - request PTP clock event + * + * @type: The type of the request. + * EXTTS: Configure external trigger timestamping + * PEROUT: Configure periodic output signal (e.g. PPS) + * PPS: trigger internal PPS event for input + * into kernel PPS subsystem + * @extts: describes configuration for external trigger timestamping. + * This is only valid when event == PTP_CLK_REQ_EXTTS. + * @perout: describes configuration for periodic output. + * This is only valid when event == PTP_CLK_REQ_PEROUT. + */ struct ptp_clock_request { enum { diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 76731230bbc5..bb1926589693 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -482,11 +482,13 @@ enum sctp_error { * 11 Restart of an association with new addresses * 12 User Initiated Abort * 13 Protocol Violation + * 14 Restart of an Association with New Encapsulation Port */ SCTP_ERROR_RESTART = cpu_to_be16(0x0b), SCTP_ERROR_USER_ABORT = cpu_to_be16(0x0c), SCTP_ERROR_PROTO_VIOLATION = cpu_to_be16(0x0d), + SCTP_ERROR_NEW_ENCAP_PORT = cpu_to_be16(0x0e), /* ADDIP Section 3.3 New Error Causes * @@ -793,4 +795,22 @@ enum { SCTP_FLOWLABEL_VAL_MASK = 0xfffff }; +/* UDP Encapsulation + * draft-tuexen-tsvwg-sctp-udp-encaps-cons-03.html#section-4-4 + * + * The error cause indicating an "Restart of an Association with + * New Encapsulation Port" + * + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cause Code = 14 | Cause Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Current Encapsulation Port | New Encapsulation Port | + * +-------------------------------+-------------------------------+ + */ +struct sctp_new_encap_port_hdr { + __be16 cur_port; + __be16 new_port; +}; + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/linux/sdla.h b/include/linux/sdla.h deleted file mode 100644 index 00e8b3b614f0..000000000000 --- a/include/linux/sdla.h +++ /dev/null @@ -1,240 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Global definitions for the Frame relay interface. - * - * Version: @(#)if_ifrad.h 0.20 13 Apr 96 - * - * Author: Mike McLagan <mike.mclagan@linux.org> - * - * Changes: - * 0.15 Mike McLagan Structure packing - * - * 0.20 Mike McLagan New flags for S508 buffer handling - */ -#ifndef SDLA_H -#define SDLA_H - -#include <uapi/linux/sdla.h> - - -/* important Z80 window addresses */ -#define SDLA_CONTROL_WND 0xE000 - -#define SDLA_502_CMD_BUF 0xEF60 -#define SDLA_502_RCV_BUF 0xA900 -#define SDLA_502_TXN_AVAIL 0xFFF1 -#define SDLA_502_RCV_AVAIL 0xFFF2 -#define SDLA_502_EVENT_FLAGS 0xFFF3 -#define SDLA_502_MDM_STATUS 0xFFF4 -#define SDLA_502_IRQ_INTERFACE 0xFFFD -#define SDLA_502_IRQ_PERMISSION 0xFFFE -#define SDLA_502_DATA_OFS 0x0010 - -#define SDLA_508_CMD_BUF 0xE000 -#define SDLA_508_TXBUF_INFO 0xF100 -#define SDLA_508_RXBUF_INFO 0xF120 -#define SDLA_508_EVENT_FLAGS 0xF003 -#define SDLA_508_MDM_STATUS 0xF004 -#define SDLA_508_IRQ_INTERFACE 0xF010 -#define SDLA_508_IRQ_PERMISSION 0xF011 -#define SDLA_508_TSE_OFFSET 0xF012 - -/* Event flags */ -#define SDLA_EVENT_STATUS 0x01 -#define SDLA_EVENT_DLCI_STATUS 0x02 -#define SDLA_EVENT_BAD_DLCI 0x04 -#define SDLA_EVENT_LINK_DOWN 0x40 - -/* IRQ Trigger flags */ -#define SDLA_INTR_RX 0x01 -#define SDLA_INTR_TX 0x02 -#define SDLA_INTR_MODEM 0x04 -#define SDLA_INTR_COMPLETE 0x08 -#define SDLA_INTR_STATUS 0x10 -#define SDLA_INTR_TIMER 0x20 - -/* DLCI status bits */ -#define SDLA_DLCI_DELETED 0x01 -#define SDLA_DLCI_ACTIVE 0x02 -#define SDLA_DLCI_WAITING 0x04 -#define SDLA_DLCI_NEW 0x08 -#define SDLA_DLCI_INCLUDED 0x40 - -/* valid command codes */ -#define SDLA_INFORMATION_WRITE 0x01 -#define SDLA_INFORMATION_READ 0x02 -#define SDLA_ISSUE_IN_CHANNEL_SIGNAL 0x03 -#define SDLA_SET_DLCI_CONFIGURATION 0x10 -#define SDLA_READ_DLCI_CONFIGURATION 0x11 -#define SDLA_DISABLE_COMMUNICATIONS 0x12 -#define SDLA_ENABLE_COMMUNICATIONS 0x13 -#define SDLA_READ_DLC_STATUS 0x14 -#define SDLA_READ_DLC_STATISTICS 0x15 -#define SDLA_FLUSH_DLC_STATISTICS 0x16 -#define SDLA_LIST_ACTIVE_DLCI 0x17 -#define SDLA_FLUSH_INFORMATION_BUFFERS 0x18 -#define SDLA_ADD_DLCI 0x20 -#define SDLA_DELETE_DLCI 0x21 -#define SDLA_ACTIVATE_DLCI 0x22 -#define SDLA_DEACTIVATE_DLCI 0x23 -#define SDLA_READ_MODEM_STATUS 0x30 -#define SDLA_SET_MODEM_STATUS 0x31 -#define SDLA_READ_COMMS_ERR_STATS 0x32 -#define SDLA_FLUSH_COMMS_ERR_STATS 0x33 -#define SDLA_READ_CODE_VERSION 0x40 -#define SDLA_SET_IRQ_TRIGGER 0x50 -#define SDLA_GET_IRQ_TRIGGER 0x51 - -/* In channel signal types */ -#define SDLA_ICS_LINK_VERIFY 0x02 -#define SDLA_ICS_STATUS_ENQ 0x03 - -/* modem status flags */ -#define SDLA_MODEM_DTR_HIGH 0x01 -#define SDLA_MODEM_RTS_HIGH 0x02 -#define SDLA_MODEM_DCD_HIGH 0x08 -#define SDLA_MODEM_CTS_HIGH 0x20 - -/* used for RET_MODEM interpretation */ -#define SDLA_MODEM_DCD_LOW 0x01 -#define SDLA_MODEM_CTS_LOW 0x02 - -/* return codes */ -#define SDLA_RET_OK 0x00 -#define SDLA_RET_COMMUNICATIONS 0x01 -#define SDLA_RET_CHANNEL_INACTIVE 0x02 -#define SDLA_RET_DLCI_INACTIVE 0x03 -#define SDLA_RET_DLCI_CONFIG 0x04 -#define SDLA_RET_BUF_TOO_BIG 0x05 -#define SDLA_RET_NO_DATA 0x05 -#define SDLA_RET_BUF_OVERSIZE 0x06 -#define SDLA_RET_CIR_OVERFLOW 0x07 -#define SDLA_RET_NO_BUFS 0x08 -#define SDLA_RET_TIMEOUT 0x0A -#define SDLA_RET_MODEM 0x10 -#define SDLA_RET_CHANNEL_OFF 0x11 -#define SDLA_RET_CHANNEL_ON 0x12 -#define SDLA_RET_DLCI_STATUS 0x13 -#define SDLA_RET_DLCI_UNKNOWN 0x14 -#define SDLA_RET_COMMAND_INVALID 0x1F - -/* Configuration flags */ -#define SDLA_DIRECT_RECV 0x0080 -#define SDLA_TX_NO_EXCEPT 0x0020 -#define SDLA_NO_ICF_MSGS 0x1000 -#define SDLA_TX50_RX50 0x0000 -#define SDLA_TX70_RX30 0x2000 -#define SDLA_TX30_RX70 0x4000 - -/* IRQ selection flags */ -#define SDLA_IRQ_RECEIVE 0x01 -#define SDLA_IRQ_TRANSMIT 0x02 -#define SDLA_IRQ_MODEM_STAT 0x04 -#define SDLA_IRQ_COMMAND 0x08 -#define SDLA_IRQ_CHANNEL 0x10 -#define SDLA_IRQ_TIMER 0x20 - -/* definitions for PC memory mapping */ -#define SDLA_8K_WINDOW 0x01 -#define SDLA_S502_SEG_A 0x10 -#define SDLA_S502_SEG_C 0x20 -#define SDLA_S502_SEG_D 0x00 -#define SDLA_S502_SEG_E 0x30 -#define SDLA_S507_SEG_A 0x00 -#define SDLA_S507_SEG_B 0x40 -#define SDLA_S507_SEG_C 0x80 -#define SDLA_S507_SEG_E 0xC0 -#define SDLA_S508_SEG_A 0x00 -#define SDLA_S508_SEG_C 0x10 -#define SDLA_S508_SEG_D 0x08 -#define SDLA_S508_SEG_E 0x18 - -/* SDLA adapter port constants */ -#define SDLA_IO_EXTENTS 0x04 - -#define SDLA_REG_CONTROL 0x00 -#define SDLA_REG_PC_WINDOW 0x01 /* offset for PC window select latch */ -#define SDLA_REG_Z80_WINDOW 0x02 /* offset for Z80 window select latch */ -#define SDLA_REG_Z80_CONTROL 0x03 /* offset for Z80 control latch */ - -#define SDLA_S502_STS 0x00 /* status reg for 502, 502E, 507 */ -#define SDLA_S508_GNRL 0x00 /* general purp. reg for 508 */ -#define SDLA_S508_STS 0x01 /* status reg for 508 */ -#define SDLA_S508_IDR 0x02 /* ID reg for 508 */ - -/* control register flags */ -#define SDLA_S502A_START 0x00 /* start the CPU */ -#define SDLA_S502A_INTREQ 0x02 -#define SDLA_S502A_INTEN 0x04 -#define SDLA_S502A_HALT 0x08 /* halt the CPU */ -#define SDLA_S502A_NMI 0x10 /* issue an NMI to the CPU */ - -#define SDLA_S502E_CPUEN 0x01 -#define SDLA_S502E_ENABLE 0x02 -#define SDLA_S502E_INTACK 0x04 - -#define SDLA_S507_ENABLE 0x01 -#define SDLA_S507_IRQ3 0x00 -#define SDLA_S507_IRQ4 0x20 -#define SDLA_S507_IRQ5 0x40 -#define SDLA_S507_IRQ7 0x60 -#define SDLA_S507_IRQ10 0x80 -#define SDLA_S507_IRQ11 0xA0 -#define SDLA_S507_IRQ12 0xC0 -#define SDLA_S507_IRQ15 0xE0 - -#define SDLA_HALT 0x00 -#define SDLA_CPUEN 0x02 -#define SDLA_MEMEN 0x04 -#define SDLA_S507_EPROMWR 0x08 -#define SDLA_S507_EPROMCLK 0x10 -#define SDLA_S508_INTRQ 0x08 -#define SDLA_S508_INTEN 0x10 - -struct sdla_cmd { - char opp_flag; - char cmd; - short length; - char retval; - short dlci; - char flags; - short rxlost_int; - long rxlost_app; - char reserve[2]; - char data[SDLA_MAX_DATA]; /* transfer data buffer */ -} __attribute__((packed)); - -struct intr_info { - char flags; - short txlen; - char irq; - char flags2; - short timeout; -} __attribute__((packed)); - -/* found in the 508's control window at RXBUF_INFO */ -struct buf_info { - unsigned short rse_num; - unsigned long rse_base; - unsigned long rse_next; - unsigned long buf_base; - unsigned short reserved; - unsigned long buf_top; -} __attribute__((packed)); - -/* structure pointed to by rse_base in RXBUF_INFO struct */ -struct buf_entry { - char opp_flag; - short length; - short dlci; - char flags; - short timestamp; - short reserved[2]; - long buf_addr; -} __attribute__((packed)); - -#endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a828cf99c521..0a1239819fd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4151,6 +4151,9 @@ enum skb_ext_id { #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, #endif +#if IS_ENABLED(CONFIG_KCOV) + SKB_EXT_KCOV_HANDLE, +#endif SKB_EXT_NUM, /* must be last */ }; @@ -4605,5 +4608,35 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } +#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) +{ + /* Do not allocate skb extensions only to set kcov_handle to zero + * (as it is zero by default). However, if the extensions are + * already allocated, update kcov_handle anyway since + * skb_set_kcov_handle can be called to zero a previously set + * value. + */ + if (skb_has_extensions(skb) || kcov_handle) { + u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); + + if (kcov_handle_ptr) + *kcov_handle_ptr = kcov_handle; + } +} + +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) +{ + u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); + + return kcov_handle ? *kcov_handle : 0; +} +#else +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) { } +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } +#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h new file mode 100644 index 000000000000..20d88b1defc3 --- /dev/null +++ b/include/linux/usb/r8152.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 Realtek Semiconductor Corp. All rights reserved. + */ + +#ifndef __LINUX_R8152_H +#define __LINUX_R8152_H + +#define RTL8152_REQT_READ 0xc0 +#define RTL8152_REQT_WRITE 0x40 +#define RTL8152_REQ_GET_REGS 0x05 +#define RTL8152_REQ_SET_REGS 0x05 + +#define BYTE_EN_DWORD 0xff +#define BYTE_EN_WORD 0x33 +#define BYTE_EN_BYTE 0x11 +#define BYTE_EN_SIX_BYTES 0x3f +#define BYTE_EN_START_MASK 0x0f +#define BYTE_EN_END_MASK 0xf0 + +#define MCU_TYPE_PLA 0x0100 +#define MCU_TYPE_USB 0x0000 + +/* Define these values to match your device */ +#define VENDOR_ID_REALTEK 0x0bda +#define VENDOR_ID_MICROSOFT 0x045e +#define VENDOR_ID_SAMSUNG 0x04e8 +#define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_LINKSYS 0x13b1 +#define VENDOR_ID_NVIDIA 0x0955 +#define VENDOR_ID_TPLINK 0x2357 + +#if IS_REACHABLE(CONFIG_USB_RTL8152) +extern u8 rtl8152_get_version(struct usb_interface *intf); +#endif + +#endif /* __LINUX_R8152_H */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 2e4f7721fc4e..88a7673894d5 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -65,8 +65,6 @@ struct usbnet { struct usb_anchor deferred; struct tasklet_struct bh; - struct pcpu_sw_netstats __percpu *stats64; - struct work_struct kevent; unsigned long flags; # define EVENT_TX_HALT 0 @@ -285,7 +283,5 @@ extern int usbnet_status_start(struct usbnet *dev, gfp_t mem_flags); extern void usbnet_status_stop(struct usbnet *dev); extern void usbnet_update_max_qlen(struct usbnet *dev); -extern void usbnet_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats); #endif /* __LINUX_USB_USBNET_H */ diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index 3c516dd07caf..0e85713f56df 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -20,6 +20,8 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +extern const struct bpf_func_proto bpf_sk_storage_get_tracing_proto; +extern const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto; struct bpf_local_storage_elem; struct bpf_sk_storage_diag; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d5ab8d99739f..ab249ca5d5d1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1008,6 +1008,21 @@ struct survey_info { * @sae_pwd: password for SAE authentication (for devices supporting SAE * offload) * @sae_pwd_len: length of SAE password (for devices supporting SAE offload) + * @sae_pwe: The mechanisms allowed for SAE PWE derivation: + * + * NL80211_SAE_PWE_UNSPECIFIED + * Not-specified, used to indicate userspace did not specify any + * preference. The driver should follow its internal policy in + * such a scenario. + * + * NL80211_SAE_PWE_HUNT_AND_PECK + * Allow hunting-and-pecking loop only + * + * NL80211_SAE_PWE_HASH_TO_ELEMENT + * Allow hash-to-element only + * + * NL80211_SAE_PWE_BOTH + * Allow either hunting-and-pecking loop or hash-to-element */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -1026,6 +1041,7 @@ struct cfg80211_crypto_settings { const u8 *psk; const u8 *sae_pwd; u8 sae_pwd_len; + enum nl80211_sae_pwe_mechanism sae_pwe; }; /** @@ -3736,8 +3752,6 @@ struct mgmt_frame_regs { * @get_tx_power: store the current TX power into the dbm variable; * return 0 if successful * - * @set_wds_peer: set the WDS peer for a WDS interface - * * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting * functions to adjust rfkill hw state * @@ -4058,9 +4072,6 @@ struct cfg80211_ops { int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, int *dbm); - int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, - const u8 *addr); - void (*rfkill_poll)(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE diff --git a/include/net/dsa.h b/include/net/dsa.h index 35429a140dfa..4e60d2610f20 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -45,6 +45,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_OCELOT_VALUE 15 #define DSA_TAG_PROTO_AR9331_VALUE 16 #define DSA_TAG_PROTO_RTL4_A_VALUE 17 +#define DSA_TAG_PROTO_HELLCREEK_VALUE 18 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -65,6 +66,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE, DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, + DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE, }; struct packet_type; @@ -535,6 +537,12 @@ struct dsa_switch_ops { struct ethtool_regs *regs, void *p); /* + * Upper device tracking. + */ + int (*port_prechangeupper)(struct dsa_switch *ds, int port, + struct netdev_notifier_changeupper_info *info); + + /* * Bridge integration */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); diff --git a/include/net/dst.h b/include/net/dst.h index 8ea8812b0b41..10f0a8399867 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -400,14 +400,12 @@ static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, co static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, struct sk_buff *skb) { - struct neighbour *n = NULL; + struct neighbour *n; - /* The packets from tunnel devices (eg bareudp) may have only - * metadata in the dst pointer of skb. Hence a pointer check of - * neigh_lookup is needed. - */ - if (dst->ops->neigh_lookup) - n = dst->ops->neigh_lookup(dst, skb, NULL); + if (WARN_ON_ONCE(!dst->ops->neigh_lookup)) + return NULL; + + n = dst->ops->neigh_lookup(dst, skb, NULL); return IS_ERR(n) ? NULL : n; } diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 19c00d100096..c0854933e24f 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -118,6 +118,7 @@ enum ieee80211_radiotap_tx_flags { IEEE80211_RADIOTAP_F_TX_RTS = 0x0004, IEEE80211_RADIOTAP_F_TX_NOACK = 0x0008, IEEE80211_RADIOTAP_F_TX_NOSEQNO = 0x0010, + IEEE80211_RADIOTAP_F_TX_ORDER = 0x0020, }; /* for IEEE80211_RADIOTAP_MCS "have" flags */ diff --git a/include/net/ip.h b/include/net/ip.h index 2d6b985d11cc..e20874059f82 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -99,7 +99,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) /* return enslaved device index if relevant */ -static inline int inet_sdif(struct sk_buff *skb) +static inline int inet_sdif(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 61620677b034..548b65bd3973 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -274,8 +274,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); -void ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, int link, __be16 flags, __be32 remote, __be32 local, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcdba96814a2..05c7524bab26 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -621,7 +621,8 @@ struct ieee80211_fils_discovery { * nontransmitted BSSIDs * @profile_periodicity: the least number of beacon frames need to be received * in order to discover all the nontransmitted BSSIDs in the set. - * @he_oper: HE operation information of the AP we are connected to + * @he_oper: HE operation information of the BSS (AP/Mesh) or of the AP we are + * connected to (STA) * @he_obss_pd: OBSS Packet Detection parameters. * @he_bss_color: BSS coloring settings, if BSS supports HE * @fils_discovery: FILS discovery configuration @@ -856,6 +857,9 @@ enum mac80211_tx_info_flags { * it can be sent out. * @IEEE80211_TX_CTRL_NO_SEQNO: Do not overwrite the sequence number that * has already been assigned to this frame. + * @IEEE80211_TX_CTRL_DONT_REORDER: This frame should not be reordered + * relative to other frames that have this flag set, independent + * of their QoS TID or other priority field values. * * These flags are used in tx_info->control.flags. */ @@ -868,6 +872,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), IEEE80211_TX_INTCFL_NEED_TXPROCESSING = BIT(6), IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), + IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), }; /* diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 753ba7e755d6..6e706d838e4e 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -29,7 +29,8 @@ struct mptcp_ext { use_ack:1, ack64:1, mpc_map:1, - __unused:2; + frozen:1, + __unused:1; /* one byte hole */ }; @@ -106,6 +107,19 @@ static inline void mptcp_skb_ext_move(struct sk_buff *to, from->active_extensions = 0; } +static inline void mptcp_skb_ext_copy(struct sk_buff *to, + struct sk_buff *from) +{ + struct mptcp_ext *from_ext; + + from_ext = skb_ext_find(from, SKB_EXT_MPTCP); + if (!from_ext) + return; + + from_ext->frozen = 1; + skb_ext_copy(to, from); +} + static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext, const struct mptcp_ext *from_ext) { @@ -193,6 +207,11 @@ static inline void mptcp_skb_ext_move(struct sk_buff *to, { } +static inline void mptcp_skb_ext_copy(struct sk_buff *to, + struct sk_buff *from) +{ +} + static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index 40e0e0623f46..0d8ff84a2588 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -18,4 +18,14 @@ struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, const struct tcphdr *oth); +struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code); +struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook); + + #endif /* _IPV4_NF_REJECT_H */ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 4a3ef9ebdf6f..edcf6d1cd316 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -20,4 +20,13 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, const struct tcphdr *oth, unsigned int otcplen); +struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook); +struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code); + #endif /* _IPV6_NF_REJECT_H */ diff --git a/include/net/netlink.h b/include/net/netlink.h index 7356f41d23ba..1ceec518ab49 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -142,7 +142,7 @@ * Attribute Misc: * nla_memcpy(dest, nla, count) copy attribute into memory * nla_memcmp(nla, data, size) compare attribute with memory area - * nla_strlcpy(dst, nla, size) copy attribute to a sized string + * nla_strscpy(dst, nla, size) copy attribute to a sized string * nla_strcmp(nla, str) compare attribute with string * * Attribute Parsing: @@ -506,7 +506,7 @@ int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); -size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); +ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize); char *nla_strdup(const struct nlattr *nla, gfp_t flags); int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index d8d02e4188d1..a0f315effa94 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -22,6 +22,14 @@ struct netns_sctp { */ struct sock *ctl_sock; + /* UDP tunneling listening sock. */ + struct sock *udp4_sock; + struct sock *udp6_sock; + /* UDP tunneling listening port. */ + int udp_port; + /* UDP tunneling remote encap port. */ + int encap_port; + /* This is the global local address list. * We actively maintain this complete list of addresses on * the system by catching address add/delete events. diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 2fd76a9b6dc8..226930d66b63 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -105,11 +105,49 @@ struct nexthop { }; enum nexthop_event_type { - NEXTHOP_EVENT_DEL + NEXTHOP_EVENT_DEL, + NEXTHOP_EVENT_REPLACE, }; -int register_nexthop_notifier(struct net *net, struct notifier_block *nb); +struct nh_notifier_single_info { + struct net_device *dev; + u8 gw_family; + union { + __be32 ipv4; + struct in6_addr ipv6; + }; + u8 is_reject:1, + is_fdb:1, + has_encap:1; +}; + +struct nh_notifier_grp_entry_info { + u8 weight; + u32 id; + struct nh_notifier_single_info nh; +}; + +struct nh_notifier_grp_info { + u16 num_nh; + bool is_fdb; + struct nh_notifier_grp_entry_info nh_entries[]; +}; + +struct nh_notifier_info { + struct net *net; + struct netlink_ext_ack *extack; + u32 id; + bool is_grp; + union { + struct nh_notifier_single_info *nh; + struct nh_notifier_grp_info *nh_grp; + }; +}; + +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 81d7773f96cd..b5b195305346 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -152,6 +152,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); void page_pool_release_page(struct page_pool *pool, struct page *page); +void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -165,6 +167,11 @@ static inline void page_pool_release_page(struct page_pool *pool, struct page *page) { } + +static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count) +{ +} #endif void page_pool_put_page(struct page_pool *pool, struct page *page, @@ -215,4 +222,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) if (unlikely(pool->p.nid != new_nid)) page_pool_update_nid(pool, new_nid); } + +static inline void page_pool_ring_lock(struct page_pool *pool) + __acquires(&pool->ring.producer_lock) +{ + if (in_serving_softirq()) + spin_lock(&pool->ring.producer_lock); + else + spin_lock_bh(&pool->ring.producer_lock); +} + +static inline void page_pool_ring_unlock(struct page_pool *pool) + __releases(&pool->ring.producer_lock) +{ + if (in_serving_softirq()) + spin_unlock(&pool->ring.producer_lock); + else + spin_unlock_bh(&pool->ring.producer_lock); +} + #endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d4d461236351..133f9ad4d4f9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -512,7 +512,7 @@ tcf_change_indev(struct net *net, struct nlattr *indev_tlv, char indev[IFNAMSIZ]; struct net_device *dev; - if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) { + if (nla_strscpy(indev, indev_tlv, IFNAMSIZ) < 0) { NL_SET_ERR_MSG_ATTR(extack, indev_tlv, "Interface name too long"); return -EINVAL; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 4ed32e6b0201..15b1b30f454e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -24,6 +24,11 @@ static inline void *qdisc_priv(struct Qdisc *q) return &q->privdata; } +static inline struct Qdisc *qdisc_from_priv(void *priv) +{ + return container_of(priv, struct Qdisc, privdata); +} + /* Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 122d9e2d8dfd..14a0d22c9113 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -286,6 +286,8 @@ enum { SCTP_MAX_GABS = 16 }; * functions simpler to write. */ +#define SCTP_DEFAULT_UDP_PORT 9899 /* default UDP tunneling port */ + /* These are the values for pf exposure, UNUSED is to keep compatible with old * applications by default. */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 4fc747b778eb..86f74f2fe6de 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -84,6 +84,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr, struct sctp_pf *sctp_get_pf_specific(sa_family_t family); int sctp_register_pf(struct sctp_pf *, sa_family_t); void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); +int sctp_udp_sock_start(struct net *net); +void sctp_udp_sock_stop(struct net *net); /* * sctp/socket.c @@ -576,10 +578,13 @@ static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp, { __u32 overhead = sizeof(struct sctphdr) + extra; - if (sp) + if (sp) { overhead += sp->pf->af->net_header_len; - else + if (sp->udp_port) + overhead += sizeof(struct udphdr); + } else { overhead += sizeof(struct ipv6hdr); + } if (WARN_ON_ONCE(mtu && mtu <= overhead)) mtu = overhead; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 5c491a3bc27e..fd223c94589a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -221,6 +221,9 @@ struct sctp_chunk *sctp_make_violation_paramlen( struct sctp_chunk *sctp_make_violation_max_retrans( const struct sctp_association *asoc, const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_new_encap_port( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport); struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, @@ -380,6 +383,7 @@ sctp_vtag_verify(const struct sctp_chunk *chunk, if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag) return 1; + chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; return 0; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0bdff38eb4bb..1aa585216f34 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -178,6 +178,9 @@ struct sctp_sock { */ __u32 hbinterval; + __be16 udp_port; + __be16 encap_port; + /* This is the max_retrans value for new associations. */ __u16 pathmaxrxt; @@ -877,6 +880,8 @@ struct sctp_transport { */ unsigned long last_time_ecne_reduced; + __be16 encap_port; + /* This is the max_retrans value for the transport and will * be initialized from the assocs value. This can be changed * using the SCTP_SET_PEER_ADDR_PARAMS socket option. @@ -1117,13 +1122,14 @@ static inline void sctp_outq_cork(struct sctp_outq *q) */ struct sctp_input_cb { union { - struct inet_skb_parm h4; + struct inet_skb_parm h4; #if IS_ENABLED(CONFIG_IPV6) - struct inet6_skb_parm h6; + struct inet6_skb_parm h6; #endif } header; struct sctp_chunk *chunk; struct sctp_af *af; + __be16 encap_port; }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) @@ -1790,6 +1796,8 @@ struct sctp_association { */ unsigned long hbinterval; + __be16 encap_port; + /* This is the max_retrans value for new transports in the * association. */ diff --git a/include/net/sock.h b/include/net/sock.h index a5c6ae78df77..1d29aeae74fd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -60,7 +60,7 @@ #include <linux/rculist_nulls.h> #include <linux/poll.h> #include <linux/sockptr.h> - +#include <linux/indirect_call_wrapper.h> #include <linux/atomic.h> #include <linux/refcount.h> #include <net/dst.h> @@ -1264,13 +1264,17 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ +INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake)); + static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; return sk->sk_prot->stream_memory_free ? - sk->sk_prot->stream_memory_free(sk, wake) : true; + INDIRECT_CALL_1(sk->sk_prot->stream_memory_free, + tcp_stream_memory_free, + sk, wake) : true; } static inline bool sk_stream_memory_free(const struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index d4ef5bf94168..d5d22c411918 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -322,6 +322,7 @@ void tcp_shutdown(struct sock *sk, int how); int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); +void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); @@ -329,6 +330,8 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); +struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, + struct page *page, int offset, size_t *size); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_send_mss(struct sock *sk, int *size_goal, int flags); @@ -386,12 +389,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); -void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag); +void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); +void __tcp_close(struct sock *sk, long timeout); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb); @@ -1965,18 +1969,7 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } -/* @wake is one when sk_stream_write_space() calls us. - * This sends EPOLLOUT only if notsent_bytes is half the limit. - * This mimics the strategy used in sock_def_write_space(). - */ -static inline bool tcp_stream_memory_free(const struct sock *sk, int wake) -{ - const struct tcp_sock *tp = tcp_sk(sk); - u32 notsent_bytes = READ_ONCE(tp->write_seq) - - READ_ONCE(tp->snd_nxt); - - return (notsent_bytes << wake) < tcp_notsent_lowat(tp); -} +bool tcp_stream_memory_free(const struct sock *sk, int wake); #ifdef CONFIG_PROC_FS int tcp4_proc_init(void); diff --git a/include/net/udp.h b/include/net/udp.h index 295d52a73598..877832bed471 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -164,7 +164,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } -typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport, +typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, @@ -313,7 +313,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); -struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -324,7 +324,7 @@ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); -struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid diff --git a/include/net/xdp.h b/include/net/xdp.h index 3814fb631d52..7d48b2ae217a 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -104,6 +104,18 @@ struct xdp_frame { struct net_device *dev_rx; /* used by cpumap */ }; +#define XDP_BULK_QUEUE_SIZE 16 +struct xdp_frame_bulk { + int count; + void *xa; + void *q[XDP_BULK_QUEUE_SIZE]; +}; + +static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) +{ + /* bq->count will be zero'ed when bq->xa gets updated */ + bq->xa = NULL; +} static inline struct skb_shared_info * xdp_get_shared_info_from_frame(struct xdp_frame *frame) @@ -194,6 +206,9 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); +void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); +void xdp_return_frame_bulk(struct xdp_frame *xdpf, + struct xdp_frame_bulk *bq); /* When sending xdp_frame into the network stack, then there is no * return point callback, which is needed to release e.g. DMA-mapping @@ -245,6 +260,6 @@ bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf); -#define DEV_MAP_BULK_SIZE 16 +#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE #endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h index 9f484113cfda..59eeba31c192 100644 --- a/include/soc/fsl/qman.h +++ b/include/soc/fsl/qman.h @@ -689,7 +689,8 @@ enum qman_cb_dqrr_result { }; typedef enum qman_cb_dqrr_result (*qman_cb_dqrr)(struct qman_portal *qm, struct qman_fq *fq, - const struct qm_dqrr_entry *dqrr); + const struct qm_dqrr_entry *dqrr, + bool sched_napi); /* * This callback type is used when handling ERNs, FQRNs and FQRLs via MR. They diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1e9db9577441..ea1de185f2e4 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -571,18 +571,21 @@ struct ocelot_vcap_block { int pol_lpr; }; +struct ocelot_vlan { + bool valid; + u16 vid; +}; + struct ocelot_port { struct ocelot *ocelot; struct regmap *target; bool vlan_aware; - - /* Ingress default VLAN (pvid) */ - u16 pvid; - - /* Egress default VLAN (vid) */ - u16 vid; + /* VLAN that untagged frames are classified to, on ingress */ + struct ocelot_vlan pvid_vlan; + /* The VLAN ID that will be transmitted as untagged, on egress */ + struct ocelot_vlan native_vlan; u8 ptp_cmd; struct sk_buff_head tx_skbs; @@ -632,6 +635,7 @@ struct ocelot { u32 *lags; struct list_head multicast; + struct list_head pgids; struct list_head dummy_rules; struct ocelot_vcap_block block[3]; @@ -743,6 +747,8 @@ int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr, u16 vid); int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr, u16 vid); +int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, + bool untagged); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged); int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e6ceac3f7d62..162999b12790 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -157,6 +157,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, }; /* Note that tracing related programs such as @@ -3742,6 +3743,50 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be an task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3900,6 +3945,9 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ + FN(task_storage_get), \ + FN(task_storage_delete), \ + FN(get_current_task_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4418,6 +4466,9 @@ struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { diff --git a/include/uapi/linux/cfm_bridge.h b/include/uapi/linux/cfm_bridge.h new file mode 100644 index 000000000000..3c1cbd1db2f5 --- /dev/null +++ b/include/uapi/linux/cfm_bridge.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_CFM_BRIDGE_H_ +#define _UAPI_LINUX_CFM_BRIDGE_H_ + +#include <linux/types.h> +#include <linux/if_ether.h> + +#define ETHER_HEADER_LENGTH (6+6+4+2) +#define CFM_MAID_LENGTH 48 +#define CFM_CCM_PDU_LENGTH 75 +#define CFM_PORT_STATUS_TLV_LENGTH 4 +#define CFM_IF_STATUS_TLV_LENGTH 4 +#define CFM_IF_STATUS_TLV_TYPE 4 +#define CFM_PORT_STATUS_TLV_TYPE 2 +#define CFM_ENDE_TLV_TYPE 0 +#define CFM_CCM_MAX_FRAME_LENGTH (ETHER_HEADER_LENGTH+\ + CFM_CCM_PDU_LENGTH+\ + CFM_PORT_STATUS_TLV_LENGTH+\ + CFM_IF_STATUS_TLV_LENGTH) +#define CFM_FRAME_PRIO 7 +#define CFM_CCM_TLV_OFFSET 70 +#define CFM_CCM_PDU_MAID_OFFSET 10 +#define CFM_CCM_PDU_MEPID_OFFSET 8 +#define CFM_CCM_PDU_SEQNR_OFFSET 4 +#define CFM_CCM_PDU_TLV_OFFSET 74 +#define CFM_CCM_ITU_RESERVED_SIZE 16 + +struct br_cfm_common_hdr { + __u8 mdlevel_version; + __u8 opcode; + __u8 flags; + __u8 tlv_offset; +}; + +enum br_cfm_opcodes { + BR_CFM_OPCODE_CCM = 0x1, +}; + +/* MEP domain */ +enum br_cfm_domain { + BR_CFM_PORT, + BR_CFM_VLAN, +}; + +/* MEP direction */ +enum br_cfm_mep_direction { + BR_CFM_MEP_DIRECTION_DOWN, + BR_CFM_MEP_DIRECTION_UP, +}; + +/* CCM interval supported. */ +enum br_cfm_ccm_interval { + BR_CFM_CCM_INTERVAL_NONE, + BR_CFM_CCM_INTERVAL_3_3_MS, + BR_CFM_CCM_INTERVAL_10_MS, + BR_CFM_CCM_INTERVAL_100_MS, + BR_CFM_CCM_INTERVAL_1_SEC, + BR_CFM_CCM_INTERVAL_10_SEC, + BR_CFM_CCM_INTERVAL_1_MIN, + BR_CFM_CCM_INTERVAL_10_MIN, +}; + +#endif diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 4c687686aa8f..13d59c51ef5b 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -121,6 +121,7 @@ enum { IFLA_BRIDGE_VLAN_INFO, IFLA_BRIDGE_VLAN_TUNNEL_INFO, IFLA_BRIDGE_MRP, + IFLA_BRIDGE_CFM, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -328,6 +329,130 @@ struct br_mrp_start_in_test { __u16 in_id; }; +enum { + IFLA_BRIDGE_CFM_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CREATE, + IFLA_BRIDGE_CFM_MEP_DELETE, + IFLA_BRIDGE_CFM_MEP_CONFIG, + IFLA_BRIDGE_CFM_CC_CONFIG, + IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD, + IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE, + IFLA_BRIDGE_CFM_CC_RDI, + IFLA_BRIDGE_CFM_CC_CCM_TX, + IFLA_BRIDGE_CFM_MEP_CREATE_INFO, + IFLA_BRIDGE_CFM_MEP_CONFIG_INFO, + IFLA_BRIDGE_CFM_CC_CONFIG_INFO, + IFLA_BRIDGE_CFM_CC_RDI_INFO, + IFLA_BRIDGE_CFM_CC_CCM_TX_INFO, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO, + IFLA_BRIDGE_CFM_MEP_STATUS_INFO, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO, + __IFLA_BRIDGE_CFM_MAX, +}; + +#define IFLA_BRIDGE_CFM_MAX (__IFLA_BRIDGE_CFM_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE, + IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN, + IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION, + IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX, + __IFLA_BRIDGE_CFM_MEP_CREATE_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_CREATE_MAX (__IFLA_BRIDGE_CFM_MEP_CREATE_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC, + IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE, + __IFLA_BRIDGE_CFM_MEP_DELETE_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_DELETE_MAX (__IFLA_BRIDGE_CFM_MEP_DELETE_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE, + IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC, + IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL, + IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID, + __IFLA_BRIDGE_CFM_MEP_CONFIG_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_CONFIG_MAX (__IFLA_BRIDGE_CFM_MEP_CONFIG_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC, + IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE, + IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE, + IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL, + IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID, + __IFLA_BRIDGE_CFM_CC_CONFIG_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_CONFIG_MAX (__IFLA_BRIDGE_CFM_CC_CONFIG_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE, + IFLA_BRIDGE_CFM_CC_PEER_MEPID, + __IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX (__IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_RDI_UNSPEC, + IFLA_BRIDGE_CFM_CC_RDI_INSTANCE, + IFLA_BRIDGE_CFM_CC_RDI_RDI, + __IFLA_BRIDGE_CFM_CC_RDI_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_RDI_MAX (__IFLA_BRIDGE_CFM_CC_RDI_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC, + IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE, + IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC, + IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE, + IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD, + IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV, + IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV, + IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE, + __IFLA_BRIDGE_CFM_CC_CCM_TX_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_CCM_TX_MAX (__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_STATUS_UNSPEC, + IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE, + IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN, + IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN, + IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN, + __IFLA_BRIDGE_CFM_MEP_STATUS_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_STATUS_MAX (__IFLA_BRIDGE_CFM_MEP_STATUS_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_PEER_STATUS_UNSPEC, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN, + __IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1) + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; @@ -526,6 +651,7 @@ struct br_mdb_entry { union { __be32 ip4; struct in6_addr ip6; + unsigned char mac_addr[ETH_ALEN]; } u; __be16 proto; } addr; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index d6de2b167448..a0b637911d3c 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -99,6 +99,7 @@ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ #define ETH_P_NCSI 0x88F8 /* NCSI protocol */ #define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ +#define ETH_P_CFM 0x8902 /* Connectivity Fault Management */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_IBOE 0x8915 /* Infiniband over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ diff --git a/include/uapi/linux/if_frad.h b/include/uapi/linux/if_frad.h deleted file mode 100644 index 3c6ee85f6262..000000000000 --- a/include/uapi/linux/if_frad.h +++ /dev/null @@ -1,123 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* - * DLCI/FRAD Definitions for Frame Relay Access Devices. DLCI devices are - * created for each DLCI associated with a FRAD. The FRAD driver - * is not truly a network device, but the lower level device - * handler. This allows other FRAD manufacturers to use the DLCI - * code, including its RFC1490 encapsulation alongside the current - * implementation for the Sangoma cards. - * - * Version: @(#)if_ifrad.h 0.15 31 Mar 96 - * - * Author: Mike McLagan <mike.mclagan@linux.org> - * - * Changes: - * 0.15 Mike McLagan changed structure defs (packed) - * re-arranged flags - * added DLCI_RET vars - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _UAPI_FRAD_H_ -#define _UAPI_FRAD_H_ - -#include <linux/if.h> - -/* Structures and constants associated with the DLCI device driver */ - -struct dlci_add -{ - char devname[IFNAMSIZ]; - short dlci; -}; - -#define DLCI_GET_CONF (SIOCDEVPRIVATE + 2) -#define DLCI_SET_CONF (SIOCDEVPRIVATE + 3) - -/* - * These are related to the Sangoma SDLA and should remain in order. - * Code within the SDLA module is based on the specifics of this - * structure. Change at your own peril. - */ -struct dlci_conf { - short flags; - short CIR_fwd; - short Bc_fwd; - short Be_fwd; - short CIR_bwd; - short Bc_bwd; - short Be_bwd; - -/* these are part of the status read */ - short Tc_fwd; - short Tc_bwd; - short Tf_max; - short Tb_max; - -/* add any new fields here above is a mirror of sdla_dlci_conf */ -}; - -#define DLCI_GET_SLAVE (SIOCDEVPRIVATE + 4) - -/* configuration flags for DLCI */ -#define DLCI_IGNORE_CIR_OUT 0x0001 -#define DLCI_ACCOUNT_CIR_IN 0x0002 -#define DLCI_BUFFER_IF 0x0008 - -#define DLCI_VALID_FLAGS 0x000B - -/* defines for the actual Frame Relay hardware */ -#define FRAD_GET_CONF (SIOCDEVPRIVATE) -#define FRAD_SET_CONF (SIOCDEVPRIVATE + 1) - -#define FRAD_LAST_IOCTL FRAD_SET_CONF - -/* - * Based on the setup for the Sangoma SDLA. If changes are - * necessary to this structure, a routine will need to be - * added to that module to copy fields. - */ -struct frad_conf -{ - short station; - short flags; - short kbaud; - short clocking; - short mtu; - short T391; - short T392; - short N391; - short N392; - short N393; - short CIR_fwd; - short Bc_fwd; - short Be_fwd; - short CIR_bwd; - short Bc_bwd; - short Be_bwd; - -/* Add new fields here, above is a mirror of the sdla_conf */ - -}; - -#define FRAD_STATION_CPE 0x0000 -#define FRAD_STATION_NODE 0x0001 - -#define FRAD_TX_IGNORE_CIR 0x0001 -#define FRAD_RX_ACCOUNT_CIR 0x0002 -#define FRAD_DROP_ABORTED 0x0004 -#define FRAD_BUFFERIF 0x0008 -#define FRAD_STATS 0x0010 -#define FRAD_MCI 0x0100 -#define FRAD_AUTODLCI 0x8000 -#define FRAD_VALID_FLAGS 0x811F - -#define FRAD_CLOCK_INT 0x0001 -#define FRAD_CLOCK_EXT 0x0000 - - -#endif /* _UAPI_FRAD_H_ */ diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 3d884d68eb30..c07caf7b40db 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -2,6 +2,7 @@ #ifndef __LINUX_IF_PACKET_H #define __LINUX_IF_PACKET_H +#include <asm/byteorder.h> #include <linux/types.h> struct sockaddr_pkt { @@ -296,6 +297,17 @@ struct packet_mreq { unsigned char mr_address[8]; }; +struct fanout_args { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 id; + __u16 type_flags; +#else + __u16 type_flags; + __u16 id; +#endif + __u32 max_num_members; +}; + #define PACKET_MR_MULTICAST 0 #define PACKET_MR_PROMISC 1 #define PACKET_MR_ALLMULTI 2 diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 11a72a938eb1..6397d75899bc 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -92,11 +92,11 @@ enum { /* Reserve empty slots */ IPSET_ATTR_CADT_MAX = 16, /* Create-only specific attributes */ - IPSET_ATTR_GC, + IPSET_ATTR_INITVAL, /* was unused IPSET_ATTR_GC */ IPSET_ATTR_HASHSIZE, IPSET_ATTR_MAXELEM, IPSET_ATTR_NETMASK, - IPSET_ATTR_PROBES, + IPSET_ATTR_BUCKETSIZE, /* was unused IPSET_ATTR_PROBES */ IPSET_ATTR_RESIZE, IPSET_ATTR_SIZE, /* Kernel-only */ @@ -214,6 +214,8 @@ enum ipset_cadt_flags { enum ipset_create_flags { IPSET_CREATE_FLAG_BIT_FORCEADD = 0, IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD), + IPSET_CREATE_FLAG_BIT_BUCKETSIZE = 1, + IPSET_CREATE_FLAG_BUCKETSIZE = (1 << IPSET_CREATE_FLAG_BIT_BUCKETSIZE), IPSET_CREATE_FLAG_BIT_MAX = 7, }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 47700a2b9af9..3e0d4a038ab6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -757,7 +757,8 @@ * of any other interfaces, and other interfaces will again take * precedence when they are used. * - * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. + * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface + * (no longer supported). * * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform * multicast to unicast conversion. When enabled, all multicast packets @@ -1750,8 +1751,9 @@ enum nl80211_commands { * specify just a single bitrate, which is to be used for the beacon. * The driver must also specify support for this with the extended * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, - * NL80211_EXT_FEATURE_BEACON_RATE_HT and - * NL80211_EXT_FEATURE_BEACON_RATE_VHT. + * NL80211_EXT_FEATURE_BEACON_RATE_HT, + * NL80211_EXT_FEATURE_BEACON_RATE_VHT and + * NL80211_EXT_FEATURE_BEACON_RATE_HE. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. @@ -2527,6 +2529,11 @@ enum nl80211_commands { * override mask. Used with NL80211_ATTR_S1G_CAPABILITY in * NL80211_CMD_ASSOCIATE or NL80211_CMD_CONNECT. * + * @NL80211_ATTR_SAE_PWE: Indicates the mechanism(s) allowed for SAE PWE + * derivation in WPA3-Personal networks which are using SAE authentication. + * This is a u8 attribute that encapsulates one of the values from + * &enum nl80211_sae_pwe_mechanism. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3016,6 +3023,8 @@ enum nl80211_attrs { NL80211_ATTR_S1G_CAPABILITY, NL80211_ATTR_S1G_CAPABILITY_MASK, + NL80211_ATTR_SAE_PWE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5896,6 +5905,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP: Driver/device supports * unsolicited broadcast probe response transmission * + * @NL80211_EXT_FEATURE_BEACON_RATE_HE: Driver supports beacon rate + * configuration (AP/mesh) with HE rates. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5956,6 +5968,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SAE_OFFLOAD_AP, NL80211_EXT_FEATURE_FILS_DISCOVERY, NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP, + NL80211_EXT_FEATURE_BEACON_RATE_HE, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -7124,4 +7137,23 @@ enum nl80211_unsol_bcast_probe_resp_attributes { NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX = __NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST - 1 }; + +/** + * enum nl80211_sae_pwe_mechanism - The mechanism(s) allowed for SAE PWE + * derivation. Applicable only when WPA3-Personal SAE authentication is + * used. + * + * @NL80211_SAE_PWE_UNSPECIFIED: not specified, used internally to indicate that + * attribute is not present from userspace. + * @NL80211_SAE_PWE_HUNT_AND_PECK: hunting-and-pecking loop only + * @NL80211_SAE_PWE_HASH_TO_ELEMENT: hash-to-element only + * @NL80211_SAE_PWE_BOTH: both hunting-and-pecking loop and hash-to-element + * can be used. + */ +enum nl80211_sae_pwe_mechanism { + NL80211_SAE_PWE_UNSPECIFIED, + NL80211_SAE_PWE_HUNT_AND_PECK, + NL80211_SAE_PWE_HASH_TO_ELEMENT, + NL80211_SAE_PWE_BOTH, +}; #endif /* __LINUX_NL80211_H */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 9b814c92de12..2ffbef5da6c1 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -396,11 +396,13 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ -#define RTNH_F_OFFLOAD 8 /* offloaded route */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ #define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ #define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ -#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD) +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) /* Macros to handle hexthops */ @@ -770,8 +772,12 @@ enum { * actions in a dump. All dump responses will contain the number of actions * being dumped stored in for user app's consumption in TCA_ROOT_COUNT * + * TCA_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) + * */ #define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_FLAG_TERSE_DUMP (1 << 1) /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) @@ -779,6 +785,8 @@ enum { #define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) #define RTEXT_FILTER_SKIP_STATS (1 << 3) #define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) /* End of information exported to user level */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 28ad40d9acba..cb78e7a739da 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -140,6 +140,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_ECN_SUPPORTED 130 #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131 #define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE +#define SCTP_REMOTE_UDP_ENCAPS_PORT 132 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1197,6 +1198,12 @@ struct sctp_event { uint8_t se_on; }; +struct sctp_udpencaps { + sctp_assoc_t sue_assoc_id; + struct sockaddr_storage sue_address; + uint16_t sue_port; +}; + /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, diff --git a/include/uapi/linux/sdla.h b/include/uapi/linux/sdla.h deleted file mode 100644 index 1e3735be6511..000000000000 --- a/include/uapi/linux/sdla.h +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Global definitions for the Frame relay interface. - * - * Version: @(#)if_ifrad.h 0.20 13 Apr 96 - * - * Author: Mike McLagan <mike.mclagan@linux.org> - * - * Changes: - * 0.15 Mike McLagan Structure packing - * - * 0.20 Mike McLagan New flags for S508 buffer handling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _UAPISDLA_H -#define _UAPISDLA_H - -/* adapter type */ -#define SDLA_TYPES -#define SDLA_S502A 5020 -#define SDLA_S502E 5021 -#define SDLA_S503 5030 -#define SDLA_S507 5070 -#define SDLA_S508 5080 -#define SDLA_S509 5090 -#define SDLA_UNKNOWN -1 - -/* port selection flags for the S508 */ -#define SDLA_S508_PORT_V35 0x00 -#define SDLA_S508_PORT_RS232 0x02 - -/* Z80 CPU speeds */ -#define SDLA_CPU_3M 0x00 -#define SDLA_CPU_5M 0x01 -#define SDLA_CPU_7M 0x02 -#define SDLA_CPU_8M 0x03 -#define SDLA_CPU_10M 0x04 -#define SDLA_CPU_16M 0x05 -#define SDLA_CPU_12M 0x06 - -/* some private IOCTLs */ -#define SDLA_IDENTIFY (FRAD_LAST_IOCTL + 1) -#define SDLA_CPUSPEED (FRAD_LAST_IOCTL + 2) -#define SDLA_PROTOCOL (FRAD_LAST_IOCTL + 3) - -#define SDLA_CLEARMEM (FRAD_LAST_IOCTL + 4) -#define SDLA_WRITEMEM (FRAD_LAST_IOCTL + 5) -#define SDLA_READMEM (FRAD_LAST_IOCTL + 6) - -struct sdla_mem { - int addr; - int len; - void __user *data; -}; - -#define SDLA_START (FRAD_LAST_IOCTL + 7) -#define SDLA_STOP (FRAD_LAST_IOCTL + 8) - -/* some offsets in the Z80's memory space */ -#define SDLA_NMIADDR 0x0000 -#define SDLA_CONF_ADDR 0x0010 -#define SDLA_S502A_NMIADDR 0x0066 -#define SDLA_CODE_BASEADDR 0x0100 -#define SDLA_WINDOW_SIZE 0x2000 -#define SDLA_ADDR_MASK 0x1FFF - -/* largest handleable block of data */ -#define SDLA_MAX_DATA 4080 -#define SDLA_MAX_MTU 4072 /* MAX_DATA - sizeof(fradhdr) */ -#define SDLA_MAX_DLCI 24 - -/* this should be the same as frad_conf */ -struct sdla_conf { - short station; - short config; - short kbaud; - short clocking; - short max_frm; - short T391; - short T392; - short N391; - short N392; - short N393; - short CIR_fwd; - short Bc_fwd; - short Be_fwd; - short CIR_bwd; - short Bc_bwd; - short Be_bwd; -}; - -/* this should be the same as dlci_conf */ -struct sdla_dlci_conf { - short config; - short CIR_fwd; - short Bc_fwd; - short Be_fwd; - short CIR_bwd; - short Bc_bwd; - short Be_bwd; - short Tc_fwd; - short Tc_bwd; - short Tf_max; - short Tb_max; -}; - - -#endif /* _UAPISDLA_H */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index f84e7bcad6de..26fc60ce9298 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -159,6 +159,7 @@ enum UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ UDP_MIB_CSUMERRORS, /* InCsumErrors */ UDP_MIB_IGNOREDMULTI, /* IgnoredMulti */ + UDP_MIB_MEMERRORS, /* MemErrors */ __UDP_MIB_MAX }; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index c1b9f71ee6aa..d1249340fd6b 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o +obj-${CONFIG_BPF_LSM} += bpf_task_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 8f10e30ea0b0..5454161407f1 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -67,6 +67,15 @@ static void bpf_iter_done_stop(struct seq_file *seq) iter_priv->done_stop = true; } +static bool bpf_iter_support_resched(struct seq_file *seq) +{ + struct bpf_iter_priv_data *iter_priv; + + iter_priv = container_of(seq->private, struct bpf_iter_priv_data, + target_private); + return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; +} + /* maximum visited objects before bailing out */ #define MAX_ITER_OBJECTS 1000000 @@ -83,6 +92,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; int err = 0, num_objs = 0; + bool can_resched; void *p; mutex_lock(&seq->lock); @@ -135,6 +145,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, goto done; } + can_resched = bpf_iter_support_resched(seq); while (1) { loff_t pos = seq->index; @@ -180,6 +191,9 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, } break; } + + if (can_resched) + cond_resched(); } stop: offs = seq->count; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 56cc5a915f67..553107f4706a 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -63,11 +63,99 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; + case BPF_FUNC_task_storage_get: + return &bpf_task_storage_get_proto; + case BPF_FUNC_task_storage_delete: + return &bpf_task_storage_delete_proto; default: return tracing_prog_func_proto(func_id, prog); } } +/* The set of hooks which are called without pagefaults disabled and are allowed + * to "sleep" and thus can be used for sleeable BPF programs. + */ +BTF_SET_START(sleepable_lsm_hooks) +BTF_ID(func, bpf_lsm_bpf) +BTF_ID(func, bpf_lsm_bpf_map) +BTF_ID(func, bpf_lsm_bpf_map_alloc_security) +BTF_ID(func, bpf_lsm_bpf_map_free_security) +BTF_ID(func, bpf_lsm_bpf_prog) +BTF_ID(func, bpf_lsm_bprm_check_security) +BTF_ID(func, bpf_lsm_bprm_committed_creds) +BTF_ID(func, bpf_lsm_bprm_committing_creds) +BTF_ID(func, bpf_lsm_bprm_creds_for_exec) +BTF_ID(func, bpf_lsm_bprm_creds_from_file) +BTF_ID(func, bpf_lsm_capget) +BTF_ID(func, bpf_lsm_capset) +BTF_ID(func, bpf_lsm_cred_prepare) +BTF_ID(func, bpf_lsm_file_ioctl) +BTF_ID(func, bpf_lsm_file_lock) +BTF_ID(func, bpf_lsm_file_open) +BTF_ID(func, bpf_lsm_file_receive) +BTF_ID(func, bpf_lsm_inet_conn_established) +BTF_ID(func, bpf_lsm_inode_create) +BTF_ID(func, bpf_lsm_inode_free_security) +BTF_ID(func, bpf_lsm_inode_getattr) +BTF_ID(func, bpf_lsm_inode_getxattr) +BTF_ID(func, bpf_lsm_inode_mknod) +BTF_ID(func, bpf_lsm_inode_need_killpriv) +BTF_ID(func, bpf_lsm_inode_post_setxattr) +BTF_ID(func, bpf_lsm_inode_readlink) +BTF_ID(func, bpf_lsm_inode_rename) +BTF_ID(func, bpf_lsm_inode_rmdir) +BTF_ID(func, bpf_lsm_inode_setattr) +BTF_ID(func, bpf_lsm_inode_setxattr) +BTF_ID(func, bpf_lsm_inode_symlink) +BTF_ID(func, bpf_lsm_inode_unlink) +BTF_ID(func, bpf_lsm_kernel_module_request) +BTF_ID(func, bpf_lsm_kernfs_init_security) +BTF_ID(func, bpf_lsm_key_free) +BTF_ID(func, bpf_lsm_mmap_file) +BTF_ID(func, bpf_lsm_netlink_send) +BTF_ID(func, bpf_lsm_path_notify) +BTF_ID(func, bpf_lsm_release_secctx) +BTF_ID(func, bpf_lsm_sb_alloc_security) +BTF_ID(func, bpf_lsm_sb_eat_lsm_opts) +BTF_ID(func, bpf_lsm_sb_kern_mount) +BTF_ID(func, bpf_lsm_sb_mount) +BTF_ID(func, bpf_lsm_sb_remount) +BTF_ID(func, bpf_lsm_sb_set_mnt_opts) +BTF_ID(func, bpf_lsm_sb_show_options) +BTF_ID(func, bpf_lsm_sb_statfs) +BTF_ID(func, bpf_lsm_sb_umount) +BTF_ID(func, bpf_lsm_settime) +BTF_ID(func, bpf_lsm_socket_accept) +BTF_ID(func, bpf_lsm_socket_bind) +BTF_ID(func, bpf_lsm_socket_connect) +BTF_ID(func, bpf_lsm_socket_create) +BTF_ID(func, bpf_lsm_socket_getpeername) +BTF_ID(func, bpf_lsm_socket_getpeersec_dgram) +BTF_ID(func, bpf_lsm_socket_getsockname) +BTF_ID(func, bpf_lsm_socket_getsockopt) +BTF_ID(func, bpf_lsm_socket_listen) +BTF_ID(func, bpf_lsm_socket_post_create) +BTF_ID(func, bpf_lsm_socket_recvmsg) +BTF_ID(func, bpf_lsm_socket_sendmsg) +BTF_ID(func, bpf_lsm_socket_shutdown) +BTF_ID(func, bpf_lsm_socket_socketpair) +BTF_ID(func, bpf_lsm_syslog) +BTF_ID(func, bpf_lsm_task_alloc) +BTF_ID(func, bpf_lsm_task_getsecid) +BTF_ID(func, bpf_lsm_task_prctl) +BTF_ID(func, bpf_lsm_task_setscheduler) +BTF_ID(func, bpf_lsm_task_to_inode) +BTF_SET_END(sleepable_lsm_hooks) + +bool bpf_lsm_is_sleepable_hook(u32 btf_id) +{ + return btf_id_set_contains(&sleepable_lsm_hooks, btf_id); +} + const struct bpf_prog_ops lsm_prog_ops = { }; diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c new file mode 100644 index 000000000000..4ef1959a78f2 --- /dev/null +++ b/kernel/bpf/bpf_task_storage.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Facebook + * Copyright 2020 Google LLC. + */ + +#include <linux/pid.h> +#include <linux/sched.h> +#include <linux/rculist.h> +#include <linux/list.h> +#include <linux/hash.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/bpf.h> +#include <linux/bpf_local_storage.h> +#include <linux/filter.h> +#include <uapi/linux/btf.h> +#include <linux/bpf_lsm.h> +#include <linux/btf_ids.h> +#include <linux/fdtable.h> + +DEFINE_BPF_STORAGE_CACHE(task_cache); + +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) +{ + struct task_struct *task = owner; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + return &bsb->storage; +} + +static struct bpf_local_storage_data * +task_storage_lookup(struct task_struct *task, struct bpf_map *map, + bool cacheit_lockit) +{ + struct bpf_local_storage *task_storage; + struct bpf_local_storage_map *smap; + struct bpf_storage_blob *bsb; + + bsb = bpf_task(task); + if (!bsb) + return NULL; + + task_storage = rcu_dereference(bsb->storage); + if (!task_storage) + return NULL; + + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); +} + +void bpf_task_storage_free(struct task_struct *task) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage *local_storage; + bool free_task_storage = false; + struct bpf_storage_blob *bsb; + struct hlist_node *n; + + bsb = bpf_task(task); + if (!bsb) + return; + + rcu_read_lock(); + + local_storage = rcu_dereference(bsb->storage); + if (!local_storage) { + rcu_read_unlock(); + return; + } + + /* Neither the bpf_prog nor the bpf-map's syscall + * could be modifying the local_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * local_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_local_storage_map_free() alone + * when unlinking elem from the local_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&local_storage->lock); + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { + /* Always unlink from map before unlinking from + * local_storage. + */ + bpf_selem_unlink_map(selem); + free_task_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, false); + } + raw_spin_unlock_bh(&local_storage->lock); + rcu_read_unlock(); + + /* free_task_storage should always be true as long as + * local_storage->list was non-empty. + */ + if (free_task_storage) + kfree_rcu(local_storage, rcu); +} + +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return ERR_CAST(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + sdata = task_storage_lookup(task, map, true); + put_pid(pid); + return sdata ? sdata->data : NULL; +out: + put_pid(pid); + return ERR_PTR(err); +} + +static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_local_storage_data *sdata; + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task || !task_storage_ptr(task)) { + err = -ENOENT; + goto out; + } + + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, map_flags); + + err = PTR_ERR_OR_ZERO(sdata); +out: + put_pid(pid); + return err; +} + +static int task_storage_delete(struct task_struct *task, struct bpf_map *map) +{ + struct bpf_local_storage_data *sdata; + + sdata = task_storage_lookup(task, map, false); + if (!sdata) + return -ENOENT; + + bpf_selem_unlink(SELEM(sdata)); + + return 0; +} + +static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct task_struct *task; + unsigned int f_flags; + struct pid *pid; + int fd, err; + + fd = *(int *)key; + pid = pidfd_get_pid(fd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + /* We should be in an RCU read side critical section, it should be safe + * to call pid_task. + */ + WARN_ON_ONCE(!rcu_read_lock_held()); + task = pid_task(pid, PIDTYPE_PID); + if (!task) { + err = -ENOENT; + goto out; + } + + err = task_storage_delete(task, map); +out: + put_pid(pid); + return err; +} + +BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags) +{ + struct bpf_local_storage_data *sdata; + + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + /* explicitly check that the task_storage_ptr is not + * NULL as task_storage_lookup returns NULL in this case and + * bpf_local_storage_update expects the owner to have a + * valid storage pointer. + */ + if (!task_storage_ptr(task)) + return (unsigned long)NULL; + + sdata = task_storage_lookup(task, map, true); + if (sdata) + return (unsigned long)sdata->data; + + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + sdata = bpf_local_storage_update( + task, (struct bpf_local_storage_map *)map, value, + BPF_NOEXIST); + return IS_ERR(sdata) ? (unsigned long)NULL : + (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, + task) +{ + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + return task_storage_delete(task, map); +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + +static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); + return &smap->map; +} + +static void task_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); + bpf_local_storage_map_free(smap); +} + +static int task_storage_map_btf_id; +const struct bpf_map_ops task_storage_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = task_storage_map_alloc, + .map_free = task_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_pid_task_storage_lookup_elem, + .map_update_elem = bpf_pid_task_storage_update_elem, + .map_delete_elem = bpf_pid_task_storage_delete_elem, + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_name = "bpf_local_storage_map", + .map_btf_id = &task_storage_map_btf_id, + .map_owner_storage_ptr = task_storage_ptr, +}; + +BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct) + +const struct bpf_func_proto bpf_task_storage_get_proto = { + .func = bpf_task_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_task_storage_delete_proto = { + .func = bpf_task_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_task_storage_btf_ids[0], +}; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ed7d02e8bc93..6b2d508b33d4 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -22,7 +22,8 @@ #include <linux/skmsg.h> #include <linux/perf_event.h> #include <linux/bsearch.h> -#include <linux/btf_ids.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> #include <net/sock.h> /* BTF (BPF Type Format) is the meta data format which describes @@ -204,12 +205,19 @@ struct btf { const char *strings; void *nohdr_data; struct btf_header hdr; - u32 nr_types; + u32 nr_types; /* includes VOID for base BTF */ u32 types_size; u32 data_size; refcount_t refcnt; u32 id; struct rcu_head rcu; + + /* split BTF support */ + struct btf *base_btf; + u32 start_id; /* first type ID in this BTF (0 for base BTF) */ + u32 start_str_off; /* first string offset (0 for base BTF) */ + char name[MODULE_NAME_LEN]; + bool kernel_btf; }; enum verifier_phase { @@ -450,14 +458,27 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } +static u32 btf_nr_types_total(const struct btf *btf) +{ + u32 total = 0; + + while (btf) { + total += btf->nr_types; + btf = btf->base_btf; + } + + return total; +} + s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) { const struct btf_type *t; const char *tname; - u32 i; + u32 i, total; - for (i = 1; i <= btf->nr_types; i++) { - t = btf->types[i]; + total = btf_nr_types_total(btf); + for (i = 1; i < total; i++) { + t = btf_type_by_id(btf, i); if (BTF_INFO_KIND(t->info) != kind) continue; @@ -600,8 +621,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) static bool btf_name_offset_valid(const struct btf *btf, u32 offset) { - return BTF_STR_OFFSET_VALID(offset) && - offset < btf->hdr.str_len; + if (!BTF_STR_OFFSET_VALID(offset)) + return false; + + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + return offset < btf->hdr.str_len; } static bool __btf_name_char_ok(char c, bool first, bool dot_ok) @@ -615,10 +642,22 @@ static bool __btf_name_char_ok(char c, bool first, bool dot_ok) return true; } +static const char *btf_str_by_offset(const struct btf *btf, u32 offset) +{ + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + if (offset < btf->hdr.str_len) + return &btf->strings[offset]; + + return NULL; +} + static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) { /* offset must be valid */ - const char *src = &btf->strings[offset]; + const char *src = btf_str_by_offset(btf, offset); const char *src_limit; if (!__btf_name_char_ok(*src, true, dot_ok)) @@ -651,27 +690,28 @@ static bool btf_name_valid_section(const struct btf *btf, u32 offset) static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) { + const char *name; + if (!offset) return "(anon)"; - else if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - else - return "(invalid-name-offset)"; + + name = btf_str_by_offset(btf, offset); + return name ?: "(invalid-name-offset)"; } const char *btf_name_by_offset(const struct btf *btf, u32 offset) { - if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - - return NULL; + return btf_str_by_offset(btf, offset); } const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { - if (type_id > btf->nr_types) - return NULL; + while (type_id < btf->start_id) + btf = btf->base_btf; + type_id -= btf->start_id; + if (type_id >= btf->nr_types) + return NULL; return btf->types[type_id]; } @@ -1391,17 +1431,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) { struct btf *btf = env->btf; - /* < 2 because +1 for btf_void which is always in btf->types[0]. - * btf_void is not accounted in btf->nr_types because btf_void - * does not come from the BTF file. - */ - if (btf->types_size - btf->nr_types < 2) { + if (btf->types_size == btf->nr_types) { /* Expand 'types' array */ struct btf_type **new_types; u32 expand_by, new_size; - if (btf->types_size == BTF_MAX_TYPE) { + if (btf->start_id + btf->types_size == BTF_MAX_TYPE) { btf_verifier_log(env, "Exceeded max num of types"); return -E2BIG; } @@ -1415,18 +1451,23 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) if (!new_types) return -ENOMEM; - if (btf->nr_types == 0) - new_types[0] = &btf_void; - else + if (btf->nr_types == 0) { + if (!btf->base_btf) { + /* lazily init VOID type */ + new_types[0] = &btf_void; + btf->nr_types++; + } + } else { memcpy(new_types, btf->types, - sizeof(*btf->types) * (btf->nr_types + 1)); + sizeof(*btf->types) * btf->nr_types); + } kvfree(btf->types); btf->types = new_types; btf->types_size = new_size; } - btf->types[++(btf->nr_types)] = t; + btf->types[btf->nr_types++] = t; return 0; } @@ -1499,18 +1540,17 @@ static int env_resolve_init(struct btf_verifier_env *env) u32 *resolved_ids = NULL; u8 *visit_states = NULL; - /* +1 for btf_void */ - resolved_sizes = kvcalloc(nr_types + 1, sizeof(*resolved_sizes), + resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes), GFP_KERNEL | __GFP_NOWARN); if (!resolved_sizes) goto nomem; - resolved_ids = kvcalloc(nr_types + 1, sizeof(*resolved_ids), + resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids), GFP_KERNEL | __GFP_NOWARN); if (!resolved_ids) goto nomem; - visit_states = kvcalloc(nr_types + 1, sizeof(*visit_states), + visit_states = kvcalloc(nr_types, sizeof(*visit_states), GFP_KERNEL | __GFP_NOWARN); if (!visit_states) goto nomem; @@ -1562,21 +1602,27 @@ static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, static bool env_type_is_resolved(const struct btf_verifier_env *env, u32 type_id) { - return env->visit_states[type_id] == RESOLVED; + /* base BTF types should be resolved by now */ + if (type_id < env->btf->start_id) + return true; + + return env->visit_states[type_id - env->btf->start_id] == RESOLVED; } static int env_stack_push(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id) { + const struct btf *btf = env->btf; struct resolve_vertex *v; if (env->top_stack == MAX_RESOLVE_DEPTH) return -E2BIG; - if (env->visit_states[type_id] != NOT_VISITED) + if (type_id < btf->start_id + || env->visit_states[type_id - btf->start_id] != NOT_VISITED) return -EEXIST; - env->visit_states[type_id] = VISITED; + env->visit_states[type_id - btf->start_id] = VISITED; v = &env->stack[env->top_stack++]; v->t = t; @@ -1606,6 +1652,7 @@ static void env_stack_pop_resolved(struct btf_verifier_env *env, u32 type_id = env->stack[--(env->top_stack)].type_id; struct btf *btf = env->btf; + type_id -= btf->start_id; /* adjust to local type id */ btf->resolved_sizes[type_id] = resolved_size; btf->resolved_ids[type_id] = resolved_type_id; env->visit_states[type_id] = RESOLVED; @@ -1710,14 +1757,30 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL); } +static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id) +{ + while (type_id < btf->start_id) + btf = btf->base_btf; + + return btf->resolved_ids[type_id - btf->start_id]; +} + /* The input param "type_id" must point to a needs_resolve type */ static const struct btf_type *btf_type_id_resolve(const struct btf *btf, u32 *type_id) { - *type_id = btf->resolved_ids[*type_id]; + *type_id = btf_resolved_type_id(btf, *type_id); return btf_type_by_id(btf, *type_id); } +static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id) +{ + while (type_id < btf->start_id) + btf = btf->base_btf; + + return btf->resolved_sizes[type_id - btf->start_id]; +} + const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *type_id, u32 *ret_size) { @@ -1732,7 +1795,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, if (btf_type_has_size(size_type)) { size = size_type->size; } else if (btf_type_is_array(size_type)) { - size = btf->resolved_sizes[size_type_id]; + size = btf_resolved_type_size(btf, size_type_id); } else if (btf_type_is_ptr(size_type)) { size = sizeof(void *); } else { @@ -1740,14 +1803,14 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, !btf_type_is_var(size_type))) return NULL; - size_type_id = btf->resolved_ids[size_type_id]; + size_type_id = btf_resolved_type_id(btf, size_type_id); size_type = btf_type_by_id(btf, size_type_id); if (btf_type_nosize_or_null(size_type)) return NULL; else if (btf_type_has_size(size_type)) size = size_type->size; else if (btf_type_is_array(size_type)) - size = btf->resolved_sizes[size_type_id]; + size = btf_resolved_type_size(btf, size_type_id); else if (btf_type_is_ptr(size_type)) size = sizeof(void *); else @@ -3799,7 +3862,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env) cur = btf->nohdr_data + hdr->type_off; end = cur + hdr->type_len; - env->log_type_id = 1; + env->log_type_id = btf->base_btf ? btf->start_id : 1; while (cur < end) { struct btf_type *t = cur; s32 meta_size; @@ -3826,8 +3889,8 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return false; if (btf_type_is_struct(t) || btf_type_is_datasec(t)) - return !btf->resolved_ids[type_id] && - !btf->resolved_sizes[type_id]; + return !btf_resolved_type_id(btf, type_id) && + !btf_resolved_type_size(btf, type_id); if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || btf_type_is_var(t)) { @@ -3847,7 +3910,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); return elem_type && !btf_type_is_modifier(elem_type) && (array->nelems * elem_size == - btf->resolved_sizes[type_id]); + btf_resolved_type_size(btf, type_id)); } return false; @@ -3889,7 +3952,8 @@ static int btf_resolve(struct btf_verifier_env *env, static int btf_check_all_types(struct btf_verifier_env *env) { struct btf *btf = env->btf; - u32 type_id; + const struct btf_type *t; + u32 type_id, i; int err; err = env_resolve_init(env); @@ -3897,8 +3961,9 @@ static int btf_check_all_types(struct btf_verifier_env *env) return err; env->phase++; - for (type_id = 1; type_id <= btf->nr_types; type_id++) { - const struct btf_type *t = btf_type_by_id(btf, type_id); + for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) { + type_id = btf->start_id + i; + t = btf_type_by_id(btf, type_id); env->log_type_id = type_id; if (btf_type_needs_resolve(t) && @@ -3935,7 +4000,7 @@ static int btf_parse_type_sec(struct btf_verifier_env *env) return -EINVAL; } - if (!hdr->type_len) { + if (!env->btf->base_btf && !hdr->type_len) { btf_verifier_log(env, "No type found"); return -EINVAL; } @@ -3962,13 +4027,18 @@ static int btf_parse_str_sec(struct btf_verifier_env *env) return -EINVAL; } - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || - start[0] || end[-1]) { + btf->strings = start; + + if (btf->base_btf && !hdr->str_len) + return 0; + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) { + btf_verifier_log(env, "Invalid string section"); + return -EINVAL; + } + if (!btf->base_btf && start[0]) { btf_verifier_log(env, "Invalid string section"); return -EINVAL; } - - btf->strings = start; return 0; } @@ -4363,6 +4433,8 @@ struct btf *btf_parse_vmlinux(void) btf->data = __start_BTF; btf->data_size = __stop_BTF - __start_BTF; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "vmlinux"); err = btf_parse_hdr(env); if (err) @@ -4388,6 +4460,81 @@ struct btf *btf_parse_vmlinux(void) bpf_struct_ops_init(btf, log); + refcount_set(&btf->refcnt, 1); + + err = btf_alloc_id(btf); + if (err) + goto errout; + + btf_verifier_env_free(env); + return btf; + +errout: + btf_verifier_env_free(env); + if (btf) { + kvfree(btf->types); + kfree(btf); + } + return ERR_PTR(err); +} + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + +static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size) +{ + struct btf_verifier_env *env = NULL; + struct bpf_verifier_log *log; + struct btf *btf = NULL, *base_btf; + int err; + + base_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(base_btf)) + return base_btf; + if (!base_btf) + return ERR_PTR(-EINVAL); + + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); + if (!env) + return ERR_PTR(-ENOMEM); + + log = &env->log; + log->level = BPF_LOG_KERNEL; + + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); + if (!btf) { + err = -ENOMEM; + goto errout; + } + env->btf = btf; + + btf->base_btf = base_btf; + btf->start_id = base_btf->nr_types; + btf->start_str_off = base_btf->hdr.str_len; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "%s", module_name); + + btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN); + if (!btf->data) { + err = -ENOMEM; + goto errout; + } + memcpy(btf->data, data, data_size); + btf->data_size = data_size; + + err = btf_parse_hdr(env); + if (err) + goto errout; + + btf->nohdr_data = btf->data + btf->hdr.hdr_len; + + err = btf_parse_str_sec(env); + if (err) + goto errout; + + err = btf_check_all_metas(env); + if (err) + goto errout; + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; @@ -4395,12 +4542,15 @@ struct btf *btf_parse_vmlinux(void) errout: btf_verifier_env_free(env); if (btf) { + kvfree(btf->data); kvfree(btf->types); kfree(btf); } return ERR_PTR(err); } +#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ + struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) { struct bpf_prog *tgt_prog = prog->aux->dst_prog; @@ -4909,7 +5059,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id, while (t && btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!t) { - *bad_type = btf->types[0]; + *bad_type = btf_type_by_id(btf, 0); return -EINVAL; } if (btf_type_is_ptr(t)) @@ -5487,7 +5637,9 @@ int btf_get_info_by_fd(const struct btf *btf, struct bpf_btf_info info; u32 info_copy, btf_copy; void __user *ubtf; - u32 uinfo_len; + char __user *uname; + u32 uinfo_len, uname_len, name_len; + int ret = 0; uinfo = u64_to_user_ptr(attr->info.info); uinfo_len = attr->info.info_len; @@ -5504,11 +5656,37 @@ int btf_get_info_by_fd(const struct btf *btf, return -EFAULT; info.btf_size = btf->data_size; + info.kernel_btf = btf->kernel_btf; + + uname = u64_to_user_ptr(info.name); + uname_len = info.name_len; + if (!uname ^ !uname_len) + return -EINVAL; + + name_len = strlen(btf->name); + info.name_len = name_len; + + if (uname) { + if (uname_len >= name_len + 1) { + if (copy_to_user(uname, btf->name, name_len + 1)) + return -EFAULT; + } else { + char zero = '\0'; + + if (copy_to_user(uname, btf->name, uname_len - 1)) + return -EFAULT; + if (put_user(zero, uname + uname_len - 1)) + return -EFAULT; + /* let user-space know about too short buffer */ + ret = -ENOSPC; + } + } + if (copy_to_user(uinfo, &info, info_copy) || put_user(info_copy, &uattr->info.info_len)) return -EFAULT; - return 0; + return ret; } int btf_get_fd_by_id(u32 id) @@ -5548,3 +5726,126 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) { return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +struct btf_module { + struct list_head list; + struct module *module; + struct btf *btf; + struct bin_attribute *sysfs_attr; +}; + +static LIST_HEAD(btf_modules); +static DEFINE_MUTEX(btf_module_mutex); + +static ssize_t +btf_module_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + const struct btf *btf = bin_attr->private; + + memcpy(buf, btf->data + off, len); + return len; +} + +static int btf_module_notify(struct notifier_block *nb, unsigned long op, + void *module) +{ + struct btf_module *btf_mod, *tmp; + struct module *mod = module; + struct btf *btf; + int err = 0; + + if (mod->btf_data_size == 0 || + (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) + goto out; + + switch (op) { + case MODULE_STATE_COMING: + btf_mod = kzalloc(sizeof(*btf_mod), GFP_KERNEL); + if (!btf_mod) { + err = -ENOMEM; + goto out; + } + btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size); + if (IS_ERR(btf)) { + pr_warn("failed to validate module [%s] BTF: %ld\n", + mod->name, PTR_ERR(btf)); + kfree(btf_mod); + err = PTR_ERR(btf); + goto out; + } + err = btf_alloc_id(btf); + if (err) { + btf_free(btf); + kfree(btf_mod); + goto out; + } + + mutex_lock(&btf_module_mutex); + btf_mod->module = module; + btf_mod->btf = btf; + list_add(&btf_mod->list, &btf_modules); + mutex_unlock(&btf_module_mutex); + + if (IS_ENABLED(CONFIG_SYSFS)) { + struct bin_attribute *attr; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + goto out; + + sysfs_bin_attr_init(attr); + attr->attr.name = btf->name; + attr->attr.mode = 0444; + attr->size = btf->data_size; + attr->private = btf; + attr->read = btf_module_read; + + err = sysfs_create_bin_file(btf_kobj, attr); + if (err) { + pr_warn("failed to register module [%s] BTF in sysfs: %d\n", + mod->name, err); + kfree(attr); + err = 0; + goto out; + } + + btf_mod->sysfs_attr = attr; + } + + break; + case MODULE_STATE_GOING: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + list_del(&btf_mod->list); + if (btf_mod->sysfs_attr) + sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); + btf_put(btf_mod->btf); + kfree(btf_mod->sysfs_attr); + kfree(btf_mod); + break; + } + mutex_unlock(&btf_module_mutex); + break; + } +out: + return notifier_from_errno(err); +} + +static struct notifier_block btf_module_nb = { + .notifier_call = btf_module_notify, +}; + +static int __init btf_module_init(void) +{ + register_module_notifier(&btf_module_nb); + return 0; +} + +fs_initcall(btf_module_init); +#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 1fccba6e88c4..ec46266aaf1c 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -86,6 +86,9 @@ struct bucket { }; }; +#define HASHTAB_MAP_LOCK_COUNT 8 +#define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) + struct bpf_htab { struct bpf_map map; struct bucket *buckets; @@ -99,6 +102,8 @@ struct bpf_htab { u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ u32 hashrnd; + struct lock_class_key lockdep_key; + int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT]; }; /* each htab element is struct htab_elem + key + value */ @@ -138,33 +143,53 @@ static void htab_init_buckets(struct bpf_htab *htab) for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); - if (htab_use_raw_lock(htab)) + if (htab_use_raw_lock(htab)) { raw_spin_lock_init(&htab->buckets[i].raw_lock); - else + lockdep_set_class(&htab->buckets[i].raw_lock, + &htab->lockdep_key); + } else { spin_lock_init(&htab->buckets[i].lock); + lockdep_set_class(&htab->buckets[i].lock, + &htab->lockdep_key); + } } } -static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab, - struct bucket *b) +static inline int htab_lock_bucket(const struct bpf_htab *htab, + struct bucket *b, u32 hash, + unsigned long *pflags) { unsigned long flags; + hash = hash & HASHTAB_MAP_LOCK_MASK; + + migrate_disable(); + if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { + __this_cpu_dec(*(htab->map_locked[hash])); + migrate_enable(); + return -EBUSY; + } + if (htab_use_raw_lock(htab)) raw_spin_lock_irqsave(&b->raw_lock, flags); else spin_lock_irqsave(&b->lock, flags); - return flags; + *pflags = flags; + + return 0; } static inline void htab_unlock_bucket(const struct bpf_htab *htab, - struct bucket *b, + struct bucket *b, u32 hash, unsigned long flags) { + hash = hash & HASHTAB_MAP_LOCK_MASK; if (htab_use_raw_lock(htab)) raw_spin_unlock_irqrestore(&b->raw_lock, flags); else spin_unlock_irqrestore(&b->lock, flags); + __this_cpu_dec(*(htab->map_locked[hash])); + migrate_enable(); } static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); @@ -390,17 +415,11 @@ static int htab_map_alloc_check(union bpf_attr *attr) attr->value_size == 0) return -EINVAL; - if (attr->key_size > MAX_BPF_STACK) - /* eBPF programs initialize keys on stack, so they cannot be - * larger than max stack size - */ - return -E2BIG; - - if (attr->value_size >= KMALLOC_MAX_SIZE - - MAX_BPF_STACK - sizeof(struct htab_elem)) - /* if value_size is bigger, the user space won't be able to - * access the elements via bpf syscall. This check also makes - * sure that the elem_size doesn't overflow and it's + if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - + sizeof(struct htab_elem)) + /* if key_size + value_size is bigger, the user space won't be + * able to access the elements via bpf syscall. This check + * also makes sure that the elem_size doesn't overflow and it's * kmalloc-able later in htab_map_update_elem() */ return -E2BIG; @@ -422,13 +441,15 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); struct bpf_htab *htab; + int err, i; u64 cost; - int err; htab = kzalloc(sizeof(*htab), GFP_USER); if (!htab) return ERR_PTR(-ENOMEM); + lockdep_register_key(&htab->lockdep_key); + bpf_map_init_from_attr(&htab->map, attr); if (percpu_lru) { @@ -480,6 +501,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (!htab->buckets) goto free_charge; + for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) { + htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int), + sizeof(int), GFP_USER); + if (!htab->map_locked[i]) + goto free_map_locked; + } + if (htab->map.map_flags & BPF_F_ZERO_SEED) htab->hashrnd = 0; else @@ -490,7 +518,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (prealloc) { err = prealloc_init(htab); if (err) - goto free_buckets; + goto free_map_locked; if (!percpu && !lru) { /* lru itself can remove the least used element, so @@ -506,11 +534,14 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) free_prealloc: prealloc_destroy(htab); -free_buckets: +free_map_locked: + for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) + free_percpu(htab->map_locked[i]); bpf_map_area_free(htab->buckets); free_charge: bpf_map_charge_finish(&htab->map.memory); free_htab: + lockdep_unregister_key(&htab->lockdep_key); kfree(htab); return ERR_PTR(err); } @@ -687,12 +718,15 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) struct hlist_nulls_node *n; unsigned long flags; struct bucket *b; + int ret; tgt_l = container_of(node, struct htab_elem, lru_node); b = __select_bucket(htab, tgt_l->hash); head = &b->head; - flags = htab_lock_bucket(htab, b); + ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags); + if (ret) + return false; hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l == tgt_l) { @@ -700,7 +734,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) break; } - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, tgt_l->hash, flags); return l == tgt_l; } @@ -998,7 +1032,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, */ } - flags = htab_lock_bucket(htab, b); + ret = htab_lock_bucket(htab, b, hash, &flags); + if (ret) + return ret; l_old = lookup_elem_raw(head, hash, key, key_size); @@ -1039,7 +1075,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, } ret = 0; err: - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, hash, flags); return ret; } @@ -1077,7 +1113,9 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, return -ENOMEM; memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size); - flags = htab_lock_bucket(htab, b); + ret = htab_lock_bucket(htab, b, hash, &flags); + if (ret) + return ret; l_old = lookup_elem_raw(head, hash, key, key_size); @@ -1096,7 +1134,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, ret = 0; err: - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, hash, flags); if (ret) bpf_lru_push_free(&htab->lru, &l_new->lru_node); @@ -1131,7 +1169,9 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, b = __select_bucket(htab, hash); head = &b->head; - flags = htab_lock_bucket(htab, b); + ret = htab_lock_bucket(htab, b, hash, &flags); + if (ret) + return ret; l_old = lookup_elem_raw(head, hash, key, key_size); @@ -1154,7 +1194,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, } ret = 0; err: - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, hash, flags); return ret; } @@ -1194,7 +1234,9 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, return -ENOMEM; } - flags = htab_lock_bucket(htab, b); + ret = htab_lock_bucket(htab, b, hash, &flags); + if (ret) + return ret; l_old = lookup_elem_raw(head, hash, key, key_size); @@ -1216,7 +1258,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, } ret = 0; err: - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, hash, flags); if (l_new) bpf_lru_push_free(&htab->lru, &l_new->lru_node); return ret; @@ -1244,7 +1286,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) struct htab_elem *l; unsigned long flags; u32 hash, key_size; - int ret = -ENOENT; + int ret; WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); @@ -1254,17 +1296,20 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) b = __select_bucket(htab, hash); head = &b->head; - flags = htab_lock_bucket(htab, b); + ret = htab_lock_bucket(htab, b, hash, &flags); + if (ret) + return ret; l = lookup_elem_raw(head, hash, key, key_size); if (l) { hlist_nulls_del_rcu(&l->hash_node); free_htab_elem(htab, l); - ret = 0; + } else { + ret = -ENOENT; } - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, hash, flags); return ret; } @@ -1276,7 +1321,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) struct htab_elem *l; unsigned long flags; u32 hash, key_size; - int ret = -ENOENT; + int ret; WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); @@ -1286,16 +1331,18 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) b = __select_bucket(htab, hash); head = &b->head; - flags = htab_lock_bucket(htab, b); + ret = htab_lock_bucket(htab, b, hash, &flags); + if (ret) + return ret; l = lookup_elem_raw(head, hash, key, key_size); - if (l) { + if (l) hlist_nulls_del_rcu(&l->hash_node); - ret = 0; - } + else + ret = -ENOENT; - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, hash, flags); if (l) bpf_lru_push_free(&htab->lru, &l->lru_node); return ret; @@ -1321,6 +1368,7 @@ static void delete_all_elements(struct bpf_htab *htab) static void htab_map_free(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + int i; /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. * bpf_free_used_maps() is called after bpf prog is no longer executing. @@ -1338,6 +1386,9 @@ static void htab_map_free(struct bpf_map *map) free_percpu(htab->extra_elems); bpf_map_area_free(htab->buckets); + for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) + free_percpu(htab->map_locked[i]); + lockdep_unregister_key(&htab->lockdep_key); kfree(htab); } @@ -1441,8 +1492,11 @@ again_nocopy: b = &htab->buckets[batch]; head = &b->head; /* do not grab the lock unless need it (bucket_cnt > 0). */ - if (locked) - flags = htab_lock_bucket(htab, b); + if (locked) { + ret = htab_lock_bucket(htab, b, batch, &flags); + if (ret) + goto next_batch; + } bucket_cnt = 0; hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) @@ -1459,7 +1513,7 @@ again_nocopy: /* Note that since bucket_cnt > 0 here, it is implicit * that the locked was grabbed, so release it. */ - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, batch, flags); rcu_read_unlock(); bpf_enable_instrumentation(); goto after_loop; @@ -1470,7 +1524,7 @@ again_nocopy: /* Note that since bucket_cnt > 0 here, it is implicit * that the locked was grabbed, so release it. */ - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, batch, flags); rcu_read_unlock(); bpf_enable_instrumentation(); kvfree(keys); @@ -1523,7 +1577,7 @@ again_nocopy: dst_val += value_size; } - htab_unlock_bucket(htab, b, flags); + htab_unlock_bucket(htab, b, batch, flags); locked = false; while (node_to_free) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8f50c9c19f1b..f3fe9f53f93c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_SK_STORAGE && - map->map_type != BPF_MAP_TYPE_INODE_STORAGE) + map->map_type != BPF_MAP_TYPE_INODE_STORAGE && + map->map_type != BPF_MAP_TYPE_TASK_STORAGE) return -ENOTSUPP; if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 11b3380887fa..ef6911aee3bb 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -26,7 +26,7 @@ static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = { .read = btf_vmlinux_read, }; -static struct kobject *btf_kobj; +struct kobject *btf_kobj; static int __init btf_vmlinux_init(void) { diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 5b6af30bfbcd..1fdb2fc196cd 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -337,6 +337,7 @@ static const struct bpf_iter_seq_info task_seq_info = { static struct bpf_iter_reg task_reg_info = { .target = "task", + .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__task, task), @@ -354,6 +355,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = { static struct bpf_iter_reg task_file_reg_info = { .target = "task_file", + .feature = BPF_ITER_RESCHED, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__task_file, task), diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1388bf733071..ab2d6a02aee0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2739,7 +2739,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, regno); return -EACCES; } - err = __check_mem_access(env, regno, off, size, reg->range, + + err = reg->range < 0 ? -EINVAL : + __check_mem_access(env, regno, off, size, reg->range, zero_size_allowed); if (err) { verbose(env, "R%d offset is outside of the packet\n", regno); @@ -4469,6 +4471,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_inode_storage_delete) goto error; break; + case BPF_MAP_TYPE_TASK_STORAGE: + if (func_id != BPF_FUNC_task_storage_get && + func_id != BPF_FUNC_task_storage_delete) + goto error; + break; default: break; } @@ -4547,6 +4554,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) goto error; break; + case BPF_FUNC_task_storage_get: + case BPF_FUNC_task_storage_delete: + if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) + goto error; + break; default: break; } @@ -4687,6 +4699,32 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) __clear_all_pkt_pointers(env, vstate->frame[i]); } +enum { + AT_PKT_END = -1, + BEYOND_PKT_END = -2, +}; + +static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) +{ + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_reg_state *reg = &state->regs[regn]; + + if (reg->type != PTR_TO_PACKET) + /* PTR_TO_PACKET_META is not supported yet */ + return; + + /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. + * How far beyond pkt_end it goes is unknown. + * if (!range_open) it's the case of pkt >= pkt_end + * if (range_open) it's the case of pkt > pkt_end + * hence this pointer is at least 1 byte bigger than pkt_end + */ + if (range_open) + reg->range = BEYOND_PKT_END; + else + reg->range = AT_PKT_END; +} + static void release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id) @@ -5176,11 +5214,14 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } - } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || + fn->ret_type == RET_PTR_TO_BTF_ID) { int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; + regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? + PTR_TO_BTF_ID : + PTR_TO_BTF_ID_OR_NULL; ret_btf_id = *fn->ret_btf_id; if (ret_btf_id == 0) { verbose(env, "invalid return type %d of func %s#%d\n", @@ -6695,7 +6736,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) static void __find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg, - enum bpf_reg_type type, u16 new_range) + enum bpf_reg_type type, int new_range) { struct bpf_reg_state *reg; int i; @@ -6720,8 +6761,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, enum bpf_reg_type type, bool range_right_open) { - u16 new_range; - int i; + int new_range, i; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -6972,6 +7012,67 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, return is_branch64_taken(reg, val, opcode); } +static int flip_opcode(u32 opcode) +{ + /* How can we transform "a <op> b" into "b <op> a"? */ + static const u8 opcode_flip[16] = { + /* these stay the same */ + [BPF_JEQ >> 4] = BPF_JEQ, + [BPF_JNE >> 4] = BPF_JNE, + [BPF_JSET >> 4] = BPF_JSET, + /* these swap "lesser" and "greater" (L and G in the opcodes) */ + [BPF_JGE >> 4] = BPF_JLE, + [BPF_JGT >> 4] = BPF_JLT, + [BPF_JLE >> 4] = BPF_JGE, + [BPF_JLT >> 4] = BPF_JGT, + [BPF_JSGE >> 4] = BPF_JSLE, + [BPF_JSGT >> 4] = BPF_JSLT, + [BPF_JSLE >> 4] = BPF_JSGE, + [BPF_JSLT >> 4] = BPF_JSGT + }; + return opcode_flip[opcode >> 4]; +} + +static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg, + u8 opcode) +{ + struct bpf_reg_state *pkt; + + if (src_reg->type == PTR_TO_PACKET_END) { + pkt = dst_reg; + } else if (dst_reg->type == PTR_TO_PACKET_END) { + pkt = src_reg; + opcode = flip_opcode(opcode); + } else { + return -1; + } + + if (pkt->range >= 0) + return -1; + + switch (opcode) { + case BPF_JLE: + /* pkt <= pkt_end */ + fallthrough; + case BPF_JGT: + /* pkt > pkt_end */ + if (pkt->range == BEYOND_PKT_END) + /* pkt has at last one extra byte beyond pkt_end */ + return opcode == BPF_JGT; + break; + case BPF_JLT: + /* pkt < pkt_end */ + fallthrough; + case BPF_JGE: + /* pkt >= pkt_end */ + if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) + return opcode == BPF_JGE; + break; + } + return -1; +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -7135,23 +7236,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, u64 val, u32 val32, u8 opcode, bool is_jmp32) { - /* How can we transform "a <op> b" into "b <op> a"? */ - static const u8 opcode_flip[16] = { - /* these stay the same */ - [BPF_JEQ >> 4] = BPF_JEQ, - [BPF_JNE >> 4] = BPF_JNE, - [BPF_JSET >> 4] = BPF_JSET, - /* these swap "lesser" and "greater" (L and G in the opcodes) */ - [BPF_JGE >> 4] = BPF_JLE, - [BPF_JGT >> 4] = BPF_JLT, - [BPF_JLE >> 4] = BPF_JGE, - [BPF_JLT >> 4] = BPF_JGT, - [BPF_JSGE >> 4] = BPF_JSLE, - [BPF_JSGT >> 4] = BPF_JSLT, - [BPF_JSLE >> 4] = BPF_JSGE, - [BPF_JSLT >> 4] = BPF_JSGT - }; - opcode = opcode_flip[opcode >> 4]; + opcode = flip_opcode(opcode); /* This uses zero as "not present in table"; luckily the zero opcode, * BPF_JA, can't get here. */ @@ -7333,6 +7418,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, false); + mark_pkt_end(other_branch, insn->dst_reg, true); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7340,6 +7426,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end > pkt_data', pkt_data > pkt_meta' */ find_good_pkt_pointers(other_branch, src_reg, src_reg->type, true); + mark_pkt_end(this_branch, insn->src_reg, false); } else { return false; } @@ -7352,6 +7439,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, true); + mark_pkt_end(this_branch, insn->dst_reg, false); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7359,6 +7447,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end < pkt_data', pkt_data > pkt_meta' */ find_good_pkt_pointers(this_branch, src_reg, src_reg->type, false); + mark_pkt_end(other_branch, insn->src_reg, true); } else { return false; } @@ -7371,6 +7460,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, true); + mark_pkt_end(other_branch, insn->dst_reg, false); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7378,6 +7468,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ find_good_pkt_pointers(other_branch, src_reg, src_reg->type, false); + mark_pkt_end(this_branch, insn->src_reg, true); } else { return false; } @@ -7390,6 +7481,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, false); + mark_pkt_end(this_branch, insn->dst_reg, true); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7397,6 +7489,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ find_good_pkt_pointers(this_branch, src_reg, src_reg->type, true); + mark_pkt_end(other_branch, insn->src_reg, false); } else { return false; } @@ -7496,6 +7589,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, src_reg->var_off.value, opcode, is_jmp32); + } else if (reg_is_pkt_pointer_any(dst_reg) && + reg_is_pkt_pointer_any(src_reg) && + !is_jmp32) { + pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); } if (pred >= 0) { @@ -7504,7 +7601,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ if (!__is_pointer_value(false, dst_reg)) err = mark_chain_precision(env, insn->dst_reg); - if (BPF_SRC(insn->code) == BPF_X && !err) + if (BPF_SRC(insn->code) == BPF_X && !err && + !__is_pointer_value(false, src_reg)) err = mark_chain_precision(env, insn->src_reg); if (err) return err; @@ -9731,11 +9829,21 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n"); } - if ((is_tracing_prog_type(prog_type) || - prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && - map_value_has_spin_lock(map)) { - verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); - return -EINVAL; + if (map_value_has_spin_lock(map)) { + if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { + verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n"); + return -EINVAL; + } + + if (is_tracing_prog_type(prog_type)) { + verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); + return -EINVAL; + } + + if (prog->aux->sleepable) { + verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n"); + return -EINVAL; + } } if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && @@ -11466,20 +11574,6 @@ static int check_attach_modify_return(unsigned long addr, const char *func_name) return -EINVAL; } -/* non exhaustive list of sleepable bpf_lsm_*() functions */ -BTF_SET_START(btf_sleepable_lsm_hooks) -#ifdef CONFIG_BPF_LSM -BTF_ID(func, bpf_lsm_bprm_committed_creds) -#else -BTF_ID_UNUSED -#endif -BTF_SET_END(btf_sleepable_lsm_hooks) - -static int check_sleepable_lsm_hook(u32 btf_id) -{ - return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id); -} - /* list of non-sleepable functions that are otherwise on * ALLOW_ERROR_INJECTION list */ @@ -11701,7 +11795,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, /* LSM progs check that they are attached to bpf_lsm_*() funcs. * Only some of them are sleepable. */ - if (check_sleepable_lsm_hook(btf_id)) + if (bpf_lsm_is_sleepable_hook(btf_id)) ret = 0; break; default: diff --git a/kernel/kcov.c b/kernel/kcov.c index 6b8368be89c8..80bfe71bbe13 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -1023,6 +1023,8 @@ EXPORT_SYMBOL(kcov_remote_stop); /* See the comment before kcov_remote_start() for usage details. */ u64 kcov_common_handle(void) { + if (!in_task()) + return 0; return current->kcov_handle; } EXPORT_SYMBOL(kcov_common_handle); diff --git a/kernel/module.c b/kernel/module.c index a4fa44a652a7..f2996b02ab2e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -380,6 +380,35 @@ static void *section_objs(const struct load_info *info, return (void *)info->sechdrs[sec].sh_addr; } +/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */ +static unsigned int find_any_sec(const struct load_info *info, const char *name) +{ + unsigned int i; + + for (i = 1; i < info->hdr->e_shnum; i++) { + Elf_Shdr *shdr = &info->sechdrs[i]; + if (strcmp(info->secstrings + shdr->sh_name, name) == 0) + return i; + } + return 0; +} + +/* + * Find a module section, or NULL. Fill in number of "objects" in section. + * Ignores SHF_ALLOC flag. + */ +static __maybe_unused void *any_section_objs(const struct load_info *info, + const char *name, + size_t object_size, + unsigned int *num) +{ + unsigned int sec = find_any_sec(info, name); + + /* Section 0 has sh_addr 0 and sh_size 0. */ + *num = info->sechdrs[sec].sh_size / object_size; + return (void *)info->sechdrs[sec].sh_addr; +} + /* Provided by the linker */ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; @@ -3250,6 +3279,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->bpf_raw_events), &mod->num_bpf_raw_events); #endif +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size); +#endif #ifdef CONFIG_JUMP_LABEL mod->jump_entries = section_objs(info, "__jump_table", sizeof(*mod->jump_entries), diff --git a/kernel/taskstats.c b/kernel/taskstats.c index a2802b6ff4bb..2b4898b4752e 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -346,7 +346,7 @@ static int parse(struct nlattr *na, struct cpumask *mask) data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; - nla_strlcpy(data, na, len); + nla_strscpy(data, na, len); ret = cpulist_parse(data, mask); kfree(data); return ret; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 048c655315f1..a18fee10e906 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -16,6 +16,9 @@ #include <linux/syscalls.h> #include <linux/error-injection.h> #include <linux/btf_ids.h> +#include <linux/bpf_lsm.h> + +#include <net/bpf_sk_storage.h> #include <uapi/linux/bpf.h> #include <uapi/linux/btf.h> @@ -1032,6 +1035,20 @@ const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_get_current_task_btf) +{ + return (unsigned long) current; +} + +BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct) + +static const struct bpf_func_proto bpf_get_current_task_btf_proto = { + .func = bpf_get_current_task_btf, + .gpl_only = true, + .ret_type = RET_PTR_TO_BTF_ID, + .ret_btf_id = &bpf_get_current_btf_ids[0], +}; + BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -1174,7 +1191,11 @@ BTF_SET_END(btf_allowlist_d_path) static bool bpf_d_path_allowed(const struct bpf_prog *prog) { - return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id); + if (prog->type == BPF_PROG_TYPE_LSM) + return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); + + return btf_id_set_contains(&btf_allowlist_d_path, + prog->aux->attach_btf_id); } BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) @@ -1275,6 +1296,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; + case BPF_FUNC_get_current_task_btf: + return &bpf_get_current_task_btf_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_current_comm: @@ -1729,6 +1752,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_tcp_request_sock_proto; case BPF_FUNC_skc_to_udp6_sock: return &bpf_skc_to_udp6_sock_proto; + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_tracing_proto; + case BPF_FUNC_sk_storage_delete: + return &bpf_sk_storage_delete_tracing_proto; #endif case BPF_FUNC_seq_printf: return prog->expected_attach_type == BPF_TRACE_ITER ? diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c789b39ed527..826a205ffd1c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -274,6 +274,15 @@ config DEBUG_INFO_BTF Turning this on expects presence of pahole tool, which will convert DWARF type info into equivalent deduplicated BTF type info. +config PAHOLE_HAS_SPLIT_BTF + def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119") + +config DEBUG_INFO_BTF_MODULES + def_bool y + depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF + help + Generate compact split BTF type information for kernel modules. + config GDB_SCRIPTS bool "Provide GDB scripts for kernel debugging" help @@ -1870,6 +1879,7 @@ config KCOV depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS select DEBUG_FS select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC + select SKB_EXTENSIONS if NET help KCOV exposes kernel code coverage information in a form suitable for coverage-guided fuzzing (randomized testing). diff --git a/lib/nlattr.c b/lib/nlattr.c index 74019c8ebf6b..09aa181569e0 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -709,35 +709,47 @@ struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) EXPORT_SYMBOL(nla_find); /** - * nla_strlcpy - Copy string attribute payload into a sized buffer - * @dst: where to copy the string to - * @nla: attribute to copy the string from - * @dstsize: size of destination buffer + * nla_strscpy - Copy string attribute payload into a sized buffer + * @dst: Where to copy the string to. + * @nla: Attribute to copy the string from. + * @dstsize: Size of destination buffer. * * Copies at most dstsize - 1 bytes into the destination buffer. - * The result is always a valid NUL-terminated string. Unlike - * strlcpy the destination buffer is always padded out. + * Unlike strlcpy the destination buffer is always padded out. * - * Returns the length of the source buffer. + * Return: + * * srclen - Returns @nla length (not including the trailing %NUL). + * * -E2BIG - If @dstsize is 0 or greater than U16_MAX or @nla length greater + * than @dstsize. */ -size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) +ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize) { size_t srclen = nla_len(nla); char *src = nla_data(nla); + ssize_t ret; + size_t len; + + if (dstsize == 0 || WARN_ON_ONCE(dstsize > U16_MAX)) + return -E2BIG; if (srclen > 0 && src[srclen - 1] == '\0') srclen--; - if (dstsize > 0) { - size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; - - memset(dst, 0, dstsize); - memcpy(dst, src, len); + if (srclen >= dstsize) { + len = dstsize - 1; + ret = -E2BIG; + } else { + len = srclen; + ret = len; } - return srclen; + memcpy(dst, src, len); + /* Zero pad end of dst. */ + memset(dst + len, 0, dstsize - len); + + return ret; } -EXPORT_SYMBOL(nla_strlcpy); +EXPORT_SYMBOL(nla_strscpy); /** * nla_strdup - Copy string attribute payload into a newly allocated buffer diff --git a/net/9p/client.c b/net/9p/client.c index 09f1ec589b80..785a7bb6a539 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -412,8 +412,9 @@ static void p9_tag_cleanup(struct p9_client *c) /** * p9_client_cb - call back from transport to client - * c: client state - * req: request received + * @c: client state + * @req: request received + * @status: request status, one of REQ_STATUS_* * */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) @@ -555,6 +556,7 @@ out_err: * p9_check_zc_errors - check 9p packet for error return and process it * @c: current client instance * @req: request to parse and check for error conditions + * @uidata: external buffer containing error * @in_hdrlen: Size of response protocol buffer. * * returns error code if one is discovered, otherwise returns 0 diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 3dff68f05fb9..6ea5ea548cd4 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -17,7 +17,9 @@ #include "trans_common.h" /** - * p9_release_pages - Release pages after the transaction. + * p9_release_pages - Release pages after the transaction. + * @pages: array of pages to be put + * @nr_pages: size of array */ void p9_release_pages(struct page **pages, int nr_pages) { diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 8f528e783a6c..fa158397bb63 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -45,7 +45,7 @@ static struct p9_trans_module p9_fd_trans; * @rfd: file descriptor for reading (trans=fd) * @wfd: file descriptor for writing (trans=fd) * @port: port to connect to (trans=tcp) - * + * @privport: port is privileged */ struct p9_fd_opts { @@ -95,6 +95,8 @@ struct p9_poll_wait { * @err: error state * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent + * @rreq: read request + * @wreq: write request * @req: current request being processed (if any) * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 2885ff9c76f0..af0a8a6cd3fd 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -99,6 +99,7 @@ struct p9_rdma_req; /** * struct p9_rdma_context - Keeps track of in-process WR * + * @cqe: completion queue entry * @busa: Bus address to unmap when the WR completes * @req: Keeps track of requests (send) * @rc: Keepts track of replies (receive) @@ -115,6 +116,7 @@ struct p9_rdma_context { /** * struct p9_rdma_opts - Collection of mount options * @port: port of connection + * @privport: Whether a privileged port may be used * @sq_depth: The requested depth of the SQ. This really doesn't need * to be any deeper than the number of threads used in the client * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index a3cd90a74012..93f2f8654882 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -50,7 +50,11 @@ static atomic_t vp_pinned = ATOMIC_INIT(0); * @client: client instance * @vdev: virtio dev associated with this channel * @vq: virtio queue associated with this channel + * @ring_bufs_avail: flag to indicate there is some available in the ring buf + * @vc_wq: wait queue for waiting for thing to be added to ring buf + * @p9_max_pages: maximum number of pinned pages * @sg: scatter gather list which is used to pack a request (protected?) + * @chan_list: linked list of channels * * We keep all per-channel information in a structure. * This structure is allocated within the devices dev->mem space. @@ -74,8 +78,8 @@ struct virtio_chan { unsigned long p9_max_pages; /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; - /* - * tag name to identify a mount null terminated + /** + * @tag: name to identify a mount null terminated */ char *tag; @@ -204,6 +208,7 @@ static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req) * this takes a list of pages. * @sg: scatter/gather list to pack into * @start: which segment of the sg_list to start at + * @limit: maximum number of pages in sg list. * @pdata: a list of pages to add into sg. * @nr_pages: number of pages to pack into the scatter/gather list * @offs: amount of data in the beginning of first page _not_ to pack diff --git a/net/Kconfig b/net/Kconfig index d6567162c1cf..f4c32d982af6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -386,8 +386,6 @@ source "net/mac80211/Kconfig" endif # WIRELESS -source "net/wimax/Kconfig" - source "net/rfkill/Kconfig" source "net/9p/Kconfig" source "net/caif/Kconfig" diff --git a/net/Makefile b/net/Makefile index 5744bf1997fd..d96b0aa8f39f 100644 --- a/net/Makefile +++ b/net/Makefile @@ -66,7 +66,6 @@ obj-$(CONFIG_MAC802154) += mac802154/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o endif -obj-$(CONFIG_WIMAX) += wimax/ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 45f584171de7..be18af481d7d 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -44,15 +44,15 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME; /* Lists of aarp entries */ /** * struct aarp_entry - AARP entry - * @last_sent - Last time we xmitted the aarp request - * @packet_queue - Queue of frames wait for resolution - * @status - Used for proxy AARP - * expires_at - Entry expiry time - * target_addr - DDP Address - * dev - Device to use - * hwaddr - Physical i/f address of target/router - * xmit_count - When this hits 10 we give up - * next - Next entry in chain + * @last_sent: Last time we xmitted the aarp request + * @packet_queue: Queue of frames wait for resolution + * @status: Used for proxy AARP + * @expires_at: Entry expiry time + * @target_addr: DDP Address + * @dev: Device to use + * @hwaddr: Physical i/f address of target/router + * @xmit_count: When this hits 10 we give up + * @next: Next entry in chain */ struct aarp_entry { /* These first two are only used for unresolved entries */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 1d48708c5a2e..ca1a0d07a087 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1407,9 +1407,10 @@ drop: /** * atalk_rcv - Receive a packet (in skb) from device dev - * @skb - packet received - * @dev - network device where the packet comes from - * @pt - packet type + * @skb: packet received + * @dev: network device where the packet comes from + * @pt: packet type + * @orig_dev: the original receive net device * * Receive a packet (in skb) from device dev. This has come from the SNAP * decoder, and on entry skb->transport_header is the DDP header, skb->len diff --git a/net/atm/raw.c b/net/atm/raw.c index b3ba44aab0ee..2b5f78a7ec3e 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -54,6 +54,8 @@ static int atm_send_aal0(struct atm_vcc *vcc, struct sk_buff *skb) kfree_skb(skb); return -EADDRNOTAVAIL; } + if (vcc->dev->ops->send_bh) + return vcc->dev->ops->send_bh(vcc, skb); return vcc->dev->ops->send(vcc, skb); } @@ -71,7 +73,10 @@ int atm_init_aal34(struct atm_vcc *vcc) vcc->push = atm_push_raw; vcc->pop = atm_pop_raw; vcc->push_oam = NULL; - vcc->send = vcc->dev->ops->send; + if (vcc->dev->ops->send_bh) + vcc->send = vcc->dev->ops->send_bh; + else + vcc->send = vcc->dev->ops->send; return 0; } @@ -80,7 +85,10 @@ int atm_init_aal5(struct atm_vcc *vcc) vcc->push = atm_push_raw; vcc->pop = atm_pop_raw; vcc->push_oam = NULL; - vcc->send = vcc->dev->ops->send; + if (vcc->dev->ops->send_bh) + vcc->send = vcc->dev->ops->send_bh; + else + vcc->send = vcc->dev->ops->send; return 0; } EXPORT_SYMBOL(atm_init_aal5); diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 80879196560c..3c8ded7d3e84 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -73,3 +73,14 @@ config BRIDGE_MRP Say N to exclude this support and reduce the binary size. If unsure, say N. + +config BRIDGE_CFM + bool "CFM protocol" + depends on BRIDGE + help + If you say Y here, then the Ethernet bridge will be able to run CFM + protocol according to 802.1Q section 12.14 + + Say N to exclude this support and reduce the binary size. + + If unsure, say N. diff --git a/net/bridge/Makefile b/net/bridge/Makefile index ccb394236fbd..4702702a74d3 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -27,3 +27,5 @@ bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o obj-$(CONFIG_NETFILTER) += netfilter/ bridge-$(CONFIG_BRIDGE_MRP) += br_mrp_switchdev.o br_mrp.o br_mrp_netlink.o + +bridge-$(CONFIG_BRIDGE_CFM) += br_cfm.o br_cfm_netlink.o diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c new file mode 100644 index 000000000000..001064f7583d --- /dev/null +++ b/net/bridge/br_cfm.c @@ -0,0 +1,867 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/cfm_bridge.h> +#include <uapi/linux/cfm_bridge.h> +#include "br_private_cfm.h" + +static struct br_cfm_mep *br_mep_find(struct net_bridge *br, u32 instance) +{ + struct br_cfm_mep *mep; + + hlist_for_each_entry(mep, &br->mep_list, head) + if (mep->instance == instance) + return mep; + + return NULL; +} + +static struct br_cfm_mep *br_mep_find_ifindex(struct net_bridge *br, + u32 ifindex) +{ + struct br_cfm_mep *mep; + + hlist_for_each_entry_rcu(mep, &br->mep_list, head, + lockdep_rtnl_is_held()) + if (mep->create.ifindex == ifindex) + return mep; + + return NULL; +} + +static struct br_cfm_peer_mep *br_peer_mep_find(struct br_cfm_mep *mep, + u32 mepid) +{ + struct br_cfm_peer_mep *peer_mep; + + hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head, + lockdep_rtnl_is_held()) + if (peer_mep->mepid == mepid) + return peer_mep; + + return NULL; +} + +static struct net_bridge_port *br_mep_get_port(struct net_bridge *br, + u32 ifindex) +{ + struct net_bridge_port *port; + + list_for_each_entry(port, &br->port_list, list) + if (port->dev->ifindex == ifindex) + return port; + + return NULL; +} + +/* Calculate the CCM interval in us. */ +static u32 interval_to_us(enum br_cfm_ccm_interval interval) +{ + switch (interval) { + case BR_CFM_CCM_INTERVAL_NONE: + return 0; + case BR_CFM_CCM_INTERVAL_3_3_MS: + return 3300; + case BR_CFM_CCM_INTERVAL_10_MS: + return 10 * 1000; + case BR_CFM_CCM_INTERVAL_100_MS: + return 100 * 1000; + case BR_CFM_CCM_INTERVAL_1_SEC: + return 1000 * 1000; + case BR_CFM_CCM_INTERVAL_10_SEC: + return 10 * 1000 * 1000; + case BR_CFM_CCM_INTERVAL_1_MIN: + return 60 * 1000 * 1000; + case BR_CFM_CCM_INTERVAL_10_MIN: + return 10 * 60 * 1000 * 1000; + } + return 0; +} + +/* Convert the interface interval to CCM PDU value. */ +static u32 interval_to_pdu(enum br_cfm_ccm_interval interval) +{ + switch (interval) { + case BR_CFM_CCM_INTERVAL_NONE: + return 0; + case BR_CFM_CCM_INTERVAL_3_3_MS: + return 1; + case BR_CFM_CCM_INTERVAL_10_MS: + return 2; + case BR_CFM_CCM_INTERVAL_100_MS: + return 3; + case BR_CFM_CCM_INTERVAL_1_SEC: + return 4; + case BR_CFM_CCM_INTERVAL_10_SEC: + return 5; + case BR_CFM_CCM_INTERVAL_1_MIN: + return 6; + case BR_CFM_CCM_INTERVAL_10_MIN: + return 7; + } + return 0; +} + +/* Convert the CCM PDU value to interval on interface. */ +static u32 pdu_to_interval(u32 value) +{ + switch (value) { + case 0: + return BR_CFM_CCM_INTERVAL_NONE; + case 1: + return BR_CFM_CCM_INTERVAL_3_3_MS; + case 2: + return BR_CFM_CCM_INTERVAL_10_MS; + case 3: + return BR_CFM_CCM_INTERVAL_100_MS; + case 4: + return BR_CFM_CCM_INTERVAL_1_SEC; + case 5: + return BR_CFM_CCM_INTERVAL_10_SEC; + case 6: + return BR_CFM_CCM_INTERVAL_1_MIN; + case 7: + return BR_CFM_CCM_INTERVAL_10_MIN; + } + return BR_CFM_CCM_INTERVAL_NONE; +} + +static void ccm_rx_timer_start(struct br_cfm_peer_mep *peer_mep) +{ + u32 interval_us; + + interval_us = interval_to_us(peer_mep->mep->cc_config.exp_interval); + /* Function ccm_rx_dwork must be called with 1/4 + * of the configured CC 'expected_interval' + * in order to detect CCM defect after 3.25 interval. + */ + queue_delayed_work(system_wq, &peer_mep->ccm_rx_dwork, + usecs_to_jiffies(interval_us / 4)); +} + +static void br_cfm_notify(int event, const struct net_bridge_port *port) +{ + u32 filter = RTEXT_FILTER_CFM_STATUS; + + return br_info_notify(event, port->br, NULL, filter); +} + +static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep) +{ + memset(&peer_mep->cc_status, 0, sizeof(peer_mep->cc_status)); + peer_mep->ccm_rx_count_miss = 0; + + ccm_rx_timer_start(peer_mep); +} + +static void cc_peer_disable(struct br_cfm_peer_mep *peer_mep) +{ + cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); +} + +static struct sk_buff *ccm_frame_build(struct br_cfm_mep *mep, + const struct br_cfm_cc_ccm_tx_info *const tx_info) + +{ + struct br_cfm_common_hdr *common_hdr; + struct net_bridge_port *b_port; + struct br_cfm_maid *maid; + u8 *itu_reserved, *e_tlv; + struct ethhdr *eth_hdr; + struct sk_buff *skb; + __be32 *status_tlv; + __be32 *snumber; + __be16 *mepid; + + skb = dev_alloc_skb(CFM_CCM_MAX_FRAME_LENGTH); + if (!skb) + return NULL; + + rcu_read_lock(); + b_port = rcu_dereference(mep->b_port); + if (!b_port) { + kfree_skb(skb); + rcu_read_unlock(); + return NULL; + } + skb->dev = b_port->dev; + rcu_read_unlock(); + /* The device cannot be deleted until the work_queue functions has + * completed. This function is called from ccm_tx_work_expired() + * that is a work_queue functions. + */ + + skb->protocol = htons(ETH_P_CFM); + skb->priority = CFM_FRAME_PRIO; + + /* Ethernet header */ + eth_hdr = skb_put(skb, sizeof(*eth_hdr)); + ether_addr_copy(eth_hdr->h_dest, tx_info->dmac.addr); + ether_addr_copy(eth_hdr->h_source, mep->config.unicast_mac.addr); + eth_hdr->h_proto = htons(ETH_P_CFM); + + /* Common CFM Header */ + common_hdr = skb_put(skb, sizeof(*common_hdr)); + common_hdr->mdlevel_version = mep->config.mdlevel << 5; + common_hdr->opcode = BR_CFM_OPCODE_CCM; + common_hdr->flags = (mep->rdi << 7) | + interval_to_pdu(mep->cc_config.exp_interval); + common_hdr->tlv_offset = CFM_CCM_TLV_OFFSET; + + /* Sequence number */ + snumber = skb_put(skb, sizeof(*snumber)); + if (tx_info->seq_no_update) { + *snumber = cpu_to_be32(mep->ccm_tx_snumber); + mep->ccm_tx_snumber += 1; + } else { + *snumber = 0; + } + + mepid = skb_put(skb, sizeof(*mepid)); + *mepid = cpu_to_be16((u16)mep->config.mepid); + + maid = skb_put(skb, sizeof(*maid)); + memcpy(maid->data, mep->cc_config.exp_maid.data, sizeof(maid->data)); + + /* ITU reserved (CFM_CCM_ITU_RESERVED_SIZE octets) */ + itu_reserved = skb_put(skb, CFM_CCM_ITU_RESERVED_SIZE); + memset(itu_reserved, 0, CFM_CCM_ITU_RESERVED_SIZE); + + /* Generel CFM TLV format: + * TLV type: one byte + * TLV value length: two bytes + * TLV value: 'TLV value length' bytes + */ + + /* Port status TLV. The value length is 1. Total of 4 bytes. */ + if (tx_info->port_tlv) { + status_tlv = skb_put(skb, sizeof(*status_tlv)); + *status_tlv = cpu_to_be32((CFM_PORT_STATUS_TLV_TYPE << 24) | + (1 << 8) | /* Value length */ + (tx_info->port_tlv_value & 0xFF)); + } + + /* Interface status TLV. The value length is 1. Total of 4 bytes. */ + if (tx_info->if_tlv) { + status_tlv = skb_put(skb, sizeof(*status_tlv)); + *status_tlv = cpu_to_be32((CFM_IF_STATUS_TLV_TYPE << 24) | + (1 << 8) | /* Value length */ + (tx_info->if_tlv_value & 0xFF)); + } + + /* End TLV */ + e_tlv = skb_put(skb, sizeof(*e_tlv)); + *e_tlv = CFM_ENDE_TLV_TYPE; + + return skb; +} + +static void ccm_frame_tx(struct sk_buff *skb) +{ + skb_reset_network_header(skb); + dev_queue_xmit(skb); +} + +/* This function is called with the configured CC 'expected_interval' + * in order to drive CCM transmission when enabled. + */ +static void ccm_tx_work_expired(struct work_struct *work) +{ + struct delayed_work *del_work; + struct br_cfm_mep *mep; + struct sk_buff *skb; + u32 interval_us; + + del_work = to_delayed_work(work); + mep = container_of(del_work, struct br_cfm_mep, ccm_tx_dwork); + + if (time_before_eq(mep->ccm_tx_end, jiffies)) { + /* Transmission period has ended */ + mep->cc_ccm_tx_info.period = 0; + return; + } + + skb = ccm_frame_build(mep, &mep->cc_ccm_tx_info); + if (skb) + ccm_frame_tx(skb); + + interval_us = interval_to_us(mep->cc_config.exp_interval); + queue_delayed_work(system_wq, &mep->ccm_tx_dwork, + usecs_to_jiffies(interval_us)); +} + +/* This function is called with 1/4 of the configured CC 'expected_interval' + * in order to detect CCM defect after 3.25 interval. + */ +static void ccm_rx_work_expired(struct work_struct *work) +{ + struct br_cfm_peer_mep *peer_mep; + struct net_bridge_port *b_port; + struct delayed_work *del_work; + + del_work = to_delayed_work(work); + peer_mep = container_of(del_work, struct br_cfm_peer_mep, ccm_rx_dwork); + + /* After 13 counts (4 * 3,25) then 3.25 intervals are expired */ + if (peer_mep->ccm_rx_count_miss < 13) { + /* 3.25 intervals are NOT expired without CCM reception */ + peer_mep->ccm_rx_count_miss++; + + /* Start timer again */ + ccm_rx_timer_start(peer_mep); + } else { + /* 3.25 intervals are expired without CCM reception. + * CCM defect detected + */ + peer_mep->cc_status.ccm_defect = true; + + /* Change in CCM defect status - notify */ + rcu_read_lock(); + b_port = rcu_dereference(peer_mep->mep->b_port); + if (b_port) + br_cfm_notify(RTM_NEWLINK, b_port); + rcu_read_unlock(); + } +} + +static u32 ccm_tlv_extract(struct sk_buff *skb, u32 index, + struct br_cfm_peer_mep *peer_mep) +{ + __be32 *s_tlv; + __be32 _s_tlv; + u32 h_s_tlv; + u8 *e_tlv; + u8 _e_tlv; + + e_tlv = skb_header_pointer(skb, index, sizeof(_e_tlv), &_e_tlv); + if (!e_tlv) + return 0; + + /* TLV is present - get the status TLV */ + s_tlv = skb_header_pointer(skb, + index, + sizeof(_s_tlv), &_s_tlv); + if (!s_tlv) + return 0; + + h_s_tlv = ntohl(*s_tlv); + if ((h_s_tlv >> 24) == CFM_IF_STATUS_TLV_TYPE) { + /* Interface status TLV */ + peer_mep->cc_status.tlv_seen = true; + peer_mep->cc_status.if_tlv_value = (h_s_tlv & 0xFF); + } + + if ((h_s_tlv >> 24) == CFM_PORT_STATUS_TLV_TYPE) { + /* Port status TLV */ + peer_mep->cc_status.tlv_seen = true; + peer_mep->cc_status.port_tlv_value = (h_s_tlv & 0xFF); + } + + /* The Sender ID TLV is not handled */ + /* The Organization-Specific TLV is not handled */ + + /* Return the length of this tlv. + * This is the length of the value field plus 3 bytes for size of type + * field and length field + */ + return ((h_s_tlv >> 8) & 0xFFFF) + 3; +} + +/* note: already called with rcu_read_lock */ +static int br_cfm_frame_rx(struct net_bridge_port *port, struct sk_buff *skb) +{ + u32 mdlevel, interval, size, index, max; + const struct br_cfm_common_hdr *hdr; + struct br_cfm_peer_mep *peer_mep; + const struct br_cfm_maid *maid; + struct br_cfm_common_hdr _hdr; + struct br_cfm_maid _maid; + struct br_cfm_mep *mep; + struct net_bridge *br; + __be32 *snumber; + __be32 _snumber; + __be16 *mepid; + __be16 _mepid; + + if (port->state == BR_STATE_DISABLED) + return 0; + + hdr = skb_header_pointer(skb, 0, sizeof(_hdr), &_hdr); + if (!hdr) + return 1; + + br = port->br; + mep = br_mep_find_ifindex(br, port->dev->ifindex); + if (unlikely(!mep)) + /* No MEP on this port - must be forwarded */ + return 0; + + mdlevel = hdr->mdlevel_version >> 5; + if (mdlevel > mep->config.mdlevel) + /* The level is above this MEP level - must be forwarded */ + return 0; + + if ((hdr->mdlevel_version & 0x1F) != 0) { + /* Invalid version */ + mep->status.version_unexp_seen = true; + return 1; + } + + if (mdlevel < mep->config.mdlevel) { + /* The level is below this MEP level */ + mep->status.rx_level_low_seen = true; + return 1; + } + + if (hdr->opcode == BR_CFM_OPCODE_CCM) { + /* CCM PDU received. */ + /* MA ID is after common header + sequence number + MEP ID */ + maid = skb_header_pointer(skb, + CFM_CCM_PDU_MAID_OFFSET, + sizeof(_maid), &_maid); + if (!maid) + return 1; + if (memcmp(maid->data, mep->cc_config.exp_maid.data, + sizeof(maid->data))) + /* MA ID not as expected */ + return 1; + + /* MEP ID is after common header + sequence number */ + mepid = skb_header_pointer(skb, + CFM_CCM_PDU_MEPID_OFFSET, + sizeof(_mepid), &_mepid); + if (!mepid) + return 1; + peer_mep = br_peer_mep_find(mep, (u32)ntohs(*mepid)); + if (!peer_mep) + return 1; + + /* Interval is in common header flags */ + interval = hdr->flags & 0x07; + if (mep->cc_config.exp_interval != pdu_to_interval(interval)) + /* Interval not as expected */ + return 1; + + /* A valid CCM frame is received */ + if (peer_mep->cc_status.ccm_defect) { + peer_mep->cc_status.ccm_defect = false; + + /* Change in CCM defect status - notify */ + br_cfm_notify(RTM_NEWLINK, port); + + /* Start CCM RX timer */ + ccm_rx_timer_start(peer_mep); + } + + peer_mep->cc_status.seen = true; + peer_mep->ccm_rx_count_miss = 0; + + /* RDI is in common header flags */ + peer_mep->cc_status.rdi = (hdr->flags & 0x80) ? true : false; + + /* Sequence number is after common header */ + snumber = skb_header_pointer(skb, + CFM_CCM_PDU_SEQNR_OFFSET, + sizeof(_snumber), &_snumber); + if (!snumber) + return 1; + if (ntohl(*snumber) != (mep->ccm_rx_snumber + 1)) + /* Unexpected sequence number */ + peer_mep->cc_status.seq_unexp_seen = true; + + mep->ccm_rx_snumber = ntohl(*snumber); + + /* TLV end is after common header + sequence number + MEP ID + + * MA ID + ITU reserved + */ + index = CFM_CCM_PDU_TLV_OFFSET; + max = 0; + do { /* Handle all TLVs */ + size = ccm_tlv_extract(skb, index, peer_mep); + index += size; + max += 1; + } while (size != 0 && max < 4); /* Max four TLVs possible */ + + return 1; + } + + mep->status.opcode_unexp_seen = true; + + return 1; +} + +static struct br_frame_type cfm_frame_type __read_mostly = { + .type = cpu_to_be16(ETH_P_CFM), + .frame_handler = br_cfm_frame_rx, +}; + +int br_cfm_mep_create(struct net_bridge *br, + const u32 instance, + struct br_cfm_mep_create *const create, + struct netlink_ext_ack *extack) +{ + struct net_bridge_port *p; + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + if (create->domain == BR_CFM_VLAN) { + NL_SET_ERR_MSG_MOD(extack, + "VLAN domain not supported"); + return -EINVAL; + } + if (create->domain != BR_CFM_PORT) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid domain value"); + return -EINVAL; + } + if (create->direction == BR_CFM_MEP_DIRECTION_UP) { + NL_SET_ERR_MSG_MOD(extack, + "Up-MEP not supported"); + return -EINVAL; + } + if (create->direction != BR_CFM_MEP_DIRECTION_DOWN) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid direction value"); + return -EINVAL; + } + p = br_mep_get_port(br, create->ifindex); + if (!p) { + NL_SET_ERR_MSG_MOD(extack, + "Port is not related to bridge"); + return -EINVAL; + } + mep = br_mep_find(br, instance); + if (mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance already exists"); + return -EEXIST; + } + + /* In PORT domain only one instance can be created per port */ + if (create->domain == BR_CFM_PORT) { + mep = br_mep_find_ifindex(br, create->ifindex); + if (mep) { + NL_SET_ERR_MSG_MOD(extack, + "Only one Port MEP on a port allowed"); + return -EINVAL; + } + } + + mep = kzalloc(sizeof(*mep), GFP_KERNEL); + if (!mep) + return -ENOMEM; + + mep->create = *create; + mep->instance = instance; + rcu_assign_pointer(mep->b_port, p); + + INIT_HLIST_HEAD(&mep->peer_mep_list); + INIT_DELAYED_WORK(&mep->ccm_tx_dwork, ccm_tx_work_expired); + + if (hlist_empty(&br->mep_list)) + br_add_frame(br, &cfm_frame_type); + + hlist_add_tail_rcu(&mep->head, &br->mep_list); + + return 0; +} + +static void mep_delete_implementation(struct net_bridge *br, + struct br_cfm_mep *mep) +{ + struct br_cfm_peer_mep *peer_mep; + struct hlist_node *n_store; + + ASSERT_RTNL(); + + /* Empty and free peer MEP list */ + hlist_for_each_entry_safe(peer_mep, n_store, &mep->peer_mep_list, head) { + cancel_delayed_work_sync(&peer_mep->ccm_rx_dwork); + hlist_del_rcu(&peer_mep->head); + kfree_rcu(peer_mep, rcu); + } + + cancel_delayed_work_sync(&mep->ccm_tx_dwork); + + RCU_INIT_POINTER(mep->b_port, NULL); + hlist_del_rcu(&mep->head); + kfree_rcu(mep, rcu); + + if (hlist_empty(&br->mep_list)) + br_del_frame(br, &cfm_frame_type); +} + +int br_cfm_mep_delete(struct net_bridge *br, + const u32 instance, + struct netlink_ext_ack *extack) +{ + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + mep = br_mep_find(br, instance); + if (!mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance does not exists"); + return -ENOENT; + } + + mep_delete_implementation(br, mep); + + return 0; +} + +int br_cfm_mep_config_set(struct net_bridge *br, + const u32 instance, + const struct br_cfm_mep_config *const config, + struct netlink_ext_ack *extack) +{ + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + mep = br_mep_find(br, instance); + if (!mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance does not exists"); + return -ENOENT; + } + + mep->config = *config; + + return 0; +} + +int br_cfm_cc_config_set(struct net_bridge *br, + const u32 instance, + const struct br_cfm_cc_config *const config, + struct netlink_ext_ack *extack) +{ + struct br_cfm_peer_mep *peer_mep; + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + mep = br_mep_find(br, instance); + if (!mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance does not exists"); + return -ENOENT; + } + + /* Check for no change in configuration */ + if (memcmp(config, &mep->cc_config, sizeof(*config)) == 0) + return 0; + + if (config->enable && !mep->cc_config.enable) + /* CC is enabled */ + hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) + cc_peer_enable(peer_mep); + + if (!config->enable && mep->cc_config.enable) + /* CC is disabled */ + hlist_for_each_entry(peer_mep, &mep->peer_mep_list, head) + cc_peer_disable(peer_mep); + + mep->cc_config = *config; + mep->ccm_rx_snumber = 0; + mep->ccm_tx_snumber = 1; + + return 0; +} + +int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance, + u32 mepid, + struct netlink_ext_ack *extack) +{ + struct br_cfm_peer_mep *peer_mep; + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + mep = br_mep_find(br, instance); + if (!mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance does not exists"); + return -ENOENT; + } + + peer_mep = br_peer_mep_find(mep, mepid); + if (peer_mep) { + NL_SET_ERR_MSG_MOD(extack, + "Peer MEP-ID already exists"); + return -EEXIST; + } + + peer_mep = kzalloc(sizeof(*peer_mep), GFP_KERNEL); + if (!peer_mep) + return -ENOMEM; + + peer_mep->mepid = mepid; + peer_mep->mep = mep; + INIT_DELAYED_WORK(&peer_mep->ccm_rx_dwork, ccm_rx_work_expired); + + if (mep->cc_config.enable) + cc_peer_enable(peer_mep); + + hlist_add_tail_rcu(&peer_mep->head, &mep->peer_mep_list); + + return 0; +} + +int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance, + u32 mepid, + struct netlink_ext_ack *extack) +{ + struct br_cfm_peer_mep *peer_mep; + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + mep = br_mep_find(br, instance); + if (!mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance does not exists"); + return -ENOENT; + } + + peer_mep = br_peer_mep_find(mep, mepid); + if (!peer_mep) { + NL_SET_ERR_MSG_MOD(extack, + "Peer MEP-ID does not exists"); + return -ENOENT; + } + + cc_peer_disable(peer_mep); + + hlist_del_rcu(&peer_mep->head); + kfree_rcu(peer_mep, rcu); + + return 0; +} + +int br_cfm_cc_rdi_set(struct net_bridge *br, const u32 instance, + const bool rdi, struct netlink_ext_ack *extack) +{ + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + mep = br_mep_find(br, instance); + if (!mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance does not exists"); + return -ENOENT; + } + + mep->rdi = rdi; + + return 0; +} + +int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance, + const struct br_cfm_cc_ccm_tx_info *const tx_info, + struct netlink_ext_ack *extack) +{ + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + mep = br_mep_find(br, instance); + if (!mep) { + NL_SET_ERR_MSG_MOD(extack, + "MEP instance does not exists"); + return -ENOENT; + } + + if (memcmp(tx_info, &mep->cc_ccm_tx_info, sizeof(*tx_info)) == 0) { + /* No change in tx_info. */ + if (mep->cc_ccm_tx_info.period == 0) + /* Transmission is not enabled - just return */ + return 0; + + /* Transmission is ongoing, the end time is recalculated */ + mep->ccm_tx_end = jiffies + + usecs_to_jiffies(tx_info->period * 1000000); + return 0; + } + + if (tx_info->period == 0 && mep->cc_ccm_tx_info.period == 0) + /* Some change in info and transmission is not ongoing */ + goto save; + + if (tx_info->period != 0 && mep->cc_ccm_tx_info.period != 0) { + /* Some change in info and transmission is ongoing + * The end time is recalculated + */ + mep->ccm_tx_end = jiffies + + usecs_to_jiffies(tx_info->period * 1000000); + + goto save; + } + + if (tx_info->period == 0 && mep->cc_ccm_tx_info.period != 0) { + cancel_delayed_work_sync(&mep->ccm_tx_dwork); + goto save; + } + + /* Start delayed work to transmit CCM frames. It is done with zero delay + * to send first frame immediately + */ + mep->ccm_tx_end = jiffies + usecs_to_jiffies(tx_info->period * 1000000); + queue_delayed_work(system_wq, &mep->ccm_tx_dwork, 0); + +save: + mep->cc_ccm_tx_info = *tx_info; + + return 0; +} + +int br_cfm_mep_count(struct net_bridge *br, u32 *count) +{ + struct br_cfm_mep *mep; + + *count = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mep, &br->mep_list, head) + *count += 1; + rcu_read_unlock(); + + return 0; +} + +int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count) +{ + struct br_cfm_peer_mep *peer_mep; + struct br_cfm_mep *mep; + + *count = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mep, &br->mep_list, head) + hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) + *count += 1; + rcu_read_unlock(); + + return 0; +} + +bool br_cfm_created(struct net_bridge *br) +{ + return !hlist_empty(&br->mep_list); +} + +/* Deletes the CFM instances on a specific bridge port + */ +void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *port) +{ + struct hlist_node *n_store; + struct br_cfm_mep *mep; + + ASSERT_RTNL(); + + hlist_for_each_entry_safe(mep, n_store, &br->mep_list, head) + if (mep->create.ifindex == port->dev->ifindex) + mep_delete_implementation(br, mep); +} diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c new file mode 100644 index 000000000000..5c4c369f8536 --- /dev/null +++ b/net/bridge/br_cfm_netlink.c @@ -0,0 +1,726 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <net/genetlink.h> + +#include "br_private.h" +#include "br_private_cfm.h" + +static const struct nla_policy +br_cfm_mep_create_policy[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1] = { + [IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX] = { .type = NLA_U32 }, +}; + +static const struct nla_policy +br_cfm_mep_delete_policy[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1] = { + [IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE] = { .type = NLA_U32 }, +}; + +static const struct nla_policy +br_cfm_mep_config_policy[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1] = { + [IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC] = NLA_POLICY_ETH_ADDR, + [IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL] = NLA_POLICY_MAX(NLA_U32, 7), + [IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID] = NLA_POLICY_MAX(NLA_U32, 0x1FFF), +}; + +static const struct nla_policy +br_cfm_cc_config_policy[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1] = { + [IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID] = { + .type = NLA_BINARY, .len = CFM_MAID_LENGTH }, +}; + +static const struct nla_policy +br_cfm_cc_peer_mep_policy[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1] = { + [IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_PEER_MEPID] = NLA_POLICY_MAX(NLA_U32, 0x1FFF), +}; + +static const struct nla_policy +br_cfm_cc_rdi_policy[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1] = { + [IFLA_BRIDGE_CFM_CC_RDI_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_CC_RDI_INSTANCE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_RDI_RDI] = { .type = NLA_U32 }, +}; + +static const struct nla_policy +br_cfm_cc_ccm_tx_policy[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1] = { + [IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC] = NLA_POLICY_ETH_ADDR, + [IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE] = { .type = NLA_U8 }, + [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV] = { .type = NLA_U32 }, + [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE] = { .type = NLA_U8 }, +}; + +static const struct nla_policy +br_cfm_policy[IFLA_BRIDGE_CFM_MAX + 1] = { + [IFLA_BRIDGE_CFM_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_CFM_MEP_CREATE] = + NLA_POLICY_NESTED(br_cfm_mep_create_policy), + [IFLA_BRIDGE_CFM_MEP_DELETE] = + NLA_POLICY_NESTED(br_cfm_mep_delete_policy), + [IFLA_BRIDGE_CFM_MEP_CONFIG] = + NLA_POLICY_NESTED(br_cfm_mep_config_policy), + [IFLA_BRIDGE_CFM_CC_CONFIG] = + NLA_POLICY_NESTED(br_cfm_cc_config_policy), + [IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD] = + NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy), + [IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE] = + NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy), + [IFLA_BRIDGE_CFM_CC_RDI] = + NLA_POLICY_NESTED(br_cfm_cc_rdi_policy), + [IFLA_BRIDGE_CFM_CC_CCM_TX] = + NLA_POLICY_NESTED(br_cfm_cc_ccm_tx_policy), +}; + +static int br_mep_create_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1]; + struct br_cfm_mep_create create; + u32 instance; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CREATE_MAX, attr, + br_cfm_mep_create_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]) { + NL_SET_ERR_MSG_MOD(extack, "Missing DOMAIN attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]) { + NL_SET_ERR_MSG_MOD(extack, "Missing DIRECTION attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]) { + NL_SET_ERR_MSG_MOD(extack, "Missing IFINDEX attribute"); + return -EINVAL; + } + + memset(&create, 0, sizeof(create)); + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]); + create.domain = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]); + create.direction = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]); + create.ifindex = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]); + + return br_cfm_mep_create(br, instance, &create, extack); +} + +static int br_mep_delete_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1]; + u32 instance; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_DELETE_MAX, attr, + br_cfm_mep_delete_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing INSTANCE attribute"); + return -EINVAL; + } + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]); + + return br_cfm_mep_delete(br, instance, extack); +} + +static int br_mep_config_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1]; + struct br_cfm_mep_config config; + u32 instance; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CONFIG_MAX, attr, + br_cfm_mep_config_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC]) { + NL_SET_ERR_MSG_MOD(extack, "Missing UNICAST_MAC attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]) { + NL_SET_ERR_MSG_MOD(extack, "Missing MDLEVEL attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing MEPID attribute"); + return -EINVAL; + } + + memset(&config, 0, sizeof(config)); + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]); + nla_memcpy(&config.unicast_mac.addr, + tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC], + sizeof(config.unicast_mac.addr)); + config.mdlevel = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]); + config.mepid = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]); + + return br_cfm_mep_config_set(br, instance, &config, extack); +} + +static int br_cc_config_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1]; + struct br_cfm_cc_config config; + u32 instance; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CONFIG_MAX, attr, + br_cfm_cc_config_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing ENABLE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INTERVAL attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing MAID attribute"); + return -EINVAL; + } + + memset(&config, 0, sizeof(config)); + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]); + config.enable = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]); + config.exp_interval = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]); + nla_memcpy(&config.exp_maid.data, tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID], + sizeof(config.exp_maid.data)); + + return br_cfm_cc_config_set(br, instance, &config, extack); +} + +static int br_cc_peer_mep_add_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1]; + u32 instance, peer_mep_id; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr, + br_cfm_cc_peer_mep_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute"); + return -EINVAL; + } + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]); + peer_mep_id = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]); + + return br_cfm_cc_peer_mep_add(br, instance, peer_mep_id, extack); +} + +static int br_cc_peer_mep_remove_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1]; + u32 instance, peer_mep_id; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr, + br_cfm_cc_peer_mep_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute"); + return -EINVAL; + } + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]); + peer_mep_id = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]); + + return br_cfm_cc_peer_mep_remove(br, instance, peer_mep_id, extack); +} + +static int br_cc_rdi_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1]; + u32 instance, rdi; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_RDI_MAX, attr, + br_cfm_cc_rdi_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]) { + NL_SET_ERR_MSG_MOD(extack, "Missing RDI attribute"); + return -EINVAL; + } + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); + rdi = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]); + + return br_cfm_cc_rdi_set(br, instance, rdi, extack); +} + +static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1]; + struct br_cfm_cc_ccm_tx_info tx_info; + u32 instance; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CCM_TX_MAX, attr, + br_cfm_cc_ccm_tx_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC]) { + NL_SET_ERR_MSG_MOD(extack, "Missing DMAC attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing SEQ_NO_UPDATE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]) { + NL_SET_ERR_MSG_MOD(extack, "Missing PERIOD attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]) { + NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV_VALUE attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]) { + NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV attribute"); + return -EINVAL; + } + if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV_VALUE attribute"); + return -EINVAL; + } + + memset(&tx_info, 0, sizeof(tx_info)); + + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); + nla_memcpy(&tx_info.dmac.addr, + tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], + sizeof(tx_info.dmac.addr)); + tx_info.seq_no_update = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]); + tx_info.period = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]); + tx_info.if_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]); + tx_info.if_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]); + tx_info.port_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]); + tx_info.port_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]); + + return br_cfm_cc_ccm_tx(br, instance, &tx_info, extack); +} + +int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p, + struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_CFM_MAX + 1]; + int err; + + /* When this function is called for a port then the br pointer is + * invalid, therefor set the br to point correctly + */ + if (p) + br = p->br; + + err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MAX, attr, + br_cfm_policy, extack); + if (err) + return err; + + if (tb[IFLA_BRIDGE_CFM_MEP_CREATE]) { + err = br_mep_create_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CREATE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_CFM_MEP_DELETE]) { + err = br_mep_delete_parse(br, tb[IFLA_BRIDGE_CFM_MEP_DELETE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_CFM_MEP_CONFIG]) { + err = br_mep_config_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CONFIG], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_CFM_CC_CONFIG]) { + err = br_cc_config_parse(br, tb[IFLA_BRIDGE_CFM_CC_CONFIG], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD]) { + err = br_cc_peer_mep_add_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE]) { + err = br_cc_peer_mep_remove_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_CFM_CC_RDI]) { + err = br_cc_rdi_parse(br, tb[IFLA_BRIDGE_CFM_CC_RDI], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_CFM_CC_CCM_TX]) { + err = br_cc_ccm_tx_parse(br, tb[IFLA_BRIDGE_CFM_CC_CCM_TX], + extack); + if (err) + return err; + } + + return 0; +} + +int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br) +{ + struct br_cfm_peer_mep *peer_mep; + struct br_cfm_mep *mep; + struct nlattr *tb; + + hlist_for_each_entry_rcu(mep, &br->mep_list, head) { + tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INFO); + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN, + mep->create.domain)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION, + mep->create.direction)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX, + mep->create.ifindex)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + + tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INFO); + + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC, + sizeof(mep->config.unicast_mac.addr), + mep->config.unicast_mac.addr)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL, + mep->config.mdlevel)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID, + mep->config.mepid)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + + tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INFO); + + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE, + mep->cc_config.enable)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL, + mep->cc_config.exp_interval)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID, + sizeof(mep->cc_config.exp_maid.data), + mep->cc_config.exp_maid.data)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + + tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_RDI_INFO); + + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_RDI, + mep->rdi)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + + tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INFO); + + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC, + sizeof(mep->cc_ccm_tx_info.dmac), + mep->cc_ccm_tx_info.dmac.addr)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE, + mep->cc_ccm_tx_info.seq_no_update)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD, + mep->cc_ccm_tx_info.period)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV, + mep->cc_ccm_tx_info.if_tlv)) + goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE, + mep->cc_ccm_tx_info.if_tlv_value)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV, + mep->cc_ccm_tx_info.port_tlv)) + goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE, + mep->cc_ccm_tx_info.port_tlv_value)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + + hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) { + tb = nla_nest_start(skb, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO); + + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_MEPID, + peer_mep->mepid)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + } + } + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, tb); + +nla_info_failure: + return -EMSGSIZE; +} + +int br_cfm_status_fill_info(struct sk_buff *skb, + struct net_bridge *br, + bool getlink) +{ + struct br_cfm_peer_mep *peer_mep; + struct br_cfm_mep *mep; + struct nlattr *tb; + + hlist_for_each_entry_rcu(mep, &br->mep_list, head) { + tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INFO); + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN, + mep->status.opcode_unexp_seen)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN, + mep->status.version_unexp_seen)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN, + mep->status.rx_level_low_seen)) + goto nla_put_failure; + + /* Only clear if this is a GETLINK */ + if (getlink) { + /* Clear all 'seen' indications */ + mep->status.opcode_unexp_seen = false; + mep->status.version_unexp_seen = false; + mep->status.rx_level_low_seen = false; + } + + nla_nest_end(skb, tb); + + hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) { + tb = nla_nest_start(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO); + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE, + mep->instance)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID, + peer_mep->mepid)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT, + peer_mep->cc_status.ccm_defect)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI, + peer_mep->cc_status.rdi)) + goto nla_put_failure; + + if (nla_put_u8(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE, + peer_mep->cc_status.port_tlv_value)) + goto nla_put_failure; + + if (nla_put_u8(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE, + peer_mep->cc_status.if_tlv_value)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN, + peer_mep->cc_status.seen)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN, + peer_mep->cc_status.tlv_seen)) + goto nla_put_failure; + + if (nla_put_u32(skb, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN, + peer_mep->cc_status.seq_unexp_seen)) + goto nla_put_failure; + + if (getlink) { /* Only clear if this is a GETLINK */ + /* Clear all 'seen' indications */ + peer_mep->cc_status.seen = false; + peer_mep->cc_status.tlv_seen = false; + peer_mep->cc_status.seq_unexp_seen = false; + } + + nla_nest_end(skb, tb); + } + } + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, tb); + +nla_info_failure: + return -EMSGSIZE; +} diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 7730c8f3cb53..77bcc84875af 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -93,7 +93,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br, eth_hdr(skb))) + br_multicast_querier_exists(br, eth_hdr(skb), mdst)) br_multicast_flood(mdst, skb, false, true); else br_flood(br, skb, BR_PKT_MULTICAST, false, true); @@ -455,8 +455,12 @@ void br_dev_setup(struct net_device *dev) spin_lock_init(&br->lock); INIT_LIST_HEAD(&br->port_list); INIT_HLIST_HEAD(&br->fdb_list); + INIT_HLIST_HEAD(&br->frame_type_list); #if IS_ENABLED(CONFIG_BRIDGE_MRP) - INIT_LIST_HEAD(&br->mrp_list); + INIT_HLIST_HEAD(&br->mrp_list); +#endif +#if IS_ENABLED(CONFIG_BRIDGE_CFM) + INIT_HLIST_HEAD(&br->mep_list); #endif spin_lock_init(&br->hash_lock); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a0e9a7937412..f7d2f472ae24 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -334,6 +334,7 @@ static void del_nbp(struct net_bridge_port *p) spin_unlock_bh(&br->lock); br_mrp_port_del(br, p); + br_cfm_port_del(br, p); br_ifinfo_notify(RTM_DELLINK, NULL, p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 59a318b9f646..21808985f268 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -134,7 +134,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb case BR_PKT_MULTICAST: mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && - br_multicast_querier_exists(br, eth_hdr(skb))) { + br_multicast_querier_exists(br, eth_hdr(skb), mdst)) { if ((mdst && mdst->host_joined) || br_multicast_is_router(br)) { local_rcv = true; @@ -254,6 +254,21 @@ frame_finish: return RX_HANDLER_CONSUMED; } +/* Return 0 if the frame was not processed otherwise 1 + * note: already called with rcu_read_lock + */ +static int br_process_frame_type(struct net_bridge_port *p, + struct sk_buff *skb) +{ + struct br_frame_type *tmp; + + hlist_for_each_entry_rcu(tmp, &p->br->frame_type_list, list) + if (unlikely(tmp->type == skb->protocol)) + return tmp->frame_handler(p, skb); + + return 0; +} + /* * Return NULL if skb is handled * note: already called with rcu_read_lock @@ -343,7 +358,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) } } - if (unlikely(br_mrp_process(p, skb))) + if (unlikely(br_process_frame_type(p, skb))) return RX_HANDLER_PASS; forward: @@ -380,3 +395,19 @@ rx_handler_func_t *br_get_rx_handler(const struct net_device *dev) return br_handle_frame; } + +void br_add_frame(struct net_bridge *br, struct br_frame_type *ft) +{ + hlist_add_head_rcu(&ft->list, &br->frame_type_list); +} + +void br_del_frame(struct net_bridge *br, struct br_frame_type *ft) +{ + struct br_frame_type *tmp; + + hlist_for_each_entry(tmp, &br->frame_type_list, list) + if (ft == tmp) { + hlist_del_rcu(&ft->list); + return; + } +} diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e15bab19a012..8846c5bcd075 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -87,6 +87,8 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip, ip->src.ip6 = nla_get_in6_addr(mdb_attrs[MDBE_ATTR_SOURCE]); break; #endif + default: + ether_addr_copy(ip->dst.mac_addr, entry->addr.u.mac_addr); } } @@ -174,9 +176,11 @@ static int __mdb_fill_info(struct sk_buff *skb, if (mp->addr.proto == htons(ETH_P_IP)) e.addr.u.ip4 = mp->addr.dst.ip4; #if IS_ENABLED(CONFIG_IPV6) - if (mp->addr.proto == htons(ETH_P_IPV6)) + else if (mp->addr.proto == htons(ETH_P_IPV6)) e.addr.u.ip6 = mp->addr.dst.ip6; #endif + else + ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr); e.addr.proto = mp->addr.proto; nest_ent = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO); @@ -210,6 +214,8 @@ static int __mdb_fill_info(struct sk_buff *skb, } break; #endif + default: + ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr); } if (p) { if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, p->rt_protocol)) @@ -562,9 +568,12 @@ void br_mdb_notify(struct net_device *dev, if (mp->addr.proto == htons(ETH_P_IP)) ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr); #if IS_ENABLED(CONFIG_IPV6) - else + else if (mp->addr.proto == htons(ETH_P_IPV6)) ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr); #endif + else + ether_addr_copy(mdb.addr, mp->addr.dst.mac_addr); + mdb.obj.orig_dev = pg->key.port->dev; switch (type) { case RTM_NEWMDB: @@ -693,6 +702,12 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry, return false; } #endif + } else if (entry->addr.proto == 0) { + /* L2 mdb */ + if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { + NL_SET_ERR_MSG_MOD(extack, "L2 entry group is not multicast"); + return false; + } } else { NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol"); return false; @@ -831,6 +846,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct net_bridge_port_group __rcu **pp; struct br_ip group, star_group; unsigned long now = jiffies; + unsigned char flags = 0; u8 filter_mode; int err; @@ -849,6 +865,11 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, } } + if (br_group_is_l2(&group) && entry->state != MDB_PERMANENT) { + NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed"); + return -EINVAL; + } + mp = br_mdb_ip_get(br, &group); if (!mp) { mp = br_multicast_new_group(br, &group); @@ -884,7 +905,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, filter_mode = br_multicast_is_star_g(&group) ? MCAST_EXCLUDE : MCAST_INCLUDE; - p = br_multicast_new_port_group(port, &group, *pp, entry->state, NULL, + if (entry->state == MDB_PERMANENT) + flags |= MDB_PG_FLAGS_PERMANENT; + + p = br_multicast_new_port_group(port, &group, *pp, flags, NULL, filter_mode, RTPROT_STATIC); if (unlikely(!p)) { NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group"); diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index b36689e6e7cb..bb12fbf9aaf2 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -6,6 +6,13 @@ static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 }; static const u8 mrp_in_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x3 }; +static int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb); + +static struct br_frame_type mrp_frame_type __read_mostly = { + .type = cpu_to_be16(ETH_P_MRP), + .frame_handler = br_mrp_process, +}; + static bool br_mrp_is_ring_port(struct net_bridge_port *p_port, struct net_bridge_port *s_port, struct net_bridge_port *port) @@ -47,8 +54,8 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id) struct br_mrp *res = NULL; struct br_mrp *mrp; - list_for_each_entry_rcu(mrp, &br->mrp_list, list, - lockdep_rtnl_is_held()) { + hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { if (mrp->ring_id == ring_id) { res = mrp; break; @@ -63,8 +70,8 @@ static struct br_mrp *br_mrp_find_in_id(struct net_bridge *br, u32 in_id) struct br_mrp *res = NULL; struct br_mrp *mrp; - list_for_each_entry_rcu(mrp, &br->mrp_list, list, - lockdep_rtnl_is_held()) { + hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { if (mrp->in_id == in_id) { res = mrp; break; @@ -78,8 +85,8 @@ static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) { struct br_mrp *mrp; - list_for_each_entry_rcu(mrp, &br->mrp_list, list, - lockdep_rtnl_is_held()) { + hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { struct net_bridge_port *p; p = rtnl_dereference(mrp->p_port); @@ -104,8 +111,8 @@ static struct br_mrp *br_mrp_find_port(struct net_bridge *br, struct br_mrp *res = NULL; struct br_mrp *mrp; - list_for_each_entry_rcu(mrp, &br->mrp_list, list, - lockdep_rtnl_is_held()) { + hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { if (rcu_access_pointer(mrp->p_port) == p || rcu_access_pointer(mrp->s_port) == p || rcu_access_pointer(mrp->i_port) == p) { @@ -443,8 +450,11 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) rcu_assign_pointer(mrp->i_port, NULL); } - list_del_rcu(&mrp->list); + hlist_del_rcu(&mrp->list); kfree_rcu(mrp, rcu); + + if (hlist_empty(&br->mrp_list)) + br_del_frame(br, &mrp_frame_type); } /* Adds a new MRP instance. @@ -493,9 +503,12 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance) spin_unlock_bh(&br->lock); rcu_assign_pointer(mrp->s_port, p); + if (hlist_empty(&br->mrp_list)) + br_add_frame(br, &mrp_frame_type); + INIT_DELAYED_WORK(&mrp->test_work, br_mrp_test_work_expired); INIT_DELAYED_WORK(&mrp->in_test_work, br_mrp_in_test_work_expired); - list_add_tail_rcu(&mrp->list, &br->mrp_list); + hlist_add_tail_rcu(&mrp->list, &br->mrp_list); err = br_mrp_switchdev_add(br, mrp); if (err) @@ -1172,20 +1185,18 @@ no_forward: * normal forwarding. * note: already called with rcu_read_lock */ -int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb) +static int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb) { /* If there is no MRP instance do normal forwarding */ if (likely(!(p->flags & BR_MRP_AWARE))) goto out; - if (unlikely(skb->protocol == htons(ETH_P_MRP))) - return br_mrp_rcv(p, skb, p->dev); - + return br_mrp_rcv(p, skb, p->dev); out: return 0; } bool br_mrp_enabled(struct net_bridge *br) { - return !list_empty(&br->mrp_list); + return !hlist_empty(&br->mrp_list); } diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index 2a2fdf3500c5..ce6f63c77cc0 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -453,7 +453,7 @@ int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) if (!mrp_tb) return -EMSGSIZE; - list_for_each_entry_rcu(mrp, &br->mrp_list, list) { + hlist_for_each_entry_rcu(mrp, &br->mrp_list, list) { struct net_bridge_port *p; tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP_INFO); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eae898c3cff7..484820c223a3 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -179,7 +179,8 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, break; #endif default: - return NULL; + ip.proto = 0; + ether_addr_copy(ip.dst.mac_addr, eth_hdr(skb)->h_dest); } return br_mdb_ip_get_rcu(br, &ip); @@ -1203,6 +1204,10 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) if (notify) br_mdb_notify(mp->br->dev, mp, NULL, RTM_NEWMDB); } + + if (br_group_is_l2(&mp->addr)) + return; + mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval); } @@ -1254,8 +1259,8 @@ __br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode, - RTPROT_KERNEL); + p = br_multicast_new_port_group(port, group, *pp, 0, src, + filter_mode, RTPROT_KERNEL); if (unlikely(!p)) { p = ERR_PTR(-ENOMEM); goto out; @@ -3690,7 +3695,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) memset(ð, 0, sizeof(eth)); eth.h_proto = htons(proto); - ret = br_multicast_querier_exists(br, ð); + ret = br_multicast_querier_exists(br, ð, NULL); unlock: rcu_read_unlock(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 92d64abffa87..49700ce0e919 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include "br_private.h" #include "br_private_stp.h" +#include "br_private_cfm.h" #include "br_private_tunnel.h" static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, @@ -93,9 +94,11 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, { struct net_bridge_vlan_group *vg = NULL; struct net_bridge_port *p = NULL; - struct net_bridge *br; - int num_vlan_infos; + struct net_bridge *br = NULL; + u32 num_cfm_peer_mep_infos; + u32 num_cfm_mep_infos; size_t vinfo_sz = 0; + int num_vlan_infos; rcu_read_lock(); if (netif_is_bridge_port(dev)) { @@ -114,6 +117,49 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, /* Each VLAN is returned in bridge_vlan_info along with flags */ vinfo_sz += num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); + if (!(filter_mask & RTEXT_FILTER_CFM_STATUS)) + return vinfo_sz; + + if (!br) + return vinfo_sz; + + /* CFM status info must be added */ + br_cfm_mep_count(br, &num_cfm_mep_infos); + br_cfm_peer_mep_count(br, &num_cfm_peer_mep_infos); + + vinfo_sz += nla_total_size(0); /* IFLA_BRIDGE_CFM */ + /* For each status struct the MEP instance (u32) is added */ + /* MEP instance (u32) + br_cfm_mep_status */ + vinfo_sz += num_cfm_mep_infos * + /*IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE */ + (nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN */ + + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN */ + + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN */ + + nla_total_size(sizeof(u32))); + /* MEP instance (u32) + br_cfm_cc_peer_status */ + vinfo_sz += num_cfm_peer_mep_infos * + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE */ + (nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID */ + + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT */ + + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI */ + + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE */ + + nla_total_size(sizeof(u8)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE */ + + nla_total_size(sizeof(u8)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN */ + + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN */ + + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN */ + + nla_total_size(sizeof(u32))); + return vinfo_sz; } @@ -377,7 +423,8 @@ nla_put_failure: static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, u32 pid, u32 seq, int event, unsigned int flags, - u32 filter_mask, const struct net_device *dev) + u32 filter_mask, const struct net_device *dev, + bool getlink) { u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct nlattr *af = NULL; @@ -426,7 +473,9 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (filter_mask & (RTEXT_FILTER_BRVLAN | RTEXT_FILTER_BRVLAN_COMPRESSED | - RTEXT_FILTER_MRP)) { + RTEXT_FILTER_MRP | + RTEXT_FILTER_CFM_CONFIG | + RTEXT_FILTER_CFM_STATUS)) { af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af) goto nla_put_failure; @@ -475,6 +524,36 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } + if (filter_mask & (RTEXT_FILTER_CFM_CONFIG | RTEXT_FILTER_CFM_STATUS)) { + struct nlattr *cfm_nest = NULL; + int err; + + if (!br_cfm_created(br) || port) + goto done; + + cfm_nest = nla_nest_start(skb, IFLA_BRIDGE_CFM); + if (!cfm_nest) + goto nla_put_failure; + + if (filter_mask & RTEXT_FILTER_CFM_CONFIG) { + rcu_read_lock(); + err = br_cfm_config_fill_info(skb, br); + rcu_read_unlock(); + if (err) + goto nla_put_failure; + } + + if (filter_mask & RTEXT_FILTER_CFM_STATUS) { + rcu_read_lock(); + err = br_cfm_status_fill_info(skb, br, getlink); + rcu_read_unlock(); + if (err) + goto nla_put_failure; + } + + nla_nest_end(skb, cfm_nest); + } + done: if (af) nla_nest_end(skb, af); @@ -486,11 +565,9 @@ nla_put_failure: return -EMSGSIZE; } -/* Notify listeners of a change in bridge or port information */ -void br_ifinfo_notify(int event, const struct net_bridge *br, - const struct net_bridge_port *port) +void br_info_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port, u32 filter) { - u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; struct net_device *dev; struct sk_buff *skb; int err = -ENOBUFS; @@ -515,7 +592,7 @@ void br_ifinfo_notify(int event, const struct net_bridge *br, if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev, false); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -528,6 +605,15 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } +/* Notify listeners of a change in bridge or port information */ +void br_ifinfo_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port) +{ + u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; + + return br_info_notify(event, br, port, filter); +} + /* * Dump information about all ports, in response to GETLINK */ @@ -538,11 +624,13 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) && !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) && - !(filter_mask & RTEXT_FILTER_MRP)) + !(filter_mask & RTEXT_FILTER_MRP) && + !(filter_mask & RTEXT_FILTER_CFM_CONFIG) && + !(filter_mask & RTEXT_FILTER_CFM_STATUS)) return 0; return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags, - filter_mask, dev); + filter_mask, dev, true); } static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, @@ -700,6 +788,11 @@ static int br_afspec(struct net_bridge *br, if (err) return err; break; + case IFLA_BRIDGE_CFM: + err = br_cfm_parse(br, p, attr, cmd, extack); + if (err) + return err; + break; } } @@ -1631,7 +1724,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, pvid = br_get_pvid(vg); list_for_each_entry(v, &vg->vlan_list, vlist) { struct bridge_vlan_xstats vxi; - struct br_vlan_stats stats; + struct pcpu_sw_netstats stats; if (++vl_idx < *prividx) continue; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 345118e35c42..f44f46a305aa 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -89,14 +89,6 @@ struct bridge_mcast_stats { }; #endif -struct br_vlan_stats { - u64 rx_bytes; - u64 rx_packets; - u64 tx_bytes; - u64 tx_packets; - struct u64_stats_sync syncp; -}; - struct br_tunnel_info { __be64 tunnel_id; struct metadata_dst *tunnel_dst; @@ -137,7 +129,7 @@ struct net_bridge_vlan { u16 flags; u16 priv_flags; u8 state; - struct br_vlan_stats __percpu *stats; + struct pcpu_sw_netstats __percpu *stats; union { struct net_bridge *br; struct net_bridge_port *port; @@ -383,7 +375,7 @@ enum net_bridge_opts { struct net_bridge { spinlock_t lock; spinlock_t hash_lock; - struct list_head port_list; + struct hlist_head frame_type_list; struct net_device *dev; struct pcpu_sw_netstats __percpu *stats; unsigned long options; @@ -395,6 +387,7 @@ struct net_bridge { #endif struct rhashtable fdb_hash_tbl; + struct list_head port_list; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) union { struct rtable fake_rtable; @@ -481,7 +474,10 @@ struct net_bridge { struct hlist_head fdb_list; #if IS_ENABLED(CONFIG_BRIDGE_MRP) - struct list_head mrp_list; + struct hlist_head mrp_list; +#endif +#if IS_ENABLED(CONFIG_BRIDGE_CFM) + struct hlist_head mep_list; #endif }; @@ -755,6 +751,16 @@ int nbp_backup_change(struct net_bridge_port *p, struct net_device *backup_dev); int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); rx_handler_func_t *br_get_rx_handler(const struct net_device *dev); +struct br_frame_type { + __be16 type; + int (*frame_handler)(struct net_bridge_port *port, + struct sk_buff *skb); + struct hlist_node list; +}; + +void br_add_frame(struct net_bridge *br, struct br_frame_type *ft); +void br_del_frame(struct net_bridge *br, struct br_frame_type *ft); + static inline bool br_rx_handler_check_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler) == br_get_rx_handler(dev); @@ -840,6 +846,11 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, struct net_bridge_port_group *sg); +static inline bool br_group_is_l2(const struct br_ip *group) +{ + return group->proto == 0; +} + #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) @@ -871,7 +882,8 @@ __br_multicast_querier_exists(struct net_bridge *br, } static inline bool br_multicast_querier_exists(struct net_bridge *br, - struct ethhdr *eth) + struct ethhdr *eth, + const struct net_bridge_mdb_entry *mdb) { switch (eth->h_proto) { case (htons(ETH_P_IP)): @@ -883,7 +895,7 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, &br->ip6_other_query, true); #endif default: - return false; + return !!mdb && br_group_is_l2(&mdb->addr); } } @@ -993,7 +1005,8 @@ static inline bool br_multicast_is_router(struct net_bridge *br) } static inline bool br_multicast_querier_exists(struct net_bridge *br, - struct ethhdr *eth) + struct ethhdr *eth, + const struct net_bridge_mdb_entry *mdb) { return false; } @@ -1072,7 +1085,7 @@ void nbp_vlan_flush(struct net_bridge_port *port); int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack); int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); void br_vlan_get_stats(const struct net_bridge_vlan *v, - struct br_vlan_stats *stats); + struct pcpu_sw_netstats *stats); void br_vlan_port_event(struct net_bridge_port *p, unsigned long event); int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr); @@ -1268,7 +1281,7 @@ static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu( } static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, - struct br_vlan_stats *stats) + struct pcpu_sw_netstats *stats) { } @@ -1417,7 +1430,6 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) #if IS_ENABLED(CONFIG_BRIDGE_MRP) int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack); -int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb); bool br_mrp_enabled(struct net_bridge *br); void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p); int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br); @@ -1429,11 +1441,6 @@ static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, return -EOPNOTSUPP; } -static inline int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb) -{ - return 0; -} - static inline bool br_mrp_enabled(struct net_bridge *br) { return false; @@ -1451,12 +1458,67 @@ static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) #endif +/* br_cfm.c */ +#if IS_ENABLED(CONFIG_BRIDGE_CFM) +int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p, + struct nlattr *attr, int cmd, struct netlink_ext_ack *extack); +bool br_cfm_created(struct net_bridge *br); +void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p); +int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br); +int br_cfm_status_fill_info(struct sk_buff *skb, + struct net_bridge *br, + bool getlink); +int br_cfm_mep_count(struct net_bridge *br, u32 *count); +int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count); +#else +static inline int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p, + struct nlattr *attr, int cmd, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline bool br_cfm_created(struct net_bridge *br) +{ + return false; +} + +static inline void br_cfm_port_del(struct net_bridge *br, + struct net_bridge_port *p) +{ +} + +static inline int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br) +{ + return -EOPNOTSUPP; +} + +static inline int br_cfm_status_fill_info(struct sk_buff *skb, + struct net_bridge *br, + bool getlink) +{ + return -EOPNOTSUPP; +} + +static inline int br_cfm_mep_count(struct net_bridge *br, u32 *count) +{ + return -EOPNOTSUPP; +} + +static inline int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count) +{ + return -EOPNOTSUPP; +} +#endif + /* br_netlink.c */ extern struct rtnl_link_ops br_link_ops; int br_netlink_init(void); void br_netlink_fini(void); void br_ifinfo_notify(int event, const struct net_bridge *br, const struct net_bridge_port *port); +void br_info_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port, u32 filter); int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags, struct netlink_ext_ack *extack); int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); diff --git a/net/bridge/br_private_cfm.h b/net/bridge/br_private_cfm.h new file mode 100644 index 000000000000..a43a5e7fa2c3 --- /dev/null +++ b/net/bridge/br_private_cfm.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _BR_PRIVATE_CFM_H_ +#define _BR_PRIVATE_CFM_H_ + +#include "br_private.h" +#include <uapi/linux/cfm_bridge.h> + +struct br_cfm_mep_create { + enum br_cfm_domain domain; /* Domain for this MEP */ + enum br_cfm_mep_direction direction; /* Up or Down MEP direction */ + u32 ifindex; /* Residence port */ +}; + +int br_cfm_mep_create(struct net_bridge *br, + const u32 instance, + struct br_cfm_mep_create *const create, + struct netlink_ext_ack *extack); + +int br_cfm_mep_delete(struct net_bridge *br, + const u32 instance, + struct netlink_ext_ack *extack); + +struct br_cfm_mep_config { + u32 mdlevel; + u32 mepid; /* MEPID for this MEP */ + struct mac_addr unicast_mac; /* The MEP unicast MAC */ +}; + +int br_cfm_mep_config_set(struct net_bridge *br, + const u32 instance, + const struct br_cfm_mep_config *const config, + struct netlink_ext_ack *extack); + +struct br_cfm_maid { + u8 data[CFM_MAID_LENGTH]; +}; + +struct br_cfm_cc_config { + /* Expected received CCM PDU MAID. */ + struct br_cfm_maid exp_maid; + + /* Expected received CCM PDU interval. */ + /* Transmitting CCM PDU interval when CCM tx is enabled. */ + enum br_cfm_ccm_interval exp_interval; + + bool enable; /* Enable/disable CCM PDU handling */ +}; + +int br_cfm_cc_config_set(struct net_bridge *br, + const u32 instance, + const struct br_cfm_cc_config *const config, + struct netlink_ext_ack *extack); + +int br_cfm_cc_peer_mep_add(struct net_bridge *br, const u32 instance, + u32 peer_mep_id, + struct netlink_ext_ack *extack); +int br_cfm_cc_peer_mep_remove(struct net_bridge *br, const u32 instance, + u32 peer_mep_id, + struct netlink_ext_ack *extack); + +/* Transmitted CCM Remote Defect Indication status set. + * This RDI is inserted in transmitted CCM PDUs if CCM transmission is enabled. + * See br_cfm_cc_ccm_tx() with interval != BR_CFM_CCM_INTERVAL_NONE + */ +int br_cfm_cc_rdi_set(struct net_bridge *br, const u32 instance, + const bool rdi, struct netlink_ext_ack *extack); + +/* OAM PDU Tx information */ +struct br_cfm_cc_ccm_tx_info { + struct mac_addr dmac; + /* The CCM will be transmitted for this period in seconds. + * Call br_cfm_cc_ccm_tx before timeout to keep transmission alive. + * When period is zero any ongoing transmission will be stopped. + */ + u32 period; + + bool seq_no_update; /* Update Tx CCM sequence number */ + bool if_tlv; /* Insert Interface Status TLV */ + u8 if_tlv_value; /* Interface Status TLV value */ + bool port_tlv; /* Insert Port Status TLV */ + u8 port_tlv_value; /* Port Status TLV value */ + /* Sender ID TLV ?? + * Organization-Specific TLV ?? + */ +}; + +int br_cfm_cc_ccm_tx(struct net_bridge *br, const u32 instance, + const struct br_cfm_cc_ccm_tx_info *const tx_info, + struct netlink_ext_ack *extack); + +struct br_cfm_mep_status { + /* Indications that an OAM PDU has been seen. */ + bool opcode_unexp_seen; /* RX of OAM PDU with unexpected opcode */ + bool version_unexp_seen; /* RX of OAM PDU with unexpected version */ + bool rx_level_low_seen; /* Rx of OAM PDU with level low */ +}; + +struct br_cfm_cc_peer_status { + /* This CCM related status is based on the latest received CCM PDU. */ + u8 port_tlv_value; /* Port Status TLV value */ + u8 if_tlv_value; /* Interface Status TLV value */ + + /* CCM has not been received for 3.25 intervals */ + u8 ccm_defect:1; + + /* (RDI == 1) for last received CCM PDU */ + u8 rdi:1; + + /* Indications that a CCM PDU has been seen. */ + u8 seen:1; /* CCM PDU received */ + u8 tlv_seen:1; /* CCM PDU with TLV received */ + /* CCM PDU with unexpected sequence number received */ + u8 seq_unexp_seen:1; +}; + +struct br_cfm_mep { + /* list header of MEP instances */ + struct hlist_node head; + u32 instance; + struct br_cfm_mep_create create; + struct br_cfm_mep_config config; + struct br_cfm_cc_config cc_config; + struct br_cfm_cc_ccm_tx_info cc_ccm_tx_info; + /* List of multiple peer MEPs */ + struct hlist_head peer_mep_list; + struct net_bridge_port __rcu *b_port; + unsigned long ccm_tx_end; + struct delayed_work ccm_tx_dwork; + u32 ccm_tx_snumber; + u32 ccm_rx_snumber; + struct br_cfm_mep_status status; + bool rdi; + struct rcu_head rcu; +}; + +struct br_cfm_peer_mep { + struct hlist_node head; + struct br_cfm_mep *mep; + struct delayed_work ccm_rx_dwork; + u32 mepid; + struct br_cfm_cc_peer_status cc_status; + u32 ccm_rx_count_miss; + struct rcu_head rcu; +}; + +#endif /* _BR_PRIVATE_CFM_H_ */ diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index af0e9eff6549..1883118aae55 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -8,7 +8,7 @@ struct br_mrp { /* list of mrp instances */ - struct list_head list; + struct hlist_node list; struct net_bridge_port __rcu *p_port; struct net_bridge_port __rcu *s_port; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3e493eb85bb2..11f54a7c0d1d 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -270,7 +270,8 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, goto out_filt; v->brvlan = masterv; if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) { - v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); + v->stats = + netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!v->stats) { err = -ENOMEM; goto out_filt; @@ -421,7 +422,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { - struct br_vlan_stats *stats; + struct pcpu_sw_netstats *stats; struct net_bridge_vlan *v; u16 vid; @@ -474,7 +475,7 @@ static bool __allowed_ingress(const struct net_bridge *br, struct sk_buff *skb, u16 *vid, u8 *state) { - struct br_vlan_stats *stats; + struct pcpu_sw_netstats *stats; struct net_bridge_vlan *v; bool tagged; @@ -708,7 +709,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed, if (!vlan) return -ENOMEM; - vlan->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); + vlan->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!vlan->stats) { kfree(vlan); return -ENOMEM; @@ -1262,14 +1263,14 @@ void nbp_vlan_flush(struct net_bridge_port *port) } void br_vlan_get_stats(const struct net_bridge_vlan *v, - struct br_vlan_stats *stats) + struct pcpu_sw_netstats *stats) { int i; memset(stats, 0, sizeof(*stats)); for_each_possible_cpu(i) { u64 rxpackets, rxbytes, txpackets, txbytes; - struct br_vlan_stats *cpu_stats; + struct pcpu_sw_netstats *cpu_stats; unsigned int start; cpu_stats = per_cpu_ptr(v->stats, i); @@ -1585,7 +1586,7 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) static bool br_vlan_stats_fill(struct sk_buff *skb, const struct net_bridge_vlan *v) { - struct br_vlan_stats stats; + struct pcpu_sw_netstats stats; struct nlattr *nest; nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_STATS); diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 5040fe43f4b4..e4d287afc2c9 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -17,7 +17,7 @@ config NFT_BRIDGE_META config NFT_BRIDGE_REJECT tristate "Netfilter nf_tables bridge reject support" - depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6 + depends on NFT_REJECT help Add support to reject packets. diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index deae2c9a0f69..eba0efe64d05 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -39,30 +39,6 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, } } -static int nft_bridge_iphdr_validate(struct sk_buff *skb) -{ - struct iphdr *iph; - u32 len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return 0; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return 0; - - len = ntohs(iph->tot_len); - if (skb->len < len) - return 0; - else if (len < (iph->ihl*4)) - return 0; - - if (!pskb_may_pull(skb, iph->ihl*4)) - return 0; - - return 1; -} - /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ @@ -72,29 +48,11 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net, int hook) { struct sk_buff *nskb; - struct iphdr *niph; - const struct tcphdr *oth; - struct tcphdr _oth; - if (!nft_bridge_iphdr_validate(oldskb)) - return; - - oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); - if (!oth) - return; - - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); + nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook); if (!nskb) return; - skb_reserve(nskb, LL_MAX_HEADER); - niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv4.sysctl_ip_default_ttl); - nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - niph->tot_len = htons(nskb->len); - ip_send_check(niph); - nft_reject_br_push_etherhdr(oldskb, nskb); br_forward(br_port_get_rcu(dev), nskb, false, true); @@ -106,139 +64,32 @@ static void nft_reject_br_send_v4_unreach(struct net *net, int hook, u8 code) { struct sk_buff *nskb; - struct iphdr *niph; - struct icmphdr *icmph; - unsigned int len; - __wsum csum; - u8 proto; - - if (!nft_bridge_iphdr_validate(oldskb)) - return; - - /* IP header checks: fragment. */ - if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; - - /* RFC says return as much as we can without exceeding 576 bytes. */ - len = min_t(unsigned int, 536, oldskb->len); - - if (!pskb_may_pull(oldskb, len)) - return; - - if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) - return; - - proto = ip_hdr(oldskb)->protocol; - - if (!skb_csum_unnecessary(oldskb) && - nf_reject_verify_csum(proto) && - nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) - return; - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + - LL_MAX_HEADER + len, GFP_ATOMIC); + nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code); if (!nskb) return; - skb_reserve(nskb, LL_MAX_HEADER); - niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, - net->ipv4.sysctl_ip_default_ttl); - - skb_reset_transport_header(nskb); - icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); - icmph->type = ICMP_DEST_UNREACH; - icmph->code = code; - - skb_put_data(nskb, skb_network_header(oldskb), len); - - csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); - icmph->checksum = csum_fold(csum); - - niph->tot_len = htons(nskb->len); - ip_send_check(niph); - nft_reject_br_push_etherhdr(oldskb, nskb); br_forward(br_port_get_rcu(dev), nskb, false, true); } -static int nft_bridge_ip6hdr_validate(struct sk_buff *skb) -{ - struct ipv6hdr *hdr; - u32 pkt_len; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return 0; - - hdr = ipv6_hdr(skb); - if (hdr->version != 6) - return 0; - - pkt_len = ntohs(hdr->payload_len); - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return 0; - - return 1; -} - static void nft_reject_br_send_v6_tcp_reset(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, int hook) { struct sk_buff *nskb; - const struct tcphdr *oth; - struct tcphdr _oth; - unsigned int otcplen; - struct ipv6hdr *nip6h; - if (!nft_bridge_ip6hdr_validate(oldskb)) - return; - - oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); - if (!oth) - return; - - nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); + nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook); if (!nskb) return; - skb_reserve(nskb, LL_MAX_HEADER); - nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv6.devconf_all->hop_limit); - nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); - nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); - nft_reject_br_push_etherhdr(oldskb, nskb); br_forward(br_port_get_rcu(dev), nskb, false, true); } -static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) -{ - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - int thoff; - __be16 fo; - u8 proto = ip6h->nexthdr; - - if (skb_csum_unnecessary(skb)) - return true; - - if (ip6h->payload_len && - pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) - return false; - - ip6h = ipv6_hdr(skb); - thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); - if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) - return false; - - if (!nf_reject_verify_csum(proto)) - return true; - - return nf_ip6_checksum(skb, hook, thoff, proto) == 0; -} static void nft_reject_br_send_v6_unreach(struct net *net, struct sk_buff *oldskb, @@ -246,49 +97,11 @@ static void nft_reject_br_send_v6_unreach(struct net *net, int hook, u8 code) { struct sk_buff *nskb; - struct ipv6hdr *nip6h; - struct icmp6hdr *icmp6h; - unsigned int len; - - if (!nft_bridge_ip6hdr_validate(oldskb)) - return; - /* Include "As much of invoking packet as possible without the ICMPv6 - * packet exceeding the minimum IPv6 MTU" in the ICMP payload. - */ - len = min_t(unsigned int, 1220, oldskb->len); - - if (!pskb_may_pull(oldskb, len)) - return; - - if (!reject6_br_csum_ok(oldskb, hook)) - return; - - nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) + - LL_MAX_HEADER + len, GFP_ATOMIC); + nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code); if (!nskb) return; - skb_reserve(nskb, LL_MAX_HEADER); - nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, - net->ipv6.devconf_all->hop_limit); - - skb_reset_transport_header(nskb); - icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); - icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; - icmp6h->icmp6_code = code; - - skb_put_data(nskb, skb_network_header(oldskb), len); - nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); - - icmp6h->icmp6_cksum = - csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, - nskb->len - sizeof(struct ipv6hdr), - IPPROTO_ICMPV6, - csum_partial(icmp6h, - nskb->len - sizeof(struct ipv6hdr), - 0)); - nft_reject_br_push_etherhdr(oldskb, nskb); br_forward(br_port_get_rcu(dev), nskb, false, true); @@ -364,69 +177,13 @@ static int nft_reject_bridge_validate(const struct nft_ctx *ctx, (1 << NF_BR_LOCAL_IN)); } -static int nft_reject_bridge_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code; - - if (tb[NFTA_REJECT_TYPE] == NULL) - return -EINVAL; - - priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); - switch (priv->type) { - case NFT_REJECT_ICMP_UNREACH: - case NFT_REJECT_ICMPX_UNREACH: - if (tb[NFTA_REJECT_ICMP_CODE] == NULL) - return -EINVAL; - - icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); - if (priv->type == NFT_REJECT_ICMPX_UNREACH && - icmp_code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; - - priv->icmp_code = icmp_code; - break; - case NFT_REJECT_TCP_RST: - break; - default: - return -EINVAL; - } - return 0; -} - -static int nft_reject_bridge_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_reject *priv = nft_expr_priv(expr); - - if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) - goto nla_put_failure; - - switch (priv->type) { - case NFT_REJECT_ICMP_UNREACH: - case NFT_REJECT_ICMPX_UNREACH: - if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) - goto nla_put_failure; - break; - default: - break; - } - - return 0; - -nla_put_failure: - return -1; -} - static struct nft_expr_type nft_reject_bridge_type; static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_bridge_eval, - .init = nft_reject_bridge_init, - .dump = nft_reject_bridge_dump, + .init = nft_reject_init, + .dump = nft_reject_dump, .validate = nft_reject_bridge_validate, }; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index c907f0dc7f87..359908a7d3c1 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> +#include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/bpf_local_storage.h> #include <net/bpf_sk_storage.h> @@ -15,20 +16,8 @@ DEFINE_BPF_STORAGE_CACHE(sk_cache); -static int omem_charge(struct sock *sk, unsigned int size) -{ - /* same check as in sock_kmalloc() */ - if (size <= sysctl_optmem_max && - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { - atomic_add(size, &sk->sk_omem_alloc); - return 0; - } - - return -ENOMEM; -} - static struct bpf_local_storage_data * -sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) +bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) { struct bpf_local_storage *sk_storage; struct bpf_local_storage_map *smap; @@ -41,11 +30,11 @@ sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); } -static int sk_storage_delete(struct sock *sk, struct bpf_map *map) +static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) { struct bpf_local_storage_data *sdata; - sdata = sk_storage_lookup(sk, map, false); + sdata = bpf_sk_storage_lookup(sk, map, false); if (!sdata) return -ENOENT; @@ -94,7 +83,7 @@ void bpf_sk_storage_free(struct sock *sk) kfree_rcu(sk_storage, rcu); } -static void sk_storage_map_free(struct bpf_map *map) +static void bpf_sk_storage_map_free(struct bpf_map *map) { struct bpf_local_storage_map *smap; @@ -103,7 +92,7 @@ static void sk_storage_map_free(struct bpf_map *map) bpf_local_storage_map_free(smap); } -static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr) +static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) { struct bpf_local_storage_map *smap; @@ -130,7 +119,7 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) fd = *(int *)key; sock = sockfd_lookup(fd, &err); if (sock) { - sdata = sk_storage_lookup(sock->sk, map, true); + sdata = bpf_sk_storage_lookup(sock->sk, map, true); sockfd_put(sock); return sdata ? sdata->data : NULL; } @@ -166,7 +155,7 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) fd = *(int *)key; sock = sockfd_lookup(fd, &err); if (sock) { - err = sk_storage_delete(sock->sk, map); + err = bpf_sk_storage_del(sock->sk, map); sockfd_put(sock); return err; } @@ -272,7 +261,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) return (unsigned long)NULL; - sdata = sk_storage_lookup(sk, map, true); + sdata = bpf_sk_storage_lookup(sk, map, true); if (sdata) return (unsigned long)sdata->data; @@ -305,7 +294,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) if (refcount_inc_not_zero(&sk->sk_refcnt)) { int err; - err = sk_storage_delete(sk, map); + err = bpf_sk_storage_del(sk, map); sock_put(sk); return err; } @@ -313,14 +302,23 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) return -ENOENT; } -static int sk_storage_charge(struct bpf_local_storage_map *smap, - void *owner, u32 size) +static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, + void *owner, u32 size) { - return omem_charge(owner, size); + struct sock *sk = (struct sock *)owner; + + /* same check as in sock_kmalloc() */ + if (size <= sysctl_optmem_max && + atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { + atomic_add(size, &sk->sk_omem_alloc); + return 0; + } + + return -ENOMEM; } -static void sk_storage_uncharge(struct bpf_local_storage_map *smap, - void *owner, u32 size) +static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, + void *owner, u32 size) { struct sock *sk = owner; @@ -328,7 +326,7 @@ static void sk_storage_uncharge(struct bpf_local_storage_map *smap, } static struct bpf_local_storage __rcu ** -sk_storage_ptr(void *owner) +bpf_sk_storage_ptr(void *owner) { struct sock *sk = owner; @@ -339,8 +337,8 @@ static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, - .map_alloc = sk_storage_map_alloc, - .map_free = sk_storage_map_free, + .map_alloc = bpf_sk_storage_map_alloc, + .map_free = bpf_sk_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, .map_update_elem = bpf_fd_sk_storage_update_elem, @@ -348,9 +346,9 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_check_btf = bpf_local_storage_map_check_btf, .map_btf_name = "bpf_local_storage_map", .map_btf_id = &sk_storage_map_btf_id, - .map_local_storage_charge = sk_storage_charge, - .map_local_storage_uncharge = sk_storage_uncharge, - .map_owner_storage_ptr = sk_storage_ptr, + .map_local_storage_charge = bpf_sk_storage_charge, + .map_local_storage_uncharge = bpf_sk_storage_uncharge, + .map_owner_storage_ptr = bpf_sk_storage_ptr, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { @@ -381,6 +379,79 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, }; +static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) +{ + const struct btf *btf_vmlinux; + const struct btf_type *t; + const char *tname; + u32 btf_id; + + if (prog->aux->dst_prog) + return false; + + /* Ensure the tracing program is not tracing + * any bpf_sk_storage*() function and also + * use the bpf_sk_storage_(get|delete) helper. + */ + switch (prog->expected_attach_type) { + case BPF_TRACE_RAW_TP: + /* bpf_sk_storage has no trace point */ + return true; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + btf_vmlinux = bpf_get_btf_vmlinux(); + btf_id = prog->aux->attach_btf_id; + t = btf_type_by_id(btf_vmlinux, btf_id); + tname = btf_name_by_offset(btf_vmlinux, t->name_off); + return !!strncmp(tname, "bpf_sk_storage", + strlen("bpf_sk_storage")); + default: + return false; + } + + return false; +} + +BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, + void *, value, u64, flags) +{ + if (!in_serving_softirq() && !in_task()) + return (unsigned long)NULL; + + return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags); +} + +BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, + struct sock *, sk) +{ + if (!in_serving_softirq() && !in_task()) + return -EPERM; + + return ____bpf_sk_storage_delete(map, sk); +} + +const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { + .func = bpf_sk_storage_get_tracing, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, + .allowed = bpf_sk_storage_tracing_allowed, +}; + +const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { + .func = bpf_sk_storage_delete_tracing, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], + .allowed = bpf_sk_storage_tracing_allowed, +}; + struct bpf_sk_storage_diag { u32 nr_maps; struct bpf_map *maps[]; diff --git a/net/core/datagram.c b/net/core/datagram.c index 9fcaa544f11a..81809fa735a7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -709,7 +709,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) EXPORT_SYMBOL(zerocopy_sg_from_iter); /** - * skb_copy_and_csum_datagram_iter - Copy datagram to an iovec iterator + * skb_copy_and_csum_datagram - Copy datagram to an iovec iterator * and update a checksum. * @skb: buffer to copy * @offset: offset in the buffer to start copying from diff --git a/net/core/dev.c b/net/core/dev.c index 82dc6b48e45f..4bfdcd6b20e8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3206,7 +3206,7 @@ int skb_checksum_help(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_COMPLETE) goto out_set_summed; - if (unlikely(skb_shinfo(skb)->gso_size)) { + if (unlikely(skb_is_gso(skb))) { skb_warn_bad_offload(skb); return -EINVAL; } @@ -6919,7 +6919,7 @@ bool netdev_has_upper_dev(struct net_device *dev, EXPORT_SYMBOL(netdev_has_upper_dev); /** - * netdev_has_upper_dev_all - Check if device is linked to an upper device + * netdev_has_upper_dev_all_rcu - Check if device is linked to an upper device * @dev: device * @upper_dev: upper device to check * @@ -8157,7 +8157,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private); /** - * netdev_lower_change - Dispatch event about lower device state change + * netdev_lower_state_changed - Dispatch event about lower device state change * @lower_dev: device * @lower_state_info: state to dispatch * @@ -8902,7 +8902,7 @@ static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) return dev->netdev_ops->ndo_bpf; default: return NULL; - }; + } } static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev, @@ -10366,6 +10366,21 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, } EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats); +/** + * dev_get_tstats64 - ndo_get_stats64 implementation + * @dev: device to get statistics from + * @s: place to store stats + * + * Populate @s from dev->stats and dev->tstats. Can be used as + * ndo_get_stats64() callback. + */ +void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s) +{ + netdev_stats_to_stats64(s, &dev->stats); + dev_fetch_sw_netstats(s, dev->tstats); +} +EXPORT_SYMBOL_GPL(dev_get_tstats64); + struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) { struct netdev_queue *queue = dev_ingress_queue(dev); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 205e92e604ef..db8a0ff86f36 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -230,7 +230,7 @@ static int dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, unsigned int cmd) { const struct net_device_ops *ops = dev->netdev_ops; - int err = -EOPNOTSUPP; + int err; err = dsa_ndo_do_ioctl(dev, ifr, cmd); if (err == 0 || err != -EOPNOTSUPP) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 7bcfb16854cb..cd80ffed6d26 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -563,7 +563,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; nlrule->iifindex = -1; - nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); + nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); dev = __dev_get_by_name(net, nlrule->iifname); if (dev) nlrule->iifindex = dev->ifindex; @@ -573,7 +573,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; nlrule->oifindex = -1; - nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); + nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); dev = __dev_get_by_name(net, nlrule->oifname); if (dev) nlrule->oifindex = dev->ifindex; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e21950a2c897..6f1adba6695f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -48,7 +48,7 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, memset(flow_dissector, 0, sizeof(*flow_dissector)); for (i = 0; i < key_count; i++, key++) { - /* User should make sure that every key target offset is withing + /* User should make sure that every key target offset is within * boundaries of unsigned short. */ BUG_ON(key->offset > USHRT_MAX); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index ef98372facf6..f3c690b8c8e3 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -11,6 +11,8 @@ #include <linux/device.h> #include <net/page_pool.h> +#include <net/xdp.h> + #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/page-flags.h> @@ -362,8 +364,9 @@ static bool pool_page_reusable(struct page_pool *pool, struct page *page) * If the page refcnt != 1, then the page will be returned to memory * subsystem. */ -void page_pool_put_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct) +static __always_inline struct page * +__page_pool_put_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) { /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the @@ -379,15 +382,12 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, page_pool_dma_sync_for_device(pool, page, dma_sync_size); - if (allow_direct && in_serving_softirq()) - if (page_pool_recycle_in_cache(page, pool)) - return; + if (allow_direct && in_serving_softirq() && + page_pool_recycle_in_cache(page, pool)) + return NULL; - if (!page_pool_recycle_in_ring(pool, page)) { - /* Cache full, fallback to free pages */ - page_pool_return_page(pool, page); - } - return; + /* Page found as candidate for recycling */ + return page; } /* Fallback/non-XDP mode: API user have elevated refcnt. * @@ -405,9 +405,59 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, /* Do not replace this with page_pool_return_page() */ page_pool_release_page(pool, page); put_page(page); + + return NULL; +} + +void page_pool_put_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) +{ + page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); + if (page && !page_pool_recycle_in_ring(pool, page)) { + /* Cache full, fallback to free pages */ + page_pool_return_page(pool, page); + } } EXPORT_SYMBOL(page_pool_put_page); +/* Caller must not use data area after call, as this function overwrites it */ +void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count) +{ + int i, bulk_len = 0; + + for (i = 0; i < count; i++) { + struct page *page = virt_to_head_page(data[i]); + + page = __page_pool_put_page(pool, page, -1, false); + /* Approved for bulk recycling in ptr_ring cache */ + if (page) + data[bulk_len++] = page; + } + + if (unlikely(!bulk_len)) + return; + + /* Bulk producer into ptr_ring page_pool cache */ + page_pool_ring_lock(pool); + for (i = 0; i < bulk_len; i++) { + if (__ptr_ring_produce(&pool->ring, data[i])) + break; /* ring full */ + } + page_pool_ring_unlock(pool); + + /* Hopefully all pages was return into ptr_ring */ + if (likely(i == bulk_len)) + return; + + /* ptr_ring cache full, free remaining pages outside producer lock + * since put_page() with refcnt == 1 can be an expensive operation + */ + for (; i < bulk_len; i++) + page_pool_return_page(pool, data[i]); +} +EXPORT_SYMBOL(page_pool_put_page_bulk); + static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7d7223691783..60917ff4a00b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1939,7 +1939,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla if (linfo[IFLA_INFO_KIND]) { char kind[MODULE_NAME_LEN]; - nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); + nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); ops = rtnl_link_ops_get(kind); } @@ -2953,9 +2953,9 @@ static struct net_device *rtnl_dev_get(struct net *net, if (!ifname) { ifname = buffer; if (ifname_attr) - nla_strlcpy(ifname, ifname_attr, IFNAMSIZ); + nla_strscpy(ifname, ifname_attr, IFNAMSIZ); else if (altifname_attr) - nla_strlcpy(ifname, altifname_attr, ALTIFNAMSIZ); + nla_strscpy(ifname, altifname_attr, ALTIFNAMSIZ); else return NULL; } @@ -2983,7 +2983,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else ifname[0] = '\0'; @@ -3264,7 +3264,7 @@ replay: return err; if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else ifname[0] = '\0'; @@ -3296,7 +3296,7 @@ replay: memset(linkinfo, 0, sizeof(linkinfo)); if (linkinfo[IFLA_INFO_KIND]) { - nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); ops = rtnl_link_ops_get(kind); } else { kind[0] = '\0'; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1ba8f0163744..ffe3dcc0ebea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -249,6 +249,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, fclones->skb2.fclone = SKB_FCLONE_CLONE; } + + skb_set_kcov_handle(skb, kcov_common_handle()); + out: return skb; nodata: @@ -282,6 +285,8 @@ static struct sk_buff *__build_skb_around(struct sk_buff *skb, memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); + skb_set_kcov_handle(skb, kcov_common_handle()); + return skb; } @@ -837,7 +842,7 @@ EXPORT_SYMBOL(consume_skb); #endif /** - * consume_stateless_skb - free an skbuff, assuming it is stateless + * __consume_stateless_skb - free an skbuff, assuming it is stateless * @skb: buffer to free * * Alike consume_skb(), but this variant assumes that this is the last @@ -4203,6 +4208,9 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif +#if IS_ENABLED(CONFIG_KCOV) + [SKB_EXT_KCOV_HANDLE] = SKB_EXT_CHUNKSIZEOF(u64), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4220,6 +4228,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #if IS_ENABLED(CONFIG_MPTCP) skb_ext_type_len[SKB_EXT_MPTCP] + #endif +#if IS_ENABLED(CONFIG_KCOV) + skb_ext_type_len[SKB_EXT_KCOV_HANDLE] + +#endif 0; } @@ -5430,7 +5441,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb) goto err_free; skb_reset_network_header(skb); - skb_reset_transport_header(skb); + if (!skb_transport_header_was_set(skb)) + skb_reset_transport_header(skb); skb_reset_mac_len(skb); return skb; diff --git a/net/core/xdp.c b/net/core/xdp.c index 48aba933a5a8..3d330ebda893 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -380,6 +380,60 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); +/* XDP bulk APIs introduce a defer/flush mechanism to return + * pages belonging to the same xdp_mem_allocator object + * (identified via the mem.id field) in bulk to optimize + * I-cache and D-cache. + * The bulk queue size is set to 16 to be aligned to how + * XDP_REDIRECT bulking works. The bulk is flushed when + * it is full or when mem.id changes. + * xdp_frame_bulk is usually stored/allocated on the function + * call-stack to avoid locking penalties. + */ +void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) +{ + struct xdp_mem_allocator *xa = bq->xa; + + if (unlikely(!xa || !bq->count)) + return; + + page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); + /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ + bq->count = 0; +} +EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); + +/* Must be called with rcu_read_lock held */ +void xdp_return_frame_bulk(struct xdp_frame *xdpf, + struct xdp_frame_bulk *bq) +{ + struct xdp_mem_info *mem = &xdpf->mem; + struct xdp_mem_allocator *xa; + + if (mem->type != MEM_TYPE_PAGE_POOL) { + __xdp_return(xdpf->data, &xdpf->mem, false); + return; + } + + xa = bq->xa; + if (unlikely(!xa)) { + xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); + bq->count = 0; + bq->xa = xa; + } + + if (bq->count == XDP_BULK_QUEUE_SIZE) + xdp_flush_frame_bulk(bq); + + if (unlikely(mem->id != xa->mem.id)) { + xdp_flush_frame_bulk(bq); + bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); + } + + bq->q[bq->count++] = xdpf->data; +} +EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); + void xdp_return_buff(struct xdp_buff *xdp) { __xdp_return(xdp->data, &xdp->rxq->mem, true); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 16014ad19406..084e159a12ba 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1827,6 +1827,8 @@ static int dcb_app_add(const struct dcb_app *app, int ifindex) /** * dcb_getapp - retrieve the DCBX application user priority + * @dev: network interface + * @app: application to get user priority of * * On success returns a non-zero 802.1p user priority bitmap * otherwise returns 0 as the invalid user priority bitmap to @@ -1849,6 +1851,8 @@ EXPORT_SYMBOL(dcb_getapp); /** * dcb_setapp - add CEE dcb application data to app list + * @dev: network interface + * @new: application data to add * * Priority 0 is an invalid priority in CEE spec. This routine * removes applications from the app list if the priority is @@ -1890,6 +1894,8 @@ EXPORT_SYMBOL(dcb_setapp); /** * dcb_ieee_getapp_mask - retrieve the IEEE DCB application priority + * @dev: network interface + * @app: where to store the retrieve application data * * Helper routine which on success returns a non-zero 802.1Qaz user * priority bitmap otherwise returns 0 to indicate the dcb_app was @@ -1912,6 +1918,8 @@ EXPORT_SYMBOL(dcb_ieee_getapp_mask); /** * dcb_ieee_setapp - add IEEE dcb application data to app list + * @dev: network interface + * @new: application data to add * * This adds Application data to the list. Multiple application * entries may exists for the same selector and protocol as long @@ -1946,6 +1954,8 @@ EXPORT_SYMBOL(dcb_ieee_setapp); /** * dcb_ieee_delapp - delete IEEE dcb application data from list + * @dev: network interface + * @del: application data to delete * * This removes a matching APP data from the APP list */ @@ -1975,7 +1985,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) } EXPORT_SYMBOL(dcb_ieee_delapp); -/** +/* * dcb_ieee_getapp_prio_dscp_mask_map - For a given device, find mapping from * priorities to the DSCP values assigned to that priority. Initialize p_map * such that each map element holds a bit mask of DSCP values configured for @@ -2004,7 +2014,7 @@ void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev, } EXPORT_SYMBOL(dcb_ieee_getapp_prio_dscp_mask_map); -/** +/* * dcb_ieee_getapp_dscp_prio_mask_map - For a given device, find mapping from * DSCP values to the priorities assigned to that DSCP value. Initialize p_map * such that each map element holds a bit mask of priorities configured for a @@ -2031,7 +2041,7 @@ dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev, } EXPORT_SYMBOL(dcb_ieee_getapp_dscp_prio_mask_map); -/** +/* * Per 802.1Q-2014, the selector value of 1 is used for matching on Ethernet * type, with valid PID values >= 1536. A special meaning is then assigned to * protocol value of 0: "default priority. For use when priority is not diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 8f3dd3b1d2d0..c4bbac99740d 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -242,6 +242,8 @@ static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, /** * dccp_ackvec_input - Register incoming packet in the buffer + * @av: Ack Vector to register packet to + * @skb: Packet to register */ void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) { @@ -273,6 +275,9 @@ void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) /** * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection + * @av: Ack Vector record to clean + * @ackno: last Ack Vector which has been acknowledged + * * This routine is called when the peer acknowledges the receipt of Ack Vectors * up to and including @ackno. While based on section A.3 of RFC 4340, here * are additional precautions to prevent corrupted buffer state. In particular, diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 1e9bb121ba72..6beac5d348e2 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -76,7 +76,7 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, return err; } -static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_fmt, const char *fmt,...) +static __printf(3, 4) struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_fmt, const char *fmt,...) { struct kmem_cache *slab; va_list args; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 3da1f77bd039..4d9823d6dced 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -181,6 +181,9 @@ MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); /** * ccid2_update_used_window - Track how much of cwnd is actually used + * @hc: socket to update window + * @new_wnd: new window values to add into the filter + * * This is done in addition to CWV. The sender needs to have an idea of how many * packets may be in flight, to set the local Sequence Window value accordingly * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the @@ -349,6 +352,8 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) /** * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm + * @sk: socket to perform estimator on + * * This code is almost identical with TCP's tcp_rtt_estimator(), since * - it has a higher sampling frequency (recommended by RFC 1323), * - the RTO does not collapse into RTT due to RTTVAR going towards zero, diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index b9ee1a4a8955..ca8670f78ac6 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -79,6 +79,8 @@ static inline u64 rfc3390_initial_rate(struct sock *sk) /** * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst + * @hc: socket to have the send interval updated + * * This respects the granularity of X_inst (64 * bytes/second). */ static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) @@ -99,6 +101,7 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) /** * ccid3_hc_tx_update_x - Update allowed sending rate X + * @sk: socket to be updated * @stamp: most recent time if available - can be left NULL. * * This function tracks draft rfc3448bis, check there for latest details. @@ -151,6 +154,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) /** * ccid3_hc_tx_update_s - Track the mean packet size `s' + * @hc: socket to be updated * @len: DCCP packet payload size in bytes * * cf. RFC 4342, 5.3 and RFC 3448, 4.1 @@ -259,6 +263,7 @@ out: /** * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets + * @sk: socket to send packet from * @skb: next packet candidate to send on @sk * * This function uses the convention of ccid_packet_dequeue_eval() and @@ -655,6 +660,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) /** * ccid3_first_li - Implements [RFC 5348, 6.3.1] + * @sk: socket to calculate loss interval for * * Determine the length of the first loss interval via inverse lookup. * Assume that X_recv can be computed by the throughput equation diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 67abad695e66..da95319842bb 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -79,6 +79,9 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) /** * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 + * @lh: histogram to update + * @skb: received socket triggering loss interval update + * * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev */ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index af08e2df7108..0cdda3c66fb5 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -385,6 +385,9 @@ static inline struct tfrc_rx_hist_entry * /** * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal + * @h: receive histogram + * @skb: packet containing timestamp. + * * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able * to compute a sample with given data - calling function should check this. */ diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 788dd629c420..305f56804832 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -996,6 +996,8 @@ int dccp_feat_finalise_settings(struct dccp_sock *dp) /** * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features + * @dreq: server socket to resolve + * * It is the server which resolves the dependencies once the CCID has been * fully negotiated. If no CCID has been negotiated, it uses the default CCID. */ @@ -1033,6 +1035,10 @@ static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen) /** * dccp_feat_prefer - Move preferred entry to the start of array + * @preferred_value: entry to move to start of array + * @array: array of preferred entries + * @array_len: size of the array + * * Reorder the @array_len elements in @array so that @preferred_value comes * first. Returns >0 to indicate that @preferred_value does occur in @array. */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 50e6d5699bb2..b8a24734385e 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -143,6 +143,8 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /** * dccp_determine_ccmps - Find out about CCID-specific packet-size limits + * @dp: socket to find packet size limits of + * * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), * since the RX CCID is restricted to feedback packets (Acks), which are small * in comparison with the data traffic. A value of 0 means "no current CCMPS". @@ -236,6 +238,8 @@ static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) /** * dccp_xmit_packet - Send data packet under control of CCID + * @sk: socket to send data packet on + * * Transmits next-queued payload and informs CCID to account for the packet. */ static void dccp_xmit_packet(struct sock *sk) @@ -296,6 +300,9 @@ static void dccp_xmit_packet(struct sock *sk) /** * dccp_flush_write_queue - Drain queue at end of connection + * @sk: socket to be drained + * @time_budget: time allowed to drain the queue + * * Since dccp_sendmsg queues packets without waiting for them to be sent, it may * happen that the TX queue is not empty at the end of a connection. We give the * HC-sender CCID a grace period of up to @time_budget jiffies. If this function @@ -367,6 +374,8 @@ void dccp_write_xmit(struct sock *sk) /** * dccp_retransmit_skb - Retransmit Request, Close, or CloseReq packets + * @sk: socket to perform retransmit on + * * There are only four retransmittable packet types in DCCP: * - Request in client-REQUEST state (sec. 8.1.1), * - CloseReq in server-CLOSEREQ state (sec. 8.3), diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c index db2448c33a62..5ba204ec0aca 100644 --- a/net/dccp/qpolicy.c +++ b/net/dccp/qpolicy.c @@ -65,14 +65,16 @@ static bool qpolicy_prio_full(struct sock *sk) * @push: add a new @skb to the write queue * @full: indicates that no more packets will be admitted * @top: peeks at whatever the queueing policy defines as its `top' + * @params: parameter passed to policy operation */ -static struct dccp_qpolicy_operations { +struct dccp_qpolicy_operations { void (*push) (struct sock *sk, struct sk_buff *skb); bool (*full) (struct sock *sk); struct sk_buff* (*top) (struct sock *sk); __be32 params; +}; -} qpol_table[DCCPQ_POLICY_MAX] = { +static struct dccp_qpolicy_operations qpol_table[DCCPQ_POLICY_MAX] = { [DCCPQ_POLICY_SIMPLE] = { .push = qpolicy_simple_push, .full = qpolicy_simple_full, diff --git a/net/dccp/timer.c b/net/dccp/timer.c index a934d2932373..db768f223ef7 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -215,13 +215,14 @@ out: /** * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface - * @data: Socket to act on + * @t: pointer to the tasklet associated with this handler * * See the comments above %ccid_dequeueing_decision for supported modes. */ -static void dccp_write_xmitlet(unsigned long data) +static void dccp_write_xmitlet(struct tasklet_struct *t) { - struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = from_tasklet(dp, t, dccps_xmitlet); + struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk; bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -235,16 +236,15 @@ static void dccp_write_xmitlet(unsigned long data) static void dccp_write_xmit_timer(struct timer_list *t) { struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer); - struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk; - dccp_write_xmitlet((unsigned long)sk); + dccp_write_xmitlet(&dp->dccps_xmitlet); } void dccp_init_xmit_timers(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); + tasklet_setup(&dp->dccps_xmitlet, dccp_write_xmitlet); timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0); inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 15d42353f1a3..d1c50a48614b 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -658,7 +658,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, ifa->ifa_dev = dn_db; if (tb[IFA_LABEL]) - nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); + nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 1f9b9b11008c..dfecd7b22fd7 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -56,20 +56,31 @@ config NET_DSA_TAG_BRCM_PREPEND Broadcom switches which places the tag before the Ethernet header (prepended). +config NET_DSA_TAG_HELLCREEK + tristate "Tag driver for Hirschmann Hellcreek TSN switches" + help + Say Y or M if you want to enable support for tagging frames + for the Hirschmann Hellcreek TSN switches. + config NET_DSA_TAG_GSWIP tristate "Tag driver for Lantiq / Intel GSWIP switches" help Say Y or M if you want to enable support for tagging frames for the Lantiq / Intel GSWIP switches. +config NET_DSA_TAG_DSA_COMMON + tristate + config NET_DSA_TAG_DSA tristate "Tag driver for Marvell switches using DSA headers" + select NET_DSA_TAG_DSA_COMMON help Say Y or M if you want to enable support for tagging frames for the Marvell switches which use DSA headers. config NET_DSA_TAG_EDSA tristate "Tag driver for Marvell switches using EtherType DSA headers" + select NET_DSA_TAG_DSA_COMMON help Say Y or M if you want to enable support for tagging frames for the Marvell switches which use EtherType DSA headers. diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 4f47b2025ff5..0fb2b75a7ae3 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -7,9 +7,9 @@ dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o -obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o -obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o +obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2131bf2b3a67..a1b1dc8a4d87 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -201,7 +201,6 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, { struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; - struct pcpu_sw_netstats *s; struct dsa_slave_priv *p; if (unlikely(!cpu_dp)) { @@ -234,11 +233,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, skb = nskb; } - s = this_cpu_ptr(p->stats64); - u64_stats_update_begin(&s->syncp); - s->rx_packets++; - s->rx_bytes += skb->len; - u64_stats_update_end(&s->syncp); + dev_sw_netstats_rx_add(skb->dev, skb->len); if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 12998bf04e55..7c96aae9062c 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -78,8 +78,6 @@ struct dsa_slave_priv { struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); - struct pcpu_sw_netstats __percpu *stats64; - struct gro_cells gcells; /* DSA port data, such as switch, port index, etc. */ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 3bc5ca40c9fb..ff2266d2b998 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -548,17 +548,36 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) } EXPORT_SYMBOL_GPL(dsa_enqueue_skb); +static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) +{ + int needed_headroom = dev->needed_headroom; + int needed_tailroom = dev->needed_tailroom; + + /* For tail taggers, we need to pad short frames ourselves, to ensure + * that the tail tag does not fail at its role of being at the end of + * the packet, once the master interface pads the frame. Account for + * that pad length here, and pad later. + */ + if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) + needed_tailroom += ETH_ZLEN - skb->len; + /* skb_headroom() returns unsigned int... */ + needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); + needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); + + if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) + /* No reallocation needed, yay! */ + return 0; + + return pskb_expand_head(skb, needed_headroom, needed_tailroom, + GFP_ATOMIC); +} + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct pcpu_sw_netstats *s; struct sk_buff *nskb; - s = this_cpu_ptr(p->stats64); - u64_stats_update_begin(&s->syncp); - s->tx_packets++; - s->tx_bytes += skb->len; - u64_stats_update_end(&s->syncp); + dev_sw_netstats_tx_add(dev, 1, skb->len); DSA_SKB_CB(skb)->clone = NULL; @@ -567,6 +586,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) */ dsa_skb_tx_timestamp(p, skb); + if (dsa_realloc_skb(skb, dev)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* needed_tailroom should still be 'warm' in the cache line from + * dsa_realloc_skb(), which has also ensured that padding is safe. + */ + if (dev->needed_tailroom) + eth_skb_pad(skb); + /* Transmit function may have to reallocate the original SKB, * in which case it must have freed it. Only free it here on error. */ @@ -679,7 +709,6 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, uint64_t *data) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = dp->ds; struct pcpu_sw_netstats *s; unsigned int start; @@ -688,7 +717,7 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, for_each_possible_cpu(i) { u64 tx_packets, tx_bytes, rx_packets, rx_bytes; - s = per_cpu_ptr(p->stats64, i); + s = per_cpu_ptr(dev->tstats, i); do { start = u64_stats_fetch_begin_irq(&s->syncp); tx_packets = s->tx_packets; @@ -1217,15 +1246,6 @@ static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, return ds->ops->port_setup_tc(ds, dp->index, type, type_data); } -static void dsa_slave_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - - netdev_stats_to_stats64(stats, &dev->stats); - dev_fetch_sw_netstats(stats, p->stats64); -} - static int dsa_slave_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rule_locs) { @@ -1601,7 +1621,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { #endif .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, - .ndo_get_stats64 = dsa_slave_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, @@ -1791,6 +1811,16 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->netdev_ops = &dsa_slave_netdev_ops; if (ds->ops->port_max_mtu) slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); + if (cpu_dp->tag_ops->tail_tag) + slave_dev->needed_tailroom = cpu_dp->tag_ops->overhead; + else + slave_dev->needed_headroom = cpu_dp->tag_ops->overhead; + /* Try to save one extra realloc later in the TX path (in the master) + * by also inheriting the master's needed headroom and tailroom. + * The 8021q driver also does this. + */ + slave_dev->needed_headroom += master->needed_headroom; + slave_dev->needed_tailroom += master->needed_tailroom; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, @@ -1801,8 +1831,8 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); - p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!p->stats64) { + slave_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!slave_dev->tstats) { free_netdev(slave_dev); return -ENOMEM; } @@ -1864,7 +1894,7 @@ out_phy: out_gcells: gro_cells_destroy(&p->gcells); out_free: - free_percpu(p->stats64); + free_percpu(slave_dev->tstats); free_netdev(slave_dev); port->slave = NULL; return ret; @@ -1886,7 +1916,7 @@ void dsa_slave_destroy(struct net_device *slave_dev) dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); phylink_destroy(dp->pl); gro_cells_destroy(&p->gcells); - free_percpu(p->stats64); + free_percpu(slave_dev->tstats); free_netdev(slave_dev); } @@ -1987,10 +2017,22 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, switch (event) { case NETDEV_PRECHANGEUPPER: { struct netdev_notifier_changeupper_info *info = ptr; + struct dsa_switch *ds; + struct dsa_port *dp; + int err; if (!dsa_slave_dev_check(dev)) return dsa_prevent_bridging_8021q_upper(dev, ptr); + dp = dsa_slave_to_port(dev); + ds = dp->ds; + + if (ds->ops->port_prechangeupper) { + err = ds->ops->port_prechangeupper(ds, dp->index, info); + if (err) + return notifier_from_errno(err); + } + if (is_vlan_dev(info->upper_dev)) return dsa_slave_check_8021q_upper(dev, ptr); break; diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 55b00694cdba..002cf7f952e2 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -31,9 +31,6 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, __le16 *phdr; u16 hdr; - if (skb_cow_head(skb, AR9331_HDR_LEN) < 0) - return NULL; - phdr = skb_push(skb, AR9331_HDR_LEN); hdr = FIELD_PREP(AR9331_HDR_VERSION_MASK, AR9331_HDR_VERSION); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index ad72dff8d524..e934dace3922 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -66,9 +66,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, u16 queue = skb_get_queue_mapping(skb); u8 *brcm_tag; - if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) - return NULL; - /* The Ethernet switch we are interfaced with needs packets to be at * least 64 bytes (including FCS) otherwise they will be discarded when * they enter the switch port logic. When Broadcom tags are enabled, we diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 0b756fae68a5..112c7c6dd568 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -1,7 +1,48 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging + * Regular and Ethertype DSA tagging * Copyright (c) 2008-2009 Marvell Semiconductor + * + * Regular DSA + * ----------- + + * For untagged (in 802.1Q terms) packets, the switch will splice in + * the tag between the SA and the ethertype of the original + * packet. Tagged frames will instead have their outermost .1Q tag + * converted to a DSA tag. It expects the same layout when receiving + * packets from the CPU. + * + * Example: + * + * .----.----.----.--------- + * Pu: | DA | SA | ET | Payload ... + * '----'----'----'--------- + * 6 6 2 N + * .----.----.--------.-----.----.--------- + * Pt: | DA | SA | 0x8100 | TCI | ET | Payload ... + * '----'----'--------'-----'----'--------- + * 6 6 2 2 2 N + * .----.----.-----.----.--------- + * Pd: | DA | SA | DSA | ET | Payload ... + * '----'----'-----'----'--------- + * 6 6 4 2 N + * + * No matter if a packet is received untagged (Pu) or tagged (Pt), + * they will both have the same layout (Pd) when they are sent to the + * CPU. This is done by ignoring 802.3, replacing the ethertype field + * with more metadata, among which is a bit to signal if the original + * packet was tagged or not. + * + * Ethertype DSA + * ------------- + * Uses the exact same tag format as regular DSA, but also includes a + * proper ethertype field (which the mv88e6xxx driver sets to + * ETH_P_EDSA/0xdada) followed by two zero bytes: + * + * .----.----.--------.--------.-----.----.--------- + * | DA | SA | 0xdada | 0x0000 | DSA | ET | Payload ... + * '----'----'--------'--------'-----'----'--------- + * 6 6 2 2 4 2 N */ #include <linux/etherdevice.h> @@ -12,46 +53,104 @@ #define DSA_HLEN 4 -static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) +/** + * enum dsa_cmd - DSA Command + * @DSA_CMD_TO_CPU: Set on packets that were trapped or mirrored to + * the CPU port. This is needed to implement control protocols, + * e.g. STP and LLDP, that must not allow those control packets to + * be switched according to the normal rules. + * @DSA_CMD_FROM_CPU: Used by the CPU to send a packet to a specific + * port, ignoring all the barriers that the switch normally + * enforces (VLANs, STP port states etc.). No source address + * learning takes place. "sudo send packet" + * @DSA_CMD_TO_SNIFFER: Set on the copies of packets that matched some + * user configured ingress or egress monitor criteria. These are + * forwarded by the switch tree to the user configured ingress or + * egress monitor port, which can be set to the CPU port or a + * regular port. If the destination is a regular port, the tag + * will be removed before egressing the port. If the destination + * is the CPU port, the tag will not be removed. + * @DSA_CMD_FORWARD: This tag is used on all bulk traffic passing + * through the switch tree, including the flows that are directed + * towards the CPU. Its device/port tuple encodes the original + * source port on which the packet ingressed. It can also be used + * on transmit by the CPU to defer the forwarding decision to the + * hardware, based on the current config of PVT/VTU/ATU + * etc. Source address learning takes places if enabled on the + * receiving DSA/CPU port. + */ +enum dsa_cmd { + DSA_CMD_TO_CPU = 0, + DSA_CMD_FROM_CPU = 1, + DSA_CMD_TO_SNIFFER = 2, + DSA_CMD_FORWARD = 3 +}; + +/** + * enum dsa_code - TO_CPU Code + * + * @DSA_CODE_MGMT_TRAP: DA was classified as a management + * address. Typical examples include STP BPDUs and LLDP. + * @DSA_CODE_FRAME2REG: Response to a "remote management" request. + * @DSA_CODE_IGMP_MLD_TRAP: IGMP/MLD signaling. + * @DSA_CODE_POLICY_TRAP: Frame matched some policy configuration on + * the device. Typical examples are matching on DA/SA/VID and DHCP + * snooping. + * @DSA_CODE_ARP_MIRROR: The name says it all really. + * @DSA_CODE_POLICY_MIRROR: Same as @DSA_CODE_POLICY_TRAP, but the + * particular policy was set to trigger a mirror instead of a + * trap. + * @DSA_CODE_RESERVED_6: Unused on all devices up to at least 6393X. + * @DSA_CODE_RESERVED_7: Unused on all devices up to at least 6393X. + * + * A 3-bit code is used to relay why a particular frame was sent to + * the CPU. We only use this to determine if the packet was mirrored + * or trapped, i.e. whether the packet has been forwarded by hardware + * or not. + * + * This is the superset of all possible codes. Any particular device + * may only implement a subset. + */ +enum dsa_code { + DSA_CODE_MGMT_TRAP = 0, + DSA_CODE_FRAME2REG = 1, + DSA_CODE_IGMP_MLD_TRAP = 2, + DSA_CODE_POLICY_TRAP = 3, + DSA_CODE_ARP_MIRROR = 4, + DSA_CODE_POLICY_MIRROR = 5, + DSA_CODE_RESERVED_6 = 6, + DSA_CODE_RESERVED_7 = 7 +}; + +static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, + u8 extra) { struct dsa_port *dp = dsa_slave_to_port(dev); u8 *dsa_header; - /* - * Convert the outermost 802.1q tag to a DSA tag for tagged - * packets, or insert a DSA tag between the addresses and - * the ethertype field for untagged packets. - */ if (skb->protocol == htons(ETH_P_8021Q)) { - if (skb_cow_head(skb, 0) < 0) - return NULL; + if (extra) { + skb_push(skb, extra); + memmove(skb->data, skb->data + extra, 2 * ETH_ALEN); + } - /* - * Construct tagged FROM_CPU DSA tag from 802.1q tag. - */ - dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x60 | dp->ds->index; + /* Construct tagged FROM_CPU DSA tag from 802.1Q tag. */ + dsa_header = skb->data + 2 * ETH_ALEN + extra; + dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | 0x20 | dp->ds->index; dsa_header[1] = dp->index << 3; - /* - * Move CFI field from byte 2 to byte 1. - */ + /* Move CFI field from byte 2 to byte 1. */ if (dsa_header[2] & 0x10) { dsa_header[1] |= 0x01; dsa_header[2] &= ~0x10; } } else { - if (skb_cow_head(skb, DSA_HLEN) < 0) - return NULL; - skb_push(skb, DSA_HLEN); + skb_push(skb, DSA_HLEN + extra); + memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN); - memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); - - /* - * Construct untagged FROM_CPU DSA tag. - */ - dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x40 | dp->ds->index; + /* Construct untagged FROM_CPU DSA tag. */ + dsa_header = skb->data + 2 * ETH_ALEN + extra; + dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | dp->ds->index; dsa_header[1] = dp->index << 3; dsa_header[2] = 0x00; dsa_header[3] = 0x00; @@ -60,30 +159,60 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, + u8 extra) { + int source_device, source_port; + enum dsa_code code; + enum dsa_cmd cmd; u8 *dsa_header; - int source_device; - int source_port; - if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) - return NULL; - - /* - * The ethertype field is part of the DSA header. - */ + /* The ethertype field is part of the DSA header. */ dsa_header = skb->data - 2; - /* - * Check that frame type is either TO_CPU or FORWARD. - */ - if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0) + cmd = dsa_header[0] >> 6; + switch (cmd) { + case DSA_CMD_FORWARD: + skb->offload_fwd_mark = 1; + break; + + case DSA_CMD_TO_CPU: + code = (dsa_header[1] & 0x6) | ((dsa_header[2] >> 4) & 1); + + switch (code) { + case DSA_CODE_FRAME2REG: + /* Remote management is not implemented yet, + * drop. + */ + return NULL; + case DSA_CODE_ARP_MIRROR: + case DSA_CODE_POLICY_MIRROR: + /* Mark mirrored packets to notify any upper + * device (like a bridge) that forwarding has + * already been done by hardware. + */ + skb->offload_fwd_mark = 1; + break; + case DSA_CODE_MGMT_TRAP: + case DSA_CODE_IGMP_MLD_TRAP: + case DSA_CODE_POLICY_TRAP: + /* Traps have, by definition, not been + * forwarded by hardware, so don't mark them. + */ + break; + default: + /* Reserved code, this could be anything. Drop + * seems like the safest option. + */ + return NULL; + } + + break; + + default: return NULL; + } - /* - * Determine source device and port. - */ source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; @@ -91,16 +220,15 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb->dev) return NULL; - /* - * Convert the DSA header to an 802.1q header if the 'tagged' - * bit in the DSA header is set. If the 'tagged' bit is clear, - * delete the DSA header entirely. + /* If the 'tagged' bit is set; convert the DSA tag to a 802.1Q + * tag, and delete the ethertype (extra) if applicable. If the + * 'tagged' bit is cleared; delete the DSA tag, and ethertype + * if applicable. */ if (dsa_header[0] & 0x20) { u8 new_header[4]; - /* - * Insert 802.1q ethertype and copy the VLAN-related + /* Insert 802.1Q ethertype and copy the VLAN-related * fields, but clear the bit that will hold CFI (since * DSA uses that bit location for another purpose). */ @@ -109,16 +237,13 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, new_header[2] = dsa_header[2] & ~0x10; new_header[3] = dsa_header[3]; - /* - * Move CFI bit from its place in the DSA header to - * its 802.1q-designated place. + /* Move CFI bit from its place in the DSA header to + * its 802.1Q-designated place. */ if (dsa_header[1] & 0x01) new_header[2] |= 0x10; - /* - * Update packet checksum if skb is CHECKSUM_COMPLETE. - */ + /* Update packet checksum if skb is CHECKSUM_COMPLETE. */ if (skb->ip_summed == CHECKSUM_COMPLETE) { __wsum c = skb->csum; c = csum_add(c, csum_partial(new_header + 2, 2, 0)); @@ -127,30 +252,101 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, } memcpy(dsa_header, new_header, DSA_HLEN); + + if (extra) + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - extra, + 2 * ETH_ALEN); } else { - /* - * Remove DSA tag and update checksum. - */ skb_pull_rcsum(skb, DSA_HLEN); memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - DSA_HLEN, + skb->data - ETH_HLEN - DSA_HLEN - extra, 2 * ETH_ALEN); } - skb->offload_fwd_mark = 1; - return skb; } +#if IS_ENABLED(CONFIG_NET_DSA_TAG_DSA) + +static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + return dsa_xmit_ll(skb, dev, 0); +} + +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt) +{ + if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) + return NULL; + + return dsa_rcv_ll(skb, dev, 0); +} + static const struct dsa_device_ops dsa_netdev_ops = { - .name = "dsa", - .proto = DSA_TAG_PROTO_DSA, - .xmit = dsa_xmit, - .rcv = dsa_rcv, + .name = "dsa", + .proto = DSA_TAG_PROTO_DSA, + .xmit = dsa_xmit, + .rcv = dsa_rcv, .overhead = DSA_HLEN, }; -MODULE_LICENSE("GPL"); +DSA_TAG_DRIVER(dsa_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_DSA); +#endif /* CONFIG_NET_DSA_TAG_DSA */ + +#if IS_ENABLED(CONFIG_NET_DSA_TAG_EDSA) + +#define EDSA_HLEN 8 + +static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + u8 *edsa_header; -module_dsa_tag_driver(dsa_netdev_ops); + skb = dsa_xmit_ll(skb, dev, EDSA_HLEN - DSA_HLEN); + if (!skb) + return NULL; + + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + return skb; +} + +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt) +{ + if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) + return NULL; + + skb_pull_rcsum(skb, EDSA_HLEN - DSA_HLEN); + + return dsa_rcv_ll(skb, dev, EDSA_HLEN - DSA_HLEN); +} + +static const struct dsa_device_ops edsa_netdev_ops = { + .name = "edsa", + .proto = DSA_TAG_PROTO_EDSA, + .xmit = edsa_xmit, + .rcv = edsa_rcv, + .overhead = EDSA_HLEN, +}; + +DSA_TAG_DRIVER(edsa_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_EDSA); +#endif /* CONFIG_NET_DSA_TAG_EDSA */ + +static struct dsa_tag_driver *dsa_tag_drivers[] = { +#if IS_ENABLED(CONFIG_NET_DSA_TAG_DSA) + &DSA_TAG_DRIVER_NAME(dsa_netdev_ops), +#endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_EDSA) + &DSA_TAG_DRIVER_NAME(edsa_netdev_ops), +#endif +}; + +module_dsa_tag_drivers(dsa_tag_drivers); + +MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c deleted file mode 100644 index 120614240319..000000000000 --- a/net/dsa/tag_edsa.c +++ /dev/null @@ -1,206 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * net/dsa/tag_edsa.c - Ethertype DSA tagging - * Copyright (c) 2008-2009 Marvell Semiconductor - */ - -#include <linux/etherdevice.h> -#include <linux/list.h> -#include <linux/slab.h> - -#include "dsa_priv.h" - -#define DSA_HLEN 4 -#define EDSA_HLEN 8 - -#define FRAME_TYPE_TO_CPU 0x00 -#define FRAME_TYPE_FORWARD 0x03 - -#define TO_CPU_CODE_MGMT_TRAP 0x00 -#define TO_CPU_CODE_FRAME2REG 0x01 -#define TO_CPU_CODE_IGMP_MLD_TRAP 0x02 -#define TO_CPU_CODE_POLICY_TRAP 0x03 -#define TO_CPU_CODE_ARP_MIRROR 0x04 -#define TO_CPU_CODE_POLICY_MIRROR 0x05 - -static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - u8 *edsa_header; - - /* - * Convert the outermost 802.1q tag to a DSA tag and prepend - * a DSA ethertype field is the packet is tagged, or insert - * a DSA ethertype plus DSA tag between the addresses and the - * current ethertype field if the packet is untagged. - */ - if (skb->protocol == htons(ETH_P_8021Q)) { - if (skb_cow_head(skb, DSA_HLEN) < 0) - return NULL; - skb_push(skb, DSA_HLEN); - - memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); - - /* - * Construct tagged FROM_CPU DSA tag from 802.1q tag. - */ - edsa_header = skb->data + 2 * ETH_ALEN; - edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; - edsa_header[1] = ETH_P_EDSA & 0xff; - edsa_header[2] = 0x00; - edsa_header[3] = 0x00; - edsa_header[4] = 0x60 | dp->ds->index; - edsa_header[5] = dp->index << 3; - - /* - * Move CFI field from byte 6 to byte 5. - */ - if (edsa_header[6] & 0x10) { - edsa_header[5] |= 0x01; - edsa_header[6] &= ~0x10; - } - } else { - if (skb_cow_head(skb, EDSA_HLEN) < 0) - return NULL; - skb_push(skb, EDSA_HLEN); - - memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); - - /* - * Construct untagged FROM_CPU DSA tag. - */ - edsa_header = skb->data + 2 * ETH_ALEN; - edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; - edsa_header[1] = ETH_P_EDSA & 0xff; - edsa_header[2] = 0x00; - edsa_header[3] = 0x00; - edsa_header[4] = 0x40 | dp->ds->index; - edsa_header[5] = dp->index << 3; - edsa_header[6] = 0x00; - edsa_header[7] = 0x00; - } - - return skb; -} - -static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) -{ - u8 *edsa_header; - int frame_type; - int code; - int source_device; - int source_port; - - if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) - return NULL; - - /* - * Skip the two null bytes after the ethertype. - */ - edsa_header = skb->data + 2; - - /* - * Check that frame type is either TO_CPU or FORWARD. - */ - frame_type = edsa_header[0] >> 6; - - switch (frame_type) { - case FRAME_TYPE_TO_CPU: - code = (edsa_header[1] & 0x6) | ((edsa_header[2] >> 4) & 1); - - /* - * Mark the frame to never egress on any port of the same switch - * unless it's a trapped IGMP/MLD packet, in which case the - * bridge might want to forward it. - */ - if (code != TO_CPU_CODE_IGMP_MLD_TRAP) - skb->offload_fwd_mark = 1; - - break; - - case FRAME_TYPE_FORWARD: - skb->offload_fwd_mark = 1; - break; - - default: - return NULL; - } - - /* - * Determine source device and port. - */ - source_device = edsa_header[0] & 0x1f; - source_port = (edsa_header[1] >> 3) & 0x1f; - - skb->dev = dsa_master_find_slave(dev, source_device, source_port); - if (!skb->dev) - return NULL; - - /* - * If the 'tagged' bit is set, convert the DSA tag to a 802.1q - * tag and delete the ethertype part. If the 'tagged' bit is - * clear, delete the ethertype and the DSA tag parts. - */ - if (edsa_header[0] & 0x20) { - u8 new_header[4]; - - /* - * Insert 802.1q ethertype and copy the VLAN-related - * fields, but clear the bit that will hold CFI (since - * DSA uses that bit location for another purpose). - */ - new_header[0] = (ETH_P_8021Q >> 8) & 0xff; - new_header[1] = ETH_P_8021Q & 0xff; - new_header[2] = edsa_header[2] & ~0x10; - new_header[3] = edsa_header[3]; - - /* - * Move CFI bit from its place in the DSA header to - * its 802.1q-designated place. - */ - if (edsa_header[1] & 0x01) - new_header[2] |= 0x10; - - skb_pull_rcsum(skb, DSA_HLEN); - - /* - * Update packet checksum if skb is CHECKSUM_COMPLETE. - */ - if (skb->ip_summed == CHECKSUM_COMPLETE) { - __wsum c = skb->csum; - c = csum_add(c, csum_partial(new_header + 2, 2, 0)); - c = csum_sub(c, csum_partial(edsa_header + 2, 2, 0)); - skb->csum = c; - } - - memcpy(edsa_header, new_header, DSA_HLEN); - - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - DSA_HLEN, - 2 * ETH_ALEN); - } else { - /* - * Remove DSA tag and update checksum. - */ - skb_pull_rcsum(skb, EDSA_HLEN); - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - EDSA_HLEN, - 2 * ETH_ALEN); - } - - return skb; -} - -static const struct dsa_device_ops edsa_netdev_ops = { - .name = "edsa", - .proto = DSA_TAG_PROTO_EDSA, - .xmit = edsa_xmit, - .rcv = edsa_rcv, - .overhead = EDSA_HLEN, -}; - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_EDSA); - -module_dsa_tag_driver(edsa_netdev_ops); diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index 408d4af390a0..2f5bd5e338ab 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -60,13 +60,8 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - int err; u8 *gswip_tag; - err = skb_cow_head(skb, GSWIP_TX_HEADER_LEN); - if (err) - return NULL; - skb_push(skb, GSWIP_TX_HEADER_LEN); gswip_tag = skb->data; diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c new file mode 100644 index 000000000000..2061de06eafb --- /dev/null +++ b/net/dsa/tag_hellcreek.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * net/dsa/tag_hellcreek.c - Hirschmann Hellcreek switch tag format handling + * + * Copyright (C) 2019,2020 Linutronix GmbH + * Author Kurt Kanzenbach <kurt@linutronix.de> + * + * Based on tag_ksz.c. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <net/dsa.h> + +#include "dsa_priv.h" + +#define HELLCREEK_TAG_LEN 1 + +static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + u8 *tag; + + /* Tag encoding */ + tag = skb_put(skb, HELLCREEK_TAG_LEN); + *tag = BIT(dp->index); + + return skb; +} + +static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt) +{ + /* Tag decoding */ + u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN; + unsigned int port = tag[0] & 0x03; + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) { + netdev_warn(dev, "Failed to get source port: %d\n", port); + return NULL; + } + + pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN); + + skb->offload_fwd_mark = true; + + return skb; +} + +static const struct dsa_device_ops hellcreek_netdev_ops = { + .name = "hellcreek", + .proto = DSA_TAG_PROTO_HELLCREEK, + .xmit = hellcreek_xmit, + .rcv = hellcreek_rcv, + .overhead = HELLCREEK_TAG_LEN, + .tail_tag = true, +}; + +MODULE_LICENSE("Dual MIT/GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_HELLCREEK); + +module_dsa_tag_driver(hellcreek_netdev_ops); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 0a5aa982c60d..4820dbcedfa2 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -14,46 +14,6 @@ #define KSZ_EGRESS_TAG_LEN 1 #define KSZ_INGRESS_TAG_LEN 1 -static struct sk_buff *ksz_common_xmit(struct sk_buff *skb, - struct net_device *dev, int len) -{ - struct sk_buff *nskb; - int padlen; - - padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; - - if (skb_tailroom(skb) >= padlen + len) { - /* Let dsa_slave_xmit() free skb */ - if (__skb_put_padto(skb, skb->len + padlen, false)) - return NULL; - - nskb = skb; - } else { - nskb = alloc_skb(NET_IP_ALIGN + skb->len + - padlen + len, GFP_ATOMIC); - if (!nskb) - return NULL; - skb_reserve(nskb, NET_IP_ALIGN); - - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, - skb_network_header(skb) - skb->head); - skb_set_transport_header(nskb, - skb_transport_header(skb) - skb->head); - skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - - /* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free - * skb - */ - if (skb_put_padto(nskb, nskb->len + padlen)) - return NULL; - - consume_skb(skb); - } - - return nskb; -} - static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, struct net_device *dev, unsigned int port, unsigned int len) @@ -90,23 +50,18 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; u8 *tag; u8 *addr; - nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN); - if (!nskb) - return NULL; - /* Tag encoding */ - tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); - addr = skb_mac_header(nskb); + tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); + addr = skb_mac_header(skb); *tag = 1 << dp->index; if (is_link_local_ether_addr(addr)) *tag |= KSZ8795_TAIL_TAG_OVERRIDE; - return nskb; + return skb; } static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev, @@ -156,18 +111,13 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; __be16 *tag; u8 *addr; u16 val; - nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN); - if (!nskb) - return NULL; - /* Tag encoding */ - tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN); - addr = skb_mac_header(nskb); + tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN); + addr = skb_mac_header(skb); val = BIT(dp->index); @@ -176,7 +126,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, *tag = cpu_to_be16(val); - return nskb; + return skb; } static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev, @@ -213,24 +163,19 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; u8 *addr; u8 *tag; - nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN); - if (!nskb) - return NULL; - /* Tag encoding */ - tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); - addr = skb_mac_header(nskb); + tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); + addr = skb_mac_header(skb); *tag = BIT(dp->index); if (is_link_local_ether_addr(addr)) *tag |= KSZ9893_TAIL_TAG_OVERRIDE; - return nskb; + return skb; } static const struct dsa_device_ops ksz9893_netdev_ops = { diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index ccfb6f641bbf..aa1318dccaf0 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -58,15 +58,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) __be16 *lan9303_tag; u16 tag; - /* insert a special VLAN tag between the MAC addresses - * and the current ethertype field. - */ - if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) { - dev_dbg(&dev->dev, - "Cannot make room for the special tag. Dropping packet\n"); - return NULL; - } - /* provide 'LAN9303_TAG_LEN' bytes additional space */ skb_push(skb, LAN9303_TAG_LEN); diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 4cdd9cf428fb..38dcdded74c0 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -34,9 +34,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, * table with VID. */ if (!skb_vlan_tagged(skb)) { - if (skb_cow_head(skb, MTK_HDR_LEN) < 0) - return NULL; - skb_push(skb, MTK_HDR_LEN); memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); is_vlan_skb = false; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 3b468aca5c53..16a1afd5b8e1 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -143,13 +143,6 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct ocelot_port *ocelot_port; u8 *prefix, *injection; u64 qos_class, rew_op; - int err; - - err = skb_cow_head(skb, OCELOT_TOTAL_TAG_LEN); - if (unlikely(err < 0)) { - netdev_err(netdev, "Cannot make room for tag.\n"); - return NULL; - } ocelot_port = ocelot->ports[dp->index]; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 1b9e8507112b..88181b52f480 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -34,9 +34,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) __be16 *phdr; u16 hdr; - if (skb_cow_head(skb, QCA_HDR_LEN) < 0) - return NULL; - skb_push(skb, QCA_HDR_LEN); memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 3a1cc24a4f0a..5b97ede56a0f 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -13,42 +13,15 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct sk_buff *nskb; - int padlen; u8 *trailer; - /* - * We have to make sure that the trailer ends up as the very - * last 4 bytes of the packet. This means that we have to pad - * the packet to the minimum ethernet frame size, if necessary, - * before adding the trailer. - */ - padlen = 0; - if (skb->len < 60) - padlen = 60 - skb->len; - - nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); - if (!nskb) - return NULL; - skb_reserve(nskb, NET_IP_ALIGN); - - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb_network_header(skb) - skb->head); - skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); - skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - consume_skb(skb); - - if (padlen) { - skb_put_zero(nskb, padlen); - } - - trailer = skb_put(nskb, 4); + trailer = skb_put(skb, 4); trailer[0] = 0x80; trailer[1] = 1 << dp->index; trailer[2] = 0x10; trailer[3] = 0x00; - return nskb; + return skb; } static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index dac65180c4ef..4106373180c6 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -272,7 +272,7 @@ void eth_header_cache_update(struct hh_cache *hh, EXPORT_SYMBOL(eth_header_cache_update); /** - * eth_header_parser_protocol - extract protocol from L2 header + * eth_header_parse_protocol - extract protocol from L2 header * @skb: packet to extract protocol from */ __be16 eth_header_parse_protocol(const struct sk_buff *skb) @@ -523,8 +523,8 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) EXPORT_SYMBOL(eth_platform_get_mac_address); /** - * Obtain the MAC address from an nvmem cell named 'mac-address' associated - * with given device. + * nvmem_get_mac_address - Obtain the MAC address from an nvmem cell named + * 'mac-address' associated with given device. * * @dev: Device with which the mac-address cell is associated. * @addrbuf: Buffer to which the MAC address will be copied on success. diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index ec2cd7aab5ad..771688e1b0da 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2433,7 +2433,7 @@ static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt); default: return -EOPNOTSUPP; - }; + } } static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna) diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 6d091e419d3e..9c640d670ffe 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -149,7 +149,7 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { char name[IFNAMSIZ + 1]; - nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], + nla_strscpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], sizeof(name)); dev = dev_get_by_name(&init_net, name); } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 123a6d39438f..75f67994fc85 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -650,8 +650,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, struct in_device *in_dev; struct ifaddrmsg *ifm; struct in_ifaddr *ifa; - - int err = -EINVAL; + int err; ASSERT_RTNL(); @@ -881,7 +880,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); if (tb[IFA_LABEL]) - nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); + nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1f75dc686b6b..b5400cec4f69 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -973,7 +973,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) char tmp[TCP_CA_NAME_MAX]; bool ecn_ca = false; - nla_strlcpy(tmp, nla, sizeof(tmp)); + nla_strscpy(tmp, nla, sizeof(tmp)); val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); } else { if (nla_len(nla) != sizeof(u32)) @@ -1641,9 +1641,8 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, break; } - *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); - if (nhc->nhc_flags & RTNH_F_OFFLOAD) - *flags |= RTNH_F_OFFLOAD; + *flags |= (nhc->nhc_flags & + (RTNH_F_ONLINK | RTNH_F_OFFLOAD | RTNH_F_TRAP)); if (!skip_oif && nhc->nhc_dev && nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ffc5332f1390..28117c05dc35 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2100,15 +2100,6 @@ static void __fib_info_notify_update(struct net *net, struct fib_table *tb, rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa, KEYLENGTH - fa->fa_slen, tb->tb_id, info, NLM_F_REPLACE); - - /* call_fib_entry_notifiers will be removed when - * in-kernel notifier is implemented and supported - * for nexthop objects - */ - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - n->key, - KEYLENGTH - fa->fa_slen, fa, - NULL); } } } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e70291748889..a68bf4c6fe9b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -920,7 +920,7 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_start_xmit = ipgre_xmit, .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipgre_tunnel_ctl, }; @@ -1275,7 +1275,7 @@ static const struct net_device_ops gre_tap_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip_tunnel_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_fill_metadata_dst = gre_fill_metadata_dst, }; @@ -1308,7 +1308,7 @@ static const struct net_device_ops erspan_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip_tunnel_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_fill_metadata_dst = gre_fill_metadata_dst, }; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index e25be2d01a7a..7ca338fbe8ba 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -429,15 +429,6 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, } EXPORT_SYMBOL(skb_tunnel_check_pmtu); -/* Often modified stats are per cpu, other are shared (netdev->stats) */ -void ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - netdev_stats_to_stats64(tot, &dev->stats); - dev_fetch_sw_netstats(tot, dev->tstats); -} -EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); - static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { [LWTUNNEL_IP_UNSPEC] = { .strict_start_type = LWTUNNEL_IP_OPTS }, [LWTUNNEL_IP_ID] = { .type = NLA_U64 }, diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index b957cbee2cf7..abc171e79d3e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -404,7 +404,7 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_start_xmit = vti_tunnel_xmit, .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = vti_tunnel_ctl, }; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 561f15b5a944..3cd13e1bc6a7 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1441,7 +1441,7 @@ static int __init ip_auto_config(void) int retries = CONF_OPEN_RETRIES; #endif int err; - unsigned int i; + unsigned int i, count; /* Initialise all name servers and NTP servers to NONE (but only if the * "ip=" or "nfsaddrs=" kernel command line parameters weren't decoded, @@ -1575,7 +1575,7 @@ static int __init ip_auto_config(void) if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); /* Name servers (if any): */ - for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { + for (i = 0, count = 0; i < CONF_NAMESERVERS_MAX; i++) { if (ic_nameservers[i] != NONE) { if (i == 0) pr_info(" nameserver%u=%pI4", @@ -1583,12 +1583,14 @@ static int __init ip_auto_config(void) else pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + + count++; } - if (i + 1 == CONF_NAMESERVERS_MAX) + if ((i + 1 == CONF_NAMESERVERS_MAX) && count > 0) pr_cont("\n"); } /* NTP servers (if any): */ - for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) { + for (i = 0, count = 0; i < CONF_NTP_SERVERS_MAX; i++) { if (ic_ntp_servers[i] != NONE) { if (i == 0) pr_info(" ntpserver%u=%pI4", @@ -1596,8 +1598,10 @@ static int __init ip_auto_config(void) else pr_cont(", ntpserver%u=%pI4", i, &ic_ntp_servers[i]); + + count++; } - if (i + 1 == CONF_NTP_SERVERS_MAX) + if ((i + 1 == CONF_NTP_SERVERS_MAX) && count > 0) pr_cont("\n"); } #endif /* !SILENT */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 75d35e76bec2..d5bfa087c23a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -347,7 +347,7 @@ static const struct net_device_ops ipip_netdev_ops = { .ndo_start_xmit = ipip_tunnel_xmit, .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip_tunnel_ctl, }; diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 3205d5f7c8c9..25ea6ac44db9 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -31,7 +31,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; - nla_strlcpy(tmp, nla, sizeof(tmp)); + nla_strscpy(tmp, nla, sizeof(tmp)); val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) { NL_SET_ERR_MSG(extack, "Unknown tcp congestion algorithm"); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 93b07739807b..4109055cdea6 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -12,6 +12,128 @@ #include <linux/netfilter_ipv4.h> #include <linux/netfilter_bridge.h> +static int nf_reject_iphdr_validate(struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + return 1; +} + +struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + const struct tcphdr *oth; + struct sk_buff *nskb; + struct iphdr *niph; + struct tcphdr _oth; + + if (!nf_reject_iphdr_validate(oldskb)) + return NULL; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return NULL; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv4.sysctl_ip_default_ttl); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v4_tcp_reset); + +struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + __wsum csum; + u8 proto; + + if (!nf_reject_iphdr_validate(oldskb)) + return NULL; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return NULL; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return NULL; + + if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len))) + return NULL; + + proto = ip_hdr(oldskb)->protocol; + + if (!skb_csum_unnecessary(oldskb) && + nf_reject_verify_csum(proto) && + nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto)) + return NULL; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + net->ipv4.sysctl_ip_default_ttl); + + skb_reset_transport_header(nskb); + icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + skb_put_data(nskb, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v4_unreach); + const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *_oth, int hook) { @@ -124,7 +246,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) if (!oth) return; - if (hook == NF_INET_PRE_ROUTING && nf_reject_fill_skb_dst(oldskb)) + if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && + nf_reject_fill_skb_dst(oldskb) < 0) return; if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -193,7 +316,8 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) if (iph->frag_off & htons(IP_OFFSET)) return; - if (hook == NF_INET_PRE_ROUTING && nf_reject_fill_skb_dst(skb_in)) + if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && + nf_reject_fill_skb_dst(skb_in) < 0) return; if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) { diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 0dc43ad28eb9..5e1b22d4f939 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -36,14 +36,145 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_FDB] = { .type = NLA_FLAG }, }; +static bool nexthop_notifiers_is_empty(struct net *net) +{ + return !net->nexthop.notifier_chain.head; +} + +static void +__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, + const struct nexthop *nh) +{ + struct nh_info *nhi = rtnl_dereference(nh->nh_info); + + nh_info->dev = nhi->fib_nhc.nhc_dev; + nh_info->gw_family = nhi->fib_nhc.nhc_gw_family; + if (nh_info->gw_family == AF_INET) + nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4; + else if (nh_info->gw_family == AF_INET6) + nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; + + nh_info->is_reject = nhi->reject_nh; + nh_info->is_fdb = nhi->fdb_nh; + nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; +} + +static int nh_notifier_single_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); + if (!info->nh) + return -ENOMEM; + + __nh_notifier_single_info_init(info->nh, nh); + + return 0; +} + +static void nh_notifier_single_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh); +} + +static int nh_notifier_grp_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + u16 num_nh = nhg->num_nh; + int i; + + info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), + GFP_KERNEL); + if (!info->nh_grp) + return -ENOMEM; + + info->nh_grp->num_nh = num_nh; + info->nh_grp->is_fdb = nhg->fdb_nh; + + for (i = 0; i < num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + info->nh_grp->nh_entries[i].id = nhge->nh->id; + info->nh_grp->nh_entries[i].weight = nhge->weight; + __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, + nhge->nh); + } + + return 0; +} + +static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) +{ + kfree(info->nh_grp); +} + +static int nh_notifier_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + info->id = nh->id; + info->is_grp = nh->is_group; + + if (info->is_grp) + return nh_notifier_grp_info_init(info, nh); + else + return nh_notifier_single_info_init(info, nh); +} + +static void nh_notifier_info_fini(struct nh_notifier_info *info) +{ + if (info->is_grp) + nh_notifier_grp_info_fini(info); + else + nh_notifier_single_info_fini(info); +} + static int call_nexthop_notifiers(struct net *net, enum nexthop_event_type event_type, - struct nexthop *nh) + struct nexthop *nh, + struct netlink_ext_ack *extack) { + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; int err; + ASSERT_RTNL(); + + if (nexthop_notifiers_is_empty(net)) + return 0; + + err = nh_notifier_info_init(&info, nh); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); + return err; + } + err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, - event_type, nh); + event_type, &info); + nh_notifier_info_fini(&info); + + return notifier_to_errno(err); +} + +static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, + enum nexthop_event_type event_type, + struct nexthop *nh, + struct netlink_ext_ack *extack) +{ + struct nh_notifier_info info = { + .net = net, + .extack = extack, + }; + int err; + + err = nh_notifier_info_init(&info, nh); + if (err) + return err; + + err = nb->notifier_call(nb, event_type, &info); + nh_notifier_info_fini(&info); + return notifier_to_errno(err); } @@ -782,9 +913,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, { struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; + struct netlink_ext_ack extack; struct nexthop *nh = nhge->nh; struct nh_group *nhg, *newg; - int i, j; + int i, j, err; WARN_ON(!nh); @@ -832,6 +964,10 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, list_del(&nhge->nh_list); nexthop_put(nhge->nh); + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack); + if (err) + pr_err("%s\n", extack._msg); + if (nlinfo) nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); } @@ -907,7 +1043,7 @@ static void __remove_nexthop(struct net *net, struct nexthop *nh, static void remove_nexthop(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { - call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh); + call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL); /* remove from the tree */ rb_erase(&nh->rb_node, &net->nexthop.rb_root); @@ -940,13 +1076,17 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, struct netlink_ext_ack *extack) { struct nh_group *oldg, *newg; - int i; + int i, err; if (!new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + oldg = rtnl_dereference(old->nh_grp); newg = rtnl_dereference(new->nh_grp); @@ -985,31 +1125,54 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, struct nexthop *new, struct netlink_ext_ack *extack) { + u8 old_protocol, old_nh_flags; struct nh_info *oldi, *newi; + struct nh_grp_entry *nhge; + int err; if (new->is_group) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); return -EINVAL; } + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); + if (err) + return err; + + /* Hardware flags were set on 'old' as 'new' is not in the red-black + * tree. Therefore, inherit the flags from 'old' to 'new'. + */ + new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP); + oldi = rtnl_dereference(old->nh_info); newi = rtnl_dereference(new->nh_info); newi->nh_parent = old; oldi->nh_parent = new; + old_protocol = old->protocol; + old_nh_flags = old->nh_flags; + old->protocol = new->protocol; old->nh_flags = new->nh_flags; rcu_assign_pointer(old->nh_info, newi); rcu_assign_pointer(new->nh_info, oldi); + /* Send a replace notification for all the groups using the nexthop. */ + list_for_each_entry(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, + extack); + if (err) + goto err_notify; + } + /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially * update IPv4 indication in all the groups using the nexthop. */ if (oldi->family == AF_INET && newi->family == AF_INET6) { - struct nh_grp_entry *nhge; - list_for_each_entry(nhge, &old->grp_list, nh_list) { struct nexthop *nhp = nhge->nh_parent; struct nh_group *nhg; @@ -1020,6 +1183,21 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old, } return 0; + +err_notify: + rcu_assign_pointer(new->nh_info, newi); + rcu_assign_pointer(old->nh_info, oldi); + old->nh_flags = old_nh_flags; + old->protocol = old_protocol; + oldi->nh_parent = old; + newi->nh_parent = new; + list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) { + struct nexthop *nhp = nhge->nh_parent; + + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack); + } + call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack); + return err; } static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, @@ -1168,7 +1346,11 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, rb_link_node_rcu(&new_nh->rb_node, parent, pp); rb_insert_color(&new_nh->rb_node, root); - rc = 0; + + rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); + if (rc) + rb_erase(&new_nh->rb_node, &net->nexthop.rb_root); + out: if (!rc) { nh_base_seq_inc(net); @@ -1957,10 +2139,40 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; -int register_nexthop_notifier(struct net *net, struct notifier_block *nb) +static int nexthops_dump(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + struct rb_root *root = &net->nexthop.rb_root; + struct rb_node *node; + int err = 0; + + for (node = rb_first(root); node; node = rb_next(node)) { + struct nexthop *nh; + + nh = rb_entry(node, struct nexthop, rb_node); + err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, + extack); + if (err) + break; + } + + return err; +} + +int register_nexthop_notifier(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - return blocking_notifier_chain_register(&net->nexthop.notifier_chain, - nb); + int err; + + rtnl_lock(); + err = nexthops_dump(net, nb, extack); + if (err) + goto unlock; + err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, + nb); +unlock: + rtnl_unlock(); + return err; } EXPORT_SYMBOL(register_nexthop_notifier); @@ -1971,6 +2183,27 @@ int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) } EXPORT_SYMBOL(unregister_nexthop_notifier); +void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap) +{ + struct nexthop *nexthop; + + rcu_read_lock(); + + nexthop = nexthop_find_by_id(net, id); + if (!nexthop) + goto out; + + nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP); + if (offload) + nexthop->nh_flags |= RTNH_F_OFFLOAD; + if (trap) + nexthop->nh_flags |= RTNH_F_TRAP; + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL(nexthop_set_hw_flags); + static void __net_exit nexthop_net_exit(struct net *net) { rtnl_lock(); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8d5e1695b9aa..63cd370ea29d 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -167,6 +167,7 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_ITEM("IgnoredMulti", UDP_MIB_IGNOREDMULTI), + SNMP_MIB_ITEM("MemErrors", UDP_MIB_MEMERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dc2a399cd9f4..c962f0d96d8d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1741,7 +1741,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) return -ENOBUFS; @@ -1857,8 +1857,8 @@ static int __mkroute_input(struct sk_buff *skb, } rth = rt_dst_alloc(out_dev->dev, 0, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM)); + IN_DEV_ORCONF(in_dev, NOPOLICY), + IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -2227,7 +2227,7 @@ local_input: rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, flags | RTCF_LOCAL, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -2450,8 +2450,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, add: rth = rt_dst_alloc(dev_out, flags, type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM)); + IN_DEV_ORCONF(in_dev, NOPOLICY), + IN_DEV_ORCONF(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); @@ -2872,6 +2872,9 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) goto nla_put_failure; + if (rt->dst.lwtstate && + lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) + goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (rt->dst.tclassid && nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b2bc3d7fe9e8..b285b338a019 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -954,7 +954,7 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) * importantly be able to generate EPOLLOUT for Edge Trigger epoll() * users. */ -static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) { if (skb && !skb->len) { tcp_unlink_write_queue(skb, sk); @@ -964,6 +964,68 @@ static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) } } +struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, + struct page *page, int offset, size_t *size) +{ + struct sk_buff *skb = tcp_write_queue_tail(sk); + struct tcp_sock *tp = tcp_sk(sk); + bool can_coalesce; + int copy, i; + + if (!skb || (copy = size_goal - skb->len) <= 0 || + !tcp_skb_can_collapse_to(skb)) { +new_segment: + if (!sk_stream_memory_free(sk)) + return NULL; + + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, + tcp_rtx_and_write_queues_empty(sk)); + if (!skb) + return NULL; + +#ifdef CONFIG_TLS_DEVICE + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif + skb_entail(sk, skb); + copy = size_goal; + } + + if (copy > *size) + copy = *size; + + i = skb_shinfo(skb)->nr_frags; + can_coalesce = skb_can_coalesce(skb, i, page, offset); + if (!can_coalesce && i >= sysctl_max_skb_frags) { + tcp_mark_push(tp, skb); + goto new_segment; + } + if (!sk_wmem_schedule(sk, copy)) + return NULL; + + if (can_coalesce) { + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + } else { + get_page(page); + skb_fill_page_desc(skb, i, page, offset, copy); + } + + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + + skb->len += copy; + skb->data_len += copy; + skb->truesize += copy; + sk_wmem_queued_add(sk, copy); + sk_mem_charge(sk, copy); + skb->ip_summed = CHECKSUM_PARTIAL; + WRITE_ONCE(tp->write_seq, tp->write_seq + copy); + TCP_SKB_CB(skb)->end_seq += copy; + tcp_skb_pcount_set(skb, 0); + + *size = copy; + return skb; +} + ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -999,60 +1061,13 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, goto out_err; while (size > 0) { - struct sk_buff *skb = tcp_write_queue_tail(sk); - int copy, i; - bool can_coalesce; + struct sk_buff *skb; + size_t copy = size; - if (!skb || (copy = size_goal - skb->len) <= 0 || - !tcp_skb_can_collapse_to(skb)) { -new_segment: - if (!sk_stream_memory_free(sk)) - goto wait_for_space; - - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, - tcp_rtx_and_write_queues_empty(sk)); - if (!skb) - goto wait_for_space; - -#ifdef CONFIG_TLS_DEVICE - skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); -#endif - skb_entail(sk, skb); - copy = size_goal; - } - - if (copy > size) - copy = size; - - i = skb_shinfo(skb)->nr_frags; - can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= sysctl_max_skb_frags) { - tcp_mark_push(tp, skb); - goto new_segment; - } - if (!sk_wmem_schedule(sk, copy)) + skb = tcp_build_frag(sk, size_goal, flags, page, offset, ©); + if (!skb) goto wait_for_space; - if (can_coalesce) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else { - get_page(page); - skb_fill_page_desc(skb, i, page, offset, copy); - } - - if (!(flags & MSG_NO_SHARED_FRAGS)) - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; - - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; - sk_wmem_queued_add(sk, copy); - sk_mem_charge(sk, copy); - skb->ip_summed = CHECKSUM_PARTIAL; - WRITE_ONCE(tp->write_seq, tp->write_seq + copy); - TCP_SKB_CB(skb)->end_seq += copy; - tcp_skb_pcount_set(skb, 0); - if (!copied) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; @@ -2405,13 +2420,12 @@ bool tcp_check_oom(struct sock *sk, int shift) return too_many_orphans || out_of_socket_memory; } -void tcp_close(struct sock *sk, long timeout) +void __tcp_close(struct sock *sk, long timeout) { struct sk_buff *skb; int data_was_unread = 0; int state; - lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TCP_LISTEN) { @@ -2575,6 +2589,12 @@ adjudge_to_death: out: bh_unlock_sock(sk); local_bh_enable(); +} + +void tcp_close(struct sock *sk, long timeout) +{ + lock_sock(sk); + __tcp_close(sk, timeout); release_sock(sk); sock_put(sk); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 389d1b340248..fb3a7750f623 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2546,7 +2546,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) * 1) If the packets in flight is larger than ssthresh, PRR spreads the * cwnd reductions across a full RTT. * 2) Otherwise PRR uses packet conservation to send as much as delivered. - * But when the retransmits are acked without further losses, PRR + * But when SND_UNA is acked without further losses, * slow starts cwnd up to ssthresh to speed up the recovery. */ static void tcp_init_cwnd_reduction(struct sock *sk) @@ -2563,7 +2563,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tcp_ecn_queue_cwr(tp); } -void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag) +void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; @@ -2577,8 +2577,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag) u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + tp->prior_cwnd - 1; sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; - } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) == - FLAG_RETRANS_DATA_ACKED) { + } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) { sndcnt = min_t(int, delta, max_t(int, tp->prr_delivered - tp->prr_out, newly_acked_sacked) + 1); @@ -3419,7 +3418,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, if (tcp_in_cwnd_reduction(sk)) { /* Reduce cwnd if state mandates */ - tcp_cwnd_reduction(sk, acked_sacked, flag); + tcp_cwnd_reduction(sk, acked_sacked, rs->losses, flag); } else if (tcp_may_raise_cwnd(sk, flag)) { /* Advance cwnd if state allows */ tcp_cong_avoid(sk, ack, acked_sacked); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7352c097ae48..c2d5132c523c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2740,6 +2740,20 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +/* @wake is one when sk_stream_write_space() calls us. + * This sends EPOLLOUT only if notsent_bytes is half the limit. + * This mimics the strategy used in sock_def_write_space(). + */ +bool tcp_stream_memory_free(const struct sock *sk, int wake) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u32 notsent_bytes = READ_ONCE(tp->write_seq) - + READ_ONCE(tp->snd_nxt); + + return (notsent_bytes << wake) < tcp_notsent_lowat(tp); +} +EXPORT_SYMBOL(tcp_stream_memory_free); + struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 8c643a4ffad1..e6459537d4d2 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -89,6 +89,7 @@ struct lp { /** * tcp_lp_init + * @sk: socket to initialize congestion control algorithm for * * Init all required variables. * Clone the handling from Vegas module implementation. @@ -111,6 +112,7 @@ static void tcp_lp_init(struct sock *sk) /** * tcp_lp_cong_avoid + * @sk: socket to avoid congesting * * Implementation of cong_avoid. * Will only call newReno CA when away from inference. @@ -126,6 +128,7 @@ static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) /** * tcp_lp_remote_hz_estimator + * @sk: socket which needs an estimate for the remote HZs * * Estimate remote HZ. * We keep on updating the estimated value, where original TCP-LP @@ -176,6 +179,7 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) /** * tcp_lp_owd_calculator + * @sk: socket to calculate one way delay for * * Calculate one way delay (in relative format). * Original implement OWD as minus of remote time difference to local time @@ -210,6 +214,8 @@ static u32 tcp_lp_owd_calculator(struct sock *sk) /** * tcp_lp_rtt_sample + * @sk: socket to add a rtt sample to + * @rtt: round trip time, which is ignored! * * Implementation or rtt_sample. * Will take the following action, @@ -254,6 +260,7 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) /** * tcp_lp_pkts_acked + * @sk: socket requiring congestion avoidance calculations * * Implementation of pkts_acked. * Deal with active drop under Early Congestion Indication. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bf48cd73e967..99905bc01d40 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1038,9 +1038,9 @@ static void tcp_tsq_handler(struct sock *sk) * transferring tsq->head because tcp_wfree() might * interrupt us (non NAPI drivers) */ -static void tcp_tasklet_func(unsigned long data) +static void tcp_tasklet_func(struct tasklet_struct *t) { - struct tsq_tasklet *tsq = (struct tsq_tasklet *)data; + struct tsq_tasklet *tsq = from_tasklet(tsq, t, tasklet); LIST_HEAD(list); unsigned long flags; struct list_head *q, *n; @@ -1125,9 +1125,7 @@ void __init tcp_tasklet_init(void) struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i); INIT_LIST_HEAD(&tsq->head); - tasklet_init(&tsq->tasklet, - tcp_tasklet_func, - (unsigned long)tsq); + tasklet_setup(&tsq->tasklet, tcp_tasklet_func); } } @@ -1569,6 +1567,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (!buff) return -ENOMEM; /* We'll just try again later. */ skb_copy_decrypted(buff, skb); + mptcp_skb_ext_copy(buff, skb); sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); @@ -2123,6 +2122,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (unlikely(!buff)) return -ENOMEM; skb_copy_decrypted(buff, skb); + mptcp_skb_ext_copy(buff, skb); sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); @@ -2393,6 +2393,7 @@ static int tcp_mtu_probe(struct sock *sk) skb = tcp_send_head(sk); skb_copy_decrypted(nskb, skb); + mptcp_skb_ext_copy(nskb, skb); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index f65a3ddd0d58..177307a3081f 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -153,6 +153,7 @@ void tcp_rack_reo_timeout(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout, prior_inflight; + u32 lost = tp->lost; prior_inflight = tcp_packets_in_flight(tp); tcp_rack_detect_loss(sk, &timeout); @@ -160,7 +161,7 @@ void tcp_rack_reo_timeout(struct sock *sk) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) { tcp_enter_recovery(sk, false); if (!inet_csk(sk)->icsk_ca_ops->cong_control) - tcp_cwnd_reduction(sk, 1, 0); + tcp_cwnd_reduction(sk, 1, tp->lost - lost, 0); } tcp_xmit_retransmit_queue(sk); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 09f0a23d1a01..a3f105227ccc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -541,7 +541,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, inet_sdif(skb), udptable, skb); } -struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const struct iphdr *iph = ip_hdr(skb); @@ -550,7 +550,6 @@ struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, iph->daddr, dport, inet_iif(skb), inet_sdif(skb), &udp_table, NULL); } -EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); /* Must be called under rcu_read_lock(). * Does increment socket refcount. @@ -702,7 +701,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); - if (!sk) { + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udp_encap_needed_key)) { @@ -874,7 +873,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct udphdr *uh; - int err = 0; + int err; int is_udplite = IS_UDPLITE(sk); int offset = skb_transport_offset(skb); int len = skb->len - offset; @@ -2038,6 +2037,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rc == -ENOMEM) UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); + else + UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, + is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); trace_udp_fail_queue_rcv_skb(rc, sk); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 1dbece34496e..b2cee9a307d4 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -30,7 +30,7 @@ static int udp_dump_one(struct udp_table *tbl, const struct inet_diag_req_v2 *req) { struct sk_buff *in_skb = cb->skb; - int err = -EINVAL; + int err; struct sock *sk = NULL; struct sk_buff *rep; struct net *net = sock_net(in_skb->sk); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index c62805cd3131..ff39e94781bf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -49,6 +49,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb_set_transport_header(skb, skb_inner_transport_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); skb->protocol = new_protocol; @@ -67,6 +68,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); features &= skb->dev->hw_enc_features; + /* CRC checksum can't be handled by HW when it's a UDP tunneling packet. */ + features &= ~NETIF_F_SCTP_CRC; /* The only checksum offload we care about from here on out is the * outer one so strip the existing checksum feature flags and @@ -564,8 +567,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, { __be16 newlen = htons(skb->len - nhoff); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - int err = -ENOSYS; struct sock *sk; + int err; uh->len = newlen; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8b6eb384bac7..eff2cacd5209 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1997,6 +1997,7 @@ EXPORT_SYMBOL(ipv6_chk_prefix); * ipv6_dev_find - find the first device with a given source address. * @net: the net namespace * @addr: the source address + * @dev: used to find the L3 domain of interest * * The caller should be protected by RCU, or RTNL. */ diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 78f766019b7e..51184a70ac7e 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -423,7 +423,7 @@ static void calipso_doi_free_rcu(struct rcu_head *entry) /** * calipso_doi_remove - Remove an existing DOI from the CALIPSO protocol engine * @doi: the DOI value - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes a DOI definition from the CALIPSO engine. The NetLabel routines will @@ -1226,7 +1226,7 @@ static int calipso_req_setattr(struct request_sock *req, /** * calipso_req_delattr - Delete the CALIPSO option from a request socket - * @reg: the request socket + * @req: the request socket * * Description: * Removes the CALIPSO option from a request socket, if present. diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 374105e4394f..6126f8bf94b3 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -906,11 +906,6 @@ void ipv6_exthdrs_exit(void) /* * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input(). */ -static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) -{ - return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev); -} - static inline struct net *ipv6_skb_net(struct sk_buff *skb) { return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 931b186d2e48..8cf659994412 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1391,7 +1391,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_do_ioctl = ip6gre_tunnel_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1828,7 +1828,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1896,7 +1896,7 @@ static const struct net_device_ops ip6erspan_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 648db3fe508f..a7950baa05e5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -94,36 +94,6 @@ static inline int ip6_tnl_mpls_supported(void) return IS_ENABLED(CONFIG_MPLS); } -static struct net_device_stats *ip6_get_stats(struct net_device *dev) -{ - struct pcpu_sw_netstats tmp, sum = { 0 }; - int i; - - for_each_possible_cpu(i) { - unsigned int start; - const struct pcpu_sw_netstats *tstats = - per_cpu_ptr(dev->tstats, i); - - do { - start = u64_stats_fetch_begin_irq(&tstats->syncp); - tmp.rx_packets = tstats->rx_packets; - tmp.rx_bytes = tstats->rx_bytes; - tmp.tx_packets = tstats->tx_packets; - tmp.tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); - - sum.rx_packets += tmp.rx_packets; - sum.rx_bytes += tmp.rx_bytes; - sum.tx_packets += tmp.tx_packets; - sum.tx_bytes += tmp.tx_bytes; - } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; -} - #define for_each_ip6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) @@ -204,6 +174,7 @@ ip6_tnl_lookup(struct net *net, int link, /** * ip6_tnl_bucket - get head of list matching given tunnel parameters + * @ip6n: the private data for ip6_vti in the netns * @p: parameters containing tunnel end-points * * Description: @@ -230,6 +201,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) /** * ip6_tnl_link - add tunnel to hash table + * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be added **/ @@ -246,6 +218,7 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) /** * ip6_tnl_unlink - remove tunnel from hash table + * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be removed **/ @@ -417,6 +390,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) /** * parse_tvl_tnl_enc_lim - handle encapsulation limit option * @skb: received socket buffer + * @raw: the ICMPv6 error message data * * Return: * 0 if none was found, @@ -485,14 +459,9 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) } EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); -/** - * ip6_tnl_err - tunnel error handler - * - * Description: - * ip6_tnl_err() should handle errors in the tunnel according - * to the specifications in RFC 2473. - **/ - +/* ip6_tnl_err() should handle errors in the tunnel according to the + * specifications in RFC 2473. + */ static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, u8 *type, u8 *code, int *msg, __u32 *info, int offset) @@ -1835,7 +1804,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_start_xmit = ip6_tnl_start_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, - .ndo_get_stats = ip6_get_stats, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5f9c4fdc120d..0225fd694192 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -125,6 +125,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, /** * vti6_tnl_bucket - get head of list matching given tunnel parameters + * @ip6n: the private data for ip6_vti in the netns * @p: parameters containing tunnel end-points * * Description: @@ -889,7 +890,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 8cd2782a31e4..6c8604390266 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -548,7 +548,7 @@ done: } int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct sockaddr_storage *p) + struct sockaddr_storage __user *p) { int err, i, count, copycount; const struct in6_addr *group; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 4aef6baaa55e..aa35e6e37c1f 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -12,6 +12,140 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> +static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto = ip6h->nexthdr; + + if (skb_csum_unnecessary(skb)) + return true; + + if (ip6h->payload_len && + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + + ip6h = ipv6_hdr(skb); + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + if (!nf_reject_verify_csum(proto)) + return true; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; +} + +static int nf_reject_ip6hdr_validate(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + return 0; + + return 1; +} + +struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nf_reject_ip6hdr_validate(oldskb)) + return NULL; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return NULL; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v6_tcp_reset); + +struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + + if (!nf_reject_ip6hdr_validate(oldskb)) + return NULL; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return NULL; + + if (!nf_reject_v6_csum_ok(oldskb, hook)) + return NULL; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return NULL; + + nskb->dev = (struct net_device *)dev; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + skb_put_data(nskb, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + return nskb; +} +EXPORT_SYMBOL_GPL(nf_reject_skb_v6_unreach); + const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *otcph, unsigned int *otcplen, int hook) @@ -170,7 +304,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; - if (hook == NF_INET_PRE_ROUTING) { + if (hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) { nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); if (!dst) return; @@ -268,7 +402,8 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = net->loopback_dev; - if (hooknum == NF_INET_PRE_ROUTING && nf_reject6_fill_skb_dst(skb_in)) + if ((hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_INGRESS) && + nf_reject6_fill_skb_dst(skb_in) < 0) return; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index bbff3e02e302..d6306aa46bb1 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -126,6 +126,7 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), + SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS), SNMP_MIB_SENTINEL }; @@ -137,6 +138,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS), + SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7e0ce7af8234..f91a689edafd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6039,11 +6039,6 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, struct sk_buff *skb; int err = -ENOBUFS; - /* call_fib6_entry_notifiers will be removed when in-kernel notifier - * is implemented and supported for nexthop objects - */ - call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL); - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (!skb) goto errout; diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c index 307f336b5353..488aec9e1a74 100644 --- a/net/ipv6/rpl.c +++ b/net/ipv6/rpl.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * Authors: * (C) 2020 Alexander Aring <alex.aring@gmail.com> */ diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 5fdf3ebb953f..233da43dd8d7 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * Authors: * (C) 2020 Alexander Aring <alex.aring@gmail.com> */ diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5e7983cb6154..2da0ee703779 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1395,7 +1395,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, - .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29d9691359b9..9008f5796ad4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -276,7 +276,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, inet6_sdif(skb), udptable, skb); } -struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -285,7 +285,6 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, &iph->daddr, dport, inet6_iif(skb), inet6_sdif(skb), &udp_table, NULL); } -EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); /* Must be called under rcu_read_lock(). * Does increment socket refcount. @@ -560,7 +559,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); - if (!sk) { + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { @@ -637,6 +636,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rc == -ENOMEM) UDP6_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); + else + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_MEMERRORS, is_udplite); UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index f9e888d1b9af..c7bd7b1a04c1 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -28,10 +28,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int tnl_hlen; int err; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); @@ -48,6 +44,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) return __udp_gso_segment(skb, features); + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 864326f150e2..ad7730b68772 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -241,6 +241,7 @@ EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); * L3 master device * @net: network namespace for device index lookup * @fl: flow struct + * @arg: store the table the rule matched with here */ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 1144cda2a0fc..912aa9bd5e29 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -909,6 +909,8 @@ static void llc_sk_init(struct sock *sk) * @net: network namespace * @family: upper layer protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * @prot: struct proto associated with this new sock instance + * @kern: is this to be a kernel socket? * * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7276e66ae435..454432ced0c9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2708,16 +2708,6 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, return 0; } -static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev, - const u8 *addr) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - memcpy(&sdata->u.wds.remote_addr, addr, ETH_ALEN); - - return 0; -} - static void ieee80211_rfkill_poll(struct wiphy *wiphy) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -4138,7 +4128,6 @@ const struct cfg80211_ops mac80211_config_ops = { .set_wiphy_params = ieee80211_set_wiphy_params, .set_tx_power = ieee80211_set_tx_power, .get_tx_power = ieee80211_get_tx_power, - .set_wds_peer = ieee80211_set_wds_peer, .rfkill_poll = ieee80211_rfkill_poll, CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) CFG80211_TESTMODE_DUMP(ieee80211_testmode_dump) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 8f48aff74c7b..b6c80a45b9f5 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -275,11 +275,11 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: width = vif->bss_conf.chandef.width; break; + case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_MONITOR: @@ -743,7 +743,6 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, continue; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: break; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index fe8a7a87e513..9fc8ce214322 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -574,9 +574,6 @@ static ssize_t ieee80211_if_parse_tsf( IEEE80211_IF_FILE_RW(tsf); -/* WDS attributes */ -IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); - #ifdef CONFIG_MAC80211_MESH IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); @@ -701,11 +698,6 @@ static void add_ibss_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD_MODE(tsf, 0600); } -static void add_wds_files(struct ieee80211_sub_if_data *sdata) -{ - DEBUGFS_ADD(peer); -} - #ifdef CONFIG_MAC80211_MESH static void add_mesh_files(struct ieee80211_sub_if_data *sdata) @@ -805,9 +797,6 @@ static void add_files(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_AP_VLAN: add_vlan_files(sdata); break; - case NL80211_IFTYPE_WDS: - add_wds_files(sdata); - break; default: break; } diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 829dcad69c2c..6a51b8b58f9e 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -274,7 +274,7 @@ static ssize_t sta_aql_read(struct file *file, char __user *userbuf, "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n", q_depth[0], q_depth[1], q_depth[2], q_depth[3], q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1], - q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]), + q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]); rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2a21226fb518..cde2e3f4fbcd 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -311,11 +311,6 @@ struct ieee80211_if_ap { bool multicast_to_unicast; }; -struct ieee80211_if_wds { - struct sta_info *sta; - u8 remote_addr[ETH_ALEN]; -}; - struct ieee80211_if_vlan { struct list_head list; /* write-protected with RTNL and local->mtx */ @@ -985,7 +980,6 @@ struct ieee80211_sub_if_data { union { struct ieee80211_if_ap ap; - struct ieee80211_if_wds wds; struct ieee80211_if_vlan vlan; struct ieee80211_if_managed mgd; struct ieee80211_if_ibss ibss; @@ -1795,7 +1789,7 @@ static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); -void ieee80211_tx_pending(unsigned long data); +void ieee80211_tx_pending(struct tasklet_struct *t); netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, @@ -2146,7 +2140,7 @@ void ieee80211_txq_remove_vlan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats, struct txq_info *txqi); -void ieee80211_wake_txqs(unsigned long data); +void ieee80211_wake_txqs(struct tasklet_struct *t); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *bssid, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1be775979132..f5d4ceb72882 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -230,10 +230,6 @@ static inline int identical_mac_addr_allowed(int type1, int type2) type2 == NL80211_IFTYPE_MONITOR || type1 == NL80211_IFTYPE_P2P_DEVICE || type2 == NL80211_IFTYPE_P2P_DEVICE || - (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) || - (type1 == NL80211_IFTYPE_WDS && - (type2 == NL80211_IFTYPE_WDS || - type2 == NL80211_IFTYPE_AP)) || (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) || (type1 == NL80211_IFTYPE_AP_VLAN && (type2 == NL80211_IFTYPE_AP || @@ -417,15 +413,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * (because if we remove a STA after ops->remove_interface() * the driver will have removed the vif info already!) * - * In WDS mode a station must exist here and be flushed, for - * AP_VLANs stations may exist since there's nothing else that + * For AP_VLANs stations may exist since there's nothing else that * would have removed them, but in other modes there shouldn't * be any stations. */ flushed = sta_info_flush(sdata); - WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || - (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1))); + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && flushed > 0); /* don't count this interface for allmulti while it is down */ if (sdata->flags & IEEE80211_SDATA_ALLMULTI) @@ -552,8 +545,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * When we get here, the interface is marked down. * Free the remaining keys, if there are any * (which can happen in AP mode if userspace sets - * keys before the interface is operating, and maybe - * also in WDS mode) + * keys before the interface is operating) * * Force the key freeing to always synchronize_net() * to wait for the RX path in case it is using this @@ -1020,16 +1012,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct net_device *dev = wdev->netdev; struct ieee80211_local *local = sdata->local; - struct sta_info *sta; u32 changed = 0; int res; u32 hw_reconf_flags = 0; switch (sdata->vif.type) { - case NL80211_IFTYPE_WDS: - if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) - return -ENOLINK; - break; case NL80211_IFTYPE_AP_VLAN: { struct ieee80211_sub_if_data *master; @@ -1078,6 +1065,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: /* cannot happen */ WARN_ON(1); break; @@ -1196,7 +1184,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_OCB: netif_carrier_off(dev); break; - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: break; @@ -1218,28 +1205,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) set_bit(SDATA_STATE_RUNNING, &sdata->state); switch (sdata->vif.type) { - case NL80211_IFTYPE_WDS: - /* Create STA entry for the WDS peer */ - sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, - GFP_KERNEL); - if (!sta) { - res = -ENOMEM; - goto err_del_interface; - } - - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); - sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); - - res = sta_info_insert(sta); - if (res) { - /* STA has been freed */ - goto err_del_interface; - } - - rate_control_rate_init(sta); - netif_carrier_on(dev); - break; case NL80211_IFTYPE_P2P_DEVICE: rcu_assign_pointer(local->p2p_sdata, sdata); break; @@ -1356,6 +1321,7 @@ static void ieee80211_iface_work(struct work_struct *work) while ((skb = skb_dequeue(&sdata->skb_queue))) { struct ieee80211_mgmt *mgmt = (void *)skb->data; + kcov_remote_start_common(skb_get_kcov_handle(skb)); if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_BACK) { int len = skb->len; @@ -1465,6 +1431,7 @@ static void ieee80211_iface_work(struct work_struct *work) } kfree_skb(skb); + kcov_remote_stop(); } /* then other type-dependent work */ @@ -1574,9 +1541,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->u.mntr.flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; - case NL80211_IFTYPE_WDS: - sdata->vif.bss_conf.bssid = NULL; - break; case NL80211_IFTYPE_NAN: idr_init(&sdata->u.nan.function_inst_ids); spin_lock_init(&sdata->u.nan.func_lock); @@ -1587,6 +1551,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: WARN_ON(1); break; @@ -1631,9 +1596,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_OCB: /* * Could probably support everything - * but WDS here (WDS do_open can fail - * under memory pressure, which this - * code isn't prepared to handle). + * but here. */ break; case NL80211_IFTYPE_P2P_CLIENT: @@ -1726,7 +1689,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, case NL80211_IFTYPE_MONITOR: /* doesn't matter */ break; - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: /* match up with an AP interface */ list_for_each_entry(sdata, &local->interfaces, list) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 523380aed92e..dee88ec566ad 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -220,9 +220,9 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_ERP_SLOT; } -static void ieee80211_tasklet_handler(unsigned long data) +static void ieee80211_tasklet_handler(struct tasklet_struct *t) { - struct ieee80211_local *local = (struct ieee80211_local *) data; + struct ieee80211_local *local = from_tasklet(local, t, tasklet); struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -733,16 +733,12 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, skb_queue_head_init(&local->pending[i]); atomic_set(&local->agg_queue_stop[i], 0); } - tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, - (unsigned long)local); + tasklet_setup(&local->tx_pending_tasklet, ieee80211_tx_pending); if (ops->wake_tx_queue) - tasklet_init(&local->wake_txqs_tasklet, ieee80211_wake_txqs, - (unsigned long)local); + tasklet_setup(&local->wake_txqs_tasklet, ieee80211_wake_txqs); - tasklet_init(&local->tasklet, - ieee80211_tasklet_handler, - (unsigned long) local); + tasklet_setup(&local->tasklet, ieee80211_tasklet_handler); skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); @@ -935,14 +931,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; } } else { - /* - * WDS is currently prohibited when channel contexts are used - * because there's no clear definition of which channel WDS - * type interfaces use - */ - if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)) - return -EINVAL; - /* DFS is not supported with multi-channel combinations yet */ for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) { const struct ieee80211_iface_combination *comb; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index ce5825d6f1d1..97095b7c9c64 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -667,6 +667,35 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) } } +static void +ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, + u8 *ie, u8 ie_len) +{ + struct ieee80211_supported_band *sband; + const u8 *cap; + const struct ieee80211_he_operation *he_oper = NULL; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return; + + if (!ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT) || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + return; + + sdata->vif.bss_conf.he_support = true; + + cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); + if (cap && cap[1] >= ieee80211_he_oper_size(&cap[3])) + he_oper = (void *)(cap + 3); + + if (he_oper) + sdata->vif.bss_conf.he_oper.params = + __le32_to_cpu(he_oper->he_oper_params); +} + /** * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame * @hdr: 802.11 frame header @@ -943,6 +972,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) bcn->tail_len = skb->len; memcpy(bcn->tail, skb->data, bcn->tail_len); + ieee80211_mesh_update_bss_params(sdata, bcn->tail, bcn->tail_len); bcn->meshconf = (struct ieee80211_meshconf_ie *) (bcn->tail + ifmsh->meshconf_offset); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 38c45e1dafd8..ae378a41c927 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -150,21 +150,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) case NL80211_IFTYPE_STATION: ieee80211_mgd_quiesce(sdata); break; - case NL80211_IFTYPE_WDS: - /* tear down aggregation sessions and remove STAs */ - mutex_lock(&local->sta_mtx); - sta = sdata->u.wds.sta; - if (sta && sta->uploaded) { - enum ieee80211_sta_state state; - - state = sta->sta_state; - for (; state > IEEE80211_STA_NOTEXIST; state--) - WARN_ON(drv_sta_state(local, sta->sdata, - sta, state, - state - 1)); - } - mutex_unlock(&local->sta_mtx); - break; default: break; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1e2e5a406d58..062c2b45584e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1477,7 +1477,6 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if (unlikely((ieee80211_is_data(hdr->frame_control) || ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && - rx->sdata->vif.type != NL80211_IFTYPE_WDS && rx->sdata->vif.type != NL80211_IFTYPE_OCB && (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC)))) { /* @@ -4080,10 +4079,6 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return false; return true; - case NL80211_IFTYPE_WDS: - if (bssid || !ieee80211_is_data(hdr->frame_control)) - return false; - return ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2); case NL80211_IFTYPE_P2P_DEVICE: return ieee80211_is_public_action(hdr, skb->len) || ieee80211_is_probe_req(hdr->frame_control) || @@ -4742,6 +4737,8 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, status->rx_flags = 0; + kcov_remote_start_common(skb_get_kcov_handle(skb)); + /* * Frames with failed FCS/PLCP checksum are not returned, * all other frames are returned without radiotap header @@ -4749,15 +4746,15 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, * Also, frames with less than 16 bytes are dropped. */ skb = ieee80211_rx_monitor(local, skb, rate); - if (!skb) - return; - - ieee80211_tpt_led_trig_rx(local, - ((struct ieee80211_hdr *)skb->data)->frame_control, - skb->len); + if (skb) { + ieee80211_tpt_led_trig_rx(local, + ((struct ieee80211_hdr *)skb->data)->frame_control, + skb->len); - __ieee80211_rx_handle_packet(hw, pubsta, skb, list); + __ieee80211_rx_handle_packet(hw, pubsta, skb, list); + } + kcov_remote_stop(); return; drop: kfree_skb(skb); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 56a4d0d20a26..01eb08527817 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -319,9 +319,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type == NL80211_IFTYPE_OCB) return TX_CONTINUE; - if (tx->sdata->vif.type == NL80211_IFTYPE_WDS) - return TX_CONTINUE; - if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; @@ -2113,6 +2110,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, info->flags |= IEEE80211_TX_CTL_NO_ACK; if (txflags & IEEE80211_RADIOTAP_F_TX_NOSEQNO) info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO; + if (txflags & IEEE80211_RADIOTAP_F_TX_ORDER) + info->control.flags |= + IEEE80211_TX_CTRL_DONT_REORDER; break; case IEEE80211_RADIOTAP_RATE: @@ -2279,11 +2279,13 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, payload[7]); } - /* - * Initialize skb->priority for QoS frames. This is put in the TID field - * of the frame before passing it to the driver. + /* Initialize skb->priority for QoS frames. If the DONT_REORDER flag + * is set, stick to the default value for skb->priority to assure + * frames injected with this flag are not reordered relative to each + * other. */ - if (ieee80211_is_data_qos(hdr->frame_control)) { + if (ieee80211_is_data_qos(hdr->frame_control) && + !(info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER)) { u8 *p = ieee80211_get_qos_ctl(hdr); skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK; } @@ -2295,8 +2297,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, * we handle as though they are non-injected frames. * This code here isn't entirely correct, the local MAC address * isn't always enough to find the interface to use; for proper - * VLAN/WDS support we will need a different mechanism (which - * likely isn't going to be monitor interfaces). + * VLAN support we have an nl80211-based mechanism. * * This is necessary, for example, for old hostapd versions that * don't use nl80211-based management TX/RX. @@ -2307,8 +2308,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, if (!ieee80211_sdata_running(tmp_sdata)) continue; if (tmp_sdata->vif.type == NL80211_IFTYPE_MONITOR || - tmp_sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - tmp_sdata->vif.type == NL80211_IFTYPE_WDS) + tmp_sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; if (ether_addr_equal(tmp_sdata->vif.addr, hdr->addr2)) { sdata = tmp_sdata; @@ -2402,9 +2402,6 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, } sta = sta_info_get_bss(sdata, skb->data); break; - case NL80211_IFTYPE_WDS: - sta = sta_info_get(sdata, sdata->u.wds.remote_addr); - break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: /* determined much later */ @@ -2580,20 +2577,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, hdrlen = 24; band = chanctx_conf->def.chan->band; break; - case NL80211_IFTYPE_WDS: - fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); - /* RA TA DA SA */ - memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); - memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 30; - /* - * This is the exception! WDS style interfaces are prohibited - * when channel contexts are in used so this must be valid - */ - band = local->hw.conf.chandef.chan->band; - break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: if (!is_multicast_ether_addr(skb->data)) { @@ -4418,9 +4401,10 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, /* * Transmit all pending packets. Called from tasklet. */ -void ieee80211_tx_pending(unsigned long data) +void ieee80211_tx_pending(struct tasklet_struct *t) { - struct ieee80211_local *local = (struct ieee80211_local *)data; + struct ieee80211_local *local = from_tasklet(local, t, + tx_pending_tasklet); unsigned long flags; int i; bool txok; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 49342060490f..8c3c01a1b923 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -386,9 +386,10 @@ _ieee80211_wake_txqs(struct ieee80211_local *local, unsigned long *flags) rcu_read_unlock(); } -void ieee80211_wake_txqs(unsigned long data) +void ieee80211_wake_txqs(struct tasklet_struct *t) { - struct ieee80211_local *local = (struct ieee80211_local *)data; + struct ieee80211_local *local = from_tasklet(local, t, + wake_txqs_tasklet); unsigned long flags; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); @@ -2513,7 +2514,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) return res; } break; - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: @@ -2523,6 +2523,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: WARN_ON(1); break; } diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 2fb99325135a..9ea6004abe1b 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -118,9 +118,11 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr) { struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u8 *p; - if (local->hw.queues < IEEE80211_NUM_ACS) + if ((info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER) || + local->hw.queues < IEEE80211_NUM_ACS) return 0; if (!ieee80211_is_data(hdr->frame_control)) { @@ -141,6 +143,7 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct mac80211_qos_map *qos_map; bool qos; @@ -153,7 +156,7 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, else qos = false; - if (!qos) { + if (!qos || (info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER)) { skb->priority = 0; /* required for correct WPA/11i MIC */ return IEEE80211_AC_BE; } @@ -202,9 +205,6 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: ra = skb->data; break; - case NL80211_IFTYPE_WDS: - ra = sdata->u.wds.remote_addr; - break; case NL80211_IFTYPE_STATION: /* might be a TDLS station */ sta = sta_info_get(sdata, skb->data); @@ -249,6 +249,14 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, p = ieee80211_get_qos_ctl(hdr); + /* don't overwrite the QoS field of injected frames */ + if (info->flags & IEEE80211_TX_CTL_INJECTED) { + /* do take into account Ack policy of injected frames */ + if (*p & IEEE80211_QOS_CTL_ACK_POLICY_NOACK) + info->flags |= IEEE80211_TX_CTL_NO_ACK; + return; + } + /* set up the first byte */ /* diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 06ea0f8bfd5c..520cedc594e1 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -20,9 +20,9 @@ #include "ieee802154_i.h" #include "cfg.h" -static void ieee802154_tasklet_handler(unsigned long data) +static void ieee802154_tasklet_handler(struct tasklet_struct *t) { - struct ieee802154_local *local = (struct ieee802154_local *)data; + struct ieee802154_local *local = from_tasklet(local, t, tasklet); struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue))) { @@ -91,9 +91,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) INIT_LIST_HEAD(&local->interfaces); mutex_init(&local->iflist_mtx); - tasklet_init(&local->tasklet, - ieee802154_tasklet_handler, - (unsigned long)local); + tasklet_setup(&local->tasklet, ieee802154_tasklet_handler); skb_queue_head_init(&local->skb_queue); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index f2868a8a50c3..47bab701555f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -377,6 +377,8 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (!pskb_may_pull(skb, sizeof(*hdr))) goto err; + skb_dst_drop(skb); + /* Read and decode the label */ hdr = mpls_hdr(skb); dec = mpls_entry_decode(hdr); diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 54b888f94009..96ba616f59bf 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -18,6 +18,7 @@ struct mptcp_pernet { struct ctl_table_header *ctl_table_hdr; int mptcp_enabled; + unsigned int add_addr_timeout; }; static struct mptcp_pernet *mptcp_get_pernet(struct net *net) @@ -30,6 +31,11 @@ int mptcp_is_enabled(struct net *net) return mptcp_get_pernet(net)->mptcp_enabled; } +unsigned int mptcp_get_add_addr_timeout(struct net *net) +{ + return mptcp_get_pernet(net)->add_addr_timeout; +} + static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", @@ -40,12 +46,19 @@ static struct ctl_table mptcp_sysctl_table[] = { */ .proc_handler = proc_dointvec, }, + { + .procname = "add_addr_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, {} }; static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; + pernet->add_addr_timeout = TCP_RTO_MAX; } static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) @@ -61,6 +74,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) } table[0].data = &pernet->mptcp_enabled; + table[1].data = &pernet->add_addr_timeout; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index a044dd43411d..f2d1e27a2bc1 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -492,7 +492,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool ret = false; mpext = skb ? mptcp_get_ext(skb) : NULL; - snd_data_fin_enable = READ_ONCE(msk->snd_data_fin_enable); + snd_data_fin_enable = mptcp_data_fin_enabled(msk); if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { unsigned int map_size; @@ -809,11 +809,14 @@ static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit) return cur_ack; } -static void update_una(struct mptcp_sock *msk, - struct mptcp_options_received *mp_opt) +static void ack_update_msk(struct mptcp_sock *msk, + const struct sock *ssk, + struct mptcp_options_received *mp_opt) { u64 new_snd_una, snd_una, old_snd_una = atomic64_read(&msk->snd_una); - u64 write_seq = READ_ONCE(msk->write_seq); + u64 new_wnd_end, wnd_end, old_wnd_end = atomic64_read(&msk->wnd_end); + u64 snd_nxt = READ_ONCE(msk->snd_nxt); + struct sock *sk = (struct sock *)msk; /* avoid ack expansion on update conflict, to reduce the risk of * wrongly expanding to a future ack sequence number, which is way @@ -822,15 +825,28 @@ static void update_una(struct mptcp_sock *msk, new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64); /* ACK for data not even sent yet? Ignore. */ - if (after64(new_snd_una, write_seq)) + if (after64(new_snd_una, snd_nxt)) new_snd_una = old_snd_una; + new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; + + while (after64(new_wnd_end, old_wnd_end)) { + wnd_end = old_wnd_end; + old_wnd_end = atomic64_cmpxchg(&msk->wnd_end, wnd_end, + new_wnd_end); + if (old_wnd_end == wnd_end) { + if (mptcp_send_head(sk)) + mptcp_schedule_work(sk); + break; + } + } + while (after64(new_snd_una, old_snd_una)) { snd_una = old_snd_una; old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una, new_snd_una); if (old_snd_una == snd_una) { - mptcp_data_acked((struct sock *)msk); + mptcp_data_acked(sk); break; } } @@ -930,7 +946,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) * monodirectional flows will stuck */ if (mp_opt.use_ack) - update_una(msk, &mp_opt); + ack_update_msk(msk, sk, &mp_opt); /* Zero-data-length packets are dropped by the caller and not * propagated to the MPTCP layer, so the skb extension does not diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index e19e1525ecbb..f9c88e2abb8e 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -89,8 +89,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, return false; msk->pm.status |= BIT(new_status); - if (schedule_work(&msk->work)) - sock_hold((struct sock *)msk); + mptcp_schedule_work((struct sock *)msk); return true; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 0d6f3d912891..f8a9d82a0ea8 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -232,7 +232,8 @@ static void mptcp_pm_add_timer(struct timer_list *timer) } if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX); + sk_reset_timer(sk, timer, + jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); spin_unlock_bh(&msk->pm.lock); @@ -264,6 +265,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); if (lookup_anno_list_by_saddr(msk, &entry->addr)) return false; @@ -279,7 +281,8 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry->retrans_times = 0; timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); - sk_reset_timer(sk, &add_entry->add_timer, jiffies + TCP_RTO_MAX); + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); return true; } @@ -413,14 +416,13 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - long timeout = 0; if (msk->pm.rm_id != subflow->remote_id) continue; spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); - __mptcp_close_ssk(sk, ssk, subflow, timeout); + __mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_accepted--; @@ -449,14 +451,13 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - long timeout = 0; if (rm_id != subflow->local_id) continue; spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); - __mptcp_close_ssk(sk, ssk, subflow, timeout); + __mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); msk->pm.local_addr_used--; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 88f2a7a0ccb8..8df013daea88 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -21,6 +21,7 @@ #include <net/transp_v6.h> #endif #include <net/mptcp.h> +#include <net/xfrm.h> #include "protocol.h" #include "mib.h" @@ -41,6 +42,9 @@ struct mptcp_skb_cb { static struct percpu_counter mptcp_sockets_allocated; +static void __mptcp_destroy_sock(struct sock *sk); +static void __mptcp_check_send_data_fin(struct sock *sk); + /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not * completed yet or has failed, return the subflow socket. * Otherwise return NULL. @@ -53,6 +57,12 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) return msk->subflow; } +/* Returns end sequence number of the receiver's advertised window */ +static u64 mptcp_wnd_end(const struct mptcp_sock *msk) +{ + return atomic64_read(&msk->wnd_end); +} + static bool mptcp_is_tcpsk(struct sock *sk) { struct socket *sock = sk->sk_socket; @@ -102,6 +112,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) msk->subflow = ssock; subflow = mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); + sock_hold(ssock->sk); subflow->request_mptcp = 1; /* accept() will wait on first subflow sk_wq, and we always wakes up @@ -169,6 +180,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) if (after64(seq, max_seq)) { /* out of window */ mptcp_drop(sk, skb); + pr_debug("oow by %ld", (unsigned long)seq - (unsigned long)max_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW); return; } @@ -323,6 +335,19 @@ static void mptcp_stop_timer(struct sock *sk) mptcp_sk(sk)->timer_ival = 0; } +static void mptcp_close_wake_up(struct sock *sk) +{ + if (sock_flag(sk, SOCK_DEAD)) + return; + + sk->sk_state_change(sk); + if (sk->sk_shutdown == SHUTDOWN_MASK || + sk->sk_state == TCP_CLOSE) + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); + else + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); +} + static void mptcp_check_data_fin_ack(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -341,20 +366,14 @@ static void mptcp_check_data_fin_ack(struct sock *sk) switch (sk->sk_state) { case TCP_FIN_WAIT1: inet_sk_state_store(sk, TCP_FIN_WAIT2); - sk->sk_state_change(sk); break; case TCP_CLOSING: case TCP_LAST_ACK: inet_sk_state_store(sk, TCP_CLOSE); - sk->sk_state_change(sk); break; } - if (sk->sk_shutdown == SHUTDOWN_MASK || - sk->sk_state == TCP_CLOSE) - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); - else - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + mptcp_close_wake_up(sk); } } @@ -388,13 +407,27 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } -static void mptcp_check_data_fin(struct sock *sk) +static void mptcp_send_ack(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + tcp_send_ack(ssk); + release_sock(ssk); + } +} + +static bool mptcp_check_data_fin(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); u64 rcv_data_fin_seq; + bool ret = false; if (__mptcp_check_fallback(msk) || !msk->first) - return; + return ret; /* Need to ack a DATA_FIN received from a peer while this side * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2. @@ -410,8 +443,6 @@ static void mptcp_check_data_fin(struct sock *sk) */ if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) { - struct mptcp_subflow_context *subflow; - WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); @@ -428,7 +459,6 @@ static void mptcp_check_data_fin(struct sock *sk) break; case TCP_FIN_WAIT2: inet_sk_state_store(sk, TCP_CLOSE); - // @@ Close subflows now? break; default: /* Other states not expected */ @@ -436,23 +466,12 @@ static void mptcp_check_data_fin(struct sock *sk) break; } + ret = true; mptcp_set_timeout(sk, NULL); - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - lock_sock(ssk); - tcp_send_ack(ssk); - release_sock(ssk); - } - - sk->sk_state_change(sk); - - if (sk->sk_shutdown == SHUTDOWN_MASK || - sk->sk_state == TCP_CLOSE) - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); - else - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + mptcp_send_ack(msk); + mptcp_close_wake_up(sk); } + return ret; } static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, @@ -466,6 +485,18 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct tcp_sock *tp; u32 old_copied_seq; bool done = false; + int sk_rbuf; + + sk_rbuf = READ_ONCE(sk->sk_rcvbuf); + + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); + + if (unlikely(ssk_rbuf > sk_rbuf)) { + WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf); + sk_rbuf = ssk_rbuf; + } + } pr_debug("msk=%p ssk=%p", msk, ssk); tp = tcp_sk(ssk); @@ -528,7 +559,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, WRITE_ONCE(tp->copied_seq, seq); more_data_avail = mptcp_subflow_data_available(ssk); - if (atomic_read(&sk->sk_rmem_alloc) > READ_ONCE(sk->sk_rcvbuf)) { + if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) { done = true; break; } @@ -608,9 +639,8 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) * this is not a good place to change state. Let the workqueue * do it. */ - if (mptcp_pending_data_fin(sk, NULL) && - schedule_work(&msk->work)) - sock_hold(sk); + if (mptcp_pending_data_fin(sk, NULL)) + mptcp_schedule_work(sk); } spin_unlock_bh(&sk->sk_lock.slock); @@ -622,6 +652,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(sk); + int sk_rbuf, ssk_rbuf; bool wake; /* move_skbs_to_msk below can legitly clear the data_avail flag, @@ -632,12 +663,16 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (wake) set_bit(MPTCP_DATA_READY, &msk->flags); - if (atomic_read(&sk->sk_rmem_alloc) < READ_ONCE(sk->sk_rcvbuf) && - move_skbs_to_msk(msk, ssk)) + ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); + sk_rbuf = READ_ONCE(sk->sk_rcvbuf); + if (unlikely(ssk_rbuf > sk_rbuf)) + sk_rbuf = ssk_rbuf; + + /* over limit? can't append more skbs to msk */ + if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) goto wake; - /* don't schedule if mptcp sk is (still) over limit */ - if (atomic_read(&sk->sk_rmem_alloc) > READ_ONCE(sk->sk_rcvbuf)) + if (move_skbs_to_msk(msk, ssk)) goto wake; /* mptcp socket is owned, release_cb should retry */ @@ -675,6 +710,10 @@ static void mptcp_reset_timer(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); unsigned long tout; + /* prevent rescheduling on close */ + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) + return; + /* should never be called with mptcp level timer cleared */ tout = READ_ONCE(mptcp_sk(sk)->timer_ival); if (WARN_ON_ONCE(!tout)) @@ -682,23 +721,33 @@ static void mptcp_reset_timer(struct sock *sk) sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout); } +bool mptcp_schedule_work(struct sock *sk) +{ + if (inet_sk_state_load(sk) != TCP_CLOSE && + schedule_work(&mptcp_sk(sk)->work)) { + /* each subflow already holds a reference to the sk, and the + * workqueue is invoked by a subflow, so sk can't go away here. + */ + sock_hold(sk); + return true; + } + return false; +} + void mptcp_data_acked(struct sock *sk) { mptcp_reset_timer(sk); - if ((!test_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags) || - (inet_sk_state_load(sk) != TCP_ESTABLISHED)) && - schedule_work(&mptcp_sk(sk)->work)) - sock_hold(sk); + if ((test_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags) || + mptcp_send_head(sk) || + (inet_sk_state_load(sk) != TCP_ESTABLISHED))) + mptcp_schedule_work(sk); } void mptcp_subflow_eof(struct sock *sk) { - struct mptcp_sock *msk = mptcp_sk(sk); - - if (!test_and_set_bit(MPTCP_WORK_EOF, &msk->flags) && - schedule_work(&msk->work)) - sock_hold(sk); + if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); } static void mptcp_check_for_eof(struct mptcp_sock *msk) @@ -709,8 +758,10 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) receivers += !subflow->rx_eof; + if (receivers) + return; - if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) { + if (!(sk->sk_shutdown & RCV_SHUTDOWN)) { /* hopefully temporary hack: propagate shutdown status * to msk, when all subflows agree on it */ @@ -720,6 +771,19 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) set_bit(MPTCP_DATA_READY, &msk->flags); sk->sk_data_ready(sk); } + + switch (sk->sk_state) { + case TCP_ESTABLISHED: + inet_sk_state_store(sk, TCP_CLOSE_WAIT); + break; + case TCP_FIN_WAIT1: + /* fallback sockets skip TCP_CLOSING - TCP will take care */ + inet_sk_state_store(sk, TCP_CLOSE); + break; + default: + return; + } + mptcp_close_wake_up(sk); } static bool mptcp_ext_cache_refill(struct mptcp_sock *msk) @@ -754,8 +818,11 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq, if (!tcp_skb_can_collapse_to(skb)) return false; - /* can collapse only if MPTCP level sequence is in order */ - return mpext && mpext->data_seq + mpext->data_len == write_seq; + /* can collapse only if MPTCP level sequence is in order and this + * mapping has not been xmitted yet + */ + return mpext && mpext->data_seq + mpext->data_len == write_seq && + !mpext->frozen; } static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, @@ -763,6 +830,7 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, const struct mptcp_data_frag *df) { return df && pfrag->page == df->page && + pfrag->size - pfrag->offset > 0 && df->data_seq + df->data_len == msk->write_seq; } @@ -781,20 +849,6 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag) put_page(dfrag->page); } -static bool mptcp_is_writeable(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - - if (!sk_stream_is_writeable((struct sock *)msk)) - return false; - - mptcp_for_each_subflow(msk, subflow) { - if (sk_stream_is_writeable(subflow->tcp_sock)) - return true; - } - return false; -} - static void mptcp_clean_una(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -806,13 +860,16 @@ static void mptcp_clean_una(struct sock *sk) * plain TCP */ if (__mptcp_check_fallback(msk)) - atomic64_set(&msk->snd_una, msk->write_seq); + atomic64_set(&msk->snd_una, msk->snd_nxt); + snd_una = atomic64_read(&msk->snd_una); list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) break; + if (WARN_ON_ONCE(dfrag == msk->first_pending)) + break; dfrag_clear(sk, dfrag); cleaned = true; } @@ -821,31 +878,33 @@ static void mptcp_clean_una(struct sock *sk) if (dfrag && after64(snd_una, dfrag->data_seq)) { u64 delta = snd_una - dfrag->data_seq; - if (WARN_ON_ONCE(delta > dfrag->data_len)) + if (WARN_ON_ONCE(delta > dfrag->already_sent)) goto out; dfrag->data_seq += delta; dfrag->offset += delta; dfrag->data_len -= delta; + dfrag->already_sent -= delta; dfrag_uncharge(sk, delta); cleaned = true; } out: - if (cleaned) { + if (cleaned) sk_mem_reclaim_partial(sk); +} - /* Only wake up writers if a subflow is ready */ - if (mptcp_is_writeable(msk)) { - set_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags); - smp_mb__after_atomic(); +static void mptcp_clean_una_wakeup(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); - /* set SEND_SPACE before sk_stream_write_space clears - * NOSPACE - */ - sk_stream_write_space(sk); - } + mptcp_clean_una(sk); + + /* Only wake up writers if a subflow is ready */ + if (sk_stream_is_writeable(sk)) { + clear_bit(MPTCP_NOSPACE, &msk->flags); + sk_stream_write_space(sk); } } @@ -854,12 +913,23 @@ out: */ static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag) { + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + bool first = true; + if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag), pfrag, sk->sk_allocation))) return true; - sk->sk_prot->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + if (first) + tcp_enter_memory_pressure(ssk); + sk_stream_moderate_sndbuf(ssk); + first = false; + } return false; } @@ -875,149 +945,109 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, dfrag->data_seq = msk->write_seq; dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); dfrag->offset = offset + sizeof(struct mptcp_data_frag); + dfrag->already_sent = 0; dfrag->page = pfrag->page; return dfrag; } +struct mptcp_sendmsg_info { + int mss_now; + int size_goal; + u16 limit; + u16 sent; + unsigned int flags; +}; + +static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq, + int avail_size) +{ + u64 window_end = mptcp_wnd_end(msk); + + if (__mptcp_check_fallback(msk)) + return avail_size; + + if (!before64(data_seq + avail_size, window_end)) { + u64 allowed_size = window_end - data_seq; + + return min_t(unsigned int, allowed_size, avail_size); + } + + return avail_size; +} + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, - struct msghdr *msg, struct mptcp_data_frag *dfrag, - long *timeo, int *pmss_now, - int *ps_goal) + struct mptcp_data_frag *dfrag, + struct mptcp_sendmsg_info *info) { - int mss_now, avail_size, size_goal, offset, ret, frag_truesize = 0; - bool dfrag_collapsed, can_collapse = false; + u64 data_seq = dfrag->data_seq + info->sent; struct mptcp_sock *msk = mptcp_sk(sk); + bool zero_window_probe = false; struct mptcp_ext *mpext = NULL; - bool retransmission = !!dfrag; struct sk_buff *skb, *tail; - struct page_frag *pfrag; - struct page *page; - u64 *write_seq; - size_t psize; - - /* use the mptcp page cache so that we can easily move the data - * from one substream to another, but do per subflow memory accounting - * Note: pfrag is used only !retransmission, but the compiler if - * fooled into a warning if we don't init here - */ - pfrag = sk_page_frag(sk); - if (!retransmission) { - write_seq = &msk->write_seq; - page = pfrag->page; - } else { - write_seq = &dfrag->data_seq; - page = dfrag->page; - } + bool can_collapse = false; + int avail_size; + size_t ret; - /* compute copy limit */ - mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags); - *pmss_now = mss_now; - *ps_goal = size_goal; - avail_size = size_goal; + pr_debug("msk=%p ssk=%p sending dfrag at seq=%lld len=%d already sent=%d", + msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); + + /* compute send limit */ + info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); + avail_size = info->size_goal; skb = tcp_write_queue_tail(ssk); if (skb) { - mpext = skb_ext_find(skb, SKB_EXT_MPTCP); - /* Limit the write to the size available in the * current skb, if any, so that we create at most a new skb. * Explicitly tells TCP internals to avoid collapsing on later * queue management operation, to avoid breaking the ext <-> * SSN association set here */ - can_collapse = (size_goal - skb->len > 0) && - mptcp_skb_can_collapse_to(*write_seq, skb, mpext); + mpext = skb_ext_find(skb, SKB_EXT_MPTCP); + can_collapse = (info->size_goal - skb->len > 0) && + mptcp_skb_can_collapse_to(data_seq, skb, mpext); if (!can_collapse) TCP_SKB_CB(skb)->eor = 1; else - avail_size = size_goal - skb->len; + avail_size = info->size_goal - skb->len; } - if (!retransmission) { - /* reuse tail pfrag, if possible, or carve a new one from the - * page allocator - */ - dfrag = mptcp_rtx_tail(sk); - offset = pfrag->offset; - dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag); - if (!dfrag_collapsed) { - dfrag = mptcp_carve_data_frag(msk, pfrag, offset); - offset = dfrag->offset; - frag_truesize = dfrag->overhead; - } - psize = min_t(size_t, pfrag->size - offset, avail_size); - - /* Copy to page */ - pr_debug("left=%zu", msg_data_left(msg)); - psize = copy_page_from_iter(pfrag->page, offset, - min_t(size_t, msg_data_left(msg), - psize), - &msg->msg_iter); - pr_debug("left=%zu", msg_data_left(msg)); - if (!psize) - return -EINVAL; - - if (!sk_wmem_schedule(sk, psize + dfrag->overhead)) { - iov_iter_revert(&msg->msg_iter, psize); - return -ENOMEM; - } - } else { - offset = dfrag->offset; - psize = min_t(size_t, dfrag->data_len, avail_size); + /* Zero window and all data acked? Probe. */ + avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size); + if (avail_size == 0) { + if (skb || atomic64_read(&msk->snd_una) != msk->snd_nxt) + return 0; + zero_window_probe = true; + data_seq = atomic64_read(&msk->snd_una) - 1; + avail_size = 1; } - /* tell the TCP stack to delay the push so that we can safely - * access the skb after the sendpages call - */ - ret = do_tcp_sendpages(ssk, page, offset, psize, - msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT); - if (ret <= 0) { - if (!retransmission) - iov_iter_revert(&msg->msg_iter, psize); - return ret; - } + if (WARN_ON_ONCE(info->sent > info->limit || + info->limit > dfrag->data_len)) + return 0; - frag_truesize += ret; - if (!retransmission) { - if (unlikely(ret < psize)) - iov_iter_revert(&msg->msg_iter, psize - ret); - - /* send successful, keep track of sent data for mptcp-level - * retransmission - */ - dfrag->data_len += ret; - if (!dfrag_collapsed) { - get_page(dfrag->page); - list_add_tail(&dfrag->list, &msk->rtx_queue); - sk_wmem_queued_add(sk, frag_truesize); - } else { - sk_wmem_queued_add(sk, ret); - } - - /* charge data on mptcp rtx queue to the master socket - * Note: we charge such data both to sk and ssk - */ - sk->sk_forward_alloc -= frag_truesize; + ret = info->limit - info->sent; + tail = tcp_build_frag(ssk, avail_size, info->flags, dfrag->page, + dfrag->offset + info->sent, &ret); + if (!tail) { + tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk)); + return -ENOMEM; } - /* if the tail skb extension is still the cached one, collapsing - * really happened. Note: we can't check for 'same skb' as the sk_buff - * hdr on tail can be transmitted, freed and re-allocated by the - * do_tcp_sendpages() call + /* if the tail skb is still the cached one, collapsing really happened. */ - tail = tcp_write_queue_tail(ssk); - if (mpext && tail && mpext == skb_ext_find(tail, SKB_EXT_MPTCP)) { + if (skb == tail) { WARN_ON_ONCE(!can_collapse); mpext->data_len += ret; + WARN_ON_ONCE(zero_window_probe); goto out; } - skb = tcp_write_queue_tail(ssk); - mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext); + mpext = __skb_ext_set(tail, SKB_EXT_MPTCP, msk->cached_ext); msk->cached_ext = NULL; memset(mpext, 0, sizeof(*mpext)); - mpext->data_seq = *write_seq; + mpext->data_seq = data_seq; mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq; mpext->data_len = ret; mpext->use_map = 1; @@ -1027,12 +1057,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext->data_seq, mpext->subflow_seq, mpext->data_len, mpext->dsn64); + if (zero_window_probe) { + mptcp_subflow_ctx(ssk)->rel_write_seq += ret; + mpext->frozen = 1; + ret = 0; + tcp_push_pending_frames(ssk); + } out: - if (!retransmission) - pfrag->offset += frag_truesize; - WRITE_ONCE(*write_seq, *write_seq + ret); mptcp_subflow_ctx(ssk)->rel_write_seq += ret; - return ret; } @@ -1040,17 +1072,25 @@ static void mptcp_nospace(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - clear_bit(MPTCP_SEND_SPACE, &msk->flags); + set_bit(MPTCP_NOSPACE, &msk->flags); smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool ssk_writeable = sk_stream_is_writeable(ssk); struct socket *sock = READ_ONCE(ssk->sk_socket); + if (ssk_writeable || !sock) + continue; + /* enables ssk->write_space() callbacks */ - if (sock) - set_bit(SOCK_NOSPACE, &sock->flags); + set_bit(SOCK_NOSPACE, &sock->flags); } + + /* mptcp_data_acked() could run just before we set the NOSPACE bit, + * so explicitly check for snd_una value + */ + mptcp_clean_una((struct sock *)msk); } static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) @@ -1154,21 +1194,86 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, return NULL; } -static void ssk_check_wmem(struct mptcp_sock *msk) +static void mptcp_push_release(struct sock *sk, struct sock *ssk, + struct mptcp_sendmsg_info *info) { - if (unlikely(!mptcp_is_writeable(msk))) - mptcp_nospace(msk); + mptcp_set_timeout(sk, ssk); + tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal); + release_sock(ssk); +} + +static void mptcp_push_pending(struct sock *sk, unsigned int flags) +{ + struct sock *prev_ssk = NULL, *ssk = NULL; + struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_sendmsg_info info = { + .flags = flags, + }; + struct mptcp_data_frag *dfrag; + int len, copied = 0; + u32 sndbuf; + + while ((dfrag = mptcp_send_head(sk))) { + info.sent = dfrag->already_sent; + info.limit = dfrag->data_len; + len = dfrag->data_len - dfrag->already_sent; + while (len > 0) { + int ret = 0; + + prev_ssk = ssk; + __mptcp_flush_join_list(msk); + ssk = mptcp_subflow_get_send(msk, &sndbuf); + + /* do auto tuning */ + if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && + sndbuf > READ_ONCE(sk->sk_sndbuf)) + WRITE_ONCE(sk->sk_sndbuf, sndbuf); + + /* try to keep the subflow socket lock across + * consecutive xmit on the same socket + */ + if (ssk != prev_ssk && prev_ssk) + mptcp_push_release(sk, prev_ssk, &info); + if (!ssk) + goto out; + + if (ssk != prev_ssk || !prev_ssk) + lock_sock(ssk); + + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + if (ret <= 0) { + mptcp_push_release(sk, ssk, &info); + goto out; + } + + info.sent += ret; + dfrag->already_sent += ret; + msk->snd_nxt += ret; + msk->snd_burst -= ret; + copied += ret; + len -= ret; + } + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + } + + /* at this point we held the socket lock for the last subflow we used */ + if (ssk) + mptcp_push_release(sk, ssk, &info); + +out: + /* start the timer, if it's not pending */ + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + if (copied) + __mptcp_check_send_data_fin(sk); } static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { - int mss_now = 0, size_goal = 0, ret = 0; struct mptcp_sock *msk = mptcp_sk(sk); struct page_frag *pfrag; size_t copied = 0; - struct sock *ssk; - u32 sndbuf; - bool tx_ok; + int ret = 0; long timeo; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) @@ -1185,130 +1290,92 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } pfrag = sk_page_frag(sk); -restart: mptcp_clean_una(sk); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { - ret = -EPIPE; - goto out; - } - - __mptcp_flush_join_list(msk); - ssk = mptcp_subflow_get_send(msk, &sndbuf); - while (!sk_stream_memory_free(sk) || - !ssk || - !mptcp_page_frag_refill(ssk, pfrag)) { - if (ssk) { - /* make sure retransmit timer is - * running before we wait for memory. - * - * The retransmit timer might be needed - * to make the peer send an up-to-date - * MPTCP Ack. - */ - mptcp_set_timeout(sk, ssk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); - } + while (msg_data_left(msg)) { + struct mptcp_data_frag *dfrag; + int frag_truesize = 0; + bool dfrag_collapsed; + size_t psize, offset; - mptcp_nospace(msk); - ret = sk_stream_wait_memory(sk, &timeo); - if (ret) - goto out; - - mptcp_clean_una(sk); - - ssk = mptcp_subflow_get_send(msk, &sndbuf); - if (list_empty(&msk->conn_list)) { - ret = -ENOTCONN; + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { + ret = -EPIPE; goto out; } - } - - /* do auto tuning */ - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && - sndbuf > READ_ONCE(sk->sk_sndbuf)) - WRITE_ONCE(sk->sk_sndbuf, sndbuf); - pr_debug("conn_list->subflow=%p", ssk); - - lock_sock(ssk); - tx_ok = msg_data_left(msg); - while (tx_ok) { - ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &timeo, &mss_now, - &size_goal); - if (ret < 0) { - if (ret == -EAGAIN && timeo > 0) { - mptcp_set_timeout(sk, ssk); - release_sock(ssk); - goto restart; + /* reuse tail pfrag, if possible, or carve a new one from the + * page allocator + */ + dfrag = mptcp_pending_tail(sk); + dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag); + if (!dfrag_collapsed) { + if (!sk_stream_memory_free(sk)) { + mptcp_push_pending(sk, msg->msg_flags); + if (!sk_stream_memory_free(sk)) + goto wait_for_memory; } - break; + if (!mptcp_page_frag_refill(sk, pfrag)) + goto wait_for_memory; + + dfrag = mptcp_carve_data_frag(msk, pfrag, pfrag->offset); + frag_truesize = dfrag->overhead; } - /* burst can be negative, we will try move to the next subflow - * at selection time, if possible. + /* we do not bound vs wspace, to allow a single packet. + * memory accounting will prevent execessive memory usage + * anyway */ - msk->snd_burst -= ret; - copied += ret; - - tx_ok = msg_data_left(msg); - if (!tx_ok) - break; - - if (!sk_stream_memory_free(ssk) || - !mptcp_page_frag_refill(ssk, pfrag) || - !mptcp_ext_cache_refill(msk)) { - tcp_push(ssk, msg->msg_flags, mss_now, - tcp_sk(ssk)->nonagle, size_goal); - mptcp_set_timeout(sk, ssk); - release_sock(ssk); - goto restart; + offset = dfrag->offset + dfrag->data_len; + psize = pfrag->size - offset; + psize = min_t(size_t, psize, msg_data_left(msg)); + if (!sk_wmem_schedule(sk, psize + frag_truesize)) + goto wait_for_memory; + + if (copy_page_from_iter(dfrag->page, offset, psize, + &msg->msg_iter) != psize) { + ret = -EFAULT; + goto out; } - /* memory is charged to mptcp level socket as well, i.e. - * if msg is very large, mptcp socket may run out of buffer - * space. mptcp_clean_una() will release data that has - * been acked at mptcp level in the mean time, so there is - * a good chance we can continue sending data right away. - * - * Normally, when the tcp subflow can accept more data, then - * so can the MPTCP socket. However, we need to cope with - * peers that might lag behind in their MPTCP-level - * acknowledgements, i.e. data might have been acked at - * tcp level only. So, we must also check the MPTCP socket - * limits before we send more data. + /* data successfully copied into the write queue */ + copied += psize; + dfrag->data_len += psize; + frag_truesize += psize; + pfrag->offset += frag_truesize; + WRITE_ONCE(msk->write_seq, msk->write_seq + psize); + + /* charge data on mptcp pending queue to the msk socket + * Note: we charge such data both to sk and ssk */ - if (unlikely(!sk_stream_memory_free(sk))) { - tcp_push(ssk, msg->msg_flags, mss_now, - tcp_sk(ssk)->nonagle, size_goal); - mptcp_clean_una(sk); - if (!sk_stream_memory_free(sk)) { - /* can't send more for now, need to wait for - * MPTCP-level ACKs from peer. - * - * Wakeup will happen via mptcp_clean_una(). - */ - mptcp_set_timeout(sk, ssk); - release_sock(ssk); - goto restart; - } + sk_wmem_queued_add(sk, frag_truesize); + sk->sk_forward_alloc -= frag_truesize; + if (!dfrag_collapsed) { + get_page(dfrag->page); + list_add_tail(&dfrag->list, &msk->rtx_queue); + if (!msk->first_pending) + WRITE_ONCE(msk->first_pending, dfrag); } - } + pr_debug("msk=%p dfrag at seq=%lld len=%d sent=%d new=%d", msk, + dfrag->data_seq, dfrag->data_len, dfrag->already_sent, + !dfrag_collapsed); - mptcp_set_timeout(sk, ssk); - if (copied) { - tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, - size_goal); + if (!mptcp_ext_cache_refill(msk)) + goto wait_for_memory; + continue; - /* start the timer, if it's not pending */ - if (!mptcp_timer_pending(sk)) +wait_for_memory: + mptcp_nospace(msk); + if (mptcp_timer_pending(sk)) mptcp_reset_timer(sk); + ret = sk_stream_wait_memory(sk, &timeo); + if (ret) + goto out; } - release_sock(ssk); + if (copied) + mptcp_push_pending(sk, msg->msg_flags); + out: - ssk_check_wmem(msk); release_sock(sk); return copied ? : ret; } @@ -1476,17 +1543,19 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) __mptcp_flush_join_list(msk); do { struct sock *ssk = mptcp_subflow_recv_lookup(msk); + bool slowpath; if (!ssk) break; - lock_sock(ssk); + slowpath = lock_sock_fast(ssk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); - release_sock(ssk); + unlock_sock_fast(ssk, slowpath); } while (!done); if (mptcp_ofo_queue(msk) || moved > 0) { - mptcp_check_data_fin((struct sock *)msk); + if (!mptcp_check_data_fin((struct sock *)msk)) + mptcp_send_ack(msk); return true; } return false; @@ -1598,12 +1667,11 @@ static void mptcp_retransmit_handler(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (atomic64_read(&msk->snd_una) == READ_ONCE(msk->write_seq)) { + if (atomic64_read(&msk->snd_una) == READ_ONCE(msk->snd_nxt)) { mptcp_stop_timer(sk); } else { set_bit(MPTCP_WORK_RTX, &msk->flags); - if (schedule_work(&msk->work)) - sock_hold(sk); + mptcp_schedule_work(sk); } } @@ -1626,6 +1694,13 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } +static void mptcp_timeout_timer(struct timer_list *t) +{ + struct sock *sk = from_timer(sk, t, sk_timer); + + mptcp_schedule_work(sk); +} + /* Find an idle subflow. Return NULL if there is unacked data at tcp * level. * @@ -1639,7 +1714,7 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) sock_owned_by_me((const struct sock *)msk); if (__mptcp_check_fallback(msk)) - return msk->first; + return NULL; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -1672,20 +1747,43 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) * parent socket. */ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow, - long timeout) + struct mptcp_subflow_context *subflow) { - struct socket *sock = READ_ONCE(ssk->sk_socket); + bool dispose_socket = false; + struct socket *sock; list_del(&subflow->node); - if (sock && sock != sk->sk_socket) { - /* outgoing subflow */ - sock_release(sock); + lock_sock(ssk); + + /* if we are invoked by the msk cleanup code, the subflow is + * already orphaned + */ + sock = ssk->sk_socket; + if (sock) { + dispose_socket = sock != sk->sk_socket; + sock_orphan(ssk); + } + + /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops + * the ssk has been already destroyed, we just need to release the + * reference owned by msk; + */ + if (!inet_csk(ssk)->icsk_ulp_ops) { + kfree_rcu(subflow, rcu); } else { - /* incoming subflow */ - tcp_close(ssk, timeout); + /* otherwise ask tcp do dispose of ssk and subflow ctx */ + subflow->disposable = 1; + __tcp_close(ssk, 0); + + /* close acquired an extra ref */ + __sock_put(ssk); } + release_sock(ssk); + if (dispose_socket) + iput(SOCK_INODE(sock)); + + sock_put(ssk); } static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) @@ -1730,31 +1828,53 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (inet_sk_state_load(ssk) != TCP_CLOSE) continue; - __mptcp_close_ssk((struct sock *)msk, ssk, subflow, 0); + __mptcp_close_ssk((struct sock *)msk, ssk, subflow); + } +} + +static bool mptcp_check_close_timeout(const struct sock *sk) +{ + s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; + struct mptcp_subflow_context *subflow; + + if (delta >= TCP_TIMEWAIT_LEN) + return true; + + /* if all subflows are in closed status don't bother with additional + * timeout + */ + mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != + TCP_CLOSE) + return false; } + return true; } static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; - int orig_len, orig_offset, mss_now = 0, size_goal = 0; + struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; - u64 orig_write_seq; size_t copied = 0; - struct msghdr msg = { - .msg_flags = MSG_DONTWAIT, - }; - long timeo = 0; + int state, ret; lock_sock(sk); - mptcp_clean_una(sk); + set_bit(MPTCP_WORKER_RUNNING, &msk->flags); + state = sk->sk_state; + if (unlikely(state == TCP_CLOSE)) + goto unlock; + + mptcp_clean_una_wakeup(sk); mptcp_check_data_fin_ack(sk); __mptcp_flush_join_list(msk); if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); __mptcp_move_skbs(msk); + if (mptcp_send_head(sk)) + mptcp_push_pending(sk, 0); if (msk->pm.status) pm_work(msk); @@ -1764,6 +1884,18 @@ static void mptcp_worker(struct work_struct *work) mptcp_check_data_fin(sk); + /* if the msk data is completely acked, or the socket timedout, + * there is no point in keeping around an orphaned sk + */ + if (sock_flag(sk, SOCK_DEAD) && + (mptcp_check_close_timeout(sk) || + (state != sk->sk_state && + ((1 << inet_sk_state_load(sk)) & (TCPF_CLOSE | TCPF_FIN_WAIT2))))) { + inet_sk_state_store(sk, TCP_CLOSE); + __mptcp_destroy_sock(sk); + goto unlock; + } + if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; @@ -1780,30 +1912,24 @@ static void mptcp_worker(struct work_struct *work) lock_sock(ssk); - orig_len = dfrag->data_len; - orig_offset = dfrag->offset; - orig_write_seq = dfrag->data_seq; - while (dfrag->data_len > 0) { - int ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, - &mss_now, &size_goal); - if (ret < 0) + /* limit retransmission to the bytes already sent on some subflows */ + info.sent = 0; + info.limit = dfrag->already_sent; + while (info.sent < dfrag->already_sent) { + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + if (ret <= 0) break; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); copied += ret; - dfrag->data_len -= ret; - dfrag->offset += ret; + info.sent += ret; if (!mptcp_ext_cache_refill(msk)) break; } if (copied) - tcp_push(ssk, msg.msg_flags, mss_now, tcp_sk(ssk)->nonagle, - size_goal); - - dfrag->data_seq = orig_write_seq; - dfrag->offset = orig_offset; - dfrag->data_len = orig_len; + tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, + info.size_goal); mptcp_set_timeout(sk, ssk); release_sock(ssk); @@ -1813,6 +1939,7 @@ reset_unlock: mptcp_reset_timer(sk); unlock: + clear_bit(MPTCP_WORKER_RUNNING, &msk->flags); release_sock(sk); sock_put(sk); } @@ -1826,9 +1953,9 @@ static int __mptcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&msk->conn_list); INIT_LIST_HEAD(&msk->join_list); INIT_LIST_HEAD(&msk->rtx_queue); - __set_bit(MPTCP_SEND_SPACE, &msk->flags); INIT_WORK(&msk->work, mptcp_worker); msk->out_of_order_queue = RB_ROOT; + msk->first_pending = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; @@ -1837,7 +1964,7 @@ static int __mptcp_init_sock(struct sock *sk) /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); - + timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); return 0; } @@ -1874,6 +2001,7 @@ static void __mptcp_clear_xmit(struct sock *sk) sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer); + WRITE_ONCE(msk->first_pending, NULL); list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) dfrag_clear(sk, dfrag); } @@ -1882,8 +2010,12 @@ static void mptcp_cancel_work(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (cancel_work_sync(&msk->work)) - sock_put(sk); + /* if called by the work itself, do not try to cancel the work, or + * we will hang. + */ + if (!test_bit(MPTCP_WORKER_RUNNING, &msk->flags) && + cancel_work_sync(&msk->work)) + __sock_put(sk); } void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) @@ -1941,42 +2073,61 @@ static int mptcp_close_state(struct sock *sk) return next & TCP_ACTION_FIN; } -static void mptcp_close(struct sock *sk, long timeout) +static void __mptcp_check_send_data_fin(struct sock *sk) { - struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - LIST_HEAD(conn_list); - lock_sock(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + pr_debug("msk=%p snd_data_fin_enable=%d pending=%d snd_nxt=%llu write_seq=%llu", + msk, msk->snd_data_fin_enable, !!mptcp_send_head(sk), + msk->snd_nxt, msk->write_seq); - if (sk->sk_state == TCP_LISTEN) { + /* we still need to enqueue subflows or not really shutting down, + * skip this + */ + if (!msk->snd_data_fin_enable || msk->snd_nxt + 1 != msk->write_seq || + mptcp_send_head(sk)) + return; + + WRITE_ONCE(msk->snd_nxt, msk->write_seq); + + /* fallback socket will not get data_fin/ack, can move to close now */ + if (__mptcp_check_fallback(msk) && sk->sk_state == TCP_LAST_ACK) { inet_sk_state_store(sk, TCP_CLOSE); - goto cleanup; - } else if (sk->sk_state == TCP_CLOSE) { - goto cleanup; + mptcp_close_wake_up(sk); } - if (__mptcp_check_fallback(msk)) { - goto update_state; - } else if (mptcp_close_state(sk)) { - pr_debug("Sending DATA_FIN sk=%p", sk); - WRITE_ONCE(msk->write_seq, msk->write_seq + 1); - WRITE_ONCE(msk->snd_data_fin_enable, 1); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); + __mptcp_flush_join_list(msk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); - mptcp_subflow_shutdown(sk, tcp_sk, SHUTDOWN_MASK); - } + mptcp_subflow_shutdown(sk, tcp_sk, SEND_SHUTDOWN); } +} - sk_stream_wait_close(sk, timeout); +static void __mptcp_wr_shutdown(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); -update_state: - inet_sk_state_store(sk, TCP_CLOSE); + pr_debug("msk=%p snd_data_fin_enable=%d shutdown=%x state=%d pending=%d", + msk, msk->snd_data_fin_enable, sk->sk_shutdown, sk->sk_state, + !!mptcp_send_head(sk)); + + /* will be ignored by fallback sockets */ + WRITE_ONCE(msk->write_seq, msk->write_seq + 1); + WRITE_ONCE(msk->snd_data_fin_enable, 1); + + __mptcp_check_send_data_fin(sk); +} + +static void __mptcp_destroy_sock(struct sock *sk) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); + LIST_HEAD(conn_list); + + pr_debug("msk=%p", msk); -cleanup: /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -1986,19 +2137,74 @@ cleanup: list_splice_init(&msk->conn_list, &conn_list); __mptcp_clear_xmit(sk); - - release_sock(sk); + sk_stop_timer(sk, &sk->sk_timer); + msk->pm.status = 0; list_for_each_entry_safe(subflow, tmp, &conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - __mptcp_close_ssk(sk, ssk, subflow, timeout); + __mptcp_close_ssk(sk, ssk, subflow); } - mptcp_cancel_work(sk); + sk->sk_prot->destroy(sk); - __skb_queue_purge(&sk->sk_receive_queue); + sk_stream_kill_queues(sk); + xfrm_sk_free_policy(sk); + sk_refcnt_debug_release(sk); + sock_put(sk); +} - sk_common_release(sk); +static void mptcp_close(struct sock *sk, long timeout) +{ + struct mptcp_subflow_context *subflow; + bool do_cancel_work = false; + + lock_sock(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { + inet_sk_state_store(sk, TCP_CLOSE); + goto cleanup; + } + + if (mptcp_close_state(sk)) + __mptcp_wr_shutdown(sk); + + sk_stream_wait_close(sk, timeout); + +cleanup: + /* orphan all the subflows */ + inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; + list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow, dispose_socket; + struct socket *sock; + + slow = lock_sock_fast(ssk); + sock = ssk->sk_socket; + dispose_socket = sock && sock != sk->sk_socket; + sock_orphan(ssk); + unlock_sock_fast(ssk, slow); + + /* for the outgoing subflows we additionally need to free + * the associated socket + */ + if (dispose_socket) + iput(SOCK_INODE(sock)); + } + sock_orphan(sk); + + sock_hold(sk); + pr_debug("msk=%p state=%d", sk, sk->sk_state); + if (sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + do_cancel_work = true; + } else { + sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN); + } + release_sock(sk); + if (do_cancel_work) + mptcp_cancel_work(sk); + sock_put(sk); } static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) @@ -2069,7 +2275,10 @@ struct sock *mptcp_sk_clone(const struct sock *sk, WRITE_ONCE(msk->fully_established, false); msk->write_seq = subflow_req->idsn + 1; + msk->snd_nxt = msk->write_seq; atomic64_set(&msk->snd_una, msk->write_seq); + atomic64_set(&msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd); + if (mp_opt->mp_capable) { msk->can_ack = true; msk->remote_key = mp_opt->sndr_key; @@ -2102,6 +2311,8 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) TCP_INIT_CWND * tp->advmss); if (msk->rcvq_space.space == 0) msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; + + atomic64_set(&msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); } static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, @@ -2150,6 +2361,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); + sock_hold(ssk); mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(new_mptcp_sock); @@ -2316,7 +2528,8 @@ static void mptcp_release_cb(struct sock *sk) struct sock *ssk; ssk = mptcp_subflow_recv_lookup(msk); - if (!ssk || !schedule_work(&msk->work)) + if (!ssk || sk->sk_state == TCP_CLOSE || + !schedule_work(&msk->work)) __sock_put(sk); } @@ -2377,6 +2590,7 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->local_key, subflow->local_key); WRITE_ONCE(msk->write_seq, subflow->idsn + 1); + WRITE_ONCE(msk->snd_nxt, msk->write_seq); WRITE_ONCE(msk->ack_seq, ack_seq); WRITE_ONCE(msk->can_ack, 1); atomic64_set(&msk->snd_una, msk->write_seq); @@ -2395,9 +2609,9 @@ static void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } -bool mptcp_finish_join(struct sock *sk) +bool mptcp_finish_join(struct sock *ssk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct sock *parent = (void *)msk; struct socket *parent_sock; @@ -2418,12 +2632,14 @@ bool mptcp_finish_join(struct sock *sk) /* active connections are already on conn_list, and we can't acquire * msk lock here. * use the join list lock as synchronization point and double-check - * msk status to avoid racing with mptcp_close() + * msk status to avoid racing with __mptcp_destroy_sock() */ spin_lock_bh(&msk->join_list_lock); ret = inet_sk_state_load(parent) == TCP_ESTABLISHED; - if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) + if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) { list_add_tail(&subflow->node, &msk->join_list); + sock_hold(ssk); + } spin_unlock_bh(&msk->join_list_lock); if (!ret) return false; @@ -2432,19 +2648,12 @@ bool mptcp_finish_join(struct sock *sk) * at close time */ parent_sock = READ_ONCE(parent->sk_socket); - if (parent_sock && !sk->sk_socket) - mptcp_sock_graft(sk, parent_sock); + if (parent_sock && !ssk->sk_socket) + mptcp_sock_graft(ssk, parent_sock); subflow->map_seq = READ_ONCE(msk->ack_seq); return true; } -static bool mptcp_memory_free(const struct sock *sk, int wake) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - return wake ? test_bit(MPTCP_SEND_SPACE, &msk->flags) : true; -} - static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -2465,7 +2674,6 @@ static struct proto mptcp_prot = { .sockets_allocated = &mptcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, - .stream_memory_free = mptcp_memory_free, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .sysctl_mem = sysctl_tcp_mem, @@ -2639,6 +2847,39 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) 0; } +static bool __mptcp_check_writeable(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + bool mptcp_writable; + + mptcp_clean_una(sk); + mptcp_writable = sk_stream_is_writeable(sk); + if (!mptcp_writable) + mptcp_nospace(msk); + + return mptcp_writable; +} + +static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + __poll_t ret = 0; + bool slow; + + if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) + return 0; + + if (sk_stream_is_writeable(sk)) + return EPOLLOUT | EPOLLWRNORM; + + slow = lock_sock_fast(sk); + if (__mptcp_check_writeable(msk)) + ret = EPOLLOUT | EPOLLWRNORM; + + unlock_sock_fast(sk, slow); + return ret; +} + static __poll_t mptcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { @@ -2657,8 +2898,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { mask |= mptcp_check_readable(msk); - if (test_bit(MPTCP_SEND_SPACE, &msk->flags)) - mask |= EPOLLOUT | EPOLLWRNORM; + mask |= mptcp_check_writeable(msk); } if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; @@ -2669,12 +2909,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, static int mptcp_shutdown(struct socket *sock, int how) { struct mptcp_sock *msk = mptcp_sk(sock->sk); - struct mptcp_subflow_context *subflow; + struct sock *sk = sock->sk; int ret = 0; pr_debug("sk=%p, how=%d", msk, how); - lock_sock(sock->sk); + lock_sock(sk); how++; if ((how & ~SHUTDOWN_MASK) || !how) { @@ -2683,45 +2923,22 @@ static int mptcp_shutdown(struct socket *sock, int how) } if (sock->state == SS_CONNECTING) { - if ((1 << sock->sk->sk_state) & + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) sock->state = SS_DISCONNECTING; else sock->state = SS_CONNECTED; } - /* If we've already sent a FIN, or it's a closed state, skip this. */ - if (__mptcp_check_fallback(msk)) { - if (how == SHUT_WR || how == SHUT_RDWR) - inet_sk_state_store(sock->sk, TCP_FIN_WAIT1); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); - - mptcp_subflow_shutdown(sock->sk, tcp_sk, how); - } - } else if ((how & SEND_SHUTDOWN) && - ((1 << sock->sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) && - mptcp_close_state(sock->sk)) { - __mptcp_flush_join_list(msk); - - WRITE_ONCE(msk->write_seq, msk->write_seq + 1); - WRITE_ONCE(msk->snd_data_fin_enable, 1); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); - - mptcp_subflow_shutdown(sock->sk, tcp_sk, how); - } - } + sk->sk_shutdown |= how; + if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) + __mptcp_wr_shutdown(sk); /* Wake up anyone sleeping in poll. */ - sock->sk->sk_state_change(sock->sk); + sk->sk_state_change(sk); out_unlock: - release_sock(sock->sk); + release_sock(sk); return ret; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 13ab89dc1914..b4c8dbe9236b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -86,11 +86,19 @@ /* MPTCP socket flags */ #define MPTCP_DATA_READY 0 -#define MPTCP_SEND_SPACE 1 +#define MPTCP_NOSPACE 1 #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_WORKER_RUNNING 6 + +static inline bool before64(__u64 seq1, __u64 seq2) +{ + return (__s64)(seq1 - seq2) < 0; +} + +#define after64(seq2, seq1) before64(seq1, seq2) struct mptcp_options_received { u64 sndr_key; @@ -187,9 +195,10 @@ struct mptcp_pm_data { struct mptcp_data_frag { struct list_head list; u64 data_seq; - int data_len; - int offset; - int overhead; + u16 data_len; + u16 offset; + u16 overhead; + u16 already_sent; struct page *page; }; @@ -200,11 +209,13 @@ struct mptcp_sock { u64 local_key; u64 remote_key; u64 write_seq; + u64 snd_nxt; u64 ack_seq; u64 rcv_data_fin_seq; struct sock *last_snd; int snd_burst; atomic64_t snd_una; + atomic64_t wnd_end; unsigned long timer_ival; u32 token; unsigned long flags; @@ -219,6 +230,7 @@ struct mptcp_sock { struct rb_root out_of_order_queue; struct list_head conn_list; struct list_head rtx_queue; + struct mptcp_data_frag *first_pending; struct list_head join_list; struct skb_ext *cached_ext; /* for the next sendmsg */ struct socket *subflow; /* outgoing connect/listener/!mp_capable */ @@ -240,11 +252,41 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) return (struct mptcp_sock *)sk; } +static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + + return READ_ONCE(msk->first_pending); +} + +static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_data_frag *cur; + + cur = msk->first_pending; + return list_is_last(&cur->list, &msk->rtx_queue) ? NULL : + list_next_entry(cur, list); +} + +static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (!msk->first_pending) + return NULL; + + if (WARN_ON_ONCE(list_empty(&msk->rtx_queue))) + return NULL; + + return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); +} + static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (list_empty(&msk->rtx_queue)) + if (!before64(msk->snd_nxt, atomic64_read(&msk->snd_una))) return NULL; return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); @@ -312,7 +354,8 @@ struct mptcp_subflow_context { mpc_map : 1, backup : 1, rx_eof : 1, - can_ack : 1; /* only after processing the remote a key */ + can_ack : 1, /* only after processing the remote a key */ + disposable : 1; /* ctx can be free at ulp release time */ enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -362,14 +405,14 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) } int mptcp_is_enabled(struct net *net); +unsigned int mptcp_get_add_addr_timeout(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow, - long timeout); + struct mptcp_subflow_context *subflow); void mptcp_subflow_reset(struct sock *ssk); /* called with sk socket lock held */ @@ -407,9 +450,16 @@ static inline bool mptcp_is_fully_established(struct sock *sk) void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); +bool mptcp_schedule_work(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); +static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->snd_data_fin_enable) && + READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); +} + void mptcp_destroy_common(struct mptcp_sock *msk); void __init mptcp_token_init(void); @@ -494,13 +544,6 @@ static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb) return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); } -static inline bool before64(__u64 seq1, __u64 seq2) -{ - return (__s64)(seq1 - seq2) < 0; -} - -#define after64(seq2, seq1) before64(seq1, seq2) - void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ac4a1fe3550b..794259789194 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -997,17 +997,16 @@ static void subflow_data_ready(struct sock *sk) static void subflow_write_space(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct socket *sock = READ_ONCE(sk->sk_socket); struct sock *parent = subflow->conn; if (!sk_stream_is_writeable(sk)) return; - if (sk_stream_is_writeable(parent)) { - set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags); - smp_mb__after_atomic(); - /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */ - sk_stream_write_space(parent); - } + if (sock && sk_stream_is_writeable(parent)) + clear_bit(SOCK_NOSPACE, &sock->flags); + + sk_stream_write_space(parent); } static struct inet_connection_sock_af_ops * @@ -1125,6 +1124,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (err && err != -EINPROGRESS) goto failed; + sock_hold(ssk); spin_lock_bh(&msk->join_list_lock); list_add_tail(&subflow->node, &msk->join_list); spin_unlock_bh(&msk->join_list_lock); @@ -1132,6 +1132,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, return err; failed: + subflow->disposable = 1; sock_release(sf); return err; } @@ -1254,7 +1255,6 @@ static void subflow_state_change(struct sock *sk) mptcp_data_ready(parent, sk); if (__mptcp_check_fallback(mptcp_sk(parent)) && - !(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; mptcp_subflow_eof(parent); @@ -1297,17 +1297,26 @@ out: return err; } -static void subflow_ulp_release(struct sock *sk) +static void subflow_ulp_release(struct sock *ssk) { - struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk); + struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); + bool release = true; + struct sock *sk; if (!ctx) return; - if (ctx->conn) - sock_put(ctx->conn); + sk = ctx->conn; + if (sk) { + /* if the msk has been orphaned, keep the ctx + * alive, will be freed by mptcp_done() + */ + release = ctx->disposable; + sock_put(sk); + } - kfree_rcu(ctx, rcu); + if (release) + kfree_rcu(ctx, rcu); } static void subflow_ulp_clone(const struct request_sock *req, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 52370211e46b..49fbef0d99be 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -682,6 +682,16 @@ config NFT_FIB_NETDEV The lookup will be delegated to the IPv4 or IPv6 FIB depending on the protocol of the packet. +config NFT_REJECT_NETDEV + depends on NFT_REJECT_IPV4 + depends on NFT_REJECT_IPV6 + tristate "Netfilter nf_tables netdev REJECT support" + help + This option enables the REJECT support from the netdev table. + The return packet generation will be delegated to the IPv4 + or IPv6 ICMP or TCP RST implementation depending on the + protocol of the packet. + endif # NF_TABLES_NETDEV endif # NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0e0ded87e27b..33da7bf1b68e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -101,6 +101,7 @@ obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o +obj-$(CONFIG_NFT_REJECT_NETDEV) += nft_reject_netdev.o obj-$(CONFIG_NFT_TUNNEL) += nft_tunnel.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7cff6e5e7445..c7eaa3776238 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1110,6 +1110,8 @@ static int ip_set_create(struct net *net, struct sock *ctnl, ret = -IPSET_ERR_PROTOCOL; goto put_out; } + /* Set create flags depending on the type revision */ + set->flags |= set->type->create_flags[revision]; ret = set->type->create(net, set, tb, flags); if (ret != 0) @@ -1240,10 +1242,12 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { + u32 flags = flag_exist(nlh); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { - ret = -ENOENT; + if (!(flags & IPSET_FLAG_EXIST)) + ret = -ENOENT; goto out; } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 521e970be402..5f1208ad049e 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -37,18 +37,18 @@ */ /* Number of elements to store in an initial array block */ -#define AHASH_INIT_SIZE 4 +#define AHASH_INIT_SIZE 2 /* Max number of elements to store in an array block */ -#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE) +#define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) /* Max muber of elements in the array block when tuned */ #define AHASH_MAX_TUNED 64 +#define AHASH_MAX(h) ((h)->bucketsize) + /* Max number of elements can be tuned */ #ifdef IP_SET_HASH_WITH_MULTI -#define AHASH_MAX(h) ((h)->ahash_max) - static u8 -tune_ahash_max(u8 curr, u32 multi) +tune_bucketsize(u8 curr, u32 multi) { u32 n; @@ -61,12 +61,10 @@ tune_ahash_max(u8 curr, u32 multi) */ return n > curr && n <= AHASH_MAX_TUNED ? n : curr; } - -#define TUNE_AHASH_MAX(h, multi) \ - ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) +#define TUNE_BUCKETSIZE(h, multi) \ + ((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi)) #else -#define AHASH_MAX(h) AHASH_MAX_SIZE -#define TUNE_AHASH_MAX(h, multi) +#define TUNE_BUCKETSIZE(h, multi) #endif /* A hash bucket */ @@ -321,9 +319,7 @@ struct htype { #ifdef IP_SET_HASH_WITH_MARKMASK u32 markmask; /* markmask value for mark mask to store */ #endif -#ifdef IP_SET_HASH_WITH_MULTI - u8 ahash_max; /* max elements in an array block */ -#endif + u8 bucketsize; /* max elements in an array block */ #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif @@ -950,7 +946,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, goto set_full; /* Create a new slot */ if (n->pos >= n->size) { - TUNE_AHASH_MAX(h, multi); + TUNE_BUCKETSIZE(h, multi); if (n->size >= AHASH_MAX(h)) { /* Trigger rehashing */ mtype_data_next(&h->next, d); @@ -1305,6 +1301,11 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) goto nla_put_failure; #endif + if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) { + if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) || + nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval))) + goto nla_put_failure; + } if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) @@ -1547,8 +1548,20 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, #ifdef IP_SET_HASH_WITH_MARKMASK h->markmask = markmask; #endif - get_random_bytes(&h->initval, sizeof(h->initval)); - + if (tb[IPSET_ATTR_INITVAL]) + h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL])); + else + get_random_bytes(&h->initval, sizeof(h->initval)); + h->bucketsize = AHASH_MAX_SIZE; + if (tb[IPSET_ATTR_BUCKETSIZE]) { + h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]); + if (h->bucketsize < AHASH_INIT_SIZE) + h->bucketsize = AHASH_INIT_SIZE; + else if (h->bucketsize > AHASH_MAX_SIZE) + h->bucketsize = AHASH_MAX_SIZE; + else if (h->bucketsize % 2) + h->bucketsize += 1; + } t->htable_bits = hbits; t->maxelem = h->maxelem / ahash_numof_locks(hbits); RCU_INIT_POINTER(h->table, t); diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 5d6d68eaf6a9..d1bef23fd4f5 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -23,7 +23,8 @@ /* 1 Counters support */ /* 2 Comments support */ /* 3 Forceadd support */ -#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */ +/* 4 skbinfo support */ +#define IPSET_TYPE_REV_MAX 5 /* bucketsize, initval support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -277,11 +278,13 @@ static struct ip_set_type hash_ip_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index eceb7bc4a93a..467c59a83c0a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -23,7 +23,7 @@ #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 0 +#define IPSET_TYPE_REV_MAX 1 /* bucketsize, initval support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>"); @@ -268,11 +268,13 @@ static struct ip_set_type hash_ipmac_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipmac_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index aba1df617d6e..18346d18aa16 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -21,7 +21,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Forceadd support */ -#define IPSET_TYPE_REV_MAX 2 /* skbinfo support */ +/* 2 skbinfo support */ +#define IPSET_TYPE_REV_MAX 3 /* bucketsize, initval support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>"); @@ -274,12 +275,14 @@ static struct ip_set_type hash_ipmark_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipmark_create, .create_policy = { [IPSET_ATTR_MARKMASK] = { .type = NLA_U32 }, [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 1ff228717e29..e1ca11196515 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -25,7 +25,8 @@ /* 2 Counters support added */ /* 3 Comments support added */ /* 4 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */ +/* 5 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 6 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -341,11 +342,13 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fa88afd812fa..ab179e064597 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -25,7 +25,8 @@ /* 2 Counters support added */ /* 3 Comments support added */ /* 4 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */ +/* 5 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 6 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -356,11 +357,13 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index eef6ecfcb409..8f075b44cf64 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -27,7 +27,8 @@ /* 4 Counters support added */ /* 5 Comments support added */ /* 6 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */ +/* 7 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -513,11 +514,13 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 0b61593165ef..718814730acf 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -16,7 +16,7 @@ #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 0 +#define IPSET_TYPE_REV_MAX 1 /* bucketsize, initval support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -125,11 +125,13 @@ static struct ip_set_type hash_mac_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_mac_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 136cf0781d3a..c1a11f041ac6 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -24,7 +24,8 @@ /* 3 Counters support added */ /* 4 Comments support added */ /* 5 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */ +/* 6 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 7 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -354,11 +355,13 @@ static struct ip_set_type hash_net_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index be5e95a0d876..ddd51c2e1cb3 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -26,7 +26,8 @@ /* 4 Comments support added */ /* 5 Forceadd support added */ /* 6 skbinfo support added */ -#define IPSET_TYPE_REV_MAX 7 /* interface wildcard support added */ +/* 7 interface wildcard support added */ +#define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -225,7 +226,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } - nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); + nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); @@ -442,7 +443,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], ip6_netmask(&e.ip, e.cidr); - nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); + nla_strscpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); @@ -470,11 +471,13 @@ static struct ip_set_type hash_netiface_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index da4ef910b12d..6532f0505e66 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -22,7 +22,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */ +/* 2 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 3 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); @@ -459,11 +460,13 @@ static struct ip_set_type hash_netnet_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_netnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 34448df80fb9..ec1564a1cb5a 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -26,7 +26,8 @@ /* 4 Counters support added */ /* 5 Comments support added */ /* 6 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */ +/* 7 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); @@ -460,11 +461,13 @@ static struct ip_set_type hash_netport_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 934c1712cba8..0e91d1e82f1c 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -23,7 +23,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 0 Comments support added */ /* 1 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */ +/* 2 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 3 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); @@ -558,11 +559,13 @@ static struct ip_set_type hash_netportnet_type __read_mostly = { .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, + .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_netportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, - [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, + [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0f58e98542be..65aa98fc5eb6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -581,7 +581,8 @@ struct nft_module_request { }; #ifdef CONFIG_MODULES -static int nft_request_module(struct net *net, const char *fmt, ...) +static __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, + ...) { char module_name[MODULE_NAME_LEN]; struct nft_module_request *req; @@ -1281,7 +1282,7 @@ static struct nft_chain *nft_chain_lookup(struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); - nla_strlcpy(search, nla, sizeof(search)); + nla_strscpy(search, nla, sizeof(search)); WARN_ON(!rcu_read_lock_held() && !lockdep_commit_lock_is_held(net)); @@ -1721,7 +1722,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, goto err_hook_alloc; } - nla_strlcpy(ifname, attr, IFNAMSIZ); + nla_strscpy(ifname, attr, IFNAMSIZ); dev = __dev_get_by_name(net, ifname); if (!dev) { err = -ENOENT; @@ -5734,7 +5735,7 @@ struct nft_object *nft_obj_lookup(const struct net *net, struct rhlist_head *tmp, *list; struct nft_object *obj; - nla_strlcpy(search, nla, sizeof(search)); + nla_strscpy(search, nla, sizeof(search)); k.name = search; WARN_ON_ONCE(!rcu_read_lock_held() && diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 5bfec829c12f..5e511df8d709 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -112,7 +112,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, nfacct->flags = flags; } - nla_strlcpy(nfacct->name, tb[NFACCT_NAME], NFACCT_NAME_MAX); + nla_strscpy(nfacct->name, tb[NFACCT_NAME], NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 5b0d0a77379c..0f94fce1d3ed 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -146,7 +146,7 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; - nla_strlcpy(expect_policy->name, + nla_strscpy(expect_policy->name, tb[NFCTH_POLICY_NAME], NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); @@ -233,7 +233,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (ret < 0) goto err1; - nla_strlcpy(helper->name, + nla_strscpy(helper->name, tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN); size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size > sizeof_field(struct nf_conn_help, data)) { diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 322bd674963e..a8c4d442231c 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -990,7 +990,7 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, if (!priv->l4proto) return -ENOENT; - nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name)); + nla_strscpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name)); if (tb[NFTA_CT_HELPER_L3PROTO]) family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO])); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 57899454a530..a06a46b039c5 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -152,7 +152,7 @@ static int nft_log_init(const struct nft_ctx *ctx, priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); if (priv->prefix == NULL) return -ENOMEM; - nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); + nla_strscpy(priv->prefix, nla, nla_len(nla) + 1); } else { priv->prefix = (char *)nft_log_null_prefix; } diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 61fb7e8afbf0..927ff8459bd9 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -40,6 +40,7 @@ int nft_reject_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); + int icmp_code; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; @@ -47,9 +48,17 @@ int nft_reject_init(const struct nft_ctx *ctx, priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: if (tb[NFTA_REJECT_ICMP_CODE] == NULL) return -EINVAL; - priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + + icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + if (priv->type == NFT_REJECT_ICMPX_UNREACH && + icmp_code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + priv->icmp_code = icmp_code; + break; case NFT_REJECT_TCP_RST: break; default: @@ -69,6 +78,7 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index cf8f2646e93c..32f3ea398ddf 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -58,60 +58,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, regs->verdict.code = NF_DROP; } -static int nft_reject_inet_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_reject_inet_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { - struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code; - - if (tb[NFTA_REJECT_TYPE] == NULL) - return -EINVAL; - - priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); - switch (priv->type) { - case NFT_REJECT_ICMP_UNREACH: - case NFT_REJECT_ICMPX_UNREACH: - if (tb[NFTA_REJECT_ICMP_CODE] == NULL) - return -EINVAL; - - icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); - if (priv->type == NFT_REJECT_ICMPX_UNREACH && - icmp_code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; - - priv->icmp_code = icmp_code; - break; - case NFT_REJECT_TCP_RST: - break; - default: - return -EINVAL; - } - return 0; -} - -static int nft_reject_inet_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_reject *priv = nft_expr_priv(expr); - - if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) - goto nla_put_failure; - - switch (priv->type) { - case NFT_REJECT_ICMP_UNREACH: - case NFT_REJECT_ICMPX_UNREACH: - if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) - goto nla_put_failure; - break; - default: - break; - } - - return 0; - -nla_put_failure: - return -1; + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_INGRESS)); } static struct nft_expr_type nft_reject_inet_type; @@ -119,9 +75,9 @@ static const struct nft_expr_ops nft_reject_inet_ops = { .type = &nft_reject_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_inet_eval, - .init = nft_reject_inet_init, - .dump = nft_reject_inet_dump, - .validate = nft_reject_validate, + .init = nft_reject_init, + .dump = nft_reject_dump, + .validate = nft_reject_inet_validate, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c new file mode 100644 index 000000000000..d89f68754f42 --- /dev/null +++ b/net/netfilter/nft_reject_netdev.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Laura Garcia Liebana <nevola@gmail.com> + * Copyright (c) 2020 Jose M. Guisado <guigom@riseup.net> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_reject.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/ipv6/nf_reject.h> + +static void nft_reject_queue_xmit(struct sk_buff *nskb, struct sk_buff *oldskb) +{ + dev_hard_header(nskb, nskb->dev, ntohs(oldskb->protocol), + eth_hdr(oldskb)->h_source, eth_hdr(oldskb)->h_dest, + nskb->len); + dev_queue_xmit(nskb); +} + +static void nft_reject_netdev_send_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + struct sk_buff *nskb; + + nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook); + if (!nskb) + return; + + nft_reject_queue_xmit(nskb, oldskb); +} + +static void nft_reject_netdev_send_v4_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + + nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code); + if (!nskb) + return; + + nft_reject_queue_xmit(nskb, oldskb); +} + +static void nft_reject_netdev_send_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook) +{ + struct sk_buff *nskb; + + nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook); + if (!nskb) + return; + + nft_reject_queue_xmit(nskb, oldskb); +} + + +static void nft_reject_netdev_send_v6_unreach(struct net *net, + struct sk_buff *oldskb, + const struct net_device *dev, + int hook, u8 code) +{ + struct sk_buff *nskb; + + nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code); + if (!nskb) + return; + + nft_reject_queue_xmit(nskb, oldskb); +} + +static void nft_reject_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct ethhdr *eth = eth_hdr(pkt->skb); + struct nft_reject *priv = nft_expr_priv(expr); + const unsigned char *dest = eth->h_dest; + + if (is_broadcast_ether_addr(dest) || + is_multicast_ether_addr(dest)) + goto out; + + switch (eth->h_proto) { + case htons(ETH_P_IP): + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nft_reject_netdev_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nft_reject_netdev_send_v4_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); + break; + case NFT_REJECT_ICMPX_UNREACH: + nft_reject_netdev_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + nft_reject_icmp_code(priv->icmp_code)); + break; + } + break; + case htons(ETH_P_IPV6): + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nft_reject_netdev_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nft_reject_netdev_send_v6_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); + break; + case NFT_REJECT_ICMPX_UNREACH: + nft_reject_netdev_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), + nft_reject_icmpv6_code(priv->icmp_code)); + break; + } + break; + default: + /* No explicit way to reject this protocol, drop it. */ + break; + } +out: + regs->verdict.code = NF_DROP; +} + +static int nft_reject_netdev_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS)); +} + +static struct nft_expr_type nft_reject_netdev_type; +static const struct nft_expr_ops nft_reject_netdev_ops = { + .type = &nft_reject_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_netdev_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, + .validate = nft_reject_netdev_validate, +}; + +static struct nft_expr_type nft_reject_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "reject", + .ops = &nft_reject_netdev_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_netdev_module_init(void) +{ + return nft_register_expr(&nft_reject_netdev_type); +} + +static void __exit nft_reject_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_reject_netdev_type); +} + +module_init(nft_reject_netdev_module_init); +module_exit(nft_reject_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Laura Garcia Liebana <nevola@gmail.com>"); +MODULE_AUTHOR("Jose M. Guisado <guigom@riseup.net>"); +MODULE_DESCRIPTION("Reject packets from netdev via nftables"); +MODULE_ALIAS_NFT_AF_EXPR(5, "reject"); diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 4e62f2ad3575..f28c8947c730 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -366,6 +366,7 @@ static const struct netlbl_calipso_ops *calipso_ops; /** * netlbl_calipso_ops_register - Register the CALIPSO operations + * @ops: ops to register * * Description: * Register the CALIPSO packet engine operations. diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index eb1d66d20afb..df1b41ed73fd 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -95,7 +95,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = -ENOMEM; goto add_free_entry; } - nla_strlcpy(entry->domain, + nla_strscpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); } diff --git a/net/nfc/core.c b/net/nfc/core.c index eb377f87bcae..573c80c6ff7a 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -189,7 +189,8 @@ static const struct rfkill_ops nfc_rfkill_ops = { * nfc_start_poll - start polling for nfc targets * * @dev: The nfc device that must start polling - * @protocols: bitset of nfc protocols that must be used for polling + * @im_protocols: bitset of nfc initiator protocols to be used for polling + * @tm_protocols: bitset of nfc transport protocols to be used for polling * * The device remains polling for targets until a target is found or * the nfc_stop_poll function is called. @@ -436,6 +437,7 @@ error: * * @dev: The nfc device that found the target * @target_idx: index of the target that must be deactivated + * @mode: idle or sleep? */ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) { @@ -703,7 +705,11 @@ EXPORT_SYMBOL(nfc_tm_deactivated); /** * nfc_alloc_send_skb - allocate a skb for data exchange responses * + * @dev: device sending the response + * @sk: socket sending the response + * @flags: MSG_DONTWAIT flag * @size: size to allocate + * @err: pointer to memory to store the error code */ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk, unsigned int flags, unsigned int size, @@ -1039,6 +1045,8 @@ struct nfc_dev *nfc_get_device(unsigned int idx) * * @ops: device operations * @supported_protocols: NFC protocols supported by the device + * @tx_headroom: reserved space at beginning of skb + * @tx_tailroom: reserved space at end of skb */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index e3599ed4a7a8..da7e2112771f 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -458,6 +458,9 @@ static void digital_add_poll_tech(struct nfc_digital_dev *ddev, u8 rf_tech, /** * start_poll operation + * @nfc_dev: device to be polled + * @im_protocols: bitset of nfc initiator protocols to be used for polling + * @tm_protocols: bitset of nfc transport protocols to be used for polling * * For every supported protocol, the corresponding polling function is added * to the table of polling technologies (ddev->poll_techs[]) using diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 741da8f81c2b..4953ee5146e1 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1112,6 +1112,8 @@ static struct nfc_ops nci_nfc_ops = { * * @ops: device operations * @supported_protocols: NFC protocols supported by the device + * @tx_headroom: Reserved space at beginning of skb + * @tx_tailroom: Reserved space at end of skb */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index c18e76d6d8ba..6b275a387a92 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -363,16 +363,13 @@ exit: } static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, - u8 result, struct sk_buff *skb) + struct sk_buff *skb) { struct nci_conn_info *conn_info; - u8 status = result; conn_info = ndev->hci_dev->conn_info; - if (!conn_info) { - status = NCI_STATUS_REJECTED; + if (!conn_info) goto exit; - } conn_info->rx_skb = skb; @@ -388,7 +385,7 @@ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe, { switch (type) { case NCI_HCI_HCP_RESPONSE: - nci_hci_resp_received(ndev, pipe, instruction, skb); + nci_hci_resp_received(ndev, pipe, skb); break; case NCI_HCI_HCP_COMMAND: nci_hci_cmd_received(ndev, pipe, instruction, skb); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 8709f3d4e7c4..573b38ad2f8e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1226,7 +1226,7 @@ static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) if (!dev) return -ENODEV; - nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], + nla_strscpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], sizeof(firmware_name)); rc = nfc_fw_download(dev, firmware_name); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b03d142ec82e..c7f34d6a9934 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -294,6 +294,10 @@ static bool icmp6hdr_ok(struct sk_buff *skb) /** * Parse vlan tag from vlan header. + * @skb: skb containing frame to parse + * @key_vh: pointer to parsed vlan tag + * @untag_vlan: should the vlan header be removed from the frame + * * Returns ERROR on memory error. * Returns 0 if it encounters a non-vlan or incomplete packet. * Returns 1 after successfully parsing vlan tag. diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 8fbefd52af7f..15424d26e85d 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -423,7 +423,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; meter = dp_meter_create(a); - if (IS_ERR_OR_NULL(meter)) + if (IS_ERR(meter)) return PTR_ERR(meter); reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 1e30d8df3ba5..5b2ee9c1c00b 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -35,21 +35,18 @@ internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { int len, err; + /* store len value because skb can be freed inside ovs_vport_receive() */ len = skb->len; + rcu_read_lock(); err = ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); rcu_read_unlock(); - if (likely(!err)) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(netdev->tstats); - - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { + if (likely(!err)) + dev_sw_netstats_tx_add(netdev, 1, len); + else netdev->stats.tx_errors++; - } + return NETDEV_TX_OK; } @@ -83,24 +80,12 @@ static void internal_dev_destructor(struct net_device *dev) ovs_vport_free(vport); } -static void -internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) -{ - memset(stats, 0, sizeof(*stats)); - stats->rx_errors = dev->stats.rx_errors; - stats->tx_errors = dev->stats.tx_errors; - stats->tx_dropped = dev->stats.tx_dropped; - stats->rx_dropped = dev->stats.rx_dropped; - - dev_fetch_sw_netstats(stats, dev->tstats); -} - static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, - .ndo_get_stats64 = internal_get_stats, + .ndo_get_stats64 = dev_get_tstats64, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 82d801f063b7..4ed7e52c7012 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -111,10 +111,12 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name) * * @priv_size: Size of private data area to allocate. * @ops: vport device ops + * @parms: information about new vport. * * Allocate and initialize a new vport defined by @ops. The vport will contain * a private data area of size @priv_size that can be accessed using - * vport_priv(). vports that are no longer needed should be released with + * vport_priv(). Some parameters of the vport will be initialized from @parms. + * @vports that are no longer needed should be released with * vport_free(). */ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index cefbd50c1090..62ebfaa7adcb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1636,13 +1636,15 @@ static bool fanout_find_new_id(struct sock *sk, u16 *new_id) return false; } -static int fanout_add(struct sock *sk, u16 id, u16 type_flags) +static int fanout_add(struct sock *sk, struct fanout_args *args) { struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); + u16 type_flags = args->type_flags; struct packet_fanout *f, *match; u8 type = type_flags & 0xff; u8 flags = type_flags >> 8; + u16 id = args->id; int err; switch (type) { @@ -1700,11 +1702,21 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } err = -EINVAL; - if (match && match->flags != flags) - goto out; - if (!match) { + if (match) { + if (match->flags != flags) + goto out; + if (args->max_num_members && + args->max_num_members != match->max_num_members) + goto out; + } else { + if (args->max_num_members > PACKET_FANOUT_MAX) + goto out; + if (!args->max_num_members) + /* legacy PACKET_FANOUT_MAX */ + args->max_num_members = 256; err = -ENOMEM; - match = kzalloc(sizeof(*match), GFP_KERNEL); + match = kvzalloc(struct_size(match, arr, args->max_num_members), + GFP_KERNEL); if (!match) goto out; write_pnet(&match->net, sock_net(sk)); @@ -1720,6 +1732,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.id_match = match_fanout_group; + match->max_num_members = args->max_num_members; list_add(&match->list, &fanout_list); } err = -EINVAL; @@ -1730,7 +1743,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; - if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { + if (refcount_read(&match->sk_ref) < match->max_num_members) { __dev_remove_pack(&po->prot_hook); po->fanout = match; po->rollover = rollover; @@ -1744,7 +1757,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (err && !refcount_read(&match->sk_ref)) { list_del(&match->list); - kfree(match); + kvfree(match); } out: @@ -3075,7 +3088,7 @@ static int packet_release(struct socket *sock) kfree(po->rollover); if (f) { fanout_release_data(f); - kfree(f); + kvfree(f); } /* * Now the socket is dead. No more input will appear. @@ -3866,14 +3879,14 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } case PACKET_FANOUT: { - int val; + struct fanout_args args = { 0 }; - if (optlen != sizeof(val)) + if (optlen != sizeof(int) && optlen != sizeof(args)) return -EINVAL; - if (copy_from_sockptr(&val, optval, sizeof(val))) + if (copy_from_sockptr(&args, optval, optlen)) return -EFAULT; - return fanout_add(sk, val & 0xffff, val >> 16); + return fanout_add(sk, &args); } case PACKET_FANOUT_DATA: { diff --git a/net/packet/internal.h b/net/packet/internal.h index fd41ecb7f605..baafc3f3fa25 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -77,11 +77,12 @@ struct packet_ring_buffer { }; extern struct mutex fanout_mutex; -#define PACKET_FANOUT_MAX 256 +#define PACKET_FANOUT_MAX (1 << 16) struct packet_fanout { possible_net_t net; unsigned int num_members; + u32 max_num_members; u16 id; u8 type; u8 flags; @@ -90,10 +91,10 @@ struct packet_fanout { struct bpf_prog __rcu *bpf_prog; }; struct list_head list; - struct sock *arr[PACKET_FANOUT_MAX]; spinlock_t lock; refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; + struct sock *arr[]; }; struct packet_rollover { diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index b8559c882431..56aaf8cb6527 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -517,10 +517,6 @@ static int ctrl_cmd_new_server(struct sockaddr_qrtr *from, port = from->sq_port; } - /* Don't accept spoofed messages */ - if (from->sq_node != node_id) - return -EINVAL; - srv = server_add(service, instance, node_id, port); if (!srv) return -EINVAL; @@ -559,10 +555,6 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, port = from->sq_port; } - /* Don't accept spoofed messages */ - if (from->sq_node != node_id) - return -EINVAL; - /* Local servers may only unregister themselves */ if (from->sq_node == qrtr_ns.local_node && from->sq_port != port) return -EINVAL; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 957aa9263ba4..f4ab3ca6d73b 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -171,8 +171,13 @@ static void __qrtr_node_release(struct kref *kref) void __rcu **slot; spin_lock_irqsave(&qrtr_nodes_lock, flags); - if (node->nid != QRTR_EP_NID_AUTO) - radix_tree_delete(&qrtr_nodes, node->nid); + /* If the node is a bridge for other nodes, there are possibly + * multiple entries pointing to our released node, delete them all. + */ + radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) { + if (*slot == node) + radix_tree_iter_delete(&qrtr_nodes, &iter, slot); + } spin_unlock_irqrestore(&qrtr_nodes_lock, flags); list_del(&node->item); @@ -347,7 +352,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->src_port_id = cpu_to_le32(from->sq_port); if (to->sq_port == QRTR_PORT_CTRL) { hdr->dst_node_id = cpu_to_le32(node->nid); - hdr->dst_port_id = cpu_to_le32(QRTR_NODE_BCAST); + hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); } else { hdr->dst_node_id = cpu_to_le32(to->sq_node); hdr->dst_port_id = cpu_to_le32(to->sq_port); @@ -401,12 +406,13 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) { unsigned long flags; - if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO) + if (nid == QRTR_EP_NID_AUTO) return; spin_lock_irqsave(&qrtr_nodes_lock, flags); radix_tree_insert(&qrtr_nodes, nid, node); - node->nid = nid; + if (node->nid == QRTR_EP_NID_AUTO) + node->nid = nid; spin_unlock_irqrestore(&qrtr_nodes_lock, flags); } @@ -494,6 +500,13 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) qrtr_node_assign(node, cb->src_node); + if (cb->type == QRTR_TYPE_NEW_SERVER) { + /* Remote node endpoint can bridge other distant nodes */ + const struct qrtr_ctrl_pkt *pkt = data + hdrlen; + + qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); + } + if (cb->type == QRTR_TYPE_RESUME_TX) { qrtr_tx_resume(node, skb); } else { @@ -519,18 +532,20 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_post); /** * qrtr_alloc_ctrl_packet() - allocate control packet skb * @pkt: reference to qrtr_ctrl_pkt pointer + * @flags: the type of memory to allocate * * Returns newly allocated sk_buff, or NULL on failure * * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and * on success returns a reference to the control packet in @pkt. */ -static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt) +static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt, + gfp_t flags) { const int pkt_len = sizeof(struct qrtr_ctrl_pkt); struct sk_buff *skb; - skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL); + skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, flags); if (!skb) return NULL; @@ -592,6 +607,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) struct qrtr_ctrl_pkt *pkt; struct qrtr_tx_flow *flow; struct sk_buff *skb; + unsigned long flags; void __rcu **slot; mutex_lock(&node->ep_lock); @@ -599,11 +615,18 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) mutex_unlock(&node->ep_lock); /* Notify the local controller about the event */ - skb = qrtr_alloc_ctrl_packet(&pkt); - if (skb) { - pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE); - qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst); + spin_lock_irqsave(&qrtr_nodes_lock, flags); + radix_tree_for_each_slot(slot, &qrtr_nodes, &iter, 0) { + if (*slot != node) + continue; + src.sq_node = iter.index; + skb = qrtr_alloc_ctrl_packet(&pkt, GFP_ATOMIC); + if (skb) { + pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE); + qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst); + } } + spin_unlock_irqrestore(&qrtr_nodes_lock, flags); /* Wake up any transmitters waiting for resume-tx from the node */ mutex_lock(&node->qrtr_tx_lock); @@ -656,7 +679,7 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) to.sq_node = QRTR_NODE_BCAST; to.sq_port = QRTR_PORT_CTRL; - skb = qrtr_alloc_ctrl_packet(&pkt); + skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL); if (skb) { pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); pkt->client.node = cpu_to_le32(ipc->us.sq_node); @@ -982,7 +1005,7 @@ static int qrtr_send_resume_tx(struct qrtr_cb *cb) if (!node) return -EINVAL; - skb = qrtr_alloc_ctrl_packet(&pkt); + skb = qrtr_alloc_ctrl_packet(&pkt, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f66417d5d2c3..fc23f46a315c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -215,6 +215,36 @@ static size_t tcf_action_fill_size(const struct tc_action *act) return sz; } +static int +tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tc_cookie *cookie; + + if (nla_put_string(skb, TCA_KIND, a->ops->kind)) + goto nla_put_failure; + if (tcf_action_copy_stats(skb, a, 0)) + goto nla_put_failure; + if (from_act && nla_put_u32(skb, TCA_ACT_INDEX, a->tcfa_index)) + goto nla_put_failure; + + rcu_read_lock(); + cookie = rcu_dereference(a->act_cookie); + if (cookie) { + if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) { + rcu_read_unlock(); + goto nla_put_failure; + } + } + rcu_read_unlock(); + + return 0; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { @@ -248,7 +278,9 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, index--; goto nla_put_failure; } - err = tcf_action_dump_1(skb, p, 0, 0); + err = (act_flags & TCA_FLAG_TERSE_DUMP) ? + tcf_action_dump_terse(skb, p, true) : + tcf_action_dump_1(skb, p, 0, 0); if (err < 0) { index--; nlmsg_trim(skb, nest); @@ -651,7 +683,7 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) return res; } -/*TCA_ACT_MAX_PRIO is 32, there count upto 32 */ +/*TCA_ACT_MAX_PRIO is 32, there count up to 32 */ #define TCA_ACT_MAX_PRIO_MASK 0x1FF int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) @@ -752,34 +784,6 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) return a->ops->dump(skb, a, bind, ref); } -static int -tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cookie *cookie; - - if (nla_put_string(skb, TCA_KIND, a->ops->kind)) - goto nla_put_failure; - if (tcf_action_copy_stats(skb, a, 0)) - goto nla_put_failure; - - rcu_read_lock(); - cookie = rcu_dereference(a->act_cookie); - if (cookie) { - if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) { - rcu_read_unlock(); - goto nla_put_failure; - } - } - rcu_read_unlock(); - - return 0; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -787,7 +791,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (tcf_action_dump_terse(skb, a)) + if (tcf_action_dump_terse(skb, a, false)) goto nla_put_failure; if (a->hw_stats != TCA_ACT_HW_STATS_ANY && @@ -832,7 +836,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; - err = terse ? tcf_action_dump_terse(skb, a) : + err = terse ? tcf_action_dump_terse(skb, a, false) : tcf_action_dump_1(skb, a, bind, ref); if (err < 0) goto errout; @@ -935,7 +939,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, NL_SET_ERR_MSG(extack, "TC action kind must be specified"); goto err_out; } - if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) { + if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); goto err_out; } @@ -1469,7 +1473,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, } static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { - [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON), + [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON | + TCA_FLAG_TERSE_DUMP), [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, }; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index a4c7ba35a343..e48e980c3b93 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -65,7 +65,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, * In case a different well-known TC_ACT opcode has been * returned, it will overwrite the default one. * - * For everything else that is unkown, TC_ACT_UNSPEC is + * For everything else that is unknown, TC_ACT_UNSPEC is * returned. */ switch (filter_res) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8dc3bec0d325..ac7297f42355 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -166,7 +166,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (unlikely(!tname)) goto err1; if (tb[TCA_IPT_TABLE] == NULL || - nla_strlcpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ) + nla_strscpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ) strcpy(tname, "mangle"); t = kmemdup(td, td->u.target_size, GFP_KERNEL); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index a4f3d0f0daa9..726cc956d06f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -52,7 +52,7 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata) d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; - nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); + nla_strscpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); return 0; } @@ -71,7 +71,7 @@ static int reset_policy(struct tc_action *a, const struct nlattr *defdata, spin_lock_bh(&d->tcf_lock); goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch); memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); - nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); + nla_strscpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); spin_unlock_bh(&d->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 838b3fd94d77..ff3e943febaa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -223,7 +223,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) static bool tcf_proto_check_kind(struct nlattr *kind, char *name) { if (kind) - return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ; + return nla_strscpy(name, kind, IFNAMSIZ) < 0; memset(name, 0, IFNAMSIZ); return false; } @@ -2940,7 +2940,6 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) struct tcf_chain *chain; long index_start; long index; - u32 parent; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) @@ -2955,13 +2954,6 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) block = tcf_block_refcnt_get(net, tcm->tcm_block_index); if (!block) goto out; - /* If we work with block index, q is NULL and parent value - * will never be used in the following code. The check - * in tcf_fill_node prevents it. However, compiler does not - * see that far, so set parent to zero to silence the warning - * about parent being uninitialized. - */ - parent = 0; } else { const struct Qdisc_class_ops *cops; struct net_device *dev; @@ -2971,13 +2963,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) if (!dev) return skb->len; - parent = tcm->tcm_parent; - if (!parent) { + if (!tcm->tcm_parent) q = dev->qdisc; - parent = q->handle; - } else { + else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); - } + if (!q) goto out; cops = q->ops->cl_ops; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index d36949d9382c..2e288f88ff02 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -238,7 +238,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) } } - /* Something went wrong if we are trying to replace a non-existant + /* Something went wrong if we are trying to replace a non-existent * node. Mind as well halt instead of silently failing. */ BUG_ON(1); diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index a4d09b1fb66a..f17b049ea530 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -41,7 +41,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, break; case TCF_EM_ALIGN_U32: - /* Worth checking boundries? The branching seems + /* Worth checking boundaries? The branching seems * to get worse. Visit again. */ val = get_unaligned_be32(ptr); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2a76a2f5ed88..1a2d2471b078 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1170,7 +1170,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, #ifdef CONFIG_MODULES if (ops == NULL && kind != NULL) { char name[IFNAMSIZ]; - if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { + if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1c281cc81f57..007bd2d9f1ff 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -466,10 +466,10 @@ drop: __maybe_unused * non-ATM interfaces. */ -static void sch_atm_dequeue(unsigned long data) +static void sch_atm_dequeue(struct tasklet_struct *t) { - struct Qdisc *sch = (struct Qdisc *)data; - struct atm_qdisc_data *p = qdisc_priv(sch); + struct atm_qdisc_data *p = from_tasklet(p, t, task); + struct Qdisc *sch = qdisc_from_priv(p); struct atm_flow_data *flow; struct sk_buff *skb; @@ -563,7 +563,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); + tasklet_setup(&p->task, sch_atm_dequeue); return 0; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index c65077f0c0f3..5a457ff61acd 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -405,7 +405,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, /* We restart the measurement cycle if the following conditions are met * 1. If the delay has been low for 2 consecutive Tupdate periods * 2. Calculated drop probability is zero - * 3. If average dq_rate_estimator is enabled, we have atleast one + * 3. If average dq_rate_estimator is enabled, we have at least one * estimate for the avg_dq_rate ie., is a non-zero value */ if ((vars->qdelay < params->target / 2) && diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 39d7fa9569f8..5da599ff84a9 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -11,6 +11,7 @@ menuconfig IP_SCTP select CRYPTO_HMAC select CRYPTO_SHA1 select LIBCRC32C + select NET_UDP_TUNNEL help Stream Control Transmission Protocol diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fdb69d46276d..336df4b36655 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -99,6 +99,8 @@ static struct sctp_association *sctp_association_init( */ asoc->hbinterval = msecs_to_jiffies(sp->hbinterval); + asoc->encap_port = sp->encap_port; + /* Initialize path max retrans value. */ asoc->pathmaxrxt = sp->pathmaxrxt; @@ -624,6 +626,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, */ peer->hbinterval = asoc->hbinterval; + peer->encap_port = asoc->encap_port; + /* Set the path max_retrans. */ peer->pathmaxrxt = asoc->pathmaxrxt; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8a58f42d6d19..c3e89c776e66 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -55,6 +55,7 @@ #include <net/inet_common.h> #include <net/inet_ecn.h> #include <net/sctp/sctp.h> +#include <net/udp_tunnel.h> #include <linux/uaccess.h> @@ -191,33 +192,53 @@ out: return ret; } -static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) +static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) { + struct dst_entry *dst = dst_clone(t->dst); + struct flowi6 *fl6 = &t->fl.u.ip6; struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 *fl6 = &transport->fl.u.ip6; __u8 tclass = np->tclass; - int res; + __be32 label; pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, &fl6->saddr, &fl6->daddr); - if (transport->dscp & SCTP_DSCP_SET_MASK) - tclass = transport->dscp & SCTP_DSCP_VAL_MASK; + if (t->dscp & SCTP_DSCP_SET_MASK) + tclass = t->dscp & SCTP_DSCP_VAL_MASK; if (INET_ECN_is_capable(tclass)) IP6_ECN_flow_xmit(sk, fl6->flowlabel); - if (!(transport->param_flags & SPP_PMTUD_ENABLE)) + if (!(t->param_flags & SPP_PMTUD_ENABLE)) skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); - rcu_read_lock(); - res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), - tclass, sk->sk_priority); - rcu_read_unlock(); - return res; + if (!t->encap_port || !sctp_sk(sk)->udp_port) { + int res; + + skb_dst_set(skb, dst); + rcu_read_lock(); + res = ip6_xmit(sk, skb, fl6, sk->sk_mark, + rcu_dereference(np->opt), + tclass, sk->sk_priority); + rcu_read_unlock(); + return res; + } + + if (skb_is_gso(skb)) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + + skb->encapsulation = 1; + skb_reset_inner_mac_header(skb); + skb_reset_inner_transport_header(skb); + skb_set_inner_ipproto(skb, IPPROTO_SCTP); + label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6); + + return udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, + &fl6->daddr, tclass, ip6_dst_hoplimit(dst), + label, sctp_sk(sk)->udp_port, t->encap_port, false); } /* Returns the dst cache entry for the given source and destination ip @@ -1053,6 +1074,7 @@ static struct inet_protosw sctpv6_stream_protosw = { static int sctp6_rcv(struct sk_buff *skb) { + SCTP_INPUT_CB(skb)->encap_port = 0; return sctp_rcv(skb) ? -1 : 0; } diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 74847d613835..ce281a9a2875 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -27,7 +27,11 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; - gso_reset_checksum(skb, ~0); + /* csum and csum_start in GSO CB may be needed to do the UDP + * checksum when it's a UDP tunneling packet. + */ + SKB_GSO_CB(skb)->csum = (__force __wsum)~0; + SKB_GSO_CB(skb)->csum_start = skb_headroom(skb) + skb->len; return sctp_compute_cksum(skb, skb_transport_offset(skb)); } diff --git a/net/sctp/output.c b/net/sctp/output.c index 1441eaf460bb..6614c9fdc51e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -508,20 +508,14 @@ merge: sizeof(struct inet6_skb_parm))); skb_shinfo(head)->gso_segs = pkt_count; skb_shinfo(head)->gso_size = GSO_BY_FRAGS; - rcu_read_lock(); - if (skb_dst(head) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - rcu_read_unlock(); goto chksum; } if (sctp_checksum_disable) return 1; - if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) || - dst_xfrm(skb_dst(head)) || packet->ipfragok) { + if (!(tp->dst->dev->features & NETIF_F_SCTP_CRC) || + dst_xfrm(tp->dst) || packet->ipfragok || tp->encap_port) { struct sctphdr *sh = (struct sctphdr *)skb_transport_header(head); @@ -548,7 +542,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; - struct dst_entry *dst; struct sk_buff *head; struct sctphdr *sh; struct sock *sk; @@ -585,13 +578,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->checksum = 0; /* drop packet if no dst */ - dst = dst_clone(tp->dst); - if (!dst) { + if (!tp->dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(head); goto out; } - skb_dst_set(head, dst); + + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + rcu_read_unlock(); /* pack up chunks */ pkt_count = sctp_packet_pack(packet, head, gso, gfp); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 25833238fe93..6f2bbfeec3a4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -44,6 +44,7 @@ #include <net/addrconf.h> #include <net/inet_common.h> #include <net/inet_ecn.h> +#include <net/udp_tunnel.h> #define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) @@ -840,6 +841,92 @@ static int sctp_ctl_sock_init(struct net *net) return 0; } +static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb) +{ + SCTP_INPUT_CB(skb)->encap_port = udp_hdr(skb)->source; + + skb_set_transport_header(skb, sizeof(struct udphdr)); + sctp_rcv(skb); + return 0; +} + +static int sctp_udp_err_lookup(struct sock *sk, struct sk_buff *skb) +{ + struct sctp_association *asoc; + struct sctp_transport *t; + int family; + + skb->transport_header += sizeof(struct udphdr); + family = (ip_hdr(skb)->version == 4) ? AF_INET : AF_INET6; + sk = sctp_err_lookup(dev_net(skb->dev), family, skb, sctp_hdr(skb), + &asoc, &t); + if (!sk) + return -ENOENT; + + sctp_err_finish(sk, t); + return 0; +} + +int sctp_udp_sock_start(struct net *net) +{ + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct udp_port_cfg udp_conf = {0}; + struct socket *sock; + int err; + + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.local_udp_port = htons(net->sctp.udp_port); + err = udp_sock_create(net, &udp_conf, &sock); + if (err) { + pr_err("Failed to create the SCTP UDP tunneling v4 sock\n"); + return err; + } + + tuncfg.encap_type = 1; + tuncfg.encap_rcv = sctp_udp_rcv; + tuncfg.encap_err_lookup = sctp_udp_err_lookup; + setup_udp_tunnel_sock(net, sock, &tuncfg); + net->sctp.udp4_sock = sock->sk; + +#if IS_ENABLED(CONFIG_IPV6) + memset(&udp_conf, 0, sizeof(udp_conf)); + + udp_conf.family = AF_INET6; + udp_conf.local_ip6 = in6addr_any; + udp_conf.local_udp_port = htons(net->sctp.udp_port); + udp_conf.use_udp6_rx_checksums = true; + udp_conf.ipv6_v6only = true; + err = udp_sock_create(net, &udp_conf, &sock); + if (err) { + pr_err("Failed to create the SCTP UDP tunneling v6 sock\n"); + udp_tunnel_sock_release(net->sctp.udp4_sock->sk_socket); + net->sctp.udp4_sock = NULL; + return err; + } + + tuncfg.encap_type = 1; + tuncfg.encap_rcv = sctp_udp_rcv; + tuncfg.encap_err_lookup = sctp_udp_err_lookup; + setup_udp_tunnel_sock(net, sock, &tuncfg); + net->sctp.udp6_sock = sock->sk; +#endif + + return 0; +} + +void sctp_udp_sock_stop(struct net *net) +{ + if (net->sctp.udp4_sock) { + udp_tunnel_sock_release(net->sctp.udp4_sock->sk_socket); + net->sctp.udp4_sock = NULL; + } + if (net->sctp.udp6_sock) { + udp_tunnel_sock_release(net->sctp.udp6_sock->sk_socket); + net->sctp.udp6_sock = NULL; + } +} + /* Register address family specific functions. */ int sctp_register_af(struct sctp_af *af) { @@ -971,25 +1058,44 @@ static int sctp_inet_supported_addrs(const struct sctp_sock *opt, } /* Wrapper routine that calls the ip transmit routine. */ -static inline int sctp_v4_xmit(struct sk_buff *skb, - struct sctp_transport *transport) +static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) { - struct inet_sock *inet = inet_sk(skb->sk); + struct dst_entry *dst = dst_clone(t->dst); + struct flowi4 *fl4 = &t->fl.u.ip4; + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(sk); __u8 dscp = inet->tos; + __be16 df = 0; pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, - skb->len, &transport->fl.u.ip4.saddr, - &transport->fl.u.ip4.daddr); + skb->len, &fl4->saddr, &fl4->daddr); - if (transport->dscp & SCTP_DSCP_SET_MASK) - dscp = transport->dscp & SCTP_DSCP_VAL_MASK; + if (t->dscp & SCTP_DSCP_SET_MASK) + dscp = t->dscp & SCTP_DSCP_VAL_MASK; + + inet->pmtudisc = t->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO + : IP_PMTUDISC_DONT; + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); - inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? - IP_PMTUDISC_DO : IP_PMTUDISC_DONT; + if (!t->encap_port || !sctp_sk(sk)->udp_port) { + skb_dst_set(skb, dst); + return __ip_queue_xmit(sk, skb, &t->fl, dscp); + } - SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); + if (skb_is_gso(skb)) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; - return __ip_queue_xmit(&inet->sk, skb, &transport->fl, dscp); + if (ip_dont_fragment(sk, dst) && !skb->ignore_df) + df = htons(IP_DF); + + skb->encapsulation = 1; + skb_reset_inner_mac_header(skb); + skb_reset_inner_transport_header(skb); + skb_set_inner_ipproto(skb, IPPROTO_SCTP); + udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr, + fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, + sctp_sk(sk)->udp_port, t->encap_port, false, false); + return 0; } static struct sctp_af sctp_af_inet; @@ -1054,9 +1160,15 @@ static struct inet_protosw sctp_stream_protosw = { .flags = SCTP_PROTOSW_FLAG }; +static int sctp4_rcv(struct sk_buff *skb) +{ + SCTP_INPUT_CB(skb)->encap_port = 0; + return sctp_rcv(skb); +} + /* Register with IP layer. */ static const struct net_protocol sctp_protocol = { - .handler = sctp_rcv, + .handler = sctp4_rcv, .err_handler = sctp_v4_err, .no_policy = 1, .netns_ok = 1, @@ -1271,6 +1383,12 @@ static int __net_init sctp_defaults_init(struct net *net) /* Enable ECN by default. */ net->sctp.ecn_enable = 1; + /* Set UDP tunneling listening port to 0 by default */ + net->sctp.udp_port = 0; + + /* Set remote encap port to 0 by default */ + net->sctp.encap_port = 0; + /* Set SCOPE policy to enabled */ net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9a56ae2f3651..f77484df097b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1142,6 +1142,26 @@ nodata: return retval; } +struct sctp_chunk *sctp_make_new_encap_port(const struct sctp_association *asoc, + const struct sctp_chunk *chunk) +{ + struct sctp_new_encap_port_hdr nep; + struct sctp_chunk *retval; + + retval = sctp_make_abort(asoc, chunk, + sizeof(struct sctp_errhdr) + sizeof(nep)); + if (!retval) + goto nodata; + + sctp_init_cause(retval, SCTP_ERROR_NEW_ENCAP_PORT, sizeof(nep)); + nep.cur_port = SCTP_INPUT_CB(chunk->skb)->encap_port; + nep.new_port = chunk->transport->encap_port; + sctp_addto_chunk(retval, sizeof(nep), &nep); + +nodata: + return retval; +} + /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport) @@ -2321,6 +2341,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * added as the primary transport. The source address seems to * be a better choice than any of the embedded addresses. */ + asoc->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; if (!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c669f8bd1eab..af2b7041fa4e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -87,6 +87,13 @@ static enum sctp_disposition sctp_sf_tabort_8_4_8( const union sctp_subtype type, void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition sctp_sf_new_encap_port( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); static enum sctp_disposition sctp_stop_t1_and_abort( @@ -1493,6 +1500,10 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + + if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port) + return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands); + /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; @@ -3392,6 +3403,45 @@ static enum sctp_disposition sctp_sf_tabort_8_4_8( sctp_packet_append_chunk(packet, abort); + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); + + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); + + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return SCTP_DISPOSITION_CONSUME; +} + +/* Handling of SCTP Packets Containing an INIT Chunk Matching an + * Existing Associations when the UDP encap port is incorrect. + * + * From Section 4 at draft-tuexen-tsvwg-sctp-udp-encaps-cons-03. + */ +static enum sctp_disposition sctp_sf_new_encap_port( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_packet *packet = NULL; + struct sctp_chunk *chunk = arg; + struct sctp_chunk *abort; + + packet = sctp_ootb_pkt_new(net, asoc, chunk); + if (!packet) + return SCTP_DISPOSITION_NOMEM; + + abort = sctp_make_new_encap_port(asoc, chunk); + if (!abort) { + sctp_ootb_pkt_free(packet); + return SCTP_DISPOSITION_NOMEM; + } + + abort->skb->sk = ep->base.sk; + + sctp_packet_append_chunk(packet, abort); + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); @@ -6268,6 +6318,8 @@ static struct sctp_packet *sctp_ootb_pkt_new( if (!transport) goto nomem; + transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; + /* Cache a route for the transport with the chunk's destination as * the source address. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 53d0a4161df3..a710917c5ac7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4417,6 +4417,55 @@ out: return retval; } +static int sctp_setsockopt_encap_port(struct sock *sk, + struct sctp_udpencaps *encap, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_transport *t; + __be16 encap_port; + + if (optlen != sizeof(*encap)) + return -EINVAL; + + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + encap_port = (__force __be16)encap->sue_port; + if (!sctp_is_any(sk, (union sctp_addr *)&encap->sue_address)) { + t = sctp_addr_id2transport(sk, &encap->sue_address, + encap->sue_assoc_id); + if (!t) + return -EINVAL; + + t->encap_port = encap_port; + return 0; + } + + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. + */ + asoc = sctp_id2assoc(sk, encap->sue_assoc_id); + if (!asoc && encap->sue_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + /* If changes are for association, also apply encap_port to + * each transport. + */ + if (asoc) { + list_for_each_entry(t, &asoc->peer.transport_addr_list, + transports) + t->encap_port = encap_port; + + return 0; + } + + sctp_sk(sk)->encap_port = encap_port; + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4636,6 +4685,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: retval = sctp_setsockopt_pf_expose(sk, kopt, optlen); break; + case SCTP_REMOTE_UDP_ENCAPS_PORT: + retval = sctp_setsockopt_encap_port(sk, kopt, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4876,6 +4928,8 @@ static int sctp_init_sock(struct sock *sk) * be modified via SCTP_PEER_ADDR_PARAMS */ sp->hbinterval = net->sctp.hb_interval; + sp->udp_port = htons(net->sctp.udp_port); + sp->encap_port = htons(net->sctp.encap_port); sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pf_retrans = net->sctp.pf_retrans; sp->ps_retrans = net->sctp.ps_retrans; @@ -7790,6 +7844,65 @@ out: return retval; } +static int sctp_getsockopt_encap_port(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_association *asoc; + struct sctp_udpencaps encap; + struct sctp_transport *t; + __be16 encap_port; + + if (len < sizeof(encap)) + return -EINVAL; + + len = sizeof(encap); + if (copy_from_user(&encap, optval, len)) + return -EFAULT; + + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + if (!sctp_is_any(sk, (union sctp_addr *)&encap.sue_address)) { + t = sctp_addr_id2transport(sk, &encap.sue_address, + encap.sue_assoc_id); + if (!t) { + pr_debug("%s: failed no transport\n", __func__); + return -EINVAL; + } + + encap_port = t->encap_port; + goto out; + } + + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. + */ + asoc = sctp_id2assoc(sk, encap.sue_assoc_id); + if (!asoc && encap.sue_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + pr_debug("%s: failed no association\n", __func__); + return -EINVAL; + } + + if (asoc) { + encap_port = asoc->encap_port; + goto out; + } + + encap_port = sctp_sk(sk)->encap_port; + +out: + encap.sue_port = (__force uint16_t)encap_port; + if (copy_to_user(optval, &encap, len)) + return -EFAULT; + + if (put_user(len, optlen)) + return -EFAULT; + + return 0; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -8010,6 +8123,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen); break; + case SCTP_REMOTE_UDP_ENCAPS_PORT: + retval = sctp_getsockopt_encap_port(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index c16c80963e55..e92df779af73 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -36,6 +36,7 @@ static int rto_alpha_max = 1000; static int rto_beta_max = 1000; static int pf_expose_max = SCTP_PF_EXPOSE_MAX; static int ps_retrans_max = SCTP_PS_RETRANS_MAX; +static int udp_port_max = 65535; static unsigned long max_autoclose_min = 0; static unsigned long max_autoclose_max = @@ -48,6 +49,8 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos); static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, @@ -291,6 +294,24 @@ static struct ctl_table sctp_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "udp_port", + .data = &init_net.sctp.udp_port, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_sctp_do_udp_port, + .extra1 = SYSCTL_ZERO, + .extra2 = &udp_port_max, + }, + { + .procname = "encap_port", + .data = &init_net.sctp.encap_port, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = SYSCTL_ZERO, + .extra2 = &udp_port_max, + }, + { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), @@ -477,6 +498,47 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write, return ret; } +static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + unsigned int min = *(unsigned int *)ctl->extra1; + unsigned int max = *(unsigned int *)ctl->extra2; + struct ctl_table tbl; + int ret, new_value; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.udp_port; + + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { + struct sock *sk = net->sctp.ctl_sock; + + if (new_value > max || new_value < min) + return -EINVAL; + + net->sctp.udp_port = new_value; + sctp_udp_sock_stop(net); + if (new_value) { + ret = sctp_udp_sock_start(net); + if (ret) + net->sctp.udp_port = 0; + } + + /* Update the value in the control socket */ + lock_sock(sk); + sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); + release_sock(sk); + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5dd4faaf7d6e..811819c849da 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1347,6 +1347,7 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, { struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext; struct smc_clc_v2_extension *pclc_v2_ext; + int rc = SMC_CLC_DECL_PEERNOSMC; ini->smc_type_v1 = pclc->hdr.typev1; ini->smc_type_v2 = pclc->hdr.typev2; @@ -1354,29 +1355,30 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, if (pclc->hdr.version > SMC_V1) ini->smcd_version |= ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; + if (!(ini->smcd_version & SMC_V2)) { + rc = SMC_CLC_DECL_PEERNOSMC; + goto out; + } if (!smc_ism_v2_capable) { ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOISM2SUPP; goto out; } pclc_v2_ext = smc_get_clc_v2_ext(pclc); if (!pclc_v2_ext) { ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOV2EXT; goto out; } pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); - if (!pclc_smcd_v2_ext) + if (!pclc_smcd_v2_ext) { ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOV2DEXT; + } out: - if (!ini->smcd_version) { - if (pclc->hdr.typev1 == SMC_TYPE_B || - pclc->hdr.typev2 == SMC_TYPE_B) - return SMC_CLC_DECL_NOSMCDEV; - if (pclc->hdr.typev1 == SMC_TYPE_D || - pclc->hdr.typev2 == SMC_TYPE_D) - return SMC_CLC_DECL_NOSMCDDEV; - return SMC_CLC_DECL_NOSMCRDEV; - } + if (!ini->smcd_version) + return rc; return 0; } @@ -1474,6 +1476,12 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini, } } +static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini) +{ + if (!ini->rc) + ini->rc = rc; +} + static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) @@ -1484,7 +1492,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, unsigned int matches = 0; u8 smcd_version; u8 *eid = NULL; - int i; + int i, rc; if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) goto not_found; @@ -1493,8 +1501,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, smc_v2_ext = smc_get_clc_v2_ext(pclc); smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); if (!smcd_v2_ext || - !smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */ + !smc_v2_ext->hdr.flag.seid) { /* no system EID support for SMCD */ + smc_find_ism_store_rc(SMC_CLC_DECL_NOSEID, ini); goto not_found; + } mutex_lock(&smcd_dev_list.mutex); if (pclc_smcd->ism.chid) @@ -1526,9 +1536,12 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, ini->smcd_version = SMC_V2; ini->is_smcd = true; ini->ism_selected = i; - if (smc_listen_ism_init(new_smc, ini)) + rc = smc_listen_ism_init(new_smc, ini); + if (rc) { + smc_find_ism_store_rc(rc, ini); /* try next active ISM device */ continue; + } return; /* matching and usable V2 ISM device found */ } /* no V2 ISM device could be initialized */ @@ -1545,19 +1558,23 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, struct smc_init_info *ini) { struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc); + int rc = 0; /* check if ISM V1 is available */ if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) goto not_found; ini->is_smcd = true; /* prepare ISM check */ ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); - if (smc_find_ism_device(new_smc, ini)) + rc = smc_find_ism_device(new_smc, ini); + if (rc) goto not_found; ini->ism_selected = 0; - if (!smc_listen_ism_init(new_smc, ini)) + rc = smc_listen_ism_init(new_smc, ini); + if (!rc) return; /* V1 ISM device found */ not_found: + smc_find_ism_store_rc(rc, ini); ini->ism_dev[0] = NULL; ini->is_smcd = false; } @@ -1614,16 +1631,16 @@ static int smc_listen_find_device(struct smc_sock *new_smc, return 0; if (!(ini->smcd_version & SMC_V1)) - return SMC_CLC_DECL_NOSMCDEV; + return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV; /* check for matching IP prefix and subnet length */ rc = smc_listen_prfx_check(new_smc, pclc); if (rc) - return rc; + return ini->rc ?: rc; /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) - return SMC_CLC_DECL_GETVLANERR; + return ini->rc ?: SMC_CLC_DECL_GETVLANERR; /* check for ISM device matching V1 proposed device */ smc_find_ism_v1_device_serv(new_smc, pclc, ini); @@ -1631,10 +1648,14 @@ static int smc_listen_find_device(struct smc_sock *new_smc, return 0; if (pclc->hdr.typev1 == SMC_TYPE_D) - return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */ + /* skip RDMA and decline */ + return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV; /* check if RDMA is available */ - return smc_find_rdma_v1_device_serv(new_smc, pclc, ini); + rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); + smc_find_ism_store_rc(rc, ini); + + return (!rc) ? 0 : ini->rc; } /* listen worker: finish RDMA setup */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index b1ce6ccbfaec..f23f558054a7 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -389,9 +389,9 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc) * Context: * - tasklet context */ -static void smcd_cdc_rx_tsklet(unsigned long data) +static void smcd_cdc_rx_tsklet(struct tasklet_struct *t) { - struct smc_connection *conn = (struct smc_connection *)data; + struct smc_connection *conn = from_tasklet(conn, t, rx_tsklet); struct smcd_cdc_msg *data_cdc; struct smcd_cdc_msg cdc; struct smc_sock *smc; @@ -411,7 +411,7 @@ static void smcd_cdc_rx_tsklet(unsigned long data) */ void smcd_cdc_rx_init(struct smc_connection *conn) { - tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn); + tasklet_setup(&conn->rx_tsklet, smcd_cdc_rx_tsklet); } /***************************** init, exit, misc ******************************/ diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c579d1d5995a..49752c997c51 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -37,6 +37,11 @@ #define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */ #define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */ #define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */ +#define SMC_CLC_DECL_NOISM2SUPP 0x03030003 /* hardware has no ISMv2 support */ +#define SMC_CLC_DECL_NOV2EXT 0x03030004 /* peer sent no clc v2 extension */ +#define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */ +#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */ +#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index f1e867ce2e63..9aee54a6bcba 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -301,6 +301,7 @@ struct smc_init_info { u8 first_contact_peer; u8 first_contact_local; unsigned short vlan_id; + u32 rc; /* SMC-R */ struct smc_clc_msg_local *ib_lcl; struct smc_ib_device *ib_dev; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 1e23cdd41eb1..cbc73a7e4d59 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -131,9 +131,9 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) wake_up(&link->wr_tx_wait); } -static void smc_wr_tx_tasklet_fn(unsigned long data) +static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t) { - struct smc_ib_device *dev = (struct smc_ib_device *)data; + struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet); struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; int i = 0, rc; int polled = 0; @@ -435,9 +435,9 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) } } -static void smc_wr_rx_tasklet_fn(unsigned long data) +static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t) { - struct smc_ib_device *dev = (struct smc_ib_device *)data; + struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet); struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; int polled = 0; int rc; @@ -698,10 +698,8 @@ void smc_wr_remove_dev(struct smc_ib_device *smcibdev) void smc_wr_add_dev(struct smc_ib_device *smcibdev) { - tasklet_init(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn, - (unsigned long)smcibdev); - tasklet_init(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn, - (unsigned long)smcibdev); + tasklet_setup(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn); + tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn); } int smc_wr_create_link(struct smc_link *lnk) diff --git a/net/socket.c b/net/socket.c index 6e6cccc2104f..152b1dcf93c6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -64,7 +64,6 @@ #include <linux/seq_file.h> #include <linux/mutex.h> #include <linux/if_bridge.h> -#include <linux/if_frad.h> #include <linux/if_vlan.h> #include <linux/ptp_classify.h> #include <linux/init.h> @@ -1027,17 +1026,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) } EXPORT_SYMBOL(vlan_ioctl_set); -static DEFINE_MUTEX(dlci_ioctl_mutex); -static int (*dlci_ioctl_hook) (unsigned int, void __user *); - -void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) -{ - mutex_lock(&dlci_ioctl_mutex); - dlci_ioctl_hook = hook; - mutex_unlock(&dlci_ioctl_mutex); -} -EXPORT_SYMBOL(dlci_ioctl_set); - static long sock_do_ioctl(struct net *net, struct socket *sock, unsigned int cmd, unsigned long arg) { @@ -1156,17 +1144,6 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; - case SIOCADDDLCI: - case SIOCDELDLCI: - err = -ENOPKG; - if (!dlci_ioctl_hook) - request_module("dlci"); - - mutex_lock(&dlci_ioctl_mutex); - if (dlci_ioctl_hook) - err = dlci_ioctl_hook(cmd, argp); - mutex_unlock(&dlci_ioctl_mutex); - break; case SIOCGSKNS: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -3427,8 +3404,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBRDELBR: case SIOCGIFVLAN: case SIOCSIFVLAN: - case SIOCADDDLCI: - case SIOCDELDLCI: case SIOCGSKNS: case SIOCGSTAMP_NEW: case SIOCGSTAMPNS_NEW: diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index eadc0ede928c..8241f5a4a01c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -781,7 +781,8 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, } /** - * rpc_mkpipe - make an rpc_pipefs file for kernel<->userspace communication + * rpc_mkpipe_dentry - make an rpc_pipefs file for kernel<->userspace + * communication * @parent: dentry of directory to create new "pipe" in * @name: name of pipe * @private: private data to associate with the pipe, for the caller's use diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 650414110452..2241d5a38f7b 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -139,10 +139,7 @@ static int bearer_name_validate(const char *name, u32 if_len; /* copy bearer name & ensure length is OK */ - name_copy[TIPC_MAX_BEARER_NAME - 1] = 0; - /* need above in case non-Posix strncpy() doesn't pad with nulls */ - strncpy(name_copy, name, TIPC_MAX_BEARER_NAME); - if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0) + if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0) return 0; /* ensure all component parts of bearer name are present */ diff --git a/net/tipc/core.c b/net/tipc/core.c index c2ff42900b53..5cc1f0307215 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -81,8 +81,6 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_nametbl; - INIT_LIST_HEAD(&tn->dist_queue); - err = tipc_bcast_init(net); if (err) goto out_bclink; diff --git a/net/tipc/core.h b/net/tipc/core.h index 1d57a4d3b05e..df34dcdd0607 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -132,9 +132,6 @@ struct tipc_net { spinlock_t nametbl_lock; struct name_table *nametbl; - /* Name dist queue */ - struct list_head dist_queue; - /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; diff --git a/net/tipc/link.c b/net/tipc/link.c index 06b880da2a8e..97b1c6bd45dc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1260,7 +1260,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); return true; - }; + } } /* tipc_link_input - process packet that has passed link protocol check diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index fe4edce459ad..4cd90d5c84c8 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -244,24 +244,6 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) kfree_rcu(p, rcu); } -/** - * tipc_dist_queue_purge - remove deferred updates from a node that went down - */ -static void tipc_dist_queue_purge(struct net *net, u32 addr) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct distr_queue_item *e, *tmp; - - spin_lock_bh(&tn->nametbl_lock); - list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { - if (e->node != addr) - continue; - list_del(&e->next); - kfree(e); - } - spin_unlock_bh(&tn->nametbl_lock); -} - void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr, u16 capabilities) { @@ -272,7 +254,6 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) tipc_publ_purge(net, publ, addr); - tipc_dist_queue_purge(net, addr); spin_lock_bh(&tn->nametbl_lock); if (!(capabilities & TIPC_NAMED_BCAST)) nt->rc_dests--; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1c7aa51cc2a3..82f154989418 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -118,7 +118,8 @@ static void tipc_tlv_init(struct sk_buff *skb, u16 type) skb_put(skb, sizeof(struct tlv_desc)); } -static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...) +static __printf(2, 3) int tipc_tlv_sprintf(struct sk_buff *skb, + const char *fmt, ...) { int n; u16 len; @@ -588,7 +589,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return 0; tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n", - nla_data(link[TIPC_NLA_LINK_NAME])); + (char *)nla_data(link[TIPC_NLA_LINK_NAME])); if (link[TIPC_NLA_LINK_BROADCAST]) { __fill_bc_link_stat(msg, prop, stats); @@ -695,7 +696,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], + nla_strscpy(link_info.str, link[TIPC_NLA_LINK_NAME], TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, diff --git a/net/tipc/node.c b/net/tipc/node.c index d269ebe382e1..cd67b7d5169f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1638,7 +1638,7 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) return; default: return; - }; + } } /** diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e795a8a2955b..69c4b16e8184 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -658,8 +658,8 @@ static int tipc_release(struct socket *sock) * NOTE: This routine doesn't need to take the socket lock since it doesn't * access any non-constant socket information. */ -static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, - int uaddr_len) + +int tipc_sk_bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) { struct sock *sk = sock->sk; struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; @@ -691,13 +691,6 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, goto exit; } - if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && - (addr->addr.nameseq.type != TIPC_TOP_SRV) && - (addr->addr.nameseq.type != TIPC_CFG_SRV)) { - res = -EACCES; - goto exit; - } - res = (addr->scope >= 0) ? tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); @@ -706,6 +699,22 @@ exit: return res; } +static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen) +{ + struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr; + + if (alen) { + if (alen < sizeof(struct sockaddr_tipc)) + return -EINVAL; + if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) { + pr_warn_once("Can't bind to reserved service type %u\n", + addr->addr.nameseq.type); + return -EACCES; + } + } + return tipc_sk_bind(sock, skaddr, alen); +} + /** * tipc_getname - get port ID of socket or peer socket * @sock: socket structure diff --git a/net/tipc/socket.h b/net/tipc/socket.h index b11575afc66f..02cdf166807d 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -74,7 +74,7 @@ int tipc_dump_done(struct netlink_callback *cb); u32 tipc_sock_get_portid(struct sock *sk); bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb); bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb); - +int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen); int tsk_set_importance(struct sock *sk, int imp); #endif diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 13f3143609f9..88ad39e47a98 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -520,12 +520,12 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) saddr.family = AF_TIPC; saddr.addrtype = TIPC_ADDR_NAMESEQ; - saddr.addr.nameseq.type = TIPC_TOP_SRV; + saddr.addr.nameseq.type = TIPC_TOP_SRV; saddr.addr.nameseq.lower = TIPC_TOP_SRV; saddr.addr.nameseq.upper = TIPC_TOP_SRV; saddr.scope = TIPC_NODE_SCOPE; - rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); + rc = tipc_sk_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); if (rc < 0) goto err; rc = kernel_listen(lsock, 0); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d10916ab4526..d2834047c6db 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2072,8 +2072,7 @@ static long vsock_dev_do_ioctl(struct file *filp, break; default: - pr_err("Unknown ioctl %d\n", cmd); - retval = -EINVAL; + retval = -ENOIOCTLCMD; } return retval; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 22d1779ab2b1..e4030f1fbc60 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -530,10 +530,10 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: break; + case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: WARN_ON(1); @@ -677,12 +677,12 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: /* Can NAN type be considered as beaconing interface? */ case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: WARN_ON(1); } @@ -1324,12 +1324,12 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, break; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: /* these interface types don't really have a channel */ return; case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: WARN_ON(1); } diff --git a/net/wireless/core.c b/net/wireless/core.c index 240282c083aa..4b1f35e976e7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -631,10 +631,8 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) return -EINVAL; } -#ifndef CONFIG_WIRELESS_WDS if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS))) return -EINVAL; -#endif /* You can't even choose that many! */ if (WARN_ON(cnt < c->max_interfaces)) @@ -675,10 +673,8 @@ int wiphy_register(struct wiphy *wiphy) !(wiphy->nan_supported_bands & BIT(NL80211_BAND_2GHZ))))) return -EINVAL; -#ifndef CONFIG_WIRELESS_WDS if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS))) return -EINVAL; -#endif if (WARN_ON(wiphy->pmsr_capa && !wiphy->pmsr_capa->ftm.supported)) return -EINVAL; @@ -1202,9 +1198,6 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_OCB: __cfg80211_leave_ocb(rdev, dev); break; - case NL80211_IFTYPE_WDS: - /* must be handled by mac80211/driver, has no APIs */ - break; case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: /* cannot happen, has no netdev */ @@ -1214,6 +1207,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: /* invalid */ break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a77174b99b07..8811a4b69f21 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -715,6 +715,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN), [NL80211_ATTR_S1G_CAPABILITY_MASK] = NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN), + [NL80211_ATTR_SAE_PWE] = + NLA_POLICY_RANGE(NLA_U8, NL80211_SAE_PWE_HUNT_AND_PECK, + NL80211_SAE_PWE_BOTH), }; /* policy for the key attributes */ @@ -1882,7 +1885,6 @@ static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; } - CMD(set_wds_peer, SET_WDS_PEER); if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); @@ -2860,8 +2862,8 @@ static int parse_txq_params(struct nlattr *tb[], static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) { /* - * You can only set the channel explicitly for WDS interfaces, - * all others have their channel managed via their respective + * You can only set the channel explicitly for some interfaces, + * most have their channel managed via their respective * "establish a connection" command (connect, join, ...) * * For AP/GO and mesh mode, the channel can be set with the @@ -3066,29 +3068,6 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) return __nl80211_set_channel(rdev, netdev, info); } -static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info) -{ - struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; - const u8 *bssid; - - if (!info->attrs[NL80211_ATTR_MAC]) - return -EINVAL; - - if (netif_running(dev)) - return -EBUSY; - - if (!rdev->ops->set_wds_peer) - return -EOPNOTSUPP; - - if (wdev->iftype != NL80211_IFTYPE_WDS) - return -EOPNOTSUPP; - - bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); - return rdev_set_wds_peer(rdev, dev, bssid); -} - static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; @@ -4595,7 +4574,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, struct nlattr *attrs[], enum nl80211_attrs attr, struct cfg80211_bitrate_mask *mask, - struct net_device *dev) + struct net_device *dev, + bool default_all_enabled) { struct nlattr *tb[NL80211_TXRATE_MAX + 1]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4610,6 +4590,9 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, for (i = 0; i < NUM_NL80211_BANDS; i++) { const struct ieee80211_sta_he_cap *he_cap; + if (!default_all_enabled) + break; + sband = rdev->wiphy.bands[i]; if (!sband) @@ -4677,6 +4660,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, mask->control[band].ht_mcs)) return -EINVAL; } + if (tb[NL80211_TXRATE_VHT]) { if (!vht_set_mcs_mask( sband, @@ -4684,6 +4668,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, mask->control[band].vht_mcs)) return -EINVAL; } + if (tb[NL80211_TXRATE_GI]) { mask->control[band].gi = nla_get_u8(tb[NL80211_TXRATE_GI]); @@ -4695,6 +4680,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, nla_data(tb[NL80211_TXRATE_HE]), mask->control[band].he_mcs)) return -EINVAL; + if (tb[NL80211_TXRATE_HE_GI]) mask->control[band].he_gi = nla_get_u8(tb[NL80211_TXRATE_HE_GI]); @@ -4736,7 +4722,7 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, enum nl80211_band band, struct cfg80211_bitrate_mask *beacon_rate) { - u32 count_ht, count_vht, i; + u32 count_ht, count_vht, count_he, i; u32 rate = beacon_rate->control[band].legacy; /* Allow only one rate */ @@ -4769,7 +4755,21 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, return -EINVAL; } - if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht)) + count_he = 0; + for (i = 0; i < NL80211_HE_NSS_MAX; i++) { + if (hweight16(beacon_rate->control[band].he_mcs[i]) > 1) { + return -EINVAL; + } else if (beacon_rate->control[band].he_mcs[i]) { + count_he++; + if (count_he > 1) + return -EINVAL; + } + if (count_he && rate) + return -EINVAL; + } + + if ((count_ht && count_vht && count_he) || + (!rate && !count_ht && !count_vht && !count_he)) return -EINVAL; if (rate && @@ -4784,6 +4784,10 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_VHT)) return -EINVAL; + if (count_he && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_HE)) + return -EINVAL; return 0; } @@ -5244,7 +5248,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, ¶ms.beacon_rate, - dev); + dev, false); if (err) return err; @@ -9732,6 +9736,12 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]); } + if (info->attrs[NL80211_ATTR_SAE_PWE]) + settings->sae_pwe = + nla_get_u8(info->attrs[NL80211_ATTR_SAE_PWE]); + else + settings->sae_pwe = NL80211_SAE_PWE_UNSPECIFIED; + return 0; } @@ -11088,7 +11098,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, - dev); + dev, true); if (err) return err; @@ -11697,7 +11707,7 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &setup.beacon_rate, - dev); + dev, false); if (err) return err; @@ -14477,7 +14487,8 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) { attr = NL80211_TID_CONFIG_ATTR_TX_RATE; err = nl80211_parse_tx_bitrate_mask(info, attrs, attr, - &tid_conf->txrate_mask, dev); + &tid_conf->txrate_mask, dev, + true); if (err) return err; @@ -15140,14 +15151,6 @@ static const struct genl_small_ops nl80211_small_ops[] = { NL80211_FLAG_NEED_RTNL, }, { - .cmd = NL80211_CMD_SET_WDS_PEER, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = nl80211_set_wds_peer, - .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, - }, - { .cmd = NL80211_CMD_JOIN_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_mesh, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 950d57494168..5e2f349c92a8 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -582,16 +582,6 @@ static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev, return ret; } -static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *addr) -{ - int ret; - trace_rdev_set_wds_peer(&rdev->wiphy, dev, addr); - ret = rdev->ops->set_wds_peer(&rdev->wiphy, dev, addr); - trace_rdev_return_int(&rdev->wiphy, ret); - return ret; -} - static inline int rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev, struct net_device *dev, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 8d0e49c46db3..3409f37d838b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -694,7 +694,7 @@ static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, struct cfg80211_scan_request *request) { - u8 i; + int i; u32 s_ssid; for (i = 0; i < request->n_ssids; i++) { diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 6e218a0acd4e..817c6fef13be 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -838,11 +838,6 @@ DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath, TP_ARGS(wiphy, netdev, mac) ); -DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_set_wds_peer, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac), - TP_ARGS(wiphy, netdev, mac) -); - TRACE_EVENT(rdev_dump_station, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *mac), diff --git a/net/wireless/util.c b/net/wireless/util.c index f01746894a4e..5af88037f1fb 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -550,8 +550,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, return -1; break; case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - if (unlikely(iftype != NL80211_IFTYPE_WDS && - iftype != NL80211_IFTYPE_MESH_POINT && + if (unlikely(iftype != NL80211_IFTYPE_MESH_POINT && iftype != NL80211_IFTYPE_AP_VLAN && iftype != NL80211_IFTYPE_STATION)) return -1; @@ -1051,7 +1050,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MESH_POINT: /* bridging OK */ break; @@ -1063,6 +1061,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* not happening */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_NAN: WARN_ON(1); break; @@ -1276,20 +1275,22 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) { -#define SCALE 2048 - u16 mcs_divisors[12] = { - 34133, /* 16.666666... */ - 17067, /* 8.333333... */ - 11378, /* 5.555555... */ - 8533, /* 4.166666... */ - 5689, /* 2.777777... */ - 4267, /* 2.083333... */ - 3923, /* 1.851851... */ - 3413, /* 1.666666... */ - 2844, /* 1.388888... */ - 2560, /* 1.250000... */ - 2276, /* 1.111111... */ - 2048, /* 1.000000... */ +#define SCALE 6144 + u32 mcs_divisors[14] = { + 102399, /* 16.666666... */ + 51201, /* 8.333333... */ + 34134, /* 5.555555... */ + 25599, /* 4.166666... */ + 17067, /* 2.777777... */ + 12801, /* 2.083333... */ + 11769, /* 1.851851... */ + 10239, /* 1.666666... */ + 8532, /* 1.388888... */ + 7680, /* 1.250000... */ + 6828, /* 1.111111... */ + 6144, /* 1.000000... */ + 5690, /* 0.926106... */ + 5120, /* 0.833333... */ }; u32 rates_160M[3] = { 960777777, 907400000, 816666666 }; u32 rates_969[3] = { 480388888, 453700000, 408333333 }; @@ -1301,7 +1302,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) u64 tmp; u32 result; - if (WARN_ON_ONCE(rate->mcs > 11)) + if (WARN_ON_ONCE(rate->mcs > 13)) return 0; if (WARN_ON_ONCE(rate->he_gi > NL80211_RATE_INFO_HE_GI_3_2)) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 78f2927ead7f..b84a345b2653 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -49,9 +49,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, case IW_MODE_ADHOC: type = NL80211_IFTYPE_ADHOC; break; - case IW_MODE_REPEAT: - type = NL80211_IFTYPE_WDS; - break; case IW_MODE_MONITOR: type = NL80211_IFTYPE_MONITOR; break; @@ -1150,50 +1147,6 @@ static int cfg80211_wext_giwpower(struct net_device *dev, return 0; } -static int cfg80211_wds_wext_siwap(struct net_device *dev, - struct iw_request_info *info, - struct sockaddr *addr, char *extra) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int err; - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS)) - return -EINVAL; - - if (addr->sa_family != ARPHRD_ETHER) - return -EINVAL; - - if (netif_running(dev)) - return -EBUSY; - - if (!rdev->ops->set_wds_peer) - return -EOPNOTSUPP; - - err = rdev_set_wds_peer(rdev, dev, (u8 *)&addr->sa_data); - if (err) - return err; - - memcpy(&wdev->wext.bssid, (u8 *) &addr->sa_data, ETH_ALEN); - - return 0; -} - -static int cfg80211_wds_wext_giwap(struct net_device *dev, - struct iw_request_info *info, - struct sockaddr *addr, char *extra) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS)) - return -EINVAL; - - addr->sa_family = ARPHRD_ETHER; - memcpy(&addr->sa_data, wdev->wext.bssid, ETH_ALEN); - - return 0; -} - static int cfg80211_wext_siwrate(struct net_device *dev, struct iw_request_info *info, struct iw_param *rate, char *extra) @@ -1371,8 +1324,6 @@ static int cfg80211_wext_siwap(struct net_device *dev, return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); - case NL80211_IFTYPE_WDS: - return cfg80211_wds_wext_siwap(dev, info, ap_addr, extra); default: return -EOPNOTSUPP; } @@ -1389,8 +1340,6 @@ static int cfg80211_wext_giwap(struct net_device *dev, return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); - case NL80211_IFTYPE_WDS: - return cfg80211_wds_wext_giwap(dev, info, ap_addr, extra); default: return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 37456d022cfa..be6351e3f3cd 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -760,9 +760,9 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr) } EXPORT_SYMBOL(xfrm_input_resume); -static void xfrm_trans_reinject(unsigned long data) +static void xfrm_trans_reinject(struct tasklet_struct *t) { - struct xfrm_trans_tasklet *trans = (void *)data; + struct xfrm_trans_tasklet *trans = from_tasklet(trans, t, tasklet); struct sk_buff_head queue; struct sk_buff *skb; @@ -818,7 +818,6 @@ void __init xfrm_input_init(void) trans = &per_cpu(xfrm_trans_tasklet, i); __skb_queue_head_init(&trans->queue); - tasklet_init(&trans->tasklet, xfrm_trans_reinject, - (unsigned long)trans); + tasklet_setup(&trans->tasklet, xfrm_trans_reinject); } } diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 9b8e292a7c6a..697cdcfbb5e1 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -319,12 +319,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) err = dst_output(xi->net, skb->sk, skb); if (net_xmit_eval(err) == 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += length; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); + dev_sw_netstats_tx_add(dev, 1, length); } else { stats->tx_errors++; stats->tx_aborted_errors++; @@ -538,15 +533,6 @@ static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p) return err; } -static void xfrmi_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *s) -{ - dev_fetch_sw_netstats(s, dev->tstats); - - s->rx_dropped = dev->stats.rx_dropped; - s->tx_dropped = dev->stats.tx_dropped; -} - static int xfrmi_get_iflink(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); @@ -554,12 +540,11 @@ static int xfrmi_get_iflink(const struct net_device *dev) return xi->p.link; } - static const struct net_device_ops xfrmi_netdev_ops = { .ndo_init = xfrmi_dev_init, .ndo_uninit = xfrmi_dev_uninit, .ndo_start_xmit = xfrmi_xmit, - .ndo_get_stats64 = xfrmi_get_stats64, + .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = xfrmi_get_iflink, }; diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index ff4c533dfac2..400e741a56eb 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -51,7 +51,6 @@ #include "cgroup_helpers.h" #include "hbm.h" #include "bpf_util.h" -#include <bpf/bpf.h> #include <bpf/libbpf.h> bool outFlag = true; diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh deleted file mode 100755 index 9e507c305c6e..000000000000 --- a/samples/bpf/test_ipip.sh +++ /dev/null @@ -1,179 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -function config_device { - ip netns add at_ns0 - ip netns add at_ns1 - ip netns add at_ns2 - ip link add veth0 type veth peer name veth0b - ip link add veth1 type veth peer name veth1b - ip link add veth2 type veth peer name veth2b - ip link set veth0b up - ip link set veth1b up - ip link set veth2b up - ip link set dev veth0b mtu 1500 - ip link set dev veth1b mtu 1500 - ip link set dev veth2b mtu 1500 - ip link set veth0 netns at_ns0 - ip link set veth1 netns at_ns1 - ip link set veth2 netns at_ns2 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad - ip netns exec at_ns0 ip link set dev veth0 up - ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1 - ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad - ip netns exec at_ns1 ip link set dev veth1 up - ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2 - ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad - ip netns exec at_ns2 ip link set dev veth2 up - ip link add br0 type bridge - ip link set br0 up - ip link set dev br0 mtu 1500 - ip link set veth0b master br0 - ip link set veth1b master br0 - ip link set veth2b master br0 -} - -function add_ipip_tunnel { - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns1 \ - ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200 - ip netns exec at_ns1 ip link set dev $DEV_NS up - # same inner IP address in at_ns0 and at_ns1 - ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 - - ip netns exec at_ns2 ip link add dev $DEV type ipip external - ip netns exec at_ns2 ip link set dev $DEV up - ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ipip6_tunnel { - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns1 \ - ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64 - ip netns exec at_ns1 ip link set dev $DEV_NS up - # same inner IP address in at_ns0 and at_ns1 - ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 - - ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external - ip netns exec at_ns2 ip link set dev $DEV up - ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ip6ip6_tunnel { - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64 - ip netns exec at_ns1 \ - ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64 - ip netns exec at_ns1 ip link set dev $DEV_NS up - # same inner IP address in at_ns0 and at_ns1 - ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64 - - ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external - ip netns exec at_ns2 ip link set dev $DEV up - ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64 -} - -function attach_bpf { - DEV=$1 - SET_TUNNEL=$2 - GET_TUNNEL=$3 - ip netns exec at_ns2 tc qdisc add dev $DEV clsact - ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL - ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL -} - -function test_ipip { - DEV_NS=ipip_std - DEV=ipip_bpf - config_device -# tcpdump -nei br0 & - cat /sys/kernel/debug/tracing/trace_pipe & - - add_ipip_tunnel - attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel - - ip netns exec at_ns0 ping -c 1 10.1.1.200 - ip netns exec at_ns2 ping -c 1 10.1.1.100 - ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null - ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null - sleep 0.2 - # tcp check _same_ IP over different tunnels - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 - cleanup -} - -# IPv4 over IPv6 tunnel -function test_ipip6 { - DEV_NS=ipip_std - DEV=ipip_bpf - config_device -# tcpdump -nei br0 & - cat /sys/kernel/debug/tracing/trace_pipe & - - add_ipip6_tunnel - attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel - - ip netns exec at_ns0 ping -c 1 10.1.1.200 - ip netns exec at_ns2 ping -c 1 10.1.1.100 - ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null - ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null - sleep 0.2 - # tcp check _same_ IP over different tunnels - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 - cleanup -} - -# IPv6 over IPv6 tunnel -function test_ip6ip6 { - DEV_NS=ipip_std - DEV=ipip_bpf - config_device -# tcpdump -nei br0 & - cat /sys/kernel/debug/tracing/trace_pipe & - - add_ip6ip6_tunnel - attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel - - ip netns exec at_ns0 ping -6 -c 1 2601:646::2 - ip netns exec at_ns2 ping -6 -c 1 2601:646::1 - ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null - ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null - sleep 0.2 - # tcp check _same_ IP over different tunnels - ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200 - ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201 - cleanup -} - -function cleanup { - set +ex - pkill iperf - ip netns delete at_ns0 - ip netns delete at_ns1 - ip netns delete at_ns2 - ip link del veth0 - ip link del veth1 - ip link del veth2 - ip link del br0 - pkill tcpdump - pkill cat - set -ex -} - -cleanup -echo "Testing IP tunnels..." -test_ipip -test_ipip6 -test_ip6ip6 -echo "*** PASS ***" diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index ae01baf96f4e..02b892421f7a 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -6,6 +6,7 @@ PHONY := __modfinal __modfinal: +include include/config/auto.conf include $(srctree)/scripts/Kbuild.include # for c_flags @@ -36,8 +37,23 @@ quiet_cmd_ld_ko_o = LD [M] $@ -T scripts/module.lds -o $@ $(filter %.o, $^); \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) -$(modules): %.ko: %.o %.mod.o scripts/module.lds FORCE - +$(call if_changed,ld_ko_o) +quiet_cmd_btf_ko = BTF [M] $@ + cmd_btf_ko = LLVM_OBJCOPY=$(OBJCOPY) $(PAHOLE) -J --btf_base vmlinux $@ + +# Same as newer-prereqs, but allows to exclude specified extra dependencies +newer_prereqs_except = $(filter-out $(PHONY) $(1),$?) + +# Same as if_changed, but allows to exclude specified extra dependencies +if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ + $(cmd); \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) + +# Re-generate module BTFs if either module's .ko or vmlinux changed +$(modules): %.ko: %.o %.mod.o scripts/module.lds vmlinux FORCE + +$(call if_changed_except,ld_ko_o,vmlinux) +ifdef CONFIG_DEBUG_INFO_BTF_MODULES + +$(if $(newer-prereqs),$(call cmd,btf_ko)) +endif targets += $(modules) $(modules:.ko=.mod.o) diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c index 788667d582ae..e5971fa74fd7 100644 --- a/security/bpf/hooks.c +++ b/security/bpf/hooks.c @@ -12,6 +12,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = { #include <linux/lsm_hook_defs.h> #undef LSM_HOOK LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free), + LSM_HOOK_INIT(task_free, bpf_task_storage_free), }; static int __init bpf_lsm_init(void) @@ -23,6 +24,7 @@ static int __init bpf_lsm_init(void) struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = { .lbs_inode = sizeof(struct bpf_storage_blob), + .lbs_task = sizeof(struct bpf_storage_blob), }; DEFINE_LSM(bpf) = { diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 3e601bcfd461..944cb4b7c95d 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only *.d -/bpftool-bootstrap +/bootstrap/ /bpftool bpftool*.8 bpf-helpers.* diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index dade10cdf295..3d52256ba75f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -50,7 +50,8 @@ MAP COMMANDS | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** -| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** } +| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** + | **task_storage** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f60e6ad3a1df..f897cb5fb12d 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -19,22 +19,39 @@ BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ LIBBPF_PATH = $(LIBBPF_OUTPUT) + BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/ else + LIBBPF_OUTPUT = LIBBPF_PATH = $(BPF_DIR) + BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/ endif LIBBPF = $(LIBBPF_PATH)libbpf.a +LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ +LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a -BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +ifeq ($(BPFTOOL_VERSION),) +BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +endif + +$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): + $(QUIET_MKDIR)mkdir -p $@ -$(LIBBPF): FORCE - $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) +$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a -$(LIBBPF)-clean: +$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ + ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ + +$(LIBBPF)-clean: FORCE | $(LIBBPF_OUTPUT) $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null +$(LIBBPF_BOOTSTRAP)-clean: FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) + $(call QUIET_CLEAN, libbpf-bootstrap) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null + prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions @@ -92,6 +109,7 @@ CFLAGS += -DCOMPAT_NEED_REALLOCARRAY endif LIBS = $(LIBBPF) -lelf -lz +LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz ifeq ($(feature-libcap), 1) CFLAGS += -DUSE_LIBCAP LIBS += -lcap @@ -118,9 +136,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif -BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap) +BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool -BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) +BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ @@ -167,12 +185,16 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)feature.o: | zdep -$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS) +$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) + $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ + $(LIBS_BOOTSTRAP) $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) +$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT) + $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< + $(OUTPUT)%.o: %.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< @@ -180,11 +202,11 @@ feature-detect-clean: $(call QUIET_CLEAN, feature-detect) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: $(LIBBPF)-clean feature-detect-clean +clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d - $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h - $(Q)$(RM) -r -- $(OUTPUT)libbpf/ + $(Q)$(RM) -- $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h + $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool $(Q)$(RM) -r -- $(OUTPUT)feature/ diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 3f1da30c4da6..fdffbc64c65c 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -705,7 +705,7 @@ _bpftool() hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack sk_storage \ - struct_ops inode_storage' -- \ + struct_ops inode_storage task_storage' -- \ "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 8ab142ff5eac..ed5e97157241 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -358,8 +358,12 @@ static int dump_btf_raw(const struct btf *btf, } } else { int cnt = btf__get_nr_types(btf); + int start_id = 1; - for (i = 1; i <= cnt; i++) { + if (base_btf) + start_id = btf__get_nr_types(base_btf) + 1; + + for (i = start_id; i <= cnt; i++) { t = btf__type_by_id(btf, i); dump_btf_type(btf, i, t); } @@ -438,7 +442,6 @@ static int do_dump(int argc, char **argv) return -1; } src = GET_ARG(); - if (is_prefix(src, "map")) { struct bpf_map_info info = {}; __u32 len = sizeof(info); @@ -499,7 +502,7 @@ static int do_dump(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(src, "file")) { - btf = btf__parse(*argv, NULL); + btf = btf__parse_split(*argv, base_btf); if (IS_ERR(btf)) { err = -PTR_ERR(btf); btf = NULL; @@ -739,9 +742,14 @@ show_btf_plain(struct bpf_btf_info *info, int fd, struct btf_attach_table *btf_map_table) { struct btf_attach_point *obj; + const char *name = u64_to_ptr(info->name); int n; printf("%u: ", info->id); + if (info->kernel_btf) + printf("name [%s] ", name); + else if (name && name[0]) + printf("name %s ", name); printf("size %uB", info->btf_size); n = 0; @@ -768,6 +776,7 @@ show_btf_json(struct bpf_btf_info *info, int fd, struct btf_attach_table *btf_map_table) { struct btf_attach_point *obj; + const char *name = u64_to_ptr(info->name); jsonw_start_object(json_wtr); /* btf object */ jsonw_uint_field(json_wtr, "id", info->id); @@ -793,6 +802,11 @@ show_btf_json(struct bpf_btf_info *info, int fd, emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + jsonw_bool_field(json_wtr, "kernel", info->kernel_btf); + + if (name && name[0]) + jsonw_string_field(json_wtr, "name", name); + jsonw_end_object(json_wtr); /* btf object */ } @@ -800,15 +814,30 @@ static int show_btf(int fd, struct btf_attach_table *btf_prog_table, struct btf_attach_table *btf_map_table) { - struct bpf_btf_info info = {}; + struct bpf_btf_info info; __u32 len = sizeof(info); + char name[64]; int err; + memset(&info, 0, sizeof(info)); err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get BTF object info: %s", strerror(errno)); return -1; } + /* if kernel support emitting BTF object name, pass name pointer */ + if (info.name_len) { + memset(&info, 0, sizeof(info)); + info.name_len = sizeof(name); + info.name = ptr_to_u64(name); + len = sizeof(info); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get BTF object info: %s", strerror(errno)); + return -1; + } + } if (json_output) show_btf_json(&info, fd, btf_prog_table, btf_map_table); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 682daaa49e6a..b86f450e6fce 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -11,6 +11,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include <bpf/btf.h> #include "main.h" @@ -28,6 +29,7 @@ bool show_pinned; bool block_mount; bool verifier_logs; bool relaxed_maps; +struct btf *base_btf; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; struct pinned_obj_table link_table; @@ -391,6 +393,7 @@ int main(int argc, char **argv) { "mapcompat", no_argument, NULL, 'm' }, { "nomount", no_argument, NULL, 'n' }, { "debug", no_argument, NULL, 'd' }, + { "base-btf", required_argument, NULL, 'B' }, { 0 } }; int opt, ret; @@ -407,7 +410,7 @@ int main(int argc, char **argv) hash_init(link_table.table); opterr = 0; - while ((opt = getopt_long(argc, argv, "Vhpjfmnd", + while ((opt = getopt_long(argc, argv, "VhpjfmndB:", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -441,6 +444,15 @@ int main(int argc, char **argv) libbpf_set_print(print_all_levels); verifier_logs = true; break; + case 'B': + base_btf = btf__parse(optarg, NULL); + if (libbpf_get_error(base_btf)) { + p_err("failed to parse base BTF at '%s': %ld\n", + optarg, libbpf_get_error(base_btf)); + base_btf = NULL; + return -1; + } + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -465,6 +477,7 @@ int main(int argc, char **argv) delete_pinned_obj_table(&map_table); delete_pinned_obj_table(&link_table); } + btf__free(base_btf); return ret; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index c46e52137b87..76e91641262b 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -90,6 +90,7 @@ extern bool show_pids; extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; +extern struct btf *base_btf; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; extern struct pinned_obj_table link_table; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a7efbd84fbcc..b400364ee054 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -51,6 +51,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", [BPF_MAP_TYPE_RINGBUF] = "ringbuf", [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -1464,7 +1465,8 @@ static int do_help(int argc, char **argv) " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" - " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n" + " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" + " task_storage }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 66cb92136de4..bf656432ad73 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -18,15 +18,6 @@ else endif # always use the host compiler -ifneq ($(LLVM),) -HOSTAR ?= llvm-ar -HOSTCC ?= clang -HOSTLD ?= ld.lld -else -HOSTAR ?= ar -HOSTCC ?= gcc -HOSTLD ?= ld -endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index fb1337d69868..4d5ca54fcd4c 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -1,13 +1,18 @@ # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -OUTPUT := .output +include ../../scripts/Makefile.include + +OUTPUT ?= $(abspath .output)/ + CLANG ?= clang LLC ?= llc LLVM_STRIP ?= llvm-strip -DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool +BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) -BPFOBJ := $(OUTPUT)/libbpf.a -BPF_INCLUDE := $(OUTPUT) +BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ +BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a +BPF_INCLUDE := $(BPFOBJ_OUTPUT) INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \ -I$(abspath ../../include/uapi) CFLAGS := -g -Wall @@ -18,13 +23,10 @@ VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL) VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ $(wildcard $(VMLINUX_BTF_PATHS)))) -abs_out := $(abspath $(OUTPUT)) ifeq ($(V),1) Q = -msg = else Q = @ -msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 endif @@ -37,12 +39,15 @@ all: runqslower runqslower: $(OUTPUT)/runqslower clean: - $(call msg,CLEAN) - $(Q)rm -rf $(OUTPUT) runqslower + $(call QUIET_CLEAN, runqslower) + $(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT) + $(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h + $(Q)$(RM) $(OUTPUT)runqslower + $(Q)$(RM) -r .output $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) - $(call msg,BINARY,$@) - $(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ + $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ $(OUTPUT)/runqslower.bpf.o @@ -50,36 +55,30 @@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ $(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) - $(call msg,GEN-SKEL,$@) - $(Q)$(BPFTOOL) gen skeleton $< > $@ + $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) - $(call msg,BPF,$@) - $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ + $(QUIET_GEN)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ -c $(filter %.c,$^) -o $@ && \ $(LLVM_STRIP) -g $@ $(OUTPUT)/%.o: %.c | $(OUTPUT) - $(call msg,CC,$@) - $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ -$(OUTPUT): - $(call msg,MKDIR,$@) - $(Q)mkdir -p $(OUTPUT) +$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT): + $(QUIET_MKDIR)mkdir -p $@ $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) - $(call msg,GEN,$@) $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ "specify its location." >&2; \ exit 1;\ fi - $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ + $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ - OUTPUT=$(abspath $(dir $@))/ $(abspath $@) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ -$(DEFAULT_BPFTOOL): - $(Q)$(MAKE) $(submake_extras) -C ../bpftool \ - prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install +$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ + CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/build/Makefile b/tools/build/Makefile index 722f1700d96a..bae48e6fa995 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -15,10 +15,6 @@ endef $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,LD,$(CROSS_COMPILE)ld) -HOSTCC ?= gcc -HOSTLD ?= ld -HOSTAR ?= ar - export HOSTCC HOSTLD HOSTAR ifeq ($(V),1) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e6ceac3f7d62..162999b12790 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -157,6 +157,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, }; /* Note that tracing related programs such as @@ -3742,6 +3743,50 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be an task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3900,6 +3945,9 @@ union bpf_attr { FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ FN(redirect_peer), \ + FN(task_storage_get), \ + FN(task_storage_delete), \ + FN(get_current_task_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4418,6 +4466,9 @@ struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 231b07203e3d..2d0d064c6d31 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -78,10 +78,32 @@ struct btf { void *types_data; size_t types_data_cap; /* used size stored in hdr->type_len */ - /* type ID to `struct btf_type *` lookup index */ + /* type ID to `struct btf_type *` lookup index + * type_offs[0] corresponds to the first non-VOID type: + * - for base BTF it's type [1]; + * - for split BTF it's the first non-base BTF type. + */ __u32 *type_offs; size_t type_offs_cap; + /* number of types in this BTF instance: + * - doesn't include special [0] void type; + * - for split BTF counts number of types added on top of base BTF. + */ __u32 nr_types; + /* if not NULL, points to the base BTF on top of which the current + * split BTF is based + */ + struct btf *base_btf; + /* BTF type ID of the first type in this BTF instance: + * - for base BTF it's equal to 1; + * - for split BTF it's equal to biggest type ID of base BTF plus 1. + */ + int start_id; + /* logical string offset of this BTF instance: + * - for base BTF it's equal to 0; + * - for split BTF it's equal to total size of base BTF's string section size. + */ + int start_str_off; void *strs_data; size_t strs_data_cap; /* used size stored in hdr->str_len */ @@ -90,6 +112,14 @@ struct btf { struct hashmap *strs_hash; /* whether strings are already deduplicated */ bool strs_deduped; + /* extra indirection layer to make strings hashmap work with stable + * string offsets and ability to transparently choose between + * btf->strs_data or btf_dedup->strs_data as a source of strings. + * This is used for BTF strings dedup to transfer deduplicated strings + * data back to struct btf without re-building strings index. + */ + void **strs_data_ptr; + /* BTF object FD, if loaded into kernel */ int fd; @@ -168,7 +198,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) __u32 *p; p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), - btf->nr_types + 1, BTF_MAX_NR_TYPES, 1); + btf->nr_types, BTF_MAX_NR_TYPES, 1); if (!p) return -ENOMEM; @@ -215,22 +245,18 @@ static int btf_parse_hdr(struct btf *btf) return -EINVAL; } - if (meta_left < hdr->type_off) { - pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); - return -EINVAL; - } - - if (meta_left < hdr->str_off) { - pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); + if (meta_left < hdr->str_off + hdr->str_len) { + pr_debug("Invalid BTF total size:%u\n", btf->raw_size); return -EINVAL; } - if (hdr->type_off >= hdr->str_off) { - pr_debug("BTF type section offset >= string section offset. No type?\n"); + if (hdr->type_off + hdr->type_len > hdr->str_off) { + pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", + hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); return -EINVAL; } - if (hdr->type_off & 0x02) { + if (hdr->type_off % 4) { pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } @@ -244,12 +270,16 @@ static int btf_parse_str_sec(struct btf *btf) const char *start = btf->strs_data; const char *end = start + btf->hdr->str_len; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || - start[0] || end[-1]) { + if (btf->base_btf && hdr->str_len == 0) + return 0; + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { + pr_debug("Invalid BTF string section\n"); + return -EINVAL; + } + if (!btf->base_btf && start[0]) { pr_debug("Invalid BTF string section\n"); return -EINVAL; } - return 0; } @@ -364,19 +394,9 @@ static int btf_parse_type_sec(struct btf *btf) struct btf_header *hdr = btf->hdr; void *next_type = btf->types_data; void *end_type = next_type + hdr->type_len; - int err, i = 0, type_size; - - /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(), - * so ensure we can never properly use its offset from index by - * setting it to a large value - */ - err = btf_add_type_idx_entry(btf, UINT_MAX); - if (err) - return err; + int err, type_size; while (next_type + sizeof(struct btf_type) <= end_type) { - i++; - if (btf->swapped_endian) btf_bswap_type_base(next_type); @@ -384,7 +404,7 @@ static int btf_parse_type_sec(struct btf *btf) if (type_size < 0) return type_size; if (next_type + type_size > end_type) { - pr_warn("BTF type [%d] is malformed\n", i); + pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types); return -EINVAL; } @@ -409,7 +429,7 @@ static int btf_parse_type_sec(struct btf *btf) __u32 btf__get_nr_types(const struct btf *btf) { - return btf->nr_types; + return btf->start_id + btf->nr_types - 1; } /* internal helper returning non-const pointer to a type */ @@ -417,13 +437,14 @@ static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) { if (type_id == 0) return &btf_void; - - return btf->types_data + btf->type_offs[type_id]; + if (type_id < btf->start_id) + return btf_type_by_id(btf->base_btf, type_id); + return btf->types_data + btf->type_offs[type_id - btf->start_id]; } const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { - if (type_id > btf->nr_types) + if (type_id >= btf->start_id + btf->nr_types) return NULL; return btf_type_by_id((struct btf *)btf, type_id); } @@ -432,9 +453,13 @@ static int determine_ptr_size(const struct btf *btf) { const struct btf_type *t; const char *name; - int i; + int i, n; - for (i = 1; i <= btf->nr_types; i++) { + if (btf->base_btf && btf->base_btf->ptr_sz > 0) + return btf->base_btf->ptr_sz; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_int(t)) continue; @@ -717,7 +742,7 @@ void btf__free(struct btf *btf) free(btf); } -struct btf *btf__new_empty(void) +static struct btf *btf_new_empty(struct btf *base_btf) { struct btf *btf; @@ -725,12 +750,21 @@ struct btf *btf__new_empty(void) if (!btf) return ERR_PTR(-ENOMEM); + btf->nr_types = 0; + btf->start_id = 1; + btf->start_str_off = 0; btf->fd = -1; btf->ptr_sz = sizeof(void *); btf->swapped_endian = false; + if (base_btf) { + btf->base_btf = base_btf; + btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_str_off = base_btf->hdr->str_len; + } + /* +1 for empty string at offset 0 */ - btf->raw_size = sizeof(struct btf_header) + 1; + btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1); btf->raw_data = calloc(1, btf->raw_size); if (!btf->raw_data) { free(btf); @@ -744,12 +778,22 @@ struct btf *btf__new_empty(void) btf->types_data = btf->raw_data + btf->hdr->hdr_len; btf->strs_data = btf->raw_data + btf->hdr->hdr_len; - btf->hdr->str_len = 1; /* empty string at offset 0 */ + btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ return btf; } -struct btf *btf__new(const void *data, __u32 size) +struct btf *btf__new_empty(void) +{ + return btf_new_empty(NULL); +} + +struct btf *btf__new_empty_split(struct btf *base_btf) +{ + return btf_new_empty(base_btf); +} + +static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) { struct btf *btf; int err; @@ -758,6 +802,16 @@ struct btf *btf__new(const void *data, __u32 size) if (!btf) return ERR_PTR(-ENOMEM); + btf->nr_types = 0; + btf->start_id = 1; + btf->start_str_off = 0; + + if (base_btf) { + btf->base_btf = base_btf; + btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_str_off = base_btf->hdr->str_len; + } + btf->raw_data = malloc(size); if (!btf->raw_data) { err = -ENOMEM; @@ -790,7 +844,13 @@ done: return btf; } -struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) +struct btf *btf__new(const void *data, __u32 size) +{ + return btf_new(data, size, NULL); +} + +static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, + struct btf_ext **btf_ext) { Elf_Data *btf_data = NULL, *btf_ext_data = NULL; int err = 0, fd = -1, idx = 0; @@ -868,7 +928,7 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) err = -ENOENT; goto done; } - btf = btf__new(btf_data->d_buf, btf_data->d_size); + btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); if (IS_ERR(btf)) goto done; @@ -913,7 +973,17 @@ done: return btf; } -struct btf *btf__parse_raw(const char *path) +struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) +{ + return btf_parse_elf(path, NULL, btf_ext); +} + +struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf) +{ + return btf_parse_elf(path, base_btf, NULL); +} + +static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) { struct btf *btf = NULL; void *data = NULL; @@ -967,7 +1037,7 @@ struct btf *btf__parse_raw(const char *path) } /* finally parse BTF data */ - btf = btf__new(data, sz); + btf = btf_new(data, sz, base_btf); err_out: free(data); @@ -976,18 +1046,38 @@ err_out: return err ? ERR_PTR(err) : btf; } -struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) +struct btf *btf__parse_raw(const char *path) +{ + return btf_parse_raw(path, NULL); +} + +struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) +{ + return btf_parse_raw(path, base_btf); +} + +static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { struct btf *btf; if (btf_ext) *btf_ext = NULL; - btf = btf__parse_raw(path); + btf = btf_parse_raw(path, base_btf); if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO) return btf; - return btf__parse_elf(path, btf_ext); + return btf_parse_elf(path, base_btf, btf_ext); +} + +struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) +{ + return btf_parse(path, NULL, btf_ext); +} + +struct btf *btf__parse_split(const char *path, struct btf *base_btf) +{ + return btf_parse(path, base_btf, NULL); } static int compare_vsi_off(const void *_a, const void *_b) @@ -1171,8 +1261,8 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi memcpy(p, btf->types_data, hdr->type_len); if (swap_endian) { - for (i = 1; i <= btf->nr_types; i++) { - t = p + btf->type_offs[i]; + for (i = 0; i < btf->nr_types; i++) { + t = p + btf->type_offs[i]; /* btf_bswap_type_rest() relies on native t->info, so * we swap base type info after we swapped all the * additional information @@ -1215,8 +1305,10 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { - if (offset < btf->hdr->str_len) - return btf->strs_data + offset; + if (offset < btf->start_str_off) + return btf__str_by_offset(btf->base_btf, offset); + else if (offset - btf->start_str_off < btf->hdr->str_len) + return btf->strs_data + (offset - btf->start_str_off); else return NULL; } @@ -1363,17 +1455,19 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, static size_t strs_hash_fn(const void *key, void *ctx) { - struct btf *btf = ctx; - const char *str = btf->strs_data + (long)key; + const struct btf *btf = ctx; + const char *strs = *btf->strs_data_ptr; + const char *str = strs + (long)key; return str_hash(str); } static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) { - struct btf *btf = ctx; - const char *str1 = btf->strs_data + (long)key1; - const char *str2 = btf->strs_data + (long)key2; + const struct btf *btf = ctx; + const char *strs = *btf->strs_data_ptr; + const char *str1 = strs + (long)key1; + const char *str2 = strs + (long)key2; return strcmp(str1, str2) == 0; } @@ -1418,6 +1512,9 @@ static int btf_ensure_modifiable(struct btf *btf) memcpy(types, btf->types_data, btf->hdr->type_len); memcpy(strs, btf->strs_data, btf->hdr->str_len); + /* make hashmap below use btf->strs_data as a source of strings */ + btf->strs_data_ptr = &btf->strs_data; + /* build lookup index for all strings */ hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf); if (IS_ERR(hash)) { @@ -1448,7 +1545,10 @@ static int btf_ensure_modifiable(struct btf *btf) /* if BTF was created from scratch, all strings are guaranteed to be * unique and deduplicated */ - btf->strs_deduped = btf->hdr->str_len <= 1; + if (btf->hdr->str_len == 0) + btf->strs_deduped = true; + if (!btf->base_btf && btf->hdr->str_len == 1) + btf->strs_deduped = true; /* invalidate raw_data representation */ btf_invalidate_raw_data(btf); @@ -1480,6 +1580,14 @@ int btf__find_str(struct btf *btf, const char *s) long old_off, new_off, len; void *p; + if (btf->base_btf) { + int ret; + + ret = btf__find_str(btf->base_btf, s); + if (ret != -ENOENT) + return ret; + } + /* BTF needs to be in a modifiable state to build string lookup index */ if (btf_ensure_modifiable(btf)) return -ENOMEM; @@ -1494,7 +1602,7 @@ int btf__find_str(struct btf *btf, const char *s) memcpy(p, s, len); if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off)) - return old_off; + return btf->start_str_off + old_off; return -ENOENT; } @@ -1510,6 +1618,14 @@ int btf__add_str(struct btf *btf, const char *s) void *p; int err; + if (btf->base_btf) { + int ret; + + ret = btf__find_str(btf->base_btf, s); + if (ret != -ENOENT) + return ret; + } + if (btf_ensure_modifiable(btf)) return -ENOMEM; @@ -1536,12 +1652,12 @@ int btf__add_str(struct btf *btf, const char *s) err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off, HASHMAP_ADD, (const void **)&old_off, NULL); if (err == -EEXIST) - return old_off; /* duplicated string, return existing offset */ + return btf->start_str_off + old_off; /* duplicated string, return existing offset */ if (err) return err; btf->hdr->str_len += len; /* new unique string, adjust data length */ - return new_off; + return btf->start_str_off + new_off; } static void *btf_add_type_mem(struct btf *btf, size_t add_sz) @@ -1560,6 +1676,20 @@ static void btf_type_inc_vlen(struct btf_type *t) t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); } +static int btf_commit_type(struct btf *btf, int data_sz) +{ + int err; + + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + if (err) + return err; + + btf->hdr->type_len += data_sz; + btf->hdr->str_off += data_sz; + btf->nr_types++; + return btf->start_id + btf->nr_types - 1; +} + /* * Append new BTF_KIND_INT type with: * - *name* - non-empty, non-NULL type name; @@ -1572,7 +1702,7 @@ static void btf_type_inc_vlen(struct btf_type *t) int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding) { struct btf_type *t; - int sz, err, name_off; + int sz, name_off; /* non-empty name */ if (!name || !name[0]) @@ -1606,14 +1736,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding /* set INT info, we don't allow setting legacy bit offset/size */ *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8); - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* it's completely legal to append BTF types with type IDs pointing forward to @@ -1631,7 +1754,7 @@ static int validate_type_id(int id) static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) { struct btf_type *t; - int sz, name_off = 0, err; + int sz, name_off = 0; if (validate_type_id(ref_type_id)) return -EINVAL; @@ -1654,14 +1777,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref t->info = btf_type_info(kind, 0, 0); t->type = ref_type_id; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -1689,7 +1805,7 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n { struct btf_type *t; struct btf_array *a; - int sz, err; + int sz; if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) return -EINVAL; @@ -1711,21 +1827,14 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n a->index_type = index_type_id; a->nelems = nr_elems; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* generic STRUCT/UNION append function */ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz) { struct btf_type *t; - int sz, err, name_off = 0; + int sz, name_off = 0; if (btf_ensure_modifiable(btf)) return -ENOMEM; @@ -1748,14 +1857,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 t->info = btf_type_info(kind, 0, 0); t->size = bytes_sz; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -1793,6 +1895,11 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz); } +static struct btf_type *btf_last_type(struct btf *btf) +{ + return btf_type_by_id(btf, btf__get_nr_types(btf)); +} + /* * Append new field for the current STRUCT/UNION type with: * - *name* - name of the field, can be NULL or empty for anonymous field; @@ -1814,7 +1921,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, /* last type should be union/struct */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_composite(t)) return -EINVAL; @@ -1849,7 +1956,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, m->offset = bit_offset | (bit_size << 24); /* btf_add_type_mem can invalidate t pointer */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); /* update parent type's vlen and kflag */ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); @@ -1874,7 +1981,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) { struct btf_type *t; - int sz, err, name_off = 0; + int sz, name_off = 0; /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) @@ -1899,14 +2006,7 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); t->size = byte_sz; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -1926,7 +2026,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) /* last type should be BTF_KIND_ENUM */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_enum(t)) return -EINVAL; @@ -1953,7 +2053,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) v->val = value; /* update parent type's vlen */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); btf_type_inc_vlen(t); btf->hdr->type_len += sz; @@ -2093,7 +2193,7 @@ int btf__add_func(struct btf *btf, const char *name, int btf__add_func_proto(struct btf *btf, int ret_type_id) { struct btf_type *t; - int sz, err; + int sz; if (validate_type_id(ret_type_id)) return -EINVAL; @@ -2113,14 +2213,7 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id) t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0); t->type = ret_type_id; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -2143,7 +2236,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) /* last type should be BTF_KIND_FUNC_PROTO */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_func_proto(t)) return -EINVAL; @@ -2166,7 +2259,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) p->type = type_id; /* update parent type's vlen */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); btf_type_inc_vlen(t); btf->hdr->type_len += sz; @@ -2188,7 +2281,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) { struct btf_type *t; struct btf_var *v; - int sz, err, name_off; + int sz, name_off; /* non-empty name */ if (!name || !name[0]) @@ -2219,14 +2312,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) v = btf_var(t); v->linkage = linkage; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -2244,7 +2330,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) { struct btf_type *t; - int sz, err, name_off; + int sz, name_off; /* non-empty name */ if (!name || !name[0]) @@ -2267,14 +2353,7 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0); t->size = byte_sz; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); - if (err) - return err; - - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; - btf->nr_types++; - return btf->nr_types; + return btf_commit_type(btf, sz); } /* @@ -2296,7 +2375,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ /* last type should be BTF_KIND_DATASEC */ if (btf->nr_types == 0) return -EINVAL; - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); if (!btf_is_datasec(t)) return -EINVAL; @@ -2317,7 +2396,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ v->size = byte_sz; /* update parent type's vlen */ - t = btf_type_by_id(btf, btf->nr_types); + t = btf_last_type(btf); btf_type_inc_vlen(t); btf->hdr->type_len += sz; @@ -2639,6 +2718,7 @@ struct btf_dedup; static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, const struct btf_dedup_opts *opts); static void btf_dedup_free(struct btf_dedup *d); +static int btf_dedup_prep(struct btf_dedup *d); static int btf_dedup_strings(struct btf_dedup *d); static int btf_dedup_prim_types(struct btf_dedup *d); static int btf_dedup_struct_types(struct btf_dedup *d); @@ -2797,6 +2877,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, if (btf_ensure_modifiable(btf)) return -ENOMEM; + err = btf_dedup_prep(d); + if (err) { + pr_debug("btf_dedup_prep failed:%d\n", err); + goto done; + } err = btf_dedup_strings(d); if (err < 0) { pr_debug("btf_dedup_strings failed:%d\n", err); @@ -2859,21 +2944,20 @@ struct btf_dedup { __u32 *hypot_list; size_t hypot_cnt; size_t hypot_cap; + /* Whether hypothetical mapping, if successful, would need to adjust + * already canonicalized types (due to a new forward declaration to + * concrete type resolution). In such case, during split BTF dedup + * candidate type would still be considered as different, because base + * BTF is considered to be immutable. + */ + bool hypot_adjust_canon; /* Various option modifying behavior of algorithm */ struct btf_dedup_opts opts; -}; - -struct btf_str_ptr { - const char *str; - __u32 new_off; - bool used; -}; - -struct btf_str_ptrs { - struct btf_str_ptr *ptrs; - const char *data; - __u32 cnt; - __u32 cap; + /* temporary strings deduplication state */ + void *strs_data; + size_t strs_cap; + size_t strs_len; + struct hashmap* strs_hash; }; static long hash_combine(long h, long value) @@ -2914,6 +2998,7 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d) for (i = 0; i < d->hypot_cnt; i++) d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; d->hypot_cnt = 0; + d->hypot_adjust_canon = false; } static void btf_dedup_free(struct btf_dedup *d) @@ -2953,7 +3038,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, { struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; - int i, err = 0; + int i, err = 0, type_cnt; if (!d) return ERR_PTR(-ENOMEM); @@ -2973,14 +3058,15 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, goto done; } - d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + type_cnt = btf__get_nr_types(btf) + 1; + d->map = malloc(sizeof(__u32) * type_cnt); if (!d->map) { err = -ENOMEM; goto done; } /* special BTF "void" type is made canonical immediately */ d->map[0] = 0; - for (i = 1; i <= btf->nr_types; i++) { + for (i = 1; i < type_cnt; i++) { struct btf_type *t = btf_type_by_id(d->btf, i); /* VAR and DATASEC are never deduped and are self-canonical */ @@ -2990,12 +3076,12 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, d->map[i] = BTF_UNPROCESSED_ID; } - d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + d->hypot_map = malloc(sizeof(__u32) * type_cnt); if (!d->hypot_map) { err = -ENOMEM; goto done; } - for (i = 0; i <= btf->nr_types; i++) + for (i = 0; i < type_cnt; i++) d->hypot_map[i] = BTF_UNPROCESSED_ID; done: @@ -3019,8 +3105,8 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) int i, j, r, rec_size; struct btf_type *t; - for (i = 1; i <= d->btf->nr_types; i++) { - t = btf_type_by_id(d->btf, i); + for (i = 0; i < d->btf->nr_types; i++) { + t = btf_type_by_id(d->btf, d->btf->start_id + i); r = fn(&t->name_off, ctx); if (r) return r; @@ -3100,64 +3186,53 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) return 0; } -static int str_sort_by_content(const void *a1, const void *a2) +static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx) { - const struct btf_str_ptr *p1 = a1; - const struct btf_str_ptr *p2 = a2; - - return strcmp(p1->str, p2->str); -} - -static int str_sort_by_offset(const void *a1, const void *a2) -{ - const struct btf_str_ptr *p1 = a1; - const struct btf_str_ptr *p2 = a2; - - if (p1->str != p2->str) - return p1->str < p2->str ? -1 : 1; - return 0; -} - -static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) -{ - const struct btf_str_ptr *p = pelem; - - if (str_ptr != p->str) - return (const char *)str_ptr < p->str ? -1 : 1; - return 0; -} - -static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) -{ - struct btf_str_ptrs *strs; - struct btf_str_ptr *s; + struct btf_dedup *d = ctx; + __u32 str_off = *str_off_ptr; + long old_off, new_off, len; + const char *s; + void *p; + int err; - if (*str_off_ptr == 0) + /* don't touch empty string or string in main BTF */ + if (str_off == 0 || str_off < d->btf->start_str_off) return 0; - strs = ctx; - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); - if (!s) - return -EINVAL; - s->used = true; - return 0; -} + s = btf__str_by_offset(d->btf, str_off); + if (d->btf->base_btf) { + err = btf__find_str(d->btf->base_btf, s); + if (err >= 0) { + *str_off_ptr = err; + return 0; + } + if (err != -ENOENT) + return err; + } -static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) -{ - struct btf_str_ptrs *strs; - struct btf_str_ptr *s; + len = strlen(s) + 1; - if (*str_off_ptr == 0) - return 0; + new_off = d->strs_len; + p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len); + if (!p) + return -ENOMEM; - strs = ctx; - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); - if (!s) - return -EINVAL; - *str_off_ptr = s->new_off; + memcpy(p, s, len); + + /* Now attempt to add the string, but only if the string with the same + * contents doesn't exist already (HASHMAP_ADD strategy). If such + * string exists, we'll get its offset in old_off (that's old_key). + */ + err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off, + HASHMAP_ADD, (const void **)&old_off, NULL); + if (err == -EEXIST) { + *str_off_ptr = d->btf->start_str_off + old_off; + } else if (err) { + return err; + } else { + *str_off_ptr = d->btf->start_str_off + new_off; + d->strs_len += len; + } return 0; } @@ -3174,118 +3249,71 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) */ static int btf_dedup_strings(struct btf_dedup *d) { - char *start = d->btf->strs_data; - char *end = start + d->btf->hdr->str_len; - char *p = start, *tmp_strs = NULL; - struct btf_str_ptrs strs = { - .cnt = 0, - .cap = 0, - .ptrs = NULL, - .data = start, - }; - int i, j, err = 0, grp_idx; - bool grp_used; + char *s; + int err; if (d->btf->strs_deduped) return 0; - /* build index of all strings */ - while (p < end) { - if (strs.cnt + 1 > strs.cap) { - struct btf_str_ptr *new_ptrs; - - strs.cap += max(strs.cnt / 2, 16U); - new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0])); - if (!new_ptrs) { - err = -ENOMEM; - goto done; - } - strs.ptrs = new_ptrs; - } - - strs.ptrs[strs.cnt].str = p; - strs.ptrs[strs.cnt].used = false; - - p += strlen(p) + 1; - strs.cnt++; - } + /* temporarily switch to use btf_dedup's strs_data for strings for hash + * functions; later we'll just transfer hashmap to struct btf as is, + * along the strs_data + */ + d->btf->strs_data_ptr = &d->strs_data; - /* temporary storage for deduplicated strings */ - tmp_strs = malloc(d->btf->hdr->str_len); - if (!tmp_strs) { - err = -ENOMEM; - goto done; + d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf); + if (IS_ERR(d->strs_hash)) { + err = PTR_ERR(d->strs_hash); + d->strs_hash = NULL; + goto err_out; } - /* mark all used strings */ - strs.ptrs[0].used = true; - err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); - if (err) - goto done; - - /* sort strings by context, so that we can identify duplicates */ - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); - - /* - * iterate groups of equal strings and if any instance in a group was - * referenced, emit single instance and remember new offset - */ - p = tmp_strs; - grp_idx = 0; - grp_used = strs.ptrs[0].used; - /* iterate past end to avoid code duplication after loop */ - for (i = 1; i <= strs.cnt; i++) { - /* - * when i == strs.cnt, we want to skip string comparison and go - * straight to handling last group of strings (otherwise we'd - * need to handle last group after the loop w/ duplicated code) - */ - if (i < strs.cnt && - !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { - grp_used = grp_used || strs.ptrs[i].used; - continue; - } + if (!d->btf->base_btf) { + s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1); + if (!s) + return -ENOMEM; + /* initial empty string */ + s[0] = 0; + d->strs_len = 1; - /* - * this check would have been required after the loop to handle - * last group of strings, but due to <= condition in a loop - * we avoid that duplication + /* insert empty string; we won't be looking it up during strings + * dedup, but it's good to have it for generic BTF string lookups */ - if (grp_used) { - int new_off = p - tmp_strs; - __u32 len = strlen(strs.ptrs[grp_idx].str); - - memmove(p, strs.ptrs[grp_idx].str, len + 1); - for (j = grp_idx; j < i; j++) - strs.ptrs[j].new_off = new_off; - p += len + 1; - } - - if (i < strs.cnt) { - grp_idx = i; - grp_used = strs.ptrs[i].used; - } + err = hashmap__insert(d->strs_hash, (void *)0, (void *)0, + HASHMAP_ADD, NULL, NULL); + if (err) + goto err_out; } - /* replace original strings with deduped ones */ - d->btf->hdr->str_len = p - tmp_strs; - memmove(start, tmp_strs, d->btf->hdr->str_len); - end = start + d->btf->hdr->str_len; - - /* restore original order for further binary search lookups */ - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); - /* remap string offsets */ - err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); + err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d); if (err) - goto done; + goto err_out; - d->btf->hdr->str_len = end - start; + /* replace BTF string data and hash with deduped ones */ + free(d->btf->strs_data); + hashmap__free(d->btf->strs_hash); + d->btf->strs_data = d->strs_data; + d->btf->strs_data_cap = d->strs_cap; + d->btf->hdr->str_len = d->strs_len; + d->btf->strs_hash = d->strs_hash; + /* now point strs_data_ptr back to btf->strs_data */ + d->btf->strs_data_ptr = &d->btf->strs_data; + + d->strs_data = d->strs_hash = NULL; + d->strs_len = d->strs_cap = 0; d->btf->strs_deduped = true; + return 0; + +err_out: + free(d->strs_data); + hashmap__free(d->strs_hash); + d->strs_data = d->strs_hash = NULL; + d->strs_len = d->strs_cap = 0; + + /* restore strings pointer for existing d->btf->strs_hash back */ + d->btf->strs_data_ptr = &d->strs_data; -done: - free(tmp_strs); - free(strs.ptrs); return err; } @@ -3550,6 +3578,66 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) return true; } +/* Prepare split BTF for deduplication by calculating hashes of base BTF's + * types and initializing the rest of the state (canonical type mapping) for + * the fixed base BTF part. + */ +static int btf_dedup_prep(struct btf_dedup *d) +{ + struct btf_type *t; + int type_id; + long h; + + if (!d->btf->base_btf) + return 0; + + for (type_id = 1; type_id < d->btf->start_id; type_id++) { + t = btf_type_by_id(d->btf, type_id); + + /* all base BTF types are self-canonical by definition */ + d->map[type_id] = type_id; + + switch (btf_kind(t)) { + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: + /* VAR and DATASEC are never hash/deduplicated */ + continue; + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + h = btf_hash_common(t); + break; + case BTF_KIND_INT: + h = btf_hash_int(t); + break; + case BTF_KIND_ENUM: + h = btf_hash_enum(t); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + h = btf_hash_struct(t); + break; + case BTF_KIND_ARRAY: + h = btf_hash_array(t); + break; + case BTF_KIND_FUNC_PROTO: + h = btf_hash_fnproto(t); + break; + default: + pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id); + return -EINVAL; + } + if (btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + } + + return 0; +} + /* * Deduplicate primitive types, that can't reference other types, by calculating * their type signature hash and comparing them with any possible canonical @@ -3643,8 +3731,8 @@ static int btf_dedup_prim_types(struct btf_dedup *d) { int i, err; - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_prim_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_prim_type(d, d->btf->start_id + i); if (err) return err; } @@ -3697,6 +3785,19 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; } +/* Check if given two types are identical ARRAY definitions */ +static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +{ + struct btf_type *t1, *t2; + + t1 = btf_type_by_id(d->btf, id1); + t2 = btf_type_by_id(d->btf, id2); + if (!btf_is_array(t1) || !btf_is_array(t2)) + return 0; + + return btf_equal_array(t1, t2); +} + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -3807,8 +3908,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, canon_id = resolve_fwd_id(d, canon_id); hypot_type_id = d->hypot_map[canon_id]; - if (hypot_type_id <= BTF_MAX_NR_TYPES) - return hypot_type_id == cand_id; + if (hypot_type_id <= BTF_MAX_NR_TYPES) { + /* In some cases compiler will generate different DWARF types + * for *identical* array type definitions and use them for + * different fields within the *same* struct. This breaks type + * equivalence check, which makes an assumption that candidate + * types sub-graph has a consistent and deduped-by-compiler + * types within a single CU. So work around that by explicitly + * allowing identical array types here. + */ + return hypot_type_id == cand_id || + btf_dedup_identical_arrays(d, hypot_type_id, cand_id); + } if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) return -ENOMEM; @@ -3834,6 +3945,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, } else { real_kind = cand_kind; fwd_kind = btf_fwd_kind(canon_type); + /* we'd need to resolve base FWD to STRUCT/UNION */ + if (fwd_kind == real_kind && canon_id < d->btf->start_id) + d->hypot_adjust_canon = true; } return fwd_kind == real_kind; } @@ -3871,8 +3985,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return 0; cand_arr = btf_array(cand_type); canon_arr = btf_array(canon_type); - eq = btf_dedup_is_equiv(d, - cand_arr->index_type, canon_arr->index_type); + eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type); if (eq <= 0) return eq; return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); @@ -3955,16 +4068,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, */ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) { - __u32 cand_type_id, targ_type_id; + __u32 canon_type_id, targ_type_id; __u16 t_kind, c_kind; __u32 t_id, c_id; int i; for (i = 0; i < d->hypot_cnt; i++) { - cand_type_id = d->hypot_list[i]; - targ_type_id = d->hypot_map[cand_type_id]; + canon_type_id = d->hypot_list[i]; + targ_type_id = d->hypot_map[canon_type_id]; t_id = resolve_type_id(d, targ_type_id); - c_id = resolve_type_id(d, cand_type_id); + c_id = resolve_type_id(d, canon_type_id); t_kind = btf_kind(btf__type_by_id(d->btf, t_id)); c_kind = btf_kind(btf__type_by_id(d->btf, c_id)); /* @@ -3979,9 +4092,26 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) * stability is not a requirement for STRUCT/UNION equivalence * checks, though. */ + + /* if it's the split BTF case, we still need to point base FWD + * to STRUCT/UNION in a split BTF, because FWDs from split BTF + * will be resolved against base FWD. If we don't point base + * canonical FWD to the resolved STRUCT/UNION, then all the + * FWDs in split BTF won't be correctly resolved to a proper + * STRUCT/UNION. + */ if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) d->map[c_id] = t_id; - else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) + + /* if graph equivalence determined that we'd need to adjust + * base canonical types, then we need to only point base FWDs + * to STRUCTs/UNIONs and do no more modifications. For all + * other purposes the type graphs were not equivalent. + */ + if (d->hypot_adjust_canon) + continue; + + if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) d->map[t_id] = c_id; if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && @@ -4065,8 +4195,10 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) return eq; if (!eq) continue; - new_id = cand_id; btf_dedup_merge_hypot_map(d); + if (d->hypot_adjust_canon) /* not really equivalent */ + continue; + new_id = cand_id; break; } @@ -4081,8 +4213,8 @@ static int btf_dedup_struct_types(struct btf_dedup *d) { int i, err; - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_struct_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_struct_type(d, d->btf->start_id + i); if (err) return err; } @@ -4225,8 +4357,8 @@ static int btf_dedup_ref_types(struct btf_dedup *d) { int i, err; - for (i = 1; i <= d->btf->nr_types; i++) { - err = btf_dedup_ref_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_ref_type(d, d->btf->start_id + i); if (err < 0) return err; } @@ -4250,39 +4382,44 @@ static int btf_dedup_ref_types(struct btf_dedup *d) static int btf_dedup_compact_types(struct btf_dedup *d) { __u32 *new_offs; - __u32 next_type_id = 1; + __u32 next_type_id = d->btf->start_id; + const struct btf_type *t; void *p; - int i, len; + int i, id, len; /* we are going to reuse hypot_map to store compaction remapping */ d->hypot_map[0] = 0; - for (i = 1; i <= d->btf->nr_types; i++) - d->hypot_map[i] = BTF_UNPROCESSED_ID; + /* base BTF types are not renumbered */ + for (id = 1; id < d->btf->start_id; id++) + d->hypot_map[id] = id; + for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) + d->hypot_map[id] = BTF_UNPROCESSED_ID; p = d->btf->types_data; - for (i = 1; i <= d->btf->nr_types; i++) { - if (d->map[i] != i) + for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) { + if (d->map[id] != id) continue; - len = btf_type_size(btf__type_by_id(d->btf, i)); + t = btf__type_by_id(d->btf, id); + len = btf_type_size(t); if (len < 0) return len; - memmove(p, btf__type_by_id(d->btf, i), len); - d->hypot_map[i] = next_type_id; - d->btf->type_offs[next_type_id] = p - d->btf->types_data; + memmove(p, t, len); + d->hypot_map[id] = next_type_id; + d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data; p += len; next_type_id++; } /* shrink struct btf's internal types index and update btf_header */ - d->btf->nr_types = next_type_id - 1; - d->btf->type_offs_cap = d->btf->nr_types + 1; + d->btf->nr_types = next_type_id - d->btf->start_id; + d->btf->type_offs_cap = d->btf->nr_types; d->btf->hdr->type_len = p - d->btf->types_data; new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, sizeof(*new_offs)); - if (!new_offs) + if (d->btf->type_offs_cap && !new_offs) return -ENOMEM; d->btf->type_offs = new_offs; d->btf->hdr->str_off = d->btf->hdr->type_len; @@ -4414,8 +4551,8 @@ static int btf_dedup_remap_types(struct btf_dedup *d) { int i, r; - for (i = 1; i <= d->btf->nr_types; i++) { - r = btf_dedup_remap_type(d, i); + for (i = 0; i < d->btf->nr_types; i++) { + r = btf_dedup_remap_type(d, d->btf->start_id + i); if (r < 0) return r; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 57247240a20a..1093f6fe6800 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -31,11 +31,19 @@ enum btf_endianness { }; LIBBPF_API void btf__free(struct btf *btf); + LIBBPF_API struct btf *btf__new(const void *data, __u32 size); +LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf); LIBBPF_API struct btf *btf__new_empty(void); +LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); + LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); +LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); +LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_raw(const char *path); +LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); + LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 4ebfadf45b47..29ff4807b909 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -337,3 +337,12 @@ LIBBPF_0.2.0 { perf_buffer__consume_buffer; xsk_socket__create_shared; } LIBBPF_0.1.0; + +LIBBPF_0.3.0 { + global: + btf__parse_elf_split; + btf__parse_raw_split; + btf__parse_split; + btf__new_empty_split; + btf__new_split; +} LIBBPF_0.2.0; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 5482a9b7ae2d..ecaae2927ab8 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -230,6 +230,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) break; case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: btf_key_type_id = 1; btf_value_type_id = 3; value_size = 8; diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 4ea9a833dde7..5cdb19036d7f 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -3,15 +3,6 @@ include ../scripts/Makefile.include include ../scripts/Makefile.arch # always use the host compiler -ifneq ($(LLVM),) -HOSTAR ?= llvm-ar -HOSTCC ?= clang -HOSTLD ?= ld.lld -else -HOSTAR ?= ar -HOSTCC ?= gcc -HOSTLD ?= ld -endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7ce3f2e8b9c7..62f3deb1d3a8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -175,10 +175,6 @@ endef LD += $(EXTRA_LDFLAGS) -HOSTCC ?= gcc -HOSTLD ?= ld -HOSTAR ?= ar - PKG_CONFIG = $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 54a2857c2510..331f6d30f472 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -54,7 +54,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) LD = $(CC) -HOSTCC = gcc # check if compiler option is supported cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index a7974638561c..1358e89cdf7d 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -59,6 +59,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) $(call allow-override,CXX,$(CROSS_COMPILE)g++) $(call allow-override,STRIP,$(CROSS_COMPILE)strip) +ifneq ($(LLVM),) +HOSTAR ?= llvm-ar +HOSTCC ?= clang +HOSTLD ?= ld.lld +else +HOSTAR ?= ar +HOSTCC ?= gcc +HOSTLD ?= ld +endif + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3ab1200e172f..395ae040ce1f 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -8,7 +8,6 @@ FEATURE-DUMP.libbpf fixdep test_dev_cgroup /test_progs* -test_tcpbpf_user test_verifier_log feature test_sock diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 542768f5195b..c1708ffa6b1c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_verifier_log test_dev_cgroup test_tcpbpf_user \ + test_verifier_log test_dev_cgroup \ test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sysctl \ @@ -163,7 +163,6 @@ $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c -$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c @@ -387,7 +386,7 @@ TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ - flow_dissector_load.h + btf_helpers.c flow_dissector_load.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c new file mode 100644 index 000000000000..48f90490f922 --- /dev/null +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <stdio.h> +#include <errno.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> +#include "test_progs.h" + +static const char * const btf_kind_str_mapping[] = { + [BTF_KIND_UNKN] = "UNKNOWN", + [BTF_KIND_INT] = "INT", + [BTF_KIND_PTR] = "PTR", + [BTF_KIND_ARRAY] = "ARRAY", + [BTF_KIND_STRUCT] = "STRUCT", + [BTF_KIND_UNION] = "UNION", + [BTF_KIND_ENUM] = "ENUM", + [BTF_KIND_FWD] = "FWD", + [BTF_KIND_TYPEDEF] = "TYPEDEF", + [BTF_KIND_VOLATILE] = "VOLATILE", + [BTF_KIND_CONST] = "CONST", + [BTF_KIND_RESTRICT] = "RESTRICT", + [BTF_KIND_FUNC] = "FUNC", + [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", + [BTF_KIND_VAR] = "VAR", + [BTF_KIND_DATASEC] = "DATASEC", +}; + +static const char *btf_kind_str(__u16 kind) +{ + if (kind > BTF_KIND_DATASEC) + return "UNKNOWN"; + return btf_kind_str_mapping[kind]; +} + +static const char *btf_int_enc_str(__u8 encoding) +{ + switch (encoding) { + case 0: + return "(none)"; + case BTF_INT_SIGNED: + return "SIGNED"; + case BTF_INT_CHAR: + return "CHAR"; + case BTF_INT_BOOL: + return "BOOL"; + default: + return "UNKN"; + } +} + +static const char *btf_var_linkage_str(__u32 linkage) +{ + switch (linkage) { + case BTF_VAR_STATIC: + return "static"; + case BTF_VAR_GLOBAL_ALLOCATED: + return "global-alloc"; + default: + return "(unknown)"; + } +} + +static const char *btf_func_linkage_str(const struct btf_type *t) +{ + switch (btf_vlen(t)) { + case BTF_FUNC_STATIC: + return "static"; + case BTF_FUNC_GLOBAL: + return "global"; + case BTF_FUNC_EXTERN: + return "extern"; + default: + return "(unknown)"; + } +} + +static const char *btf_str(const struct btf *btf, __u32 off) +{ + if (!off) + return "(anon)"; + return btf__str_by_offset(btf, off) ?: "(invalid)"; +} + +int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) +{ + const struct btf_type *t; + int kind, i; + __u32 vlen; + + t = btf__type_by_id(btf, id); + if (!t) + return -EINVAL; + + vlen = btf_vlen(t); + kind = btf_kind(t); + + fprintf(out, "[%u] %s '%s'", id, btf_kind_str(kind), btf_str(btf, t->name_off)); + + switch (kind) { + case BTF_KIND_INT: + fprintf(out, " size=%u bits_offset=%u nr_bits=%u encoding=%s", + t->size, btf_int_offset(t), btf_int_bits(t), + btf_int_enc_str(btf_int_encoding(t))); + break; + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + fprintf(out, " type_id=%u", t->type); + break; + case BTF_KIND_ARRAY: { + const struct btf_array *arr = btf_array(t); + + fprintf(out, " type_id=%u index_type_id=%u nr_elems=%u", + arr->type, arr->index_type, arr->nelems); + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + fprintf(out, " size=%u vlen=%u", t->size, vlen); + for (i = 0; i < vlen; i++, m++) { + __u32 bit_off, bit_sz; + + bit_off = btf_member_bit_offset(t, i); + bit_sz = btf_member_bitfield_size(t, i); + fprintf(out, "\n\t'%s' type_id=%u bits_offset=%u", + btf_str(btf, m->name_off), m->type, bit_off); + if (bit_sz) + fprintf(out, " bitfield_size=%u", bit_sz); + } + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *v = btf_enum(t); + + fprintf(out, " size=%u vlen=%u", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, "\n\t'%s' val=%u", + btf_str(btf, v->name_off), v->val); + } + break; + } + case BTF_KIND_FWD: + fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct"); + break; + case BTF_KIND_FUNC: + fprintf(out, " type_id=%u linkage=%s", t->type, btf_func_linkage_str(t)); + break; + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = btf_params(t); + + fprintf(out, " ret_type_id=%u vlen=%u", t->type, vlen); + for (i = 0; i < vlen; i++, p++) { + fprintf(out, "\n\t'%s' type_id=%u", + btf_str(btf, p->name_off), p->type); + } + break; + } + case BTF_KIND_VAR: + fprintf(out, " type_id=%u, linkage=%s", + t->type, btf_var_linkage_str(btf_var(t)->linkage)); + break; + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *v = btf_var_secinfos(t); + + fprintf(out, " size=%u vlen=%u", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, "\n\ttype_id=%u offset=%u size=%u", + v->type, v->offset, v->size); + } + break; + } + default: + break; + } + + return 0; +} + +/* Print raw BTF type dump into a local buffer and return string pointer back. + * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls + */ +const char *btf_type_raw_dump(const struct btf *btf, int type_id) +{ + static char buf[16 * 1024]; + FILE *buf_file; + + buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); + if (!buf_file) { + fprintf(stderr, "Failed to open memstream: %d\n", errno); + return NULL; + } + + fprintf_btf_type_raw(buf_file, btf, type_id); + fflush(buf_file); + fclose(buf_file); + + return buf; +} + +int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]) +{ + int i; + bool ok = true; + + ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types"); + + for (i = 1; i <= nr_types; i++) { + if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump")) + ok = false; + } + + return ok; +} + +static void btf_dump_printf(void *ctx, const char *fmt, va_list args) +{ + vfprintf(ctx, fmt, args); +} + +/* Print BTF-to-C dump into a local buffer and return string pointer back. + * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls + */ +const char *btf_type_c_dump(const struct btf *btf) +{ + static char buf[16 * 1024]; + FILE *buf_file; + struct btf_dump *d = NULL; + struct btf_dump_opts opts = {}; + int err, i; + + buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); + if (!buf_file) { + fprintf(stderr, "Failed to open memstream: %d\n", errno); + return NULL; + } + + opts.ctx = buf_file; + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + if (libbpf_get_error(d)) { + fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d)); + return NULL; + } + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + if (err) { + fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); + return NULL; + } + } + + fflush(buf_file); + fclose(buf_file); + return buf; +} diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h new file mode 100644 index 000000000000..295c0137d9bd --- /dev/null +++ b/tools/testing/selftests/bpf/btf_helpers.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#ifndef __BTF_HELPERS_H +#define __BTF_HELPERS_H + +#include <stdio.h> +#include <bpf/btf.h> + +int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id); +const char *btf_type_raw_dump(const struct btf *btf, int type_id); +int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]); + +#define VALIDATE_RAW_BTF(btf, raw_types...) \ + btf_validate_raw(btf, \ + sizeof((const char *[]){raw_types})/sizeof(void *),\ + (const char *[]){raw_types}) + +const char *btf_type_c_dump(const struct btf *btf); +#endif diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 93162484c2ca..8ae97e2a4b9d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -6652,7 +6652,7 @@ static void do_test_dedup(unsigned int test_num) const void *test_btf_data, *expect_btf_data; const char *ret_test_next_str, *ret_expect_next_str; const char *test_strs, *expect_strs; - const char *test_str_cur, *test_str_end; + const char *test_str_cur; const char *expect_str_cur, *expect_str_end; unsigned int raw_btf_size; void *raw_btf; @@ -6719,12 +6719,18 @@ static void do_test_dedup(unsigned int test_num) goto done; } - test_str_cur = test_strs; - test_str_end = test_strs + test_hdr->str_len; expect_str_cur = expect_strs; expect_str_end = expect_strs + expect_hdr->str_len; - while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { + while (expect_str_cur < expect_str_end) { size_t test_len, expect_len; + int off; + + off = btf__find_str(test_btf, expect_str_cur); + if (CHECK(off < 0, "exp str '%s' not found: %d\n", expect_str_cur, off)) { + err = -1; + goto done; + } + test_str_cur = btf__str_by_offset(test_btf, off); test_len = strlen(test_str_cur); expect_len = strlen(expect_str_cur); @@ -6741,15 +6747,8 @@ static void do_test_dedup(unsigned int test_num) err = -1; goto done; } - test_str_cur += test_len + 1; expect_str_cur += expect_len + 1; } - if (CHECK(test_str_cur != test_str_end, - "test_str_cur:%p != test_str_end:%p", - test_str_cur, test_str_end)) { - err = -1; - goto done; - } test_nr_types = btf__get_nr_types(test_btf); expect_nr_types = btf__get_nr_types(expect_btf); @@ -6775,10 +6774,21 @@ static void do_test_dedup(unsigned int test_num) err = -1; goto done; } - if (CHECK(memcmp((void *)test_type, - (void *)expect_type, - test_size), - "type #%d: contents differ", i)) { + if (CHECK(btf_kind(test_type) != btf_kind(expect_type), + "type %d kind: exp %d != got %u\n", + i, btf_kind(expect_type), btf_kind(test_type))) { + err = -1; + goto done; + } + if (CHECK(test_type->info != expect_type->info, + "type %d info: exp %d != got %u\n", + i, expect_type->info, test_type->info)) { + err = -1; + goto done; + } + if (CHECK(test_type->size != expect_type->size, + "type %d size/type: exp %d != got %u\n", + i, expect_type->size, test_type->size)) { err = -1; goto done; } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c new file mode 100644 index 000000000000..64554fd33547 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include <bpf/btf.h> +#include "btf_helpers.h" + +static void test_split_simple() { + const struct btf_type *t; + struct btf *btf1, *btf2; + int str_off, err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 1); /* [2] ptr to int */ + btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + + ASSERT_STREQ(btf_type_c_dump(btf1), "\ +struct s1 {\n\ + int f1;\n\ +};\n\n", "c_dump"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + /* pointer size should be "inherited" from main BTF */ + ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz"); + + str_off = btf__find_str(btf2, "int"); + ASSERT_NEQ(str_off, -ENOENT, "str_int_missing"); + + t = btf__type_by_id(btf2, 1); + if (!ASSERT_OK_PTR(t, "int_type")) + goto cleanup; + ASSERT_EQ(btf_is_int(t), true, "int_kind"); + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name"); + + btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */ + btf__add_field(btf2, "f1", 6, 0, 0); /* struct s1 f1; */ + btf__add_field(btf2, "f2", 5, 32, 0); /* int f2; */ + btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */ + /* } */ + + /* duplicated int */ + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [5] int */ + + /* duplicated struct s1 */ + btf__add_struct(btf2, "s1", 4); /* [6] struct s1 { */ + btf__add_field(btf2, "f1", 5, 0, 0); /* int f1; */ + /* } */ + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[4] STRUCT 's2' size=16 vlen=3\n" + "\t'f1' type_id=6 bits_offset=0\n" + "\t'f2' type_id=5 bits_offset=32\n" + "\t'f3' type_id=2 bits_offset=64", + "[5] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[6] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=5 bits_offset=0"); + + ASSERT_STREQ(btf_type_c_dump(btf2), "\ +struct s1 {\n\ + int f1;\n\ +};\n\ +\n\ +struct s1___2 {\n\ + int f1;\n\ +};\n\ +\n\ +struct s2 {\n\ + struct s1___2 f1;\n\ + int f2;\n\ + int *f3;\n\ +};\n\n", "c_dump"); + + err = btf__dedup(btf2, NULL, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[4] STRUCT 's2' size=16 vlen=3\n" + "\t'f1' type_id=3 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32\n" + "\t'f3' type_id=2 bits_offset=64"); + + ASSERT_STREQ(btf_type_c_dump(btf2), "\ +struct s1 {\n\ + int f1;\n\ +};\n\ +\n\ +struct s2 {\n\ + struct s1 f1;\n\ + int f2;\n\ + int *f3;\n\ +};\n\n", "c_dump"); + +cleanup: + btf__free(btf2); + btf__free(btf1); +} + +static void test_split_fwd_resolve() { + struct btf *btf1, *btf2; + int err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 4); /* [2] ptr to struct s1 */ + btf__add_ptr(btf1, 5); /* [3] ptr to struct s2 */ + btf__add_struct(btf1, "s1", 16); /* [4] struct s1 { */ + btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf1, "f2", 3, 64, 0); /* struct s2 *f2; */ + /* } */ + btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=4", + "[3] PTR '(anon)' type_id=5", + "[4] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=64", + "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ + btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ + btf__add_ptr(btf2, 8); /* [9] ptr to fwd struct s2 */ + btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + /* } */ + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=4", + "[3] PTR '(anon)' type_id=5", + "[4] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=64", + "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[7] PTR '(anon)' type_id=10", + "[8] FWD 's2' fwd_kind=struct", + "[9] PTR '(anon)' type_id=8", + "[10] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=9 bits_offset=64"); + + err = btf__dedup(btf2, NULL, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=4", + "[3] PTR '(anon)' type_id=5", + "[4] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=64", + "[5] STRUCT 's2' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + +cleanup: + btf__free(btf2); + btf__free(btf1); +} + +static void test_split_struct_duped() { + struct btf *btf1, *btf2; + int err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 5); /* [2] ptr to struct s1 */ + btf__add_fwd(btf1, "s2", BTF_FWD_STRUCT); /* [3] fwd for struct s2 */ + btf__add_ptr(btf1, 3); /* [4] ptr to fwd struct s2 */ + btf__add_struct(btf1, "s1", 16); /* [5] struct s1 { */ + btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf1, "f2", 4, 64, 0); /* struct s2 *f2; */ + /* } */ + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=5", + "[3] FWD 's2' fwd_kind=struct", + "[4] PTR '(anon)' type_id=3", + "[5] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=64"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ + btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ + btf__add_ptr(btf2, 11); /* [9] ptr to struct s2 */ + btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + /* } */ + btf__add_struct(btf2, "s2", 40); /* [11] struct s2 { */ + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ + btf__add_field(btf2, "f3", 6, 128, 0); /* int f3; */ + btf__add_field(btf2, "f4", 10, 192, 0); /* struct s1 f4; */ + /* } */ + btf__add_ptr(btf2, 8); /* [12] ptr to fwd struct s2 */ + btf__add_struct(btf2, "s3", 8); /* [13] struct s3 { */ + btf__add_field(btf2, "f1", 12, 0, 0); /* struct s2 *f1; (fwd) */ + /* } */ + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=5", + "[3] FWD 's2' fwd_kind=struct", + "[4] PTR '(anon)' type_id=3", + "[5] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=64", + "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[7] PTR '(anon)' type_id=10", + "[8] FWD 's2' fwd_kind=struct", + "[9] PTR '(anon)' type_id=11", + "[10] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=9 bits_offset=64", + "[11] STRUCT 's2' size=40 vlen=4\n" + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=9 bits_offset=64\n" + "\t'f3' type_id=6 bits_offset=128\n" + "\t'f4' type_id=10 bits_offset=192", + "[12] PTR '(anon)' type_id=8", + "[13] STRUCT 's3' size=8 vlen=1\n" + "\t'f1' type_id=12 bits_offset=0"); + + err = btf__dedup(btf2, NULL, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=5", + "[3] FWD 's2' fwd_kind=struct", + "[4] PTR '(anon)' type_id=3", + "[5] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=2 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=64", + "[6] PTR '(anon)' type_id=8", + "[7] PTR '(anon)' type_id=9", + "[8] STRUCT 's1' size=16 vlen=2\n" + "\t'f1' type_id=6 bits_offset=0\n" + "\t'f2' type_id=7 bits_offset=64", + "[9] STRUCT 's2' size=40 vlen=4\n" + "\t'f1' type_id=6 bits_offset=0\n" + "\t'f2' type_id=7 bits_offset=64\n" + "\t'f3' type_id=1 bits_offset=128\n" + "\t'f4' type_id=8 bits_offset=192", + "[10] STRUCT 's3' size=8 vlen=1\n" + "\t'f1' type_id=7 bits_offset=0"); + +cleanup: + btf__free(btf2); + btf__free(btf1); +} + +void test_btf_dedup_split() +{ + if (test__start_subtest("split_simple")) + test_split_simple(); + if (test__start_subtest("split_struct_duped")) + test_split_struct_duped(); + if (test__start_subtest("split_fwd_resolve")) + test_split_fwd_resolve(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 86ccf37e26b3..762f6a9da8b5 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -17,7 +17,7 @@ #include "test_btf_skc_cls_ingress.skel.h" static struct test_btf_skc_cls_ingress *skel; -struct sockaddr_in6 srv_sa6; +static struct sockaddr_in6 srv_sa6; static __u32 duration; #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c new file mode 100644 index 000000000000..ca7c2a91610a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include <bpf/btf.h> + +static char *dump_buf; +static size_t dump_buf_sz; +static FILE *dump_buf_file; + +static void btf_dump_printf(void *ctx, const char *fmt, va_list args) +{ + vfprintf(ctx, fmt, args); +} + +void test_btf_split() { + struct btf_dump_opts opts; + struct btf_dump *d = NULL; + const struct btf_type *t; + struct btf *btf1, *btf2; + int str_off, i, err; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 1); /* [2] ptr to int */ + + btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + /* pointer size should be "inherited" from main BTF */ + ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz"); + + str_off = btf__find_str(btf2, "int"); + ASSERT_NEQ(str_off, -ENOENT, "str_int_missing"); + + t = btf__type_by_id(btf2, 1); + if (!ASSERT_OK_PTR(t, "int_type")) + goto cleanup; + ASSERT_EQ(btf_is_int(t), true, "int_kind"); + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name"); + + btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */ + btf__add_field(btf2, "f1", 3, 0, 0); /* struct s1 f1; */ + btf__add_field(btf2, "f2", 1, 32, 0); /* int f2; */ + btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */ + /* } */ + + t = btf__type_by_id(btf1, 4); + ASSERT_NULL(t, "split_type_in_main"); + + t = btf__type_by_id(btf2, 4); + if (!ASSERT_OK_PTR(t, "split_struct_type")) + goto cleanup; + ASSERT_EQ(btf_is_struct(t), true, "split_struct_kind"); + ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen"); + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name"); + + /* BTF-to-C dump of split BTF */ + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); + if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) + return; + opts.ctx = dump_buf_file; + d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); + if (!ASSERT_OK_PTR(d, "btf_dump__new")) + goto cleanup; + for (i = 1; i <= btf__get_nr_types(btf2); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"struct s1 {\n" +" int f1;\n" +"};\n" +"\n" +"struct s2 {\n" +" struct s1 f1;\n" +" int f2;\n" +" int *f3;\n" +"};\n\n", "c_dump"); + +cleanup: + if (dump_buf_file) + fclose(dump_buf_file); + free(dump_buf); + btf_dump__free(d); + btf__free(btf1); + btf__free(btf2); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 314e1e7c36df..f36da15b134f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -2,6 +2,7 @@ /* Copyright (c) 2020 Facebook */ #include <test_progs.h> #include <bpf/btf.h> +#include "btf_helpers.h" static int duration = 0; @@ -39,6 +40,8 @@ void test_btf_write() { ASSERT_EQ(t->size, 4, "int_sz"); ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc"); ASSERT_EQ(btf_int_bits(t), 32, "int_bits"); + ASSERT_STREQ(btf_type_raw_dump(btf, 1), + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "raw_dump"); /* invalid int size */ id = btf__add_int(btf, "bad sz int", 7, 0); @@ -59,24 +62,32 @@ void test_btf_write() { t = btf__type_by_id(btf, 2); ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind"); ASSERT_EQ(t->type, 1, "ptr_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 2), + "[2] PTR '(anon)' type_id=1", "raw_dump"); id = btf__add_const(btf, 5); /* points forward to restrict */ ASSERT_EQ(id, 3, "const_id"); t = btf__type_by_id(btf, 3); ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind"); ASSERT_EQ(t->type, 5, "const_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 3), + "[3] CONST '(anon)' type_id=5", "raw_dump"); id = btf__add_volatile(btf, 3); ASSERT_EQ(id, 4, "volatile_id"); t = btf__type_by_id(btf, 4); ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind"); ASSERT_EQ(t->type, 3, "volatile_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 4), + "[4] VOLATILE '(anon)' type_id=3", "raw_dump"); id = btf__add_restrict(btf, 4); ASSERT_EQ(id, 5, "restrict_id"); t = btf__type_by_id(btf, 5); ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind"); ASSERT_EQ(t->type, 4, "restrict_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 5), + "[5] RESTRICT '(anon)' type_id=4", "raw_dump"); /* ARRAY */ id = btf__add_array(btf, 1, 2, 10); /* int *[10] */ @@ -86,6 +97,8 @@ void test_btf_write() { ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type"); ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type"); ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems"); + ASSERT_STREQ(btf_type_raw_dump(btf, 6), + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", "raw_dump"); /* STRUCT */ err = btf__add_field(btf, "field", 1, 0, 0); @@ -113,6 +126,10 @@ void test_btf_write() { ASSERT_EQ(m->type, 1, "f2_type"); ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off"); ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 7), + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "raw_dump"); /* UNION */ id = btf__add_union(btf, "u1", 8); @@ -136,6 +153,9 @@ void test_btf_write() { ASSERT_EQ(m->type, 1, "f1_type"); ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 8), + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", "raw_dump"); /* ENUM */ id = btf__add_enum(btf, "e1", 4); @@ -156,6 +176,10 @@ void test_btf_write() { v = btf_enum(t) + 1; ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); ASSERT_EQ(v->val, 2, "v2_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 9), + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", "raw_dump"); /* FWDs */ id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT); @@ -164,6 +188,8 @@ void test_btf_write() { ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name"); ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag"); + ASSERT_STREQ(btf_type_raw_dump(btf, 10), + "[10] FWD 'struct_fwd' fwd_kind=struct", "raw_dump"); id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION); ASSERT_EQ(id, 11, "union_fwd_id"); @@ -171,6 +197,8 @@ void test_btf_write() { ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name"); ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag"); + ASSERT_STREQ(btf_type_raw_dump(btf, 11), + "[11] FWD 'union_fwd' fwd_kind=union", "raw_dump"); id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM); ASSERT_EQ(id, 12, "enum_fwd_id"); @@ -179,6 +207,8 @@ void test_btf_write() { ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind"); ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); ASSERT_EQ(t->size, 4, "enum_fwd_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 12), + "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump"); /* TYPEDEF */ id = btf__add_typedef(btf, "typedef1", 1); @@ -187,6 +217,8 @@ void test_btf_write() { ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name"); ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind"); ASSERT_EQ(t->type, 1, "typedef_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 13), + "[13] TYPEDEF 'typedef1' type_id=1", "raw_dump"); /* FUNC & FUNC_PROTO */ id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15); @@ -196,6 +228,8 @@ void test_btf_write() { ASSERT_EQ(t->type, 15, "func_type"); ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind"); ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen"); + ASSERT_STREQ(btf_type_raw_dump(btf, 14), + "[14] FUNC 'func1' type_id=15 linkage=global", "raw_dump"); id = btf__add_func_proto(btf, 1); ASSERT_EQ(id, 15, "func_proto_id"); @@ -214,6 +248,10 @@ void test_btf_write() { p = btf_params(t) + 1; ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name"); ASSERT_EQ(p->type, 2, "p2_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 15), + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", "raw_dump"); /* VAR */ id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1); @@ -223,6 +261,8 @@ void test_btf_write() { ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind"); ASSERT_EQ(t->type, 1, "var_type"); ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 16), + "[16] VAR 'var1' type_id=1, linkage=global-alloc", "raw_dump"); /* DATASECT */ id = btf__add_datasec(btf, "datasec1", 12); @@ -239,6 +279,9 @@ void test_btf_write() { ASSERT_EQ(vi->type, 1, "v1_type"); ASSERT_EQ(vi->offset, 4, "v1_off"); ASSERT_EQ(vi->size, 8, "v1_sz"); + ASSERT_STREQ(btf_type_raw_dump(btf, 17), + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", "raw_dump"); btf__free(btf); } diff --git a/tools/testing/selftests/bpf/prog_tests/hash_large_key.c b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c new file mode 100644 index 000000000000..34684c0fc76d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_hash_large_key.skel.h" + +void test_hash_large_key(void) +{ + int err, value = 21, duration = 0, hash_map_fd; + struct test_hash_large_key *skel; + + struct bigelement { + int a; + char b[4096]; + long long c; + } key; + bzero(&key, sizeof(key)); + + skel = test_hash_large_key__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + return; + + hash_map_fd = bpf_map__fd(skel->maps.hash_map); + if (CHECK(hash_map_fd < 0, "bpf_map__fd", "failed\n")) + goto cleanup; + + err = test_hash_large_key__attach(skel); + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) + goto cleanup; + + err = bpf_map_update_elem(hash_map_fd, &key, &value, BPF_ANY); + if (CHECK(err, "bpf_map_update_elem", "errno=%d\n", errno)) + goto cleanup; + + key.c = 1; + err = bpf_map_lookup_elem(hash_map_fd, &key, &value); + if (CHECK(err, "bpf_map_lookup_elem", "errno=%d\n", errno)) + goto cleanup; + + CHECK_FAIL(value != 42); + +cleanup: + test_hash_large_key__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c new file mode 100644 index 000000000000..2b392590e8ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <sys/types.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include "test_progs.h" +#include "network_helpers.h" +#include "test_sk_storage_trace_itself.skel.h" +#include "test_sk_storage_tracing.skel.h" + +#define LO_ADDR6 "::1" +#define TEST_COMM "test_progs" + +struct sk_stg { + __u32 pid; + __u32 last_notclose_state; + char comm[16]; +}; + +static struct test_sk_storage_tracing *skel; +static __u32 duration; +static pid_t my_pid; + +static int check_sk_stg(int sk_fd, __u32 expected_state) +{ + struct sk_stg sk_stg; + int err; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_stg_map), &sk_fd, + &sk_stg); + if (!ASSERT_OK(err, "map_lookup(sk_stg_map)")) + return -1; + + if (!ASSERT_EQ(sk_stg.last_notclose_state, expected_state, + "last_notclose_state")) + return -1; + + if (!ASSERT_EQ(sk_stg.pid, my_pid, "pid")) + return -1; + + if (!ASSERT_STREQ(sk_stg.comm, skel->bss->task_comm, "task_comm")) + return -1; + + return 0; +} + +static void do_test(void) +{ + int listen_fd = -1, passive_fd = -1, active_fd = -1, value = 1, err; + char abyte; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); + if (CHECK(listen_fd == -1, "start_server", + "listen_fd:%d errno:%d\n", listen_fd, errno)) + return; + + active_fd = connect_to_fd(listen_fd, 0); + if (CHECK(active_fd == -1, "connect_to_fd", "active_fd:%d errno:%d\n", + active_fd, errno)) + goto out; + + err = bpf_map_update_elem(bpf_map__fd(skel->maps.del_sk_stg_map), + &active_fd, &value, 0); + if (!ASSERT_OK(err, "map_update(del_sk_stg_map)")) + goto out; + + passive_fd = accept(listen_fd, NULL, 0); + if (CHECK(passive_fd == -1, "accept", "passive_fd:%d errno:%d\n", + passive_fd, errno)) + goto out; + + shutdown(active_fd, SHUT_WR); + err = read(passive_fd, &abyte, 1); + if (!ASSERT_OK(err, "read(passive_fd)")) + goto out; + + shutdown(passive_fd, SHUT_WR); + err = read(active_fd, &abyte, 1); + if (!ASSERT_OK(err, "read(active_fd)")) + goto out; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.del_sk_stg_map), + &active_fd, &value); + if (!ASSERT_ERR(err, "map_lookup(del_sk_stg_map)")) + goto out; + + err = check_sk_stg(listen_fd, BPF_TCP_LISTEN); + if (!ASSERT_OK(err, "listen_fd sk_stg")) + goto out; + + err = check_sk_stg(active_fd, BPF_TCP_FIN_WAIT2); + if (!ASSERT_OK(err, "active_fd sk_stg")) + goto out; + + err = check_sk_stg(passive_fd, BPF_TCP_LAST_ACK); + ASSERT_OK(err, "passive_fd sk_stg"); + +out: + if (active_fd != -1) + close(active_fd); + if (passive_fd != -1) + close(passive_fd); + if (listen_fd != -1) + close(listen_fd); +} + +void test_sk_storage_tracing(void) +{ + struct test_sk_storage_trace_itself *skel_itself; + int err; + + my_pid = getpid(); + + skel_itself = test_sk_storage_trace_itself__open_and_load(); + + if (!ASSERT_NULL(skel_itself, "test_sk_storage_trace_itself")) { + test_sk_storage_trace_itself__destroy(skel_itself); + return; + } + + skel = test_sk_storage_tracing__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_sk_storage_tracing")) + return; + + err = test_sk_storage_tracing__attach(skel); + if (!ASSERT_OK(err, "test_sk_storage_tracing__attach")) { + test_sk_storage_tracing__destroy(skel); + return; + } + + do_test(); + + test_sk_storage_tracing__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index c85174cdcb77..08d19cafd5e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -18,12 +18,12 @@ #define LO_ADDR6 "::1" #define CG_NAME "/tcpbpf-hdr-opt-test" -struct bpf_test_option exp_passive_estab_in; -struct bpf_test_option exp_active_estab_in; -struct bpf_test_option exp_passive_fin_in; -struct bpf_test_option exp_active_fin_in; -struct hdr_stg exp_passive_hdr_stg; -struct hdr_stg exp_active_hdr_stg = { .active = true, }; +static struct bpf_test_option exp_passive_estab_in; +static struct bpf_test_option exp_active_estab_in; +static struct bpf_test_option exp_passive_fin_in; +static struct bpf_test_option exp_active_fin_in; +static struct hdr_stg exp_passive_hdr_stg; +static struct hdr_stg exp_active_hdr_stg = { .active = true, }; static struct test_misc_tcp_hdr_options *misc_skel; static struct test_tcp_hdr_options *skel; diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c new file mode 100644 index 000000000000..ab5281475f44 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +#include "test_tcpbpf.h" +#include "test_tcpbpf_kern.skel.h" + +#define LO_ADDR6 "::1" +#define CG_NAME "/tcpbpf-user-test" + +static __u32 duration; + +static void verify_result(struct tcpbpf_globals *result) +{ + __u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | + (1 << BPF_SOCK_OPS_RWND_INIT) | + (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | + (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | + (1 << BPF_SOCK_OPS_NEEDS_ECN) | + (1 << BPF_SOCK_OPS_STATE_CB) | + (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); + + /* check global map */ + CHECK(expected_events != result->event_map, "event_map", + "unexpected event_map: actual 0x%08x != expected 0x%08x\n", + result->event_map, expected_events); + + ASSERT_EQ(result->bytes_received, 501, "bytes_received"); + ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked"); + ASSERT_EQ(result->data_segs_in, 1, "data_segs_in"); + ASSERT_EQ(result->data_segs_out, 1, "data_segs_out"); + ASSERT_EQ(result->bad_cb_test_rv, 0x80, "bad_cb_test_rv"); + ASSERT_EQ(result->good_cb_test_rv, 0, "good_cb_test_rv"); + ASSERT_EQ(result->num_listen, 1, "num_listen"); + + /* 3 comes from one listening socket + both ends of the connection */ + ASSERT_EQ(result->num_close_events, 3, "num_close_events"); + + /* check setsockopt for SAVE_SYN */ + ASSERT_EQ(result->tcp_save_syn, 0, "tcp_save_syn"); + + /* check getsockopt for SAVED_SYN */ + ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn"); +} + +static void run_test(struct tcpbpf_globals *result) +{ + int listen_fd = -1, cli_fd = -1, accept_fd = -1; + char buf[1000]; + int err = -1; + int i, rv; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); + if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n", + listen_fd, errno)) + goto done; + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)", + "cli_fd:%d errno:%d\n", cli_fd, errno)) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (CHECK(accept_fd == -1, "accept(listen_fd)", + "accept_fd:%d errno:%d\n", accept_fd, errno)) + goto done; + + /* Send 1000B of '+'s from cli_fd -> accept_fd */ + for (i = 0; i < 1000; i++) + buf[i] = '+'; + + rv = send(cli_fd, buf, 1000, 0); + if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + rv = recv(accept_fd, buf, 1000, 0); + if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + /* Send 500B of '.'s from accept_fd ->cli_fd */ + for (i = 0; i < 500; i++) + buf[i] = '.'; + + rv = send(accept_fd, buf, 500, 0); + if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + rv = recv(cli_fd, buf, 500, 0); + if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno)) + goto done; + + /* + * shutdown accept first to guarantee correct ordering for + * bytes_received and bytes_acked when we go to verify the results. + */ + shutdown(accept_fd, SHUT_WR); + err = recv(cli_fd, buf, 1, 0); + if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno)) + goto done; + + shutdown(cli_fd, SHUT_WR); + err = recv(accept_fd, buf, 1, 0); + CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno); +done: + if (accept_fd != -1) + close(accept_fd); + if (cli_fd != -1) + close(cli_fd); + if (listen_fd != -1) + close(listen_fd); + + if (!err) + verify_result(result); +} + +void test_tcpbpf_user(void) +{ + struct test_tcpbpf_kern *skel; + int cg_fd = -1; + + skel = test_tcpbpf_kern__open_and_load(); + if (CHECK(!skel, "open and load skel", "failed")) + return; + + cg_fd = test__join_cgroup(CG_NAME); + if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")", + "cg_fd:%d errno:%d", cg_fd, errno)) + goto err; + + skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd); + if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)")) + goto err; + + run_test(&skel->bss->global); + +err: + if (cg_fd != -1) + close(cg_fd); + test_tcpbpf_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 91cd6f357246..fcca7ba1f368 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -4,30 +4,165 @@ * Copyright (C) 2020 Google LLC. */ +#include <asm-generic/errno-base.h> +#include <sys/stat.h> #include <test_progs.h> #include <linux/limits.h> #include "local_storage.skel.h" #include "network_helpers.h" -int create_and_unlink_file(void) +#ifndef __NR_pidfd_open +#define __NR_pidfd_open 434 +#endif + +static inline int sys_pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out, + loff_t *off_out, size_t len, + unsigned int flags) { - char fname[PATH_MAX] = "/tmp/fileXXXXXX"; - int fd; + return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, + len, flags); +} + +static unsigned int duration; - fd = mkstemp(fname); - if (fd < 0) - return fd; +#define TEST_STORAGE_VALUE 0xbeefdead - close(fd); - unlink(fname); - return 0; +struct storage { + void *inode; + unsigned int value; + /* Lock ensures that spin locked versions of local stoage operations + * also work, most operations in this tests are still single threaded + */ + struct bpf_spin_lock lock; +}; + +/* Copies an rm binary to a temp file. dest is a mkstemp template */ +static int copy_rm(char *dest) +{ + int fd_in, fd_out = -1, ret = 0; + struct stat stat; + + fd_in = open("/bin/rm", O_RDONLY); + if (fd_in < 0) + return -errno; + + fd_out = mkstemp(dest); + if (fd_out < 0) { + ret = -errno; + goto out; + } + + ret = fstat(fd_in, &stat); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0); + if (ret == -1) { + ret = -errno; + goto out; + } + + /* Set executable permission on the copied file */ + ret = chmod(dest, 0100); + if (ret == -1) + ret = -errno; + +out: + close(fd_in); + close(fd_out); + return ret; +} + +/* Fork and exec the provided rm binary and return the exit code of the + * forked process and its pid. + */ +static int run_self_unlink(int *monitored_pid, const char *rm_path) +{ + int child_pid, child_status, ret; + int null_fd; + + child_pid = fork(); + if (child_pid == 0) { + null_fd = open("/dev/null", O_WRONLY); + dup2(null_fd, STDOUT_FILENO); + dup2(null_fd, STDERR_FILENO); + close(null_fd); + + *monitored_pid = getpid(); + /* Use the copied /usr/bin/rm to delete itself + * /tmp/copy_of_rm /tmp/copy_of_rm. + */ + ret = execlp(rm_path, rm_path, rm_path, NULL); + if (ret) + exit(errno); + } else if (child_pid > 0) { + waitpid(child_pid, &child_status, 0); + return WEXITSTATUS(child_status); + } + + return -EINVAL; +} + +static bool check_syscall_operations(int map_fd, int obj_fd) +{ + struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } }, + lookup_val = { .value = 0, .lock = { 0 } }; + int err; + + /* Looking up an existing element should fail initially */ + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", + "err:%d errno:%d\n", err, errno)) + return false; + + /* Create a new element */ + err = bpf_map_update_elem(map_fd, &obj_fd, &val, + BPF_NOEXIST | BPF_F_LOCK); + if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, + errno)) + return false; + + /* Lookup the newly created element */ + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); + if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, + errno)) + return false; + + /* Check the value of the newly created element */ + if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem", + "value got = %x errno:%d", lookup_val.value, val.value)) + return false; + + err = bpf_map_delete_elem(map_fd, &obj_fd); + if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, + errno)) + return false; + + /* The lookup should fail, now that the element has been deleted */ + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, + BPF_F_LOCK); + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", + "err:%d errno:%d\n", err, errno)) + return false; + + return true; } void test_test_local_storage(void) { + char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX"; + int err, serv_sk = -1, task_fd = -1, rm_fd = -1; struct local_storage *skel = NULL; - int err, duration = 0, serv_sk = -1; skel = local_storage__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -37,12 +172,46 @@ void test_test_local_storage(void) if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) goto close_prog; - skel->bss->monitored_pid = getpid(); + task_fd = sys_pidfd_open(getpid(), 0); + if (CHECK(task_fd < 0, "pidfd_open", + "failed to get pidfd err:%d, errno:%d", task_fd, errno)) + goto close_prog; + + if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map), + task_fd)) + goto close_prog; + + err = copy_rm(tmp_exec_path); + if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno)) + goto close_prog; + + rm_fd = open(tmp_exec_path, O_RDONLY); + if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", + tmp_exec_path, rm_fd, errno)) + goto close_prog; - err = create_and_unlink_file(); - if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) + if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), + rm_fd)) goto close_prog; + /* Sets skel->bss->monitored_pid to the pid of the forked child + * forks a child process that executes tmp_exec_path and tries to + * unlink its executable. This operation should be denied by the loaded + * LSM program. + */ + err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); + if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) + goto close_prog_unlink; + + /* Set the process being monitored to be the current process */ + skel->bss->monitored_pid = getpid(); + + /* Remove the temporary created executable */ + err = unlink(tmp_exec_path); + if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path, + errno)) + goto close_prog_unlink; + CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", "inode_local_storage not set\n"); @@ -53,8 +222,15 @@ void test_test_local_storage(void) CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", "sk_local_storage not set\n"); - close(serv_sk); + if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), + serv_sk)) + goto close_prog; +close_prog_unlink: + unlink(tmp_exec_path); close_prog: + close(serv_sk); + close(rm_fd); + close(task_fd); local_storage__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c new file mode 100644 index 000000000000..cf1215531920 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include <network_helpers.h> +#include "skb_pkt_end.skel.h" + +static int sanity_run(struct bpf_program *prog) +{ + __u32 duration, retval; + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + if (CHECK(err || retval != 123, "test_run", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + return -1; + return 0; +} + +void test_test_skb_pkt_end(void) +{ + struct skb_pkt_end *skb_pkt_end_skel = NULL; + __u32 duration = 0; + int err; + + skb_pkt_end_skel = skb_pkt_end__open_and_load(); + if (CHECK(!skb_pkt_end_skel, "skb_pkt_end_skel_load", "skb_pkt_end skeleton failed\n")) + goto cleanup; + + err = skb_pkt_end__attach(skb_pkt_end_skel); + if (CHECK(err, "skb_pkt_end_attach", "skb_pkt_end attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(skb_pkt_end_skel->progs.main_prog)) + goto cleanup; + +cleanup: + skb_pkt_end__destroy(skb_pkt_end_skel); +} diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 0758ba229ae0..3e3de130f28f 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -4,9 +4,8 @@ * Copyright 2020 Google LLC. */ +#include "vmlinux.h" #include <errno.h> -#include <linux/bpf.h> -#include <stdbool.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -18,60 +17,68 @@ int monitored_pid = 0; int inode_storage_result = -1; int sk_storage_result = -1; -struct dummy_storage { +struct local_storage { + struct inode *exec_inode; __u32 value; + struct bpf_spin_lock lock; }; struct { __uint(type, BPF_MAP_TYPE_INODE_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); __type(key, int); - __type(value, struct dummy_storage); + __type(value, struct local_storage); } inode_storage_map SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_SK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); __type(key, int); - __type(value, struct dummy_storage); + __type(value, struct local_storage); } sk_storage_map SEC(".maps"); -/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed. - */ -struct sock {} __attribute__((preserve_access_index)); -struct sockaddr {} __attribute__((preserve_access_index)); -struct socket { - struct sock *sk; -} __attribute__((preserve_access_index)); - -struct inode {} __attribute__((preserve_access_index)); -struct dentry { - struct inode *d_inode; -} __attribute__((preserve_access_index)); -struct file { - struct inode *f_inode; -} __attribute__((preserve_access_index)); - +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct local_storage); +} task_storage_map SEC(".maps"); SEC("lsm/inode_unlink") int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; + bool is_self_unlink; + int err; if (pid != monitored_pid) return 0; + storage = bpf_task_storage_get(&task_storage_map, + bpf_get_current_task_btf(), 0, 0); + if (storage) { + /* Don't let an executable delete itself */ + bpf_spin_lock(&storage->lock); + is_self_unlink = storage->exec_inode == victim->d_inode; + bpf_spin_unlock(&storage->lock); + if (is_self_unlink) + return -EPERM; + } + storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; - if (storage->value == DUMMY_STORAGE_VALUE) + bpf_spin_lock(&storage->lock); + if (storage->value != DUMMY_STORAGE_VALUE) inode_storage_result = -1; + bpf_spin_unlock(&storage->lock); - inode_storage_result = - bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + if (!err) + inode_storage_result = err; return 0; } @@ -81,20 +88,26 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, int addrlen) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; + int err; if (pid != monitored_pid) return 0; storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; - if (storage->value == DUMMY_STORAGE_VALUE) + bpf_spin_lock(&storage->lock); + if (storage->value != DUMMY_STORAGE_VALUE) sk_storage_result = -1; + bpf_spin_unlock(&storage->lock); + + err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + if (!err) + sk_storage_result = err; - sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk); return 0; } @@ -103,17 +116,19 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int protocol, int kern) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; if (pid != monitored_pid) return 0; storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, - BPF_SK_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; + bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); return 0; } @@ -122,7 +137,7 @@ SEC("lsm/file_open") int BPF_PROG(file_open, struct file *file) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct dummy_storage *storage; + struct local_storage *storage; if (pid != monitored_pid) return 0; @@ -131,10 +146,30 @@ int BPF_PROG(file_open, struct file *file) return 0; storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return 0; + bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); return 0; } + +/* This uses the local storage to remember the inode of the binary that a + * process was originally executing. + */ +SEC("lsm/bprm_committed_creds") +void BPF_PROG(exec, struct linux_binprm *bprm) +{ + struct local_storage *storage; + + storage = bpf_task_storage_get(&task_storage_map, + bpf_get_current_task_btf(), 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (storage) { + bpf_spin_lock(&storage->lock); + storage->exec_inode = bprm->file->f_inode; + bpf_spin_unlock(&storage->lock); + } +} diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c new file mode 100644 index 000000000000..cf6823f42e80 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_PRESERVE_ACCESS_INDEX +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> + +#define NULL 0 +#define INLINE __always_inline + +#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end) + +#define ETH_IPV4_TCP_SIZE (14 + sizeof(struct iphdr) + sizeof(struct tcphdr)) + +static INLINE struct iphdr *get_iphdr(struct __sk_buff *skb) +{ + struct iphdr *ip = NULL; + struct ethhdr *eth; + + if (skb_shorter(skb, ETH_IPV4_TCP_SIZE)) + goto out; + + eth = (void *)(long)skb->data; + ip = (void *)(eth + 1); + +out: + return ip; +} + +SEC("classifier/cls") +int main_prog(struct __sk_buff *skb) +{ + struct iphdr *ip = NULL; + struct tcphdr *tcp; + __u8 proto = 0; + + if (!(ip = get_iphdr(skb))) + goto out; + + proto = ip->protocol; + + if (proto != IPPROTO_TCP) + goto out; + + tcp = (void*)(ip + 1); + if (tcp->dest != 0) + goto out; + if (!tcp) + goto out; + + return tcp->urg_ptr; +out: + return -1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c new file mode 100644 index 000000000000..473a22794a62 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, struct bigelement); + __type(value, __u32); +} hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct bigelement); +} key_map SEC(".maps"); + +struct bigelement { + int a; + char b[4096]; + long long c; +}; + +SEC("raw_tracepoint/sys_enter") +int bpf_hash_large_key_test(void *ctx) +{ + int zero = 0, err = 1, value = 42; + struct bigelement *key; + + key = bpf_map_lookup_elem(&key_map, &zero); + if (!key) + return 0; + + key->c = 1; + if (bpf_map_update_elem(&hash_map, key, &value, BPF_ANY)) + return 0; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c new file mode 100644 index 000000000000..59ef72d02a61 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + +SEC("fentry/bpf_sk_storage_free") +int BPF_PROG(trace_bpf_sk_storage_free, struct sock *sk) +{ + int *value; + + value = bpf_sk_storage_get(&sk_stg_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + + if (value) + *value = 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c new file mode 100644 index 000000000000..8e94e5c080aa --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> + +struct sk_stg { + __u32 pid; + __u32 last_notclose_state; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct sk_stg); +} sk_stg_map SEC(".maps"); + +/* Testing delete */ +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} del_sk_stg_map SEC(".maps"); + +char task_comm[16] = ""; + +SEC("tp_btf/inet_sock_set_state") +int BPF_PROG(trace_inet_sock_set_state, struct sock *sk, int oldstate, + int newstate) +{ + struct sk_stg *stg; + + if (newstate == BPF_TCP_CLOSE) + return 0; + + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!stg) + return 0; + + stg->last_notclose_state = newstate; + + bpf_sk_storage_delete(&del_sk_stg_map, sk); + + return 0; +} + +static void set_task_info(struct sock *sk) +{ + struct task_struct *task; + struct sk_stg *stg; + + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!stg) + return; + + stg->pid = bpf_get_current_pid_tgid(); + + task = (struct task_struct *)bpf_get_current_task(); + bpf_core_read_str(&stg->comm, sizeof(stg->comm), &task->comm); + bpf_core_read_str(&task_comm, sizeof(task_comm), &task->comm); +} + +SEC("fentry/inet_csk_listen_start") +int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog) +{ + set_task_info(sk); + + return 0; +} + +SEC("fentry/tcp_connect") +int BPF_PROG(trace_tcp_connect, struct sock *sk) +{ + set_task_info(sk); + + return 0; +} + +SEC("fexit/inet_csk_accept") +int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern, + struct sock *accepted_sk) +{ + set_task_info(accepted_sk); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 3e6912e4df3d..e85e49deba70 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -14,40 +14,7 @@ #include <bpf/bpf_endian.h> #include "test_tcpbpf.h" -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4); - __type(key, __u32); - __type(value, struct tcpbpf_globals); -} global_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 2); - __type(key, __u32); - __type(value, int); -} sockopt_results SEC(".maps"); - -static inline void update_event_map(int event) -{ - __u32 key = 0; - struct tcpbpf_globals g, *gp; - - gp = bpf_map_lookup_elem(&global_map, &key); - if (gp == NULL) { - struct tcpbpf_globals g = {0}; - - g.event_map |= (1 << event); - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); - } else { - g = *gp; - g.event_map |= (1 << event); - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); - } -} - +struct tcpbpf_globals global = {}; int _version SEC("version") = 1; SEC("sockops") @@ -105,29 +72,15 @@ int bpf_testcb(struct bpf_sock_ops *skops) op = (int) skops->op; - update_event_map(op); + global.event_map |= (1 << op); switch (op) { case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: /* Test failure to set largest cb flag (assumes not defined) */ - bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); + global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); /* Set callback */ - good_call_rv = bpf_sock_ops_cb_flags_set(skops, + global.good_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); - /* Update results */ - { - __u32 key = 0; - struct tcpbpf_globals g, *gp; - - gp = bpf_map_lookup_elem(&global_map, &key); - if (!gp) - break; - g = *gp; - g.bad_cb_test_rv = bad_call_rv; - g.good_cb_test_rv = good_call_rv; - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); - } break; case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: skops->sk_txhash = 0x12345f; @@ -143,10 +96,8 @@ int bpf_testcb(struct bpf_sock_ops *skops) thdr = (struct tcphdr *)(header + offset); v = thdr->syn; - __u32 key = 1; - bpf_map_update_elem(&sockopt_results, &key, &v, - BPF_ANY); + global.tcp_saved_syn = v; } } break; @@ -156,25 +107,16 @@ int bpf_testcb(struct bpf_sock_ops *skops) break; case BPF_SOCK_OPS_STATE_CB: if (skops->args[1] == BPF_TCP_CLOSE) { - __u32 key = 0; - struct tcpbpf_globals g, *gp; - - gp = bpf_map_lookup_elem(&global_map, &key); - if (!gp) - break; - g = *gp; if (skops->args[0] == BPF_TCP_LISTEN) { - g.num_listen++; + global.num_listen++; } else { - g.total_retrans = skops->total_retrans; - g.data_segs_in = skops->data_segs_in; - g.data_segs_out = skops->data_segs_out; - g.bytes_received = skops->bytes_received; - g.bytes_acked = skops->bytes_acked; + global.total_retrans = skops->total_retrans; + global.data_segs_in = skops->data_segs_in; + global.data_segs_out = skops->data_segs_out; + global.bytes_received = skops->bytes_received; + global.bytes_acked = skops->bytes_acked; } - g.num_close_events++; - bpf_map_update_elem(&global_map, &key, &g, - BPF_ANY); + global.num_close_events++; } break; case BPF_SOCK_OPS_TCP_LISTEN_CB: @@ -182,9 +124,7 @@ int bpf_testcb(struct bpf_sock_ops *skops) v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN, &save_syn, sizeof(save_syn)); /* Update global map w/ result of setsock opt */ - __u32 key = 0; - - bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY); + global.tcp_save_syn = v; break; default: rv = -1; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index f48dbfe24ddc..a621b58ab079 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -15,7 +15,6 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/types.h> -#include <linux/tcp.h> #include <linux/socket.h> #include <linux/pkt_cls.h> #include <linux/erspan.h> @@ -528,12 +527,11 @@ int _ipip_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } @@ -541,16 +539,6 @@ int _ipip_set_tunnel(struct __sk_buff *skb) key.tunnel_ttl = 64; if (iph->protocol == IPPROTO_ICMP) { key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - } else { - if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) - return TC_ACT_SHOT; - - if (tcp->dest == bpf_htons(5200)) - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - else if (tcp->dest == bpf_htons(5201)) - key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ - else - return TC_ACT_SHOT; } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); @@ -585,19 +573,20 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); @@ -634,35 +623,18 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; struct ipv6hdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); void *data_end = (void *)(long)skb->data_end; int ret; /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + if (data + sizeof(*iph) > data_end) { ERROR(1); return TC_ACT_SHOT; } - key.remote_ipv6[0] = bpf_htonl(0x2401db00); key.tunnel_ttl = 64; - if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) { - key.remote_ipv6[3] = bpf_htonl(1); - } else { - if (iph->nexthdr != 6 /* NEXTHDR_TCP */) { - ERROR(iph->nexthdr); - return TC_ACT_SHOT; - } - - if (tcp->dest == bpf_htons(5200)) { - key.remote_ipv6[3] = bpf_htonl(1); - } else if (tcp->dest == bpf_htons(5201)) { - key.remote_ipv6[3] = bpf_htonl(2); - } else { - ERROR(tcp->dest); - return TC_ACT_SHOT; - } + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ } ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py deleted file mode 100755 index bfff82be3fc1..000000000000 --- a/tools/testing/selftests/bpf/tcp_client.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -# -# SPDX-License-Identifier: GPL-2.0 -# - -import sys, os, os.path, getopt -import socket, time -import subprocess -import select - -def read(sock, n): - buf = b'' - while len(buf) < n: - rem = n - len(buf) - try: s = sock.recv(rem) - except (socket.error) as e: return b'' - buf += s - return buf - -def send(sock, s): - total = len(s) - count = 0 - while count < total: - try: n = sock.send(s) - except (socket.error) as e: n = 0 - if n == 0: - return count; - count += n - return count - - -serverPort = int(sys.argv[1]) - -# create active socket -sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) -try: - sock.connect(('::1', serverPort)) -except socket.error as e: - sys.exit(1) - -buf = b'' -n = 0 -while n < 1000: - buf += b'+' - n += 1 - -sock.settimeout(1); -n = send(sock, buf) -n = read(sock, 500) -sys.exit(0) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py deleted file mode 100755 index 42ab8882f00f..000000000000 --- a/tools/testing/selftests/bpf/tcp_server.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -# -# SPDX-License-Identifier: GPL-2.0 -# - -import sys, os, os.path, getopt -import socket, time -import subprocess -import select - -def read(sock, n): - buf = b'' - while len(buf) < n: - rem = n - len(buf) - try: s = sock.recv(rem) - except (socket.error) as e: return b'' - buf += s - return buf - -def send(sock, s): - total = len(s) - count = 0 - while count < total: - try: n = sock.send(s) - except (socket.error) as e: n = 0 - if n == 0: - return count; - count += n - return count - - -SERVER_PORT = 12877 -MAX_PORTS = 2 - -serverPort = SERVER_PORT -serverSocket = None - -# create passive socket -serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) - -try: serverSocket.bind(('::1', 0)) -except socket.error as msg: - print('bind fails: ' + str(msg)) - -sn = serverSocket.getsockname() -serverPort = sn[1] - -cmdStr = ("./tcp_client.py %d &") % (serverPort) -os.system(cmdStr) - -buf = b'' -n = 0 -while n < 500: - buf += b'.' - n += 1 - -serverSocket.listen(MAX_PORTS) -readList = [serverSocket] - -while True: - readyRead, readyWrite, inError = \ - select.select(readList, [], [], 2) - - if len(readyRead) > 0: - waitCount = 0 - for sock in readyRead: - if sock == serverSocket: - (clientSocket, address) = serverSocket.accept() - address = str(address[0]) - readList.append(clientSocket) - else: - sock.settimeout(1); - s = read(sock, 1000) - n = send(sock, buf) - sock.close() - serverSocket.close() - sys.exit(0) - else: - print('Select timeout!') - sys.exit(1) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 0d92ebcb335d..0ad3e6305ff0 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1223,9 +1223,10 @@ out_map_in_map: static void test_map_large(void) { + struct bigkey { int a; - char b[116]; + char b[4096]; long long c; } key; int fd, i, value; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 238f5f61189e..d6b14853f3bc 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -141,6 +141,17 @@ extern int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_NEQ(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act != ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld == expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + #define ASSERT_STREQ(actual, expected, name) ({ \ static int duration = 0; \ const char *___act = actual; \ diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h index 6220b95cbd02..0ed33521cbbb 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf.h +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -14,5 +14,7 @@ struct tcpbpf_globals { __u64 bytes_acked; __u32 num_listen; __u32 num_close_events; + __u32 tcp_save_syn; + __u32 tcp_saved_syn; }; #endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c deleted file mode 100644 index 74a9e49988b6..000000000000 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <inttypes.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <linux/bpf.h> -#include <sys/types.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_rlimit.h" -#include "bpf_util.h" -#include "cgroup_helpers.h" - -#include "test_tcpbpf.h" - -/* 3 comes from one listening socket + both ends of the connection */ -#define EXPECTED_CLOSE_EVENTS 3 - -#define EXPECT_EQ(expected, actual, fmt) \ - do { \ - if ((expected) != (actual)) { \ - printf(" Value of: " #actual "\n" \ - " Actual: %" fmt "\n" \ - " Expected: %" fmt "\n", \ - (actual), (expected)); \ - ret--; \ - } \ - } while (0) - -int verify_result(const struct tcpbpf_globals *result) -{ - __u32 expected_events; - int ret = 0; - - expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | - (1 << BPF_SOCK_OPS_RWND_INIT) | - (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | - (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | - (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | - (1 << BPF_SOCK_OPS_NEEDS_ECN) | - (1 << BPF_SOCK_OPS_STATE_CB) | - (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); - - EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); - EXPECT_EQ(501ULL, result->bytes_received, "llu"); - EXPECT_EQ(1002ULL, result->bytes_acked, "llu"); - EXPECT_EQ(1, result->data_segs_in, PRIu32); - EXPECT_EQ(1, result->data_segs_out, PRIu32); - EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); - EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); - EXPECT_EQ(1, result->num_listen, PRIu32); - EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32); - - return ret; -} - -int verify_sockopt_result(int sock_map_fd) -{ - __u32 key = 0; - int ret = 0; - int res; - int rv; - - /* check setsockopt for SAVE_SYN */ - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - EXPECT_EQ(0, rv, "d"); - EXPECT_EQ(0, res, "d"); - key = 1; - /* check getsockopt for SAVED_SYN */ - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); - EXPECT_EQ(0, rv, "d"); - EXPECT_EQ(1, res, "d"); - return ret; -} - -static int bpf_find_map(const char *test, struct bpf_object *obj, - const char *name) -{ - struct bpf_map *map; - - map = bpf_object__find_map_by_name(obj, name); - if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); - return -1; - } - return bpf_map__fd(map); -} - -int main(int argc, char **argv) -{ - const char *file = "test_tcpbpf_kern.o"; - int prog_fd, map_fd, sock_map_fd; - struct tcpbpf_globals g = {0}; - const char *cg_path = "/foo"; - int error = EXIT_FAILURE; - struct bpf_object *obj; - int cg_fd = -1; - int retry = 10; - __u32 key = 0; - int rv; - - cg_fd = cgroup_setup_and_join(cg_path); - if (cg_fd < 0) - goto err; - - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { - printf("FAILED: load_bpf_file failed for: %s\n", file); - goto err; - } - - rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); - if (rv) { - printf("FAILED: bpf_prog_attach: %d (%s)\n", - error, strerror(errno)); - goto err; - } - - if (system("./tcp_server.py")) { - printf("FAILED: TCP server\n"); - goto err; - } - - map_fd = bpf_find_map(__func__, obj, "global_map"); - if (map_fd < 0) - goto err; - - sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); - if (sock_map_fd < 0) - goto err; - -retry_lookup: - rv = bpf_map_lookup_elem(map_fd, &key, &g); - if (rv != 0) { - printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); - goto err; - } - - if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) { - printf("Unexpected number of close events (%d), retrying!\n", - g.num_close_events); - usleep(100); - goto retry_lookup; - } - - if (verify_result(&g)) { - printf("FAILED: Wrong stats\n"); - goto err; - } - - if (verify_sockopt_result(sock_map_fd)) { - printf("FAILED: Wrong sockopt stats\n"); - goto err; - } - - printf("PASSED!\n"); - error = 0; -err: - bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); - close(cg_fd); - cleanup_cgroup_environment(); - return error; -} diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index bd12ec97a44d..1ccbe804e8e1 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -24,12 +24,12 @@ # Root namespace with metadata-mode tunnel + BPF # Device names and addresses: # veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) -# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay) +# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) # # Namespace at_ns0 with native tunnel # Device names and addresses: # veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) -# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay) +# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) # # # End-to-end ping packet flow @@ -250,7 +250,7 @@ add_ipip_tunnel() ip addr add dev $DEV 10.1.1.200/24 } -add_ipip6tnl_tunnel() +add_ip6tnl_tunnel() { ip netns exec at_ns0 ip addr add ::11/96 dev veth0 ip netns exec at_ns0 ip link set dev veth0 up @@ -262,11 +262,13 @@ add_ipip6tnl_tunnel() ip link add dev $DEV_NS type $TYPE \ local ::11 remote ::22 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96 ip netns exec at_ns0 ip link set dev $DEV_NS up # root namespace ip link add dev $DEV type $TYPE external ip addr add dev $DEV 10.1.1.200/24 + ip addr add dev $DEV 1::22/96 ip link set dev $DEV up } @@ -534,7 +536,7 @@ test_ipip6() check $TYPE config_device - add_ipip6tnl_tunnel + add_ip6tnl_tunnel ip link set dev veth1 mtu 1500 attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel # underlay @@ -553,6 +555,34 @@ test_ipip6() echo -e ${GREEN}"PASS: $TYPE"${NC} } +test_ip6ip6() +{ + TYPE=ip6tnl + DEV_NS=ip6ip6tnl00 + DEV=ip6ip6tnl11 + ret=0 + + check $TYPE + config_device + add_ip6tnl_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel + # underlay + ping6 $PING_ARG ::11 + # ip6 over ip6 + ping6 $PING_ARG 1::11 + check_err $? + ip netns exec at_ns0 ping6 $PING_ARG 1::22 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: ip6$TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} +} + setup_xfrm_tunnel() { auth=0x$(printf '1%.0s' {1..40}) @@ -646,6 +676,7 @@ cleanup() ip link del veth1 2> /dev/null ip link del ipip11 2> /dev/null ip link del ipip6tnl11 2> /dev/null + ip link del ip6ip6tnl11 2> /dev/null ip link del gretap11 2> /dev/null ip link del ip6gre11 2> /dev/null ip link del ip6gretap11 2> /dev/null @@ -742,6 +773,10 @@ bpf_tunnel_test() test_ipip6 errors=$(( $errors + $? )) + echo "Testing IP6IP6 tunnel..." + test_ip6ip6 + errors=$(( $errors + $? )) + echo "Testing IPSec tunnel..." test_xfrm_tunnel errors=$(( $errors + $? )) diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 2e16b8e268f2..2022c0f2cd75 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -1089,3 +1089,45 @@ .errstr_unpriv = "R1 leaks addr", .result = REJECT, }, +{ + "pkt > pkt_end taken check", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end) + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data) + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42 + BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1), // 8. if r3 > r2 goto 10 + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9) + BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0 + BPF_EXIT_INSN(), // 11. exit + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "pkt_end < pkt taken check", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end) + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data) + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42 + BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4 + BPF_JMP_REG(BPF_JLT, BPF_REG_2, BPF_REG_3, 1), // 8. if r2 < r3 goto 10 + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9) + BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0 + BPF_EXIT_INSN(), // 11. exit + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh new file mode 100755 index 000000000000..be0c1b5ee6b8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the nexthop offload API. It makes use of netdevsim +# which registers a listener to the nexthop notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + nexthop_single_add_test + nexthop_single_add_err_test + nexthop_group_add_test + nexthop_group_add_err_test + nexthop_group_replace_test + nexthop_group_replace_err_test + nexthop_single_replace_test + nexthop_single_replace_err_test + nexthop_single_in_group_replace_test + nexthop_single_in_group_replace_err_test + nexthop_single_in_group_delete_test + nexthop_single_in_group_delete_err_test + nexthop_replay_test + nexthop_replay_err_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +nexthop_check() +{ + local nharg="$1"; shift + local expected="$1"; shift + + out=$($IP nexthop show ${nharg} | sed -e 's/ *$//') + if [[ "$out" != "$expected" ]]; then + return 1 + fi + + return 0 +} + +nexthop_resource_check() +{ + local expected_occ=$1; shift + + occ=$($DEVLINK -jp resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="nexthops") | .["occ"]') + + if [ $expected_occ -ne $occ ]; then + return 1 + fi + + return 0 +} + +nexthop_resource_set() +{ + local size=$1; shift + + $DEVLINK resource set $DEVLINK_DEV path nexthops size $size + $DEVLINK dev reload $DEVLINK_DEV +} + +nexthop_single_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 1 + nexthop_resource_check 0 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Single nexthop add and delete" +} + +nexthop_single_add_err_test() +{ + RET=0 + + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop addition succeeded when should fail" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,20/2,39 + nexthop_check "id 10" "id 10 group 1,20/2,39 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_resource_check 61 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 + nexthop_check "id 10" "id 10 group 1/2/3 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Nexthop group replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_err_test() +{ + RET=0 + + # This is supposed to cause the replace to fail because the new nexthop + # is programmed before deleting the replaced one. + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop replace succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Single nexthop replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_delete_err_test() +{ + RET=0 + + # First, nexthop 1 will be deleted, which will reduce the occupancy to + # 5. Afterwards, a replace notification will be sent for nexthop group + # 10 with only two nexthops. Since the new group is allocated before + # the old is deleted, the replacement will fail as it will result in an + # occupancy of 7. + nexthop_resource_set 6 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 + + $IP nexthop del id 1 + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_replay_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after reload" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop replay" + + $IP nexthop flush &> /dev/null +} + +nexthop_replay_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + # Reduce size of nexthop resource so that reload will fail. + $DEVLINK resource set $DEVLINK_DEV path nexthops size 3 + $DEVLINK dev reload $DEVLINK_DEV &> /dev/null + check_fail $? "Reload succeeded when should fail" + + $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999 + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + log_test "Nexthop replay failure" + + $IP nexthop flush &> /dev/null +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + IP="ip -netns testns1" + DEVLINK="devlink -N testns1" + + $IP link add name dummy1 up type dummy + $IP address add 192.0.2.1/24 dev dummy1 + + set +e +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ef352477cac6..fa5fa425d148 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh +TEST_PROGS += bareudp.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh new file mode 100755 index 000000000000..f366cadbc5e8 --- /dev/null +++ b/tools/testing/selftests/net/bareudp.sh @@ -0,0 +1,546 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Test various bareudp tunnel configurations. +# +# The bareudp module allows to tunnel network protocols like IP or MPLS over +# UDP, without adding any intermediate header. This scripts tests several +# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting +# IPv4, IPv6 or MPLS packets on the overlay). +# +# Network topology: +# +# * A chain of 4 network namespaces, connected with veth pairs. Each veth +# is assigned an IPv4 and an IPv6 address. A host-route allows a veth to +# join its peer. +# +# * NS0 and NS3 are at the extremities of the chain. They have additional +# IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0 +# and NS3, so that they can communicate using these overlay IP addresses. +# For IPv4 and IPv6 reachability tests, the route simply sets the peer's +# veth address as gateway. For MPLS reachability tests, an MPLS header is +# also pushed before the IP header. +# +# * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to +# encapsulate the traffic into UDP. +# +# +-----------------------------------------------------------------------+ +# | NS0 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.100/32 | +# | * IPv6 address: 2001:db8::100/128 | +# | * IPv6 address: 2001:db8::200/128 | +# | * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11 | +# | * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11 | +# | * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11 | +# | (encapsulated with MPLS label 203) | +# | | +# | veth01: | +# | ^ * IPv4 address: 192.0.2.10, peer 192.0.2.11/32 | +# | | * IPv6 address: 2001:db8::10, peer 2001:db8::11/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS1 | +# | | | +# | v | +# | veth10: | +# | * IPv4 address: 192.0.2.11, peer 192.0.2.10/32 | +# | * IPv6 address: 2001:db8::11, peer 2001:db8::10/128 | +# | | +# | bareudp_ns1: | +# | * Encapsulate IP or MPLS packets received on veth10 into UDP | +# | and send the resulting packets through veth12. | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth12 and send the inner packets through veth10. | +# | | +# | veth12: | +# | ^ * IPv4 address: 192.0.2.21, peer 192.0.2.22/32 | +# | | * IPv6 address: 2001:db8::21, peer 2001:db8::22/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test), over UDP +# | +# +---+-------------------------------------------------------------------+ +# | | NS2 | +# | | | +# | v | +# | veth21: | +# | * IPv4 address: 192.0.2.22, peer 192.0.2.21/32 | +# | * IPv6 address: 2001:db8::22, peer 2001:db8::21/128 | +# | | +# | bareudp_ns2: | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth21 and send the inner packets through veth23. | +# | * Encapsulate IP or MPLS packets received on veth23 into UDP | +# | and send the resulting packets through veth21. | +# | | +# | veth23: | +# | ^ * IPv4 address: 192.0.2.32, peer 192.0.2.33/32 | +# | | * IPv6 address: 2001:db8::32, peer 2001:db8::33/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS3 | +# | v | +# | veth32: | +# | * IPv4 address: 192.0.2.33, peer 192.0.2.32/32 | +# | * IPv6 address: 2001:db8::33, peer 2001:db8::32/128 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.103/32 | +# | * IPv6 address: 2001:db8::103/128 | +# | * IPv6 address: 2001:db8::203/128 | +# | * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32 | +# | * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32 | +# | * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32 | +# | (encapsulated with MPLS label 200) | +# | | +# +-----------------------------------------------------------------------+ + +ERR=4 # Return 4 by default, which is the SKIP code for kselftest +PING6="ping" +PAUSE_ON_FAIL="no" + +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) +readonly NS2=$(mktemp -u ns2-XXXXXXXX) +readonly NS3=$(mktemp -u ns3-XXXXXXXX) + +# Exit the script after having removed the network namespaces it created +# +# Parameters: +# +# * The list of network namespaces to delete before exiting. +# +exit_cleanup() +{ + for ns in "$@"; do + ip netns delete "${ns}" 2>/dev/null || true + done + + if [ "${ERR}" -eq 4 ]; then + echo "Error: Setting up the testing environment failed." >&2 + fi + + exit "${ERR}" +} + +# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3) +# +# New namespaces are cleaned up manually in case of error, to ensure that only +# namespaces created by this script are deleted. +create_namespaces() +{ + ip netns add "${NS0}" || exit_cleanup + ip netns add "${NS1}" || exit_cleanup "${NS0}" + ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" + ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" +} + +# The trap function handler +# +exit_cleanup_all() +{ + exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" +} + +# Configure a network interface using a host route +# +# Parameters +# +# * $1: the netns the network interface resides in, +# * $2: the network interface name, +# * $3: the local IPv4 address to assign to this interface, +# * $4: the IPv4 address of the remote network interface, +# * $5: the local IPv6 address to assign to this interface, +# * $6: the IPv6 address of the remote network interface. +# +iface_config() +{ + local NS="${1}"; readonly NS + local DEV="${2}"; readonly DEV + local LOCAL_IP4="${3}"; readonly LOCAL_IP4 + local PEER_IP4="${4}"; readonly PEER_IP4 + local LOCAL_IP6="${5}"; readonly LOCAL_IP6 + local PEER_IP6="${6}"; readonly PEER_IP6 + + ip -netns "${NS}" link set dev "${DEV}" up + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}" + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad +} + +# Create base networking topology: +# +# * set up the loopback device in all network namespaces (NS0..NS3), +# * set up a veth pair to connect each netns in sequence (NS0 with NS1, +# NS1 with NS2, etc.), +# * add and IPv4 and an IPv6 address on each veth interface, +# * prepare the ingress qdiscs in the intermediate namespaces. +# +setup_underlay() +{ + for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do + ip -netns "${ns}" link set dev lo up + done; + + ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" + ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" + ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" + iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128 + iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128 + iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128 + iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128 + iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128 + iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128 + + tc -netns "${NS1}" qdisc add dev veth10 ingress + tc -netns "${NS2}" qdisc add dev veth23 ingress +} + +# Set up the IPv4, IPv6 and MPLS overlays. +# +# Configuration is similar for all protocols: +# +# * add an overlay IP address on the loopback interface of each edge +# namespace, +# * route these IP addresses via the intermediate namespaces (for the MPLS +# tests, this is also where MPLS encapsulation is done), +# * add routes for these IP addresses (or MPLS labels) in the intermediate +# namespaces. +# +# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be +# done just before running the reachability tests. + +setup_overlay_ipv4() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 192.0.2.100/32 dev lo + ip -netns "${NS3}" address add 192.0.2.103/32 dev lo + ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11 + ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 + ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 + ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 + + # The intermediate namespaces don't have routes for the reverse path, + # as it will be handled by tc. So we need to ensure that rp_filter is + # not going to block the traffic. + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 +} + +setup_overlay_ipv6() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 2001:db8::100/128 dev lo + ip -netns "${NS3}" address add 2001:db8::103/128 dev lo + ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10 + ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33 +} + +setup_overlay_mpls() +{ + # Add specific overlay IP addresses, routed over MPLS + ip -netns "${NS0}" address add 2001:db8::200/128 dev lo + ip -netns "${NS3}" address add 2001:db8::203/128 dev lo + ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32 + + # Route the MPLS packets in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256 + ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256 + ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10 + ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33 +} + +# Run "ping" from NS0 and print the result +# +# Parameters: +# +# * $1: the variant of ping to use (normally either "ping" or "ping6"), +# * $2: the IP address to ping, +# * $3: a human readable description of the purpose of the test. +# +# If the test fails and PAUSE_ON_FAIL is active, the user is given the +# possibility to continue with the next test or to quit immediately. +# +ping_test_one() +{ + local PING="$1"; readonly PING + local IP="$2"; readonly IP + local MSG="$3"; readonly MSG + local RET + + printf "TEST: %-60s " "${MSG}" + + set +e + ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1 + RET=$? + set -e + + if [ "${RET}" -eq 0 ]; then + printf "[ OK ]\n" + else + ERR=1 + printf "[FAIL]\n" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + printf "\nHit enter to continue, 'q' to quit\n" + read a + if [ "$a" = "q" ]; then + exit 1 + fi + fi + fi +} + +# Run reachability tests +# +# Parameters: +# +# * $1: human readable string describing the underlay protocol. +# +# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling +# function. +# +ping_test() +{ + local UNDERLAY="$1"; readonly UNDERLAY + local MODE + local MSG + + if [ "${MULTIPROTO}" = "multiproto" ]; then + MODE=" (multiproto mode)" + else + MODE="" + fi + + if [ $IPV4 ]; then + ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}" + fi + if [ $IPV6 ]; then + ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}" + fi + if [ $MPLS_UC ]; then + ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}" + fi +} + +# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6 +# +# Parameters: +# +# * $1: the packet type (protocol) to be handled by bareudp, +# * $2: a flag to activate or deactivate bareudp's "multiproto" mode. +# +test_overlay() +{ + local ETHERTYPE="$1"; readonly ETHERTYPE + local MULTIPROTO="$2"; readonly MULTIPROTO + local IPV4 + local IPV6 + local MPLS_UC + + case "${ETHERTYPE}" in + "ipv4") + IPV4="ipv4" + if [ "${MULTIPROTO}" = "multiproto" ]; then + IPV6="ipv6" + else + IPV6="" + fi + MPLS_UC="" + ;; + "ipv6") + IPV6="ipv6" + IPV4="" + MPLS_UC="" + ;; + "mpls_uc") + MPLS_UC="mpls_uc" + IPV4="" + IPV6="" + ;; + *) + exit 1 + ;; + esac + readonly IPV4 + readonly IPV6 + readonly MPLS_UC + + # Create the bareudp devices in the intermediate namespaces + ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + + # IPv4 over UDPv4 + if [ $IPV4 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv4 + if [ $IPV6 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv4 + if [ $MPLS_UC ]; then + ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1 + ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1 + + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv4 underlay + ping_test "UDPv4" + + # Cleanup bareudp encapsulation instructions, as they were specific to + # the IPv4 underlay, before setting up and testing the IPv6 underlay + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + + # IPv4 over UDPv6 + if [ $IPV4 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv6 + if [ $IPV6 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv6 + if [ $MPLS_UC ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv6 underlay + ping_test "UDPv6" + + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + ip -netns "${NS1}" link delete bareudp_ns1 + ip -netns "${NS2}" link delete bareudp_ns2 +} + +check_features() +{ + ip link help 2>&1 | grep -q bareudp + if [ $? -ne 0 ]; then + echo "Missing bareudp support in iproute2" >&2 + exit_cleanup + fi + + # Use ping6 on systems where ping doesn't handle IPv6 + ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6" +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +check_features + +# Create namespaces before setting up the exit trap. +# Otherwise, exit_cleanup_all() could delete namespaces that were not created +# by this script. +create_namespaces + +set -e +trap exit_cleanup_all EXIT + +setup_underlay +setup_overlay_ipv4 +setup_overlay_ipv6 +setup_overlay_mpls + +test_overlay ipv4 nomultiproto +test_overlay ipv6 nomultiproto +test_overlay ipv4 multiproto +test_overlay mpls_uc nomultiproto + +if [ "${ERR}" -eq 1 ]; then + echo "Some tests failed." >&2 +else + ERR=0 +fi diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 4d5df8e1eee7..614d5477365a 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -34,3 +34,10 @@ CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m CONFIG_NET_FOU=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_MIRRED=m +CONFIG_BAREUDP=m diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 88d2472ba151..675eff45b037 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,11 +1,37 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="reportleave_test" +ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ + v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ + v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ + v3exc_timeout_test v3star_ex_auto_add_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +ALL_GROUP="224.0.0.1" +ALL_MAC="01:00:5e:00:00:01" + +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3 +MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30 +MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30 +MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21 +MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30 +MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" +# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" + source lib.sh h1_create() @@ -79,38 +105,7 @@ cleanup() vrf_cleanup } -# return 0 if the packet wasn't seen on host2_if or 1 if it was -mcast_packet_test() -{ - local mac=$1 - local ip=$2 - local host1_if=$3 - local host2_if=$4 - local seen=0 - - # Add an ACL on `host2_if` which will tell us whether the packet - # was received by it or not. - tc qdisc add dev $host2_if ingress - tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ - flower dst_mac $mac action drop - - $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q - sleep 1 - - tc -j -s filter show dev $host2_if ingress \ - | jq -e ".[] | select(.options.handle == 101) \ - | select(.options.actions[0].stats.packets == 1)" &> /dev/null - if [[ $? -eq 0 ]]; then - seen=1 - fi - - tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower - tc qdisc del dev $host2_if ingress - - return $seen -} - -reportleave_test() +v2reportleave_test() { RET=0 ip address add dev $h2 $TEST_GROUP/32 autojoin @@ -118,12 +113,12 @@ reportleave_test() sleep 5 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null - check_err $? "Report didn't create mdb entry for $TEST_GROUP" + check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" - log_test "IGMP report $TEST_GROUP" + log_test "IGMPv2 report $TEST_GROUP" RET=0 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null @@ -136,10 +131,424 @@ reportleave_test() bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry" - log_test "IGMP leave $TEST_GROUP" + log_test "IGMPv2 leave $TEST_GROUP" +} + +v3include_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + ip link set dev br0 type bridge mcast_igmp_version 3 + check_err $? "Could not change bridge IGMP version to 3" + + $MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + brmcast_check_sg_entries "is_include" "${X[@]}" +} + +v3exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("192.0.2.1" "192.0.2.2") + local Y=("192.0.2.20" "192.0.2.21") + + v3include_prepare $host1_if $mac $group + + $MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists" +} + +v3cleanup() +{ + local port=$1 + local group=$2 + + bridge mdb del dev br0 port $port grp $group + ip link set dev br0 type bridge mcast_igmp_version 2 +} + +v3include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_allow_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_is_include_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_is_exclude_test() +{ + RET=0 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1") + local Y=("192.0.2.20" "192.0.2.30") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_allow_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_is_include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_is_exclude_test() +{ + RET=0 + local X=("192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q + sleep 1 + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_block_test() +{ + RET=0 + local X=("192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + brmcast_check_sg_entries "block" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> block" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_block_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.30") + local Y=("192.0.2.20" "192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + sleep 1 + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_timeout_test() +{ + RET=0 + local X=("192.0.2.20" "192.0.2.30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 192.0.2.100 + + log_test "IGMPv3 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 + + v3cleanup $swp1 $TEST_GROUP +} + +v3star_ex_auto_add_test() +{ + RET=0 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + brmcast_check_sg_fwding 1 192.0.2.3 + + log_test "IGMPv3 S,G port entry automatic add to a *,G port" + + v3cleanup $swp1 $TEST_GROUP + v3cleanup $swp2 $TEST_GROUP } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh new file mode 100755 index 000000000000..ffdcfa87ca2b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -0,0 +1,558 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ + mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ + mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ + mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +NUM_NETIFS=4 +CHECK_TC="yes" +TEST_GROUP="ff02::cc" +TEST_GROUP_MAC="33:33:00:00:00:cc" + +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3 +MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\ +00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\ +00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\ +00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\ +05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\ +00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\ +00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\ +00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\ +02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\ +00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\ +00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30 +MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21 +MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\ +00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\ +00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \ + mcast_mld_version 2 mcast_startup_query_interval 300 \ + mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + # make sure a query has been generated + sleep 5 +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +mldv2include_prepare() +{ + local host1_if=$1 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + ip link set dev br0 type bridge mcast_mld_version 2 + check_err $? "Could not change bridge MLD version to 2" + + $MZ $host1_if $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + brmcast_check_sg_entries "is_include" "${X[@]}" +} + +mldv2exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("2001:db8:1::1" "2001:db8:1::2") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2include_prepare $h1 + + $MZ $host1_if -c 1 $MZPKT_IS_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists" +} + +mldv2cleanup() +{ + local port=$1 + + bridge mdb del dev br0 port $port grp $TEST_GROUP + ip link set dev br0 type bridge mcast_mld_version 1 +} + +mldv2include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP is_include" + + mldv2cleanup $swp1 +} + +mldv2inc_allow_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> allow" + + mldv2cleanup $swp1 +} + +mldv2inc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC2 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> is_include" + + mldv2cleanup $swp1 +} + +mldv2inc_is_exclude_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> is_exclude" + + mldv2cleanup $swp1 +} + +mldv2inc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1") + local Y=("2001:db8:1::20" "2001:db8:1::30") + + mldv2include_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2exc_allow_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> allow" + + mldv2cleanup $swp1 +} + +mldv2exc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC3 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_include" + + mldv2cleanup $swp1 +} + +mldv2exc_is_exclude_test() +{ + RET=0 + local X=("2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_EXC2 -q + sleep 1 + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude" + + mldv2cleanup $swp1 +} + +mldv2exc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2inc_block_test() +{ + RET=0 + local X=("2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + brmcast_check_sg_entries "block" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 report $TEST_GROUP include -> block" + + mldv2cleanup $swp1 +} + +mldv2exc_block_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + sleep 1 + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2exc_timeout_test() +{ + RET=0 + local X=("2001:db8:1::20" "2001:db8:1::30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + + mldv2exclude_prepare $h1 + ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 + + mldv2cleanup $swp1 +} + +mldv2star_ex_auto_add_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + $MZ $h2 -c 1 $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + brmcast_check_sg_fwding 1 2001:db8:1::3 + + log_test "MLDv2 S,G port entry automatic add to a *,G port" + + mldv2cleanup $swp1 + mldv2cleanup $swp2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 927f9ba49e08..98ea37d26c44 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1270,3 +1270,110 @@ tcpdump_show() { tcpdump -e -n -r $capfile 2>&1 } + +# return 0 if the packet wasn't seen on host2_if or 1 if it was +mcast_packet_test() +{ + local mac=$1 + local src_ip=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 + local seen=0 + local tc_proto="ip" + local mz_v6arg="" + + # basic check to see if we were passed an IPv4 address, if not assume IPv6 + if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then + tc_proto="ipv6" + mz_v6arg="-6" + fi + + # Add an ACL on `host2_if` which will tell us whether the packet + # was received by it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \ + flower ip_proto udp dst_mac $mac action drop + + $MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -eq 0 ]]; then + seen=1 + fi + + tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $seen +} + +brmcast_check_sg_entries() +{ + local report=$1; shift + local slist=("$@") + local sarg="" + + for src in "${slist[@]}"; do + sarg="${sarg} and .source_list[].address == \"$src\"" + done + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null + check_err $? "Wrong *,G entry source list after $report report" + + for sgent in "${slist[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null + check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)" + done +} + +brmcast_check_sg_fwding() +{ + local should_fwd=$1; shift + local sources=("$@") + + for src in "${sources[@]}"; do + local retval=0 + + mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1 + retval=$? + if [ $should_fwd -eq 1 ]; then + check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)" + else + check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)" + fi + done +} + +brmcast_check_sg_state() +{ + local is_blocked=$1; shift + local sources=("$@") + local should_fail=1 + + if [ $is_blocked -eq 1 ]; then + should_fail=0 + fi + + for src in "${sources[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null) | + .source_list[] | + select(.address == \"$src\") | + select(.timer == \"0.00\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has zero timer" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \ + .flags[] == \"blocked\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has blocked flag" + done +} diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 741a1c4f4ae8..0faaccd21447 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -5,3 +5,13 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NF_TABLES=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 08f53d86dedc..0d93b243695f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -13,6 +13,24 @@ capture=0 TEST_COUNT=0 +# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || +# (ip6 && (ip6[74] & 0xf0) == 0x30)'" +CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, + 48 0 0 0, + 84 0 0 240, + 21 0 3 64, + 48 0 0 54, + 84 0 0 240, + 21 6 7 48, + 48 0 0 0, + 84 0 0 240, + 21 0 4 96, + 48 0 0 74, + 84 0 0 240, + 21 0 1 48, + 6 0 0 65535, + 6 0 0 0" + init() { capout=$(mktemp) @@ -82,6 +100,26 @@ reset_with_cookies() done } +reset_with_add_addr_timeout() +{ + local ip="${1:-4}" + local tables + + tables="iptables" + if [ $ip -eq 6 ]; then + tables="ip6tables" + fi + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + ip netns exec $ns2 $tables -A OUTPUT -p tcp \ + -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ + -j DROP +} + for arg in "$@"; do if [ "$arg" = "-c" ]; then capture=1 @@ -94,6 +132,17 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +iptables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without iptables tool" + exit $ksft_skip +fi + +ip6tables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without ip6tables tool" + exit $ksft_skip +fi check_transfer() { @@ -135,6 +184,7 @@ do_transfer() connect_addr="$5" rm_nr_ns1="$6" rm_nr_ns2="$7" + speed="$8" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -159,7 +209,7 @@ do_transfer() sleep 1 fi - if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then + if [ $speed = "fast" ]; then mptcp_connect="./mptcp_connect -j" else mptcp_connect="./mptcp_connect -r" @@ -250,26 +300,13 @@ run_tests() listener_ns="$1" connector_ns="$2" connect_addr="$3" + rm_nr_ns1="${4:-0}" + rm_nr_ns2="${5:-0}" + speed="${6:-fast}" lret=0 - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 - lret=$? - if [ $lret -ne 0 ]; then - ret=$lret - return - fi -} - -run_remove_tests() -{ - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" - rm_nr_ns1="$4" - rm_nr_ns2="$5" - lret=0 - - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ + ${rm_nr_ns1} ${rm_nr_ns2} ${speed} lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -491,12 +528,21 @@ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 +# add_addr timeout +reset_with_add_addr_timeout +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 0 0 slow +chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 +chk_add_nr 4 0 + # single subflow, remove reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 1 +run_tests $ns1 $ns2 10.0.1.1 0 1 slow chk_join_nr "remove single subflow" 1 1 1 chk_rm_nr 1 1 @@ -506,7 +552,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 2 +run_tests $ns1 $ns2 10.0.1.1 0 2 slow chk_join_nr "remove multiple subflows" 2 2 2 chk_rm_nr 2 2 @@ -515,7 +561,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_remove_tests $ns1 $ns2 10.0.1.1 1 0 +run_tests $ns1 $ns2 10.0.1.1 1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 chk_rm_nr 0 0 @@ -526,7 +572,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 1 +run_tests $ns1 $ns2 10.0.1.1 1 1 slow chk_join_nr "remove subflow and signal" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -538,7 +584,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 2 +run_tests $ns1 $ns2 10.0.1.1 1 2 slow chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 6bbf69a28e12..464e31eabc73 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -355,7 +355,7 @@ setup_fou_or_gue() { encap="${3}" if [ "${outer}" = "4" ]; then - modprobe fou || return 2 + modprobe fou || return $ksft_skip a_addr="${prefix4}.${a_r1}.1" b_addr="${prefix4}.${b_r1}.1" if [ "${inner}" = "4" ]; then @@ -366,7 +366,7 @@ setup_fou_or_gue() { ipproto="41" fi else - modprobe fou6 || return 2 + modprobe fou6 || return $ksft_skip a_addr="${prefix6}:${a_r1}::1" b_addr="${prefix6}:${b_r1}::1" if [ "${inner}" = "4" ]; then @@ -380,8 +380,8 @@ setup_fou_or_gue() { fi fi - run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 - run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip + run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 @@ -455,7 +455,7 @@ setup_ipvX_over_ipvY() { fi fi - run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2 + run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode} run_cmd ${ns_a} ip link set ip_a up @@ -713,7 +713,7 @@ setup_routing() { } setup_bridge() { - run_cmd ${ns_a} ip link add br0 type bridge || return 2 + run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip run_cmd ${ns_a} ip link set br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -765,7 +765,7 @@ setup_ovs_vxlan6() { } setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return 2 + run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -887,7 +887,7 @@ check_pmtu_value() { test_pmtu_ipvX() { family=${1} - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -985,11 +985,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ${type}4 || return 2 + setup namespaces routing ${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ${type}6 || return 2 + setup namespaces routing ${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1060,11 +1060,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing bridge bridged_${type}4 || return 2 + setup namespaces routing bridge bridged_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing bridge bridged_${type}6 || return 2 + setup namespaces routing bridge bridged_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1144,11 +1144,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ovs_bridge ovs_${type}4 || return 2 + setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ovs_bridge ovs_${type}6 || return 2 + setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1230,7 +1230,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() { encap=${3} ll_mtu=4000 - setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1309,7 +1309,7 @@ test_pmtu_ipvX_over_ipvY_exception() { outer=${2} ll_mtu=4000 - setup namespaces routing ip${inner}ip${outer} || return 2 + setup namespaces routing ip${inner}ip${outer} || return $ksft_skip trace "${ns_a}" ip_a "${ns_b}" ip_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ @@ -1363,7 +1363,7 @@ test_pmtu_ipv6_ipv6_exception() { } test_pmtu_vti4_exception() { - setup namespaces veth vti4 xfrm4 || return 2 + setup namespaces veth vti4 xfrm4 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti4_a "${ns_b}" vti4_b @@ -1393,7 +1393,7 @@ test_pmtu_vti4_exception() { } test_pmtu_vti6_exception() { - setup namespaces veth vti6 xfrm6 || return 2 + setup namespaces veth vti6 xfrm6 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 @@ -1423,7 +1423,7 @@ test_pmtu_vti6_exception() { } test_pmtu_vti4_default_mtu() { - setup namespaces veth vti4 || return 2 + setup namespaces veth vti4 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv4 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1435,7 +1435,7 @@ test_pmtu_vti4_default_mtu() { } test_pmtu_vti6_default_mtu() { - setup namespaces veth vti6 || return 2 + setup namespaces veth vti6 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv6 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1447,10 +1447,10 @@ test_pmtu_vti6_default_mtu() { } test_pmtu_vti4_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 - [ $? -ne 0 ] && err " vti not supported" && return 2 + [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti4_a fail=0 @@ -1485,10 +1485,10 @@ test_pmtu_vti4_link_add_mtu() { } test_pmtu_vti6_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 - [ $? -ne 0 ] && err " vti6 not supported" && return 2 + [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti6_a fail=0 @@ -1523,10 +1523,10 @@ test_pmtu_vti6_link_add_mtu() { } test_pmtu_vti6_link_change_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy - [ $? -ne 0 ] && err " dummy not supported" && return 2 + [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy run_cmd ${ns_a} ip link set dummy0 up run_cmd ${ns_a} ip link set dummy1 up @@ -1579,10 +1579,10 @@ test_cleanup_vxlanX_exception() { encap="vxlan" ll_mtu=4000 - check_command taskset || return 2 + check_command taskset || return $ksft_skip cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) - setup namespaces routing ${encap}${outer} || return 2 + setup namespaces routing ${encap}${outer} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1644,7 +1644,7 @@ run_test() { fi err_flush exit 1 - elif [ $ret -eq 2 ]; then + elif [ $ret -eq $ksft_skip ]; then printf "TEST: %-60s [SKIP]\n" "${tdesc}" err_flush fi @@ -1652,7 +1652,19 @@ run_test() { return $ret ) ret=$? - [ $ret -ne 0 ] && exitcode=1 + case $ret in + 0) + all_skipped=false + [ $exitcode=$ksft_skip ] && exitcode=0 + ;; + $ksft_skip) + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac return $ret } @@ -1667,7 +1679,7 @@ run_test_nh() { } test_list_flush_ipv4_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1721,7 +1733,7 @@ test_list_flush_ipv4_exception() { } test_list_flush_ipv6_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1786,6 +1798,7 @@ usage() { # exitcode=0 desc=0 +all_skipped=true while getopts :ptv o do @@ -1840,7 +1853,7 @@ for t in ${tests}; do if [ $run_this -eq 1 ]; then run_test "${name}" "${desc}" # if test was skipped no need to retry with nexthop objects - [ $? -eq 2 ] && rerun_nh=0 + [ $? -eq $ksft_skip ] && rerun_nh=0 if [ "${rerun_nh}" = "1" ]; then run_test_nh "${name}" "${desc}" diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 2c522f7a0aec..db4521335722 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -56,12 +56,15 @@ #define RING_NUM_FRAMES 20 +static uint32_t cfg_max_num_members; + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { struct sockaddr_ll addr = {0}; - int fd, val; + struct fanout_args args; + int fd, val, err; fd = socket(PF_PACKET, SOCK_RAW, 0); if (fd < 0) { @@ -83,8 +86,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) exit(1); } - val = (((int) typeflags) << 16) | group_id; - if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (cfg_max_num_members) { + args.id = group_id; + args.type_flags = typeflags; + args.max_num_members = cfg_max_num_members; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, + sizeof(args)); + } else { + val = (((int) typeflags) << 16) | group_id; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, + sizeof(val)); + } + if (err) { if (close(fd)) { perror("close packet"); exit(1); @@ -286,6 +299,56 @@ static void test_control_group(void) } } +/* Test illegal max_num_members values */ +static void test_control_group_max_num_members(void) +{ + int fds[3]; + + fprintf(stderr, "test: control multiple sockets, max_num_members\n"); + + /* expected failure on greater than PACKET_FANOUT_MAX */ + cfg_max_num_members = (1 << 16) + 1; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n"); + exit(1); + } + + cfg_max_num_members = 256; + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + + /* expected failure on joining group with different max_num_members */ + cfg_max_num_members = 257; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: set different max_num_members\n"); + exit(1); + } + + /* success on joining group with same max_num_members */ + cfg_max_num_members = 256; + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + /* success on joining group with max_num_members unspecified */ + cfg_max_num_members = 0; + fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[2] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + if (close(fds[2]) || close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + /* Test creating a unique fanout group ids */ static void test_unique_fanout_group_ids(void) { @@ -426,8 +489,11 @@ int main(int argc, char **argv) test_control_single(); test_control_group(); + test_control_group_max_num_members(); test_unique_fanout_group_ids(); + /* PACKET_FANOUT_MAX */ + cfg_max_num_members = 1 << 16; /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, expect_hash[0], expect_hash[1]); diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index f4bb4fef0f39..21091be70688 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -59,7 +59,8 @@ static void usage(const char *error) " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" " SIOCGSTAMP - check last socket time stamp\n" - " SIOCGSTAMPNS - more accurate socket time stamp\n"); + " SIOCGSTAMPNS - more accurate socket time stamp\n" + " PTPV2 - use PTPv2 messages\n"); exit(1); } @@ -115,13 +116,28 @@ static const unsigned char sync[] = { 0x00, 0x00, 0x00, 0x00 }; -static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +static const unsigned char sync_v2[] = { + 0x00, 0x02, 0x00, 0x2C, + 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xFF, + 0xFE, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2) { + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct timeval now; int res; - res = sendto(sock, sync, sizeof(sync), 0, - addr, addr_len); + res = sendto(sock, sync_p, sync_len, 0, addr, addr_len); gettimeofday(&now, 0); if (res < 0) printf("%s: %s\n", "send", strerror(errno)); @@ -134,9 +150,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) static void printpacket(struct msghdr *msg, int res, char *data, int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct cmsghdr *cmsg; struct timeval tv; struct timespec ts; @@ -210,10 +228,9 @@ static void printpacket(struct msghdr *msg, int res, "probably SO_EE_ORIGIN_TIMESTAMPING" #endif ); - if (res < sizeof(sync)) + if (res < sync_len) printf(" => truncated data?!"); - else if (!memcmp(sync, data + res - sizeof(sync), - sizeof(sync))) + else if (!memcmp(sync_p, data + res - sync_len, sync_len)) printf(" => GOT OUR DATA BACK (HURRAY!)"); break; } @@ -257,7 +274,7 @@ static void printpacket(struct msghdr *msg, int res, } static void recvpacket(int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { char data[256]; struct msghdr msg; @@ -288,7 +305,7 @@ static void recvpacket(int sock, int recvmsg_flags, } else { printpacket(&msg, res, data, sock, recvmsg_flags, - siocgstamp, siocgstampns); + siocgstamp, siocgstampns, ptpv2); } } @@ -300,6 +317,7 @@ int main(int argc, char **argv) int siocgstamp = 0; int siocgstampns = 0; int ip_multicast_loop = 0; + int ptpv2 = 0; char *interface; int i; int enabled = 1; @@ -335,6 +353,8 @@ int main(int argc, char **argv) siocgstampns = 1; else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "PTPV2")) + ptpv2 = 1; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) @@ -369,6 +389,7 @@ int main(int argc, char **argv) HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; hwconfig.rx_filter = (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC : HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; hwconfig_requested = hwconfig; if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { @@ -496,16 +517,16 @@ int main(int argc, char **argv) printf("has error\n"); recvpacket(sock, 0, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); recvpacket(sock, MSG_ERRQUEUE, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); } } else { /* write one packet */ sendpacket(sock, (struct sockaddr *)&addr, - sizeof(addr)); + sizeof(addr), ptpv2); next.tv_sec += 5; continue; } |