diff options
52 files changed, 571 insertions, 226 deletions
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt new file mode 100644 index 000000000000..031ef4a63485 --- /dev/null +++ b/Documentation/networking/vrf.txt @@ -0,0 +1,96 @@ +Virtual Routing and Forwarding (VRF) +==================================== +The VRF device combined with ip rules provides the ability to create virtual +routing and forwarding domains (aka VRFs, VRF-lite to be specific) in the +Linux network stack. One use case is the multi-tenancy problem where each +tenant has their own unique routing tables and in the very least need +different default gateways. + +Processes can be "VRF aware" by binding a socket to the VRF device. Packets +through the socket then use the routing table associated with the VRF +device. An important feature of the VRF device implementation is that it +impacts only Layer 3 and above so L2 tools (e.g., LLDP) are not affected +(ie., they do not need to be run in each VRF). The design also allows +the use of higher priority ip rules (Policy Based Routing, PBR) to take +precedence over the VRF device rules directing specific traffic as desired. + +In addition, VRF devices allow VRFs to be nested within namespaces. For +example network namespaces provide separation of network interfaces at L1 +(Layer 1 separation), VLANs on the interfaces within a namespace provide +L2 separation and then VRF devices provide L3 separation. + +Design +------ +A VRF device is created with an associated route table. Network interfaces +are then enslaved to a VRF device: + + +-----------------------------+ + | vrf-blue | ===> route table 10 + +-----------------------------+ + | | | + +------+ +------+ +-------------+ + | eth1 | | eth2 | ... | bond1 | + +------+ +------+ +-------------+ + | | + +------+ +------+ + | eth8 | | eth9 | + +------+ +------+ + +Packets received on an enslaved device and are switched to the VRF device +using an rx_handler which gives the impression that packets flow through +the VRF device. Similarly on egress routing rules are used to send packets +to the VRF device driver before getting sent out the actual interface. This +allows tcpdump on a VRF device to capture all packets into and out of the +VRF as a whole.[1] Similiarly, netfilter [2] and tc rules can be applied +using the VRF device to specify rules that apply to the VRF domain as a whole. + +[1] Packets in the forwarded state do not flow through the device, so those + packets are not seen by tcpdump. Will revisit this limitation in a + future release. + +[2] Iptables on ingress is limited to NF_INET_PRE_ROUTING only with skb->dev + set to real ingress device and egress is limited to NF_INET_POST_ROUTING. + Will revisit this limitation in a future release. + + +Setup +----- +1. VRF device is created with an association to a FIB table. + e.g, ip link add vrf-blue type vrf table 10 + ip link set dev vrf-blue up + +2. Rules are added that send lookups to the associated FIB table when the + iif or oif is the VRF device. e.g., + ip ru add oif vrf-blue table 10 + ip ru add iif vrf-blue table 10 + + Set the default route for the table (and hence default route for the VRF). + e.g, ip route add table 10 prohibit default + +3. Enslave L3 interfaces to a VRF device. + e.g, ip link set dev eth1 master vrf-blue + + Local and connected routes for enslaved devices are automatically moved to + the table associated with VRF device. Any additional routes depending on + the enslaved device will need to be reinserted following the enslavement. + +4. Additional VRF routes are added to associated table. + e.g., ip route add table 10 ... + + +Applications +------------ +Applications that are to work within a VRF need to bind their socket to the +VRF device: + + setsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, dev, strlen(dev)+1); + +or to specify the output device using cmsg and IP_PKTINFO. + + +Limitations +----------- +VRF device currently only works for IPv4. Support for IPv6 is under development. + +Index of original ingress interface is not available via cmsg. Will address +soon. diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 6294b5186ae5..809ab6efcc74 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -54,13 +54,15 @@ default_qdisc -------------- The default queuing discipline to use for network devices. This allows -overriding the default queue discipline of pfifo_fast with an -alternative. Since the default queuing discipline is created with the -no additional parameters so is best suited to queuing disciplines that -work well without configuration like stochastic fair queue (sfq), -CoDel (codel) or fair queue CoDel (fq_codel). Don't use queuing disciplines -like Hierarchical Token Bucket or Deficit Round Robin which require setting -up classes and bandwidths. +overriding the default of pfifo_fast with an alternative. Since the default +queuing discipline is created without additional parameters so is best suited +to queuing disciplines that work well without configuration like stochastic +fair queue (sfq), CoDel (codel) or fair queue CoDel (fq_codel). Don't use +queuing disciplines like Hierarchical Token Bucket or Deficit Round Robin +which require setting up classes and bandwidths. Note that physical multiqueue +interfaces still use mq as root qdisc, which in turn uses this default for its +leaves. Virtual devices (like e.g. lo or veth) ignore this setting and instead +default to noqueue. Default: pfifo_fast busy_read diff --git a/MAINTAINERS b/MAINTAINERS index 310da4295c70..e1c9fbb8bf92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -808,6 +808,13 @@ S: Maintained F: drivers/video/fbdev/arcfb.c F: drivers/video/fbdev/core/fb_defio.c +ARCNET NETWORK LAYER +M: Michael Grzeschik <m.grzeschik@pengutronix.de> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/arcnet/ +F: include/uapi/linux/if_arcnet.h + ARM MFM AND FLOPPY DRIVERS M: Ian Molton <spyro@f2s.com> S: Maintained @@ -8490,7 +8497,6 @@ F: Documentation/networking/LICENSE.qla3xxx F: drivers/net/ethernet/qlogic/qla3xxx.* QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Shahed Shaikh <shahed.shaikh@qlogic.com> M: Dept-GELinuxNICDev@qlogic.com L: netdev@vger.kernel.org S: Supported @@ -11243,6 +11249,7 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/vrf.c F: include/net/vrf.h +F: Documentation/networking/vrf.txt VT1211 HARDWARE MONITOR DRIVER M: Juerg Haefliger <juergh@gmail.com> diff --git a/drivers/atm/he.c b/drivers/atm/he.c index a8da3a50e374..0f5cb37636bc 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1578,9 +1578,7 @@ he_stop(struct he_dev *he_dev) kfree(he_dev->rbpl_virt); kfree(he_dev->rbpl_table); - - if (he_dev->rbpl_pool) - dma_pool_destroy(he_dev->rbpl_pool); + dma_pool_destroy(he_dev->rbpl_pool); if (he_dev->rbrq_base) dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), @@ -1594,8 +1592,7 @@ he_stop(struct he_dev *he_dev) dma_free_coherent(&he_dev->pci_dev->dev, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), he_dev->tpdrq_base, he_dev->tpdrq_phys); - if (he_dev->tpd_pool) - dma_pool_destroy(he_dev->tpd_pool); + dma_pool_destroy(he_dev->tpd_pool); if (he_dev->pci_dev) { pci_read_config_word(he_dev->pci_dev, PCI_COMMAND, &command); diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 74e18b0a6d89..3d7fb6516f74 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -805,7 +805,12 @@ static void solos_bh(unsigned long card_arg) continue; } - skb = alloc_skb(size + 1, GFP_ATOMIC); + /* Use netdev_alloc_skb() because it adds NET_SKB_PAD of + * headroom, and ensures we can route packets back out an + * Ethernet interface (for example) without having to + * reallocate. Adding NET_IP_ALIGN also ensures that both + * PPPoATM and PPPoEoBR2684 packets end up aligned. */ + skb = netdev_alloc_skb_ip_align(NULL, size + 1); if (!skb) { if (net_ratelimit()) dev_warn(&card->dev->dev, "Failed to allocate sk_buff for RX\n"); @@ -869,7 +874,10 @@ static void solos_bh(unsigned long card_arg) /* Allocate RX skbs for any ports which need them */ if (card->using_dma && card->atmdev[port] && !card->rx_skb[port]) { - struct sk_buff *skb = alloc_skb(RX_DMA_SIZE, GFP_ATOMIC); + /* Unlike the MMIO case (qv) we can't add NET_IP_ALIGN + * here; the FPGA can only DMA to addresses which are + * aligned to 4 bytes. */ + struct sk_buff *skb = dev_alloc_skb(RX_DMA_SIZE); if (skb) { SKB_CB(skb)->dma_addr = dma_map_single(&card->dev->dev, skb->data, diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 10f71c732b59..816d0e94961c 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -326,7 +326,7 @@ static void arcdev_setup(struct net_device *dev) dev->type = ARPHRD_ARCNET; dev->netdev_ops = &arcnet_netdev_ops; dev->header_ops = &arcnet_header_ops; - dev->hard_header_len = sizeof(struct archdr); + dev->hard_header_len = sizeof(struct arc_hardware); dev->mtu = choose_mtu(); dev->addr_len = ARCNET_ALEN; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index ba936635322a..b5e64b02200c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1946,6 +1946,7 @@ struct bnx2x { u16 vlan_cnt; u16 vlan_credit; u16 vxlan_dst_port; + u8 vxlan_dst_port_count; bool accept_any_vlan; }; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 89a174fa1300..f1d62d5dbaff 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10108,12 +10108,18 @@ static void __bnx2x_add_vxlan_port(struct bnx2x *bp, u16 port) if (!netif_running(bp->dev)) return; - if (bp->vxlan_dst_port || !IS_PF(bp)) { + if (bp->vxlan_dst_port_count && bp->vxlan_dst_port == port) { + bp->vxlan_dst_port_count++; + return; + } + + if (bp->vxlan_dst_port_count || !IS_PF(bp)) { DP(BNX2X_MSG_SP, "Vxlan destination port limit reached\n"); return; } bp->vxlan_dst_port = port; + bp->vxlan_dst_port_count = 1; bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_ADD_VXLAN_PORT, 0); } @@ -10128,10 +10134,14 @@ static void bnx2x_add_vxlan_port(struct net_device *netdev, static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port) { - if (!bp->vxlan_dst_port || bp->vxlan_dst_port != port || !IS_PF(bp)) { + if (!bp->vxlan_dst_port_count || bp->vxlan_dst_port != port || + !IS_PF(bp)) { DP(BNX2X_MSG_SP, "Invalid vxlan port\n"); return; } + bp->vxlan_dst_port--; + if (bp->vxlan_dst_port) + return; if (netif_running(bp->dev)) { bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_DEL_VXLAN_PORT, 0); diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c index 5d0753cc7e73..04b0d16b210e 100644 --- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c +++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c @@ -2400,6 +2400,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad, q0->rcb->id = 0; q0->rx_packets = q0->rx_bytes = 0; q0->rx_packets_with_error = q0->rxbuf_alloc_failed = 0; + q0->rxbuf_map_failed = 0; bna_rxq_qpt_setup(q0, rxp, dpage_count, PAGE_SIZE, &dqpt_mem[i], &dsqpt_mem[i], &dpage_mem[i]); @@ -2428,6 +2429,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad, : rx_cfg->q1_buf_size; q1->rx_packets = q1->rx_bytes = 0; q1->rx_packets_with_error = q1->rxbuf_alloc_failed = 0; + q1->rxbuf_map_failed = 0; bna_rxq_qpt_setup(q1, rxp, hpage_count, PAGE_SIZE, &hqpt_mem[i], &hsqpt_mem[i], diff --git a/drivers/net/ethernet/brocade/bna/bna_types.h b/drivers/net/ethernet/brocade/bna/bna_types.h index e0e797f2ea14..c438d032e8bf 100644 --- a/drivers/net/ethernet/brocade/bna/bna_types.h +++ b/drivers/net/ethernet/brocade/bna/bna_types.h @@ -587,6 +587,7 @@ struct bna_rxq { u64 rx_bytes; u64 rx_packets_with_error; u64 rxbuf_alloc_failed; + u64 rxbuf_map_failed; }; /* RxQ pair */ diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 506047c38607..21a0cfc3e7ec 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -399,7 +399,13 @@ bnad_rxq_refill_page(struct bnad *bnad, struct bna_rcb *rcb, u32 nalloc) } dma_addr = dma_map_page(&bnad->pcidev->dev, page, page_offset, - unmap_q->map_size, DMA_FROM_DEVICE); + unmap_q->map_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + put_page(page); + BNAD_UPDATE_CTR(bnad, rxbuf_map_failed); + rcb->rxq->rxbuf_map_failed++; + goto finishing; + } unmap->page = page; unmap->page_offset = page_offset; @@ -454,8 +460,15 @@ bnad_rxq_refill_skb(struct bnad *bnad, struct bna_rcb *rcb, u32 nalloc) rcb->rxq->rxbuf_alloc_failed++; goto finishing; } + dma_addr = dma_map_single(&bnad->pcidev->dev, skb->data, buff_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + dev_kfree_skb_any(skb); + BNAD_UPDATE_CTR(bnad, rxbuf_map_failed); + rcb->rxq->rxbuf_map_failed++; + goto finishing; + } unmap->skb = skb; dma_unmap_addr_set(&unmap->vector, dma_addr, dma_addr); @@ -3025,6 +3038,11 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) unmap = head_unmap; dma_addr = dma_map_single(&bnad->pcidev->dev, skb->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + dev_kfree_skb_any(skb); + BNAD_UPDATE_CTR(bnad, tx_skb_map_failed); + return NETDEV_TX_OK; + } BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[0].host_addr); txqent->vector[0].length = htons(len); dma_unmap_addr_set(&unmap->vectors[0], dma_addr, dma_addr); @@ -3056,6 +3074,15 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) dma_addr = skb_frag_dma_map(&bnad->pcidev->dev, frag, 0, size, DMA_TO_DEVICE); + if (dma_mapping_error(&bnad->pcidev->dev, dma_addr)) { + /* Undo the changes starting at tcb->producer_index */ + bnad_tx_buff_unmap(bnad, unmap_q, q_depth, + tcb->producer_index); + dev_kfree_skb_any(skb); + BNAD_UPDATE_CTR(bnad, tx_skb_map_failed); + return NETDEV_TX_OK; + } + dma_unmap_len_set(&unmap->vectors[vect_id], dma_len, size); BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr); txqent->vector[vect_id].length = htons(size); diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h index faedbf24777e..f4ed816b93ee 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.h +++ b/drivers/net/ethernet/brocade/bna/bnad.h @@ -175,6 +175,7 @@ struct bnad_drv_stats { u64 tx_skb_headlen_zero; u64 tx_skb_frag_zero; u64 tx_skb_len_mismatch; + u64 tx_skb_map_failed; u64 hw_stats_updates; u64 netif_rx_dropped; @@ -189,6 +190,7 @@ struct bnad_drv_stats { u64 rx_unmap_q_alloc_failed; u64 rxbuf_alloc_failed; + u64 rxbuf_map_failed; }; /* Complete driver stats */ diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 2bdfc5dff4b1..0e4fdc3dd729 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -90,6 +90,7 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_skb_headlen_zero", "tx_skb_frag_zero", "tx_skb_len_mismatch", + "tx_skb_map_failed", "hw_stats_updates", "netif_rx_dropped", @@ -102,6 +103,7 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_unmap_q_alloc_failed", "rx_unmap_q_alloc_failed", "rxbuf_alloc_failed", + "rxbuf_map_failed", "mac_stats_clr_cnt", "mac_frame_64", @@ -807,6 +809,7 @@ bnad_per_q_stats_fill(struct bnad *bnad, u64 *buf, int bi) rx_packets_with_error; buf[bi++] = rcb->rxq-> rxbuf_alloc_failed; + buf[bi++] = rcb->rxq->rxbuf_map_failed; buf[bi++] = rcb->producer_index; buf[bi++] = rcb->consumer_index; } @@ -821,6 +824,7 @@ bnad_per_q_stats_fill(struct bnad *bnad, u64 *buf, int bi) rx_packets_with_error; buf[bi++] = rcb->rxq-> rxbuf_alloc_failed; + buf[bi++] = rcb->rxq->rxbuf_map_failed; buf[bi++] = rcb->producer_index; buf[bi++] = rcb->consumer_index; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 8353a6cbfcc2..03ed00c49823 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -157,6 +157,11 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5090), /* Custom T540-CR */ CH_PCI_ID_TABLE_FENTRY(0x5091), /* Custom T522-CR */ CH_PCI_ID_TABLE_FENTRY(0x5092), /* Custom T520-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5093), /* Custom T580-LP-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5094), /* Custom T540-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5095), /* Custom T540-CR-SO */ + CH_PCI_ID_TABLE_FENTRY(0x5096), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x5097), /* Custom T520-KR */ /* T6 adapters: */ diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 0a27805cbbbd..821540913343 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -582,6 +582,7 @@ struct be_adapter { u16 pvid; __be16 vxlan_port; int vxlan_port_count; + int vxlan_port_aliases; struct phy_info phy; u8 wol_cap; bool wol_en; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 12687bf52b95..7bf51a1a0a77 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5176,6 +5176,11 @@ static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family, if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) return; + if (adapter->vxlan_port == port && adapter->vxlan_port_count) { + adapter->vxlan_port_aliases++; + return; + } + if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) { dev_info(dev, "Only one UDP port supported for VxLAN offloads\n"); @@ -5226,6 +5231,11 @@ static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family, if (adapter->vxlan_port != port) goto done; + if (adapter->vxlan_port_aliases) { + adapter->vxlan_port_aliases--; + return; + } + be_disable_vxlan_offloads(adapter); dev_info(&adapter->pdev->dev, diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index fe2299ac4f5c..09ec32e33076 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1479,6 +1479,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq); struct sk_buff *skb; unsigned char *data; + dma_addr_t phys_addr; u32 rx_status; int rx_bytes, err; @@ -1486,6 +1487,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, rx_status = rx_desc->status; rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); data = (unsigned char *)rx_desc->buf_cookie; + phys_addr = rx_desc->buf_phys_addr; if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { @@ -1534,7 +1536,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, if (!skb) goto err_drop_frame; - dma_unmap_single(dev->dev.parent, rx_desc->buf_phys_addr, + dma_unmap_single(dev->dev.parent, phys_addr, MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); rcvd_pkts++; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 4402a1e48c9b..0ce6ffe73ca8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1268,8 +1268,6 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->hash_fn = MLX4_RSS_HASH_TOP; memcpy(rss_context->rss_key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); - netdev_rss_key_fill(rss_context->rss_key, - MLX4_EN_RSS_KEY_SIZE); } else { en_err(priv, "Unknown RSS hash function requested\n"); err = -EINVAL; diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 66d4ab703f45..60f43ec22175 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1601,6 +1601,7 @@ static const struct of_device_id ks8851_match_table[] = { { .compatible = "micrel,ks8851" }, { } }; +MODULE_DEVICE_TABLE(of, ks8851_match_table); static struct spi_driver ks8851_driver = { .driver = { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 06bcc734fe8d..d6696cfa11d2 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -536,6 +536,7 @@ struct qlcnic_hardware_context { u8 extend_lb_time; u8 phys_port_id[ETH_ALEN]; u8 lb_mode; + u8 vxlan_port_count; u16 vxlan_port; struct device *hwmon_dev; u32 post_mode; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 8b08b20e8b30..d4481454b5f8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -483,11 +483,17 @@ static void qlcnic_add_vxlan_port(struct net_device *netdev, /* Adapter supports only one VXLAN port. Use very first port * for enabling offload */ - if (!qlcnic_encap_rx_offload(adapter) || ahw->vxlan_port) + if (!qlcnic_encap_rx_offload(adapter)) return; + if (!ahw->vxlan_port_count) { + ahw->vxlan_port_count = 1; + ahw->vxlan_port = ntohs(port); + adapter->flags |= QLCNIC_ADD_VXLAN_PORT; + return; + } + if (ahw->vxlan_port == ntohs(port)) + ahw->vxlan_port_count++; - ahw->vxlan_port = ntohs(port); - adapter->flags |= QLCNIC_ADD_VXLAN_PORT; } static void qlcnic_del_vxlan_port(struct net_device *netdev, @@ -496,11 +502,13 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev, struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_hardware_context *ahw = adapter->ahw; - if (!qlcnic_encap_rx_offload(adapter) || !ahw->vxlan_port || + if (!qlcnic_encap_rx_offload(adapter) || !ahw->vxlan_port_count || (ahw->vxlan_port != ntohs(port))) return; - adapter->flags |= QLCNIC_DEL_VXLAN_PORT; + ahw->vxlan_port_count--; + if (!ahw->vxlan_port_count) + adapter->flags |= QLCNIC_DEL_VXLAN_PORT; } static netdev_features_t qlcnic_features_check(struct sk_buff *skb, diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index d79e33b3c191..ba3dab721806 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1151,7 +1151,7 @@ static void cp_clean_rings (struct cp_private *cp) desc = cp->rx_ring + i; dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr), cp->rx_buf_sz, PCI_DMA_FROMDEVICE); - dev_kfree_skb(cp->rx_skb[i]); + dev_kfree_skb_any(cp->rx_skb[i]); } } @@ -1164,7 +1164,7 @@ static void cp_clean_rings (struct cp_private *cp) le32_to_cpu(desc->opts1) & 0xffff, PCI_DMA_TODEVICE); if (le32_to_cpu(desc->opts1) & LastFrag) - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); cp->dev->stats.tx_dropped++; } } @@ -1261,6 +1261,7 @@ static void cp_tx_timeout(struct net_device *dev) cp_clean_rings(cp); rc = cp_init_rings(cp); cp_start_hw(cp); + __cp_set_rx_mode(dev); cp_enable_irq(cp); netif_wake_queue(dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index b735fa22ac95..ebf6abc4853f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -161,11 +161,16 @@ int stmmac_mdio_reset(struct mii_bus *bus) if (!gpio_request(reset_gpio, "mdio-reset")) { gpio_direction_output(reset_gpio, active_low ? 1 : 0); - udelay(data->delays[0]); + if (data->delays[0]) + msleep(DIV_ROUND_UP(data->delays[0], 1000)); + gpio_set_value(reset_gpio, active_low ? 0 : 1); - udelay(data->delays[1]); + if (data->delays[1]) + msleep(DIV_ROUND_UP(data->delays[1], 1000)); + gpio_set_value(reset_gpio, active_low ? 1 : 0); - udelay(data->delays[2]); + if (data->delays[2]) + msleep(DIV_ROUND_UP(data->delays[2], 1000)); } } #endif diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index b5f4a78da828..2d3848c9dc35 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -1011,11 +1011,11 @@ static void fjes_hw_update_zone_task(struct work_struct *work) set_bit(epidx, &irq_bit); break; } - } - - hw->ep_shm_info[epidx].es_status = info[epidx].es_status; - hw->ep_shm_info[epidx].zone = info[epidx].zone; + hw->ep_shm_info[epidx].es_status = + info[epidx].es_status; + hw->ep_shm_info[epidx].zone = info[epidx].zone; + } break; } diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 17cad185169d..76cad712ddb2 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -66,7 +66,6 @@ #define PHY_ID_VSC8244 0x000fc6c0 #define PHY_ID_VSC8514 0x00070670 #define PHY_ID_VSC8574 0x000704a0 -#define PHY_ID_VSC8641 0x00070431 #define PHY_ID_VSC8662 0x00070660 #define PHY_ID_VSC8221 0x000fc550 #define PHY_ID_VSC8211 0x000fc4b0 @@ -273,18 +272,6 @@ static struct phy_driver vsc82xx_driver[] = { .config_intr = &vsc82xx_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_VSC8641, - .name = "Vitesse VSC8641", - .phy_id_mask = 0x000ffff0, - .features = PHY_GBIT_FEATURES, - .flags = PHY_HAS_INTERRUPT, - .config_init = &vsc824x_config_init, - .config_aneg = &vsc82x4_config_aneg, - .read_status = &genphy_read_status, - .ack_interrupt = &vsc824x_ack_interrupt, - .config_intr = &vsc82xx_config_intr, - .driver = { .owner = THIS_MODULE,}, -}, { .phy_id = PHY_ID_VSC8662, .name = "Vitesse VSC8662", .phy_id_mask = 0x000ffff0, @@ -331,7 +318,6 @@ static struct mdio_device_id __maybe_unused vitesse_tbl[] = { { PHY_ID_VSC8244, 0x000fffc0 }, { PHY_ID_VSC8514, 0x000ffff0 }, { PHY_ID_VSC8574, 0x000ffff0 }, - { PHY_ID_VSC8641, 0x000ffff0 }, { PHY_ID_VSC8662, 0x000ffff0 }, { PHY_ID_VSC8221, 0x000ffff0 }, { PHY_ID_VSC8211, 0x000ffff0 }, diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e7094fbd7568..488c6f50df73 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -193,7 +193,8 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC, + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC | + FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index cf8b7f0473b3..bbac1d35ed4e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2392,10 +2392,6 @@ static void vxlan_setup(struct net_device *dev) eth_hw_addr_random(dev); ether_setup(dev); - if (vxlan->default_dst.remote_ip.sa.sa_family == AF_INET6) - dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM; - else - dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM; dev->netdev_ops = &vxlan_netdev_ops; dev->destructor = free_netdev; @@ -2640,8 +2636,11 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dst->remote_ip.sa.sa_family = AF_INET; if (dst->remote_ip.sa.sa_family == AF_INET6 || - vxlan->cfg.saddr.sa.sa_family == AF_INET6) + vxlan->cfg.saddr.sa.sa_family == AF_INET6) { + if (!IS_ENABLED(CONFIG_IPV6)) + return -EPFNOSUPPORT; use_ipv6 = true; + } if (conf->remote_ifindex) { struct net_device *lowerdev @@ -2670,8 +2669,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dev->needed_headroom = lowerdev->hard_header_len + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); - } else if (use_ipv6) + } else if (use_ipv6) { vxlan->flags |= VXLAN_F_IPV6; + dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM; + } else { + dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM; + } memcpy(&vxlan->cfg, conf, sizeof(*conf)); if (!vxlan->cfg.dst_port) diff --git a/include/net/flow.h b/include/net/flow.h index acd6a096250e..9b85db85f13c 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -35,6 +35,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_VRFSRC 0x04 +#define FLOWI_FLAG_SKIP_NH_OIF 0x08 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 063d30474cf6..aaf9700fc9e5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -275,7 +275,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, struct mx6_config *mxc); int fib6_del(struct rt6_info *rt, struct nl_info *info); -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, + unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index b8529aa1dae7..65c2a9397b3c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -32,6 +32,12 @@ struct __ip6_tnl_parm { __be32 o_key; }; +struct ip6_tnl_dst { + seqlock_t lock; + struct dst_entry __rcu *dst; + u32 cookie; +}; + /* IPv6 tunnel */ struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ @@ -39,8 +45,7 @@ struct ip6_tnl { struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ - struct dst_entry *dst_cache; /* cached dst */ - u32 dst_cookie; + struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */ int err_count; unsigned long err_time; @@ -60,9 +65,11 @@ struct ipv6_tlv_tnl_enc_lim { __u8 encap_limit; /* tunnel encapsulation limit */ } __packed; -struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t); +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t); +int ip6_tnl_dst_init(struct ip6_tnl *t); +void ip6_tnl_dst_destroy(struct ip6_tnl *t); void ip6_tnl_dst_reset(struct ip6_tnl *t); -void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst); +void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst); int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr); int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a37d0432bebd..727d6e9a9685 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -236,8 +236,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) - err = 0; + if (tb) + err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF); + + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); @@ -258,7 +261,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; - int err; + int err = -ENETUNREACH; flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) @@ -268,15 +271,20 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, res->tclassid = 0; - for (err = 0; !err; err = -ENETUNREACH) { - tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; + tb = rcu_dereference_rtnl(net->ipv4.fib_main); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); + + if (!err) + goto out; + + tb = rcu_dereference_rtnl(net->ipv4.fib_default); + if (tb) + err = fib_table_lookup(tb, flp, res, flags); - tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, flags)) - break; - } +out: + if (err == -EAGAIN) + err = -ENETUNREACH; rcu_read_unlock(); diff --git a/include/net/route.h b/include/net/route.h index cc61cb95f059..f46af256880c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -255,7 +255,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; if (netif_index_is_vrf(sock_net(sk), oif)) - flow_flags |= FLOWI_FLAG_VRFSRC; + flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport); diff --git a/net/atm/clip.c b/net/atm/clip.c index 17e55dfecbe2..e07f551a863c 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -317,6 +317,9 @@ static int clip_constructor(struct neighbour *neigh) static int clip_encap(struct atm_vcc *vcc, int mode) { + if (!CLIP_VCC(vcc)) + return -EBADFD; + CLIP_VCC(vcc)->encap = mode; return 0; } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index ad82324f710f..0510a577a7b5 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2311,12 +2311,6 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (!conn) return 1; - chan = conn->smp; - if (!chan) { - BT_ERR("SMP security requested but not available"); - return 1; - } - if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; @@ -2330,6 +2324,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) return 0; + chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } + l2cap_chan_lock(chan); /* If SMP is already in progress ignore this request */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a466821d1441..0ec48403ed68 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3047,6 +3047,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; u32 filter_mask = 0; + int err; if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) { struct nlattr *extfilt; @@ -3067,20 +3068,25 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *br_dev = netdev_master_upper_dev_get(dev); if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - if (idx >= cb->args[0] && - br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev, filter_mask, - NLM_F_MULTI) < 0) - break; + if (idx >= cb->args[0]) { + err = br_dev->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev, + filter_mask, NLM_F_MULTI); + if (err < 0 && err != -EOPNOTSUPP) + break; + } idx++; } if (ops->ndo_bridge_getlink) { - if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev, - filter_mask, - NLM_F_MULTI) < 0) - break; + if (idx >= cb->args[0]) { + err = ops->ndo_bridge_getlink(skb, portid, + seq, dev, + filter_mask, + NLM_F_MULTI); + if (err < 0 && err != -EOPNOTSUPP) + break; + } idx++; } } diff --git a/net/core/sock.c b/net/core/sock.c index ca2984afe16e..3307c02244d3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2740,10 +2740,8 @@ static void req_prot_cleanup(struct request_sock_ops *rsk_prot) return; kfree(rsk_prot->slab_name); rsk_prot->slab_name = NULL; - if (rsk_prot->slab) { - kmem_cache_destroy(rsk_prot->slab); - rsk_prot->slab = NULL; - } + kmem_cache_destroy(rsk_prot->slab); + rsk_prot->slab = NULL; } static int req_prot_init(const struct proto *prot) @@ -2828,10 +2826,8 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); mutex_unlock(&proto_list_mutex); - if (prot->slab != NULL) { - kmem_cache_destroy(prot->slab); - prot->slab = NULL; - } + kmem_cache_destroy(prot->slab); + prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index bd9e718c2a20..3de0d0362d7f 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -398,12 +398,8 @@ out_err: void dccp_ackvec_exit(void) { - if (dccp_ackvec_slab != NULL) { - kmem_cache_destroy(dccp_ackvec_slab); - dccp_ackvec_slab = NULL; - } - if (dccp_ackvec_record_slab != NULL) { - kmem_cache_destroy(dccp_ackvec_record_slab); - dccp_ackvec_record_slab = NULL; - } + kmem_cache_destroy(dccp_ackvec_slab); + dccp_ackvec_slab = NULL; + kmem_cache_destroy(dccp_ackvec_record_slab); + dccp_ackvec_record_slab = NULL; } diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 83498975165f..90f77d08cc37 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -95,8 +95,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f static void ccid_kmem_cache_destroy(struct kmem_cache *slab) { - if (slab != NULL) - kmem_cache_destroy(slab); + kmem_cache_destroy(slab); } static int __init ccid_activate(struct ccid_operations *ccid_ops) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 26d6ffb6d23c..6c2af797f2f9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1426,7 +1426,7 @@ found: nh->nh_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) continue; - if (!(flp->flowi4_flags & FLOWI_FLAG_VRFSRC)) { + if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5f4a5565ad8b..c6ad99ad0ffb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2045,6 +2045,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) struct fib_result res; struct rtable *rth; int orig_oif; + int err = -ENETUNREACH; res.tclassid = 0; res.fi = NULL; @@ -2153,7 +2154,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) goto make_route; } - if (fib_lookup(net, fl4, &res, 0)) { + err = fib_lookup(net, fl4, &res, 0); + if (err) { res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif) { @@ -2181,7 +2183,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) res.type = RTN_UNICAST; goto make_route; } - rth = ERR_PTR(-ENETUNREACH); + rth = ERR_PTR(err); goto out; } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index c6ded6b2a79f..448c2615fece 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -154,14 +154,20 @@ static void bictcp_init(struct sock *sk) static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { - s32 delta = tcp_time_stamp - tcp_sk(sk)->lsndtime; struct bictcp *ca = inet_csk_ca(sk); + u32 now = tcp_time_stamp; + s32 delta; + + delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while. * Shift epoch_start to keep cwnd growth to cubic curve. */ - if (ca->epoch_start && delta > 0) + if (ca->epoch_start && delta > 0) { ca->epoch_start += delta; + if (after(ca->epoch_start, now)) + ca->epoch_start = now; + } return; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c0a15e7f359f..f7d1d5e19e95 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1024,7 +1024,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (netif_index_is_vrf(net, ipc.oif)) { flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - (flow_flags | FLOWI_FLAG_VRFSRC), + (flow_flags | FLOWI_FLAG_VRFSRC | + FLOWI_FLAG_SKIP_NH_OIF), faddr, saddr, dport, inet->inet_sport); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index bb919b28619f..c10a9ee68433 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -33,6 +33,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, if (saddr) fl4->saddr = saddr->a4; + fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; + rt = __ip_route_output_key(net, fl4); if (!IS_ERR(rt)) return &rt->dst; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 030fefdc9aed..900113376d4e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5127,13 +5127,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, ifp->idev->dev, 0, 0); - if (rt && ip6_del_rt(rt)) - dst_free(&rt->dst); + if (rt) + ip6_del_rt(rt); } dst_hold(&ifp->rt->dst); - if (ip6_del_rt(ifp->rt)) - dst_free(&ifp->rt->dst); + ip6_del_rt(ifp->rt); rt_genid_bump_ipv6(net); break; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 418d9823692b..7d2e0023c72d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -155,6 +155,11 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } +static void rt6_rcu_free(struct rt6_info *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -169,7 +174,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); pcpu_rt = *ppcpu_rt; if (pcpu_rt) { - dst_free(&pcpu_rt->dst); + rt6_rcu_free(pcpu_rt); *ppcpu_rt = NULL; } } @@ -181,7 +186,7 @@ static void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) { rt6_free_pcpu(rt); - dst_free(&rt->dst); + rt6_rcu_free(rt); } } @@ -846,7 +851,7 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info); + inet6_rt_notify(RTM_NEWROUTE, rt, info, 0); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { @@ -872,7 +877,7 @@ add: rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info); + inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; @@ -933,6 +938,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, int replace_required = 0; int sernum = fib6_new_sernum(info->nl_net); + if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) && + !atomic_read(&rt->dst.__refcnt))) + return -EINVAL; + if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) allow_create = 0; @@ -1025,6 +1034,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) fib6_prune_clones(info->nl_net, pn); + rt->dst.flags &= ~DST_NOCACHE; } out: @@ -1049,7 +1059,8 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - dst_free(&rt->dst); + if (!(rt->dst.flags & DST_NOCACHE)) + dst_free(&rt->dst); } return err; @@ -1060,7 +1071,8 @@ out: st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); - dst_free(&rt->dst); + if (!(rt->dst.flags & DST_NOCACHE)) + dst_free(&rt->dst); return err; #endif } @@ -1410,7 +1422,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fib6_purge_rt(rt, fn, net); - inet6_rt_notify(RTM_DELROUTE, rt, info); + inet6_rt_notify(RTM_DELROUTE, rt, info, 0); rt6_release(rt); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4038c694ec03..646512488c28 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -634,20 +634,20 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_check(tunnel); + dst = ip6_tnl_dst_get(tunnel); if (!dst) { - ndst = ip6_route_output(net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); - if (ndst->error) + if (dst->error) goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto tx_err_link_failure; } - dst = ndst; + ndst = dst; } tdev = dst->dev; @@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb = new_skb; } - if (fl6->flowi6_mark) { - skb_dst_set(skb, dst); - ndst = NULL; - } else { - skb_dst_set_noref(skb, dst); - } + if (!fl6->flowi6_mark && ndst) + ip6_tnl_dst_set(tunnel, ndst); + skb_dst_set(skb, dst); proto = NEXTHDR_GRE; if (encap_limit >= 0) { @@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_set_inner_protocol(skb, protocol); ip6tunnel_xmit(NULL, skb, dev); - if (ndst) - ip6_tnl_dst_store(tunnel, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = { static void ip6gre_dev_free(struct net_device *dev) { + struct ip6_tnl *t = netdev_priv(dev); + + ip6_tnl_dst_destroy(t); free_percpu(dev->tstats); free_netdev(dev); } @@ -1245,9 +1243,10 @@ static void ip6gre_tunnel_setup(struct net_device *dev) netif_keep_dst(dev); } -static int ip6gre_tunnel_init(struct net_device *dev) +static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; + int ret; tunnel = netdev_priv(dev); @@ -1255,16 +1254,37 @@ static int ip6gre_tunnel_init(struct net_device *dev) tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + ret = ip6_tnl_dst_init(tunnel); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + + return 0; +} + +static int ip6gre_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + int ret; + + ret = ip6gre_tunnel_init_common(dev); + if (ret) + return ret; + + tunnel = netdev_priv(dev); + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - return 0; } @@ -1460,19 +1480,16 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int ret; - tunnel = netdev_priv(dev); + ret = ip6gre_tunnel_init_common(dev); + if (ret) + return ret; - tunnel->dev = dev; - tunnel->net = dev_net(dev); - strcpy(tunnel->parms.name, dev->name); + tunnel = netdev_priv(dev); ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 26ea47930740..92b1aa38f121 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -586,20 +586,22 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); + hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || - skb_cloned(skb)) + skb_cloned(skb) || + skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || - skb_headroom(frag) < hlen) + skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path_clean; /* Partially cloned skb? */ @@ -616,8 +618,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, err = 0; offset = 0; - frag = skb_shinfo(skb)->frag_list; - skb_frag_list_init(skb); /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; @@ -625,8 +625,11 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, if (!tmp_hdr) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); - return -ENOMEM; + err = -ENOMEM; + goto fail; } + frag = skb_shinfo(skb)->frag_list; + skb_frag_list_init(skb); __skb_pull(skb, hlen); fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); @@ -723,7 +726,6 @@ slow_path: */ *prevhdr = NEXTHDR_FRAGMENT; - hroom = LL_RESERVED_SPACE(rt->dst.dev); troom = rt->dst.dev->needed_tailroom; /* diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b0ab420612bc..983f0d20f96d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,36 +126,92 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, + struct dst_entry *dst) { - struct dst_entry *dst = t->dst_cache; + write_seqlock_bh(&idst->lock); + dst_release(rcu_dereference_protected( + idst->dst, + lockdep_is_held(&idst->lock.lock))); + if (dst) { + dst_hold(dst); + idst->cookie = rt6_get_cookie((struct rt6_info *)dst); + } else { + idst->cookie = 0; + } + rcu_assign_pointer(idst->dst, dst); + write_sequnlock_bh(&idst->lock); +} + +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) +{ + struct ip6_tnl_dst *idst; + struct dst_entry *dst; + unsigned int seq; + u32 cookie; - if (dst && dst->obsolete && - !dst->ops->check(dst, t->dst_cookie)) { - t->dst_cache = NULL; + idst = raw_cpu_ptr(t->dst_cache); + + rcu_read_lock(); + do { + seq = read_seqbegin(&idst->lock); + dst = rcu_dereference(idst->dst); + cookie = idst->cookie; + } while (read_seqretry(&idst->lock, seq)); + + if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + dst = NULL; + rcu_read_unlock(); + + if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) { + ip6_tnl_per_cpu_dst_set(idst, NULL); dst_release(dst); - return NULL; + dst = NULL; } - return dst; } -EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); +EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); void ip6_tnl_dst_reset(struct ip6_tnl *t) { - dst_release(t->dst_cache); - t->dst_cache = NULL; + int i; + + for_each_possible_cpu(i) + ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL); } EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); -void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) +{ + ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst); + +} +EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); + +void ip6_tnl_dst_destroy(struct ip6_tnl *t) +{ + if (!t->dst_cache) + return; + + ip6_tnl_dst_reset(t); + free_percpu(t->dst_cache); +} +EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy); + +int ip6_tnl_dst_init(struct ip6_tnl *t) { - struct rt6_info *rt = (struct rt6_info *) dst; - t->dst_cookie = rt6_get_cookie(rt); - dst_release(t->dst_cache); - t->dst_cache = dst; + int i; + + t->dst_cache = alloc_percpu(struct ip6_tnl_dst); + if (!t->dst_cache) + return -ENOMEM; + + for_each_possible_cpu(i) + seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock); + + return 0; } -EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); +EXPORT_SYMBOL_GPL(ip6_tnl_dst_init); /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses @@ -271,6 +327,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) static void ip6_dev_free(struct net_device *dev) { + struct ip6_tnl *t = netdev_priv(dev); + + ip6_tnl_dst_destroy(t); free_percpu(dev->tstats); free_netdev(dev); } @@ -1010,23 +1069,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } else if (!fl6->flowi6_mark) - dst = ip6_tnl_dst_check(t); + dst = ip6_tnl_dst_get(t); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) goto tx_err_link_failure; if (!dst) { - ndst = ip6_route_output(net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); - if (ndst->error) + if (dst->error) goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto tx_err_link_failure; } - dst = ndst; + ndst = dst; } tdev = dst->dev; @@ -1072,12 +1131,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, consume_skb(skb); skb = new_skb; } - if (fl6->flowi6_mark) { - skb_dst_set(skb, dst); - ndst = NULL; - } else { - skb_dst_set_noref(skb, dst); - } + + if (!fl6->flowi6_mark && ndst) + ip6_tnl_dst_set(t, ndst); + skb_dst_set(skb, dst); + skb->transport_header = skb->network_header; proto = fl6->flowi6_proto; @@ -1101,14 +1159,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; ip6tunnel_xmit(NULL, skb, dev); - if (ndst) - ip6_tnl_dst_store(t, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -1573,12 +1629,21 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int ret; t->dev = dev; t->net = dev_net(dev); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + + ret = ip6_tnl_dst_init(t); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 53617d715188..f204089e854c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1322,8 +1322,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { dst_hold(&rt->dst); - if (ip6_del_rt(rt)) - dst_free(&rt->dst); + ip6_del_rt(rt); } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; } @@ -1886,9 +1885,11 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: + case RTN_UNREACHABLE: default: rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN - : -ENETUNREACH; + : (cfg->fc_type == RTN_UNREACHABLE) + ? -EHOSTUNREACH : -ENETUNREACH; rt->dst.output = ip6_pkt_discard_out; rt->dst.input = ip6_pkt_discard; break; @@ -2028,7 +2029,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->dst.dev); - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.ip6_null_entry || + rt->dst.flags & DST_NOCACHE) { err = -ENOENT; goto out; } @@ -2515,6 +2517,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); + rt->dst.flags |= DST_NOCACHE; atomic_set(&rt->dst.__refcnt, 1); @@ -3303,7 +3306,8 @@ errout: return err; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, + unsigned int nlm_flags) { struct sk_buff *skb; struct net *net = info->nl_net; @@ -3318,7 +3322,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, 0, 0, 0); + event, info->portid, seq, 0, 0, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e8e524ad8a01..002a755fa07e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -275,13 +275,15 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) case NFPROTO_IPV6: { u8 nexthdr = ipv6_hdr(skb)->nexthdr; __be16 frag_off; + int ofs; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { + ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); + if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); return NF_ACCEPT; } + protoff = ofs; break; } default: diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c92d6a262bc5..5c030a4d7338 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -57,6 +57,7 @@ struct ovs_len_tbl { }; #define OVS_ATTR_NESTED -1 +#define OVS_ATTR_VARIABLE -2 static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) @@ -304,6 +305,10 @@ size_t ovs_key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } +static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) }, +}; + static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, @@ -315,8 +320,9 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, - [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, + .next = ovs_vxlan_ext_key_lens }, }; /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ @@ -349,6 +355,13 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, }; +static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) +{ + return expected_len == attr_len || + expected_len == OVS_ATTR_NESTED || + expected_len == OVS_ATTR_VARIABLE; +} + static bool is_all_zero(const u8 *fp, size_t size) { int i; @@ -388,7 +401,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, } expected_len = ovs_key_lens[type].len; - if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { + if (!check_attr_len(nla_len(nla), expected_len)) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; @@ -473,29 +486,50 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, return 0; } -static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { - [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, -}; - -static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, +static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) { - struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + struct nlattr *a; + int rem; unsigned long opt_key_offset; struct vxlan_metadata opts; - int err; BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); - err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); - if (err < 0) - return err; - memset(&opts, 0, sizeof(opts)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); - if (tb[OVS_VXLAN_EXT_GBP]) - opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + if (type > OVS_VXLAN_EXT_MAX) { + OVS_NLERR(log, "VXLAN extension %d out of range max %d", + type, OVS_VXLAN_EXT_MAX); + return -EINVAL; + } + + if (!check_attr_len(nla_len(a), + ovs_vxlan_ext_key_lens[type].len)) { + OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d", + type, nla_len(a), + ovs_vxlan_ext_key_lens[type].len); + return -EINVAL; + } + + switch (type) { + case OVS_VXLAN_EXT_GBP: + opts.gbp = nla_get_u32(a); + break; + default: + OVS_NLERR(log, "Unknown VXLAN extension attribute %d", + type); + return -EINVAL; + } + } + if (rem) { + OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.", + rem); + return -EINVAL; + } if (!is_mask) SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); @@ -528,8 +562,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, return -EINVAL; } - if (ovs_tunnel_key_lens[type].len != nla_len(a) && - ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { + if (!check_attr_len(nla_len(a), + ovs_tunnel_key_lens[type].len)) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; @@ -1052,10 +1086,13 @@ static void nlattr_set(struct nlattr *attr, u8 val, /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) - nlattr_set(nla, val, tbl[nla_type(nla)].next); - else + if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { + if (tbl[nla_type(nla)].next) + tbl = tbl[nla_type(nla)].next; + nlattr_set(nla, val, tbl); + } else { memset(nla_data(nla), val, nla_len(nla)); + } } } @@ -1922,8 +1959,7 @@ static int validate_set(const struct nlattr *a, key_len /= 2; if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type].len != key_len && - ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) + !check_attr_len(key_len, ovs_key_lens[key_type].len)) return -EINVAL; if (masked && !validate_masked(nla_data(ovs_key), key_len)) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 562c926a51cc..c5ac436235e0 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -539,6 +539,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) *err = -TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; + msg = buf_msg(skb); if (msg_reroute_cnt(msg)) return false; dnode = addr_domain(net, msg_lookup_scope(msg)); |