diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
62 files changed, 5395 insertions, 1340 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 3d4e4a5d00d1..bf1f04164885 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1742,13 +1742,18 @@ static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } +static int mlx4_en_get_max_num_rx_rings(struct net_device *dev) +{ + return min_t(int, num_online_cpus(), MAX_RX_RINGS); +} + static void mlx4_en_get_channels(struct net_device *dev, struct ethtool_channels *channel) { struct mlx4_en_priv *priv = netdev_priv(dev); - channel->max_rx = MAX_RX_RINGS; - channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; + channel->max_rx = mlx4_en_get_max_num_rx_rings(dev); + channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up; channel->rx_count = priv->rx_ring_num; channel->tx_count = priv->tx_ring_num[TX] / @@ -1777,7 +1782,7 @@ static int mlx4_en_set_channels(struct net_device *dev, mutex_lock(&mdev->state_lock); xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; if (channel->tx_count * priv->prof->num_up + xdp_count > - MAX_TX_RINGS) { + priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) { err = -EINVAL; en_err(priv, "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 686e18de9a97..2c2965497ed3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -153,7 +153,7 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) int i; params->udp_rss = udp_rss; - params->num_tx_rings_p_up = mlx4_low_memory_profile() ? + params->max_num_tx_rings_p_up = mlx4_low_memory_profile() ? MLX4_EN_MIN_TX_RING_P_UP : min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP); @@ -170,8 +170,8 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; params->prof[i].num_up = MLX4_EN_NUM_UP_LOW; - params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up; - params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up * + params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up; + params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up * params->prof[i].num_up; params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 9c218f1cfc6c..d611df2f274d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_arm_cq(priv, cq); } else { + mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring); mlx4_en_init_recycle_ring(priv, i); /* XDP TX CQ should never be armed */ } @@ -3305,7 +3306,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->pflags = MLX4_EN_PRIV_FLAGS_BLUEFLAME; priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED); - priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; + priv->num_tx_rings_p_up = mdev->profile.max_num_tx_rings_p_up; priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK; netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 5a47f9669621..6883ac75d37f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -53,7 +53,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, if (is_tx) { context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP) - context->params2 |= MLX4_QP_BIT_FPP; + context->params2 |= cpu_to_be32(MLX4_QP_BIT_FPP); } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b97a55c827eb..92aec17f4b4d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -254,8 +254,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) DEF_RX_RINGS)); num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : - min_t(int, num_of_eqs, - netif_get_num_default_rss_queues()); + min_t(int, num_of_eqs, num_online_cpus()); mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(num_rx_rings); } @@ -762,6 +761,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud xdp.data_hard_start = va - frags[0].page_offset; xdp.data = va; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + length; orig_data = xdp.data; @@ -778,7 +778,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (likely(!mlx4_en_xmit_frame(ring, frags, dev, + if (likely(!mlx4_en_xmit_frame(ring, frags, priv, length, cq_ring, &doorbell_pending))) { frags[0].page = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8a32a8f7f9c0..596445a4a241 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -718,7 +718,7 @@ void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) #else iowrite32be( #endif - ring->doorbell_qpn, + (__force u32)ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL); } @@ -1085,13 +1085,35 @@ tx_drop: #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ / 16) & 0x3f) +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring) +{ + int i; + + for (i = 0; i < ring->size; i++) { + struct mlx4_en_tx_info *tx_info = &ring->tx_info[i]; + struct mlx4_en_tx_desc *tx_desc = ring->buf + + (i << LOG_TXBB_SIZE); + + tx_info->map0_byte_count = PAGE_SIZE; + tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; + tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + tx_desc->data.lkey = ring->mr_key; + tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + } +} + netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, - struct net_device *dev, unsigned int length, + struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending) { - struct mlx4_en_priv *priv = netdev_priv(dev); - union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_info *tx_info; struct mlx4_wqe_data_seg *data; @@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, tx_info->page = frame->page; frame->page = NULL; tx_info->map0_dma = dma; - tx_info->map0_byte_count = PAGE_SIZE; - tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); - tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); - tx_info->ts_requested = 0; - tx_info->nr_maps = 1; - tx_info->linear = 1; - tx_info->inl = 0; dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset, length, PCI_DMA_TODEVICE); data->addr = cpu_to_be64(dma + frame->page_offset); - data->lkey = ring->mr_key; dma_wmb(); data->byte_count = cpu_to_be32(length); /* tx completion can avoid cache line miss for common cases */ - tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; op_own = cpu_to_be32(MLX4_OPCODE_SEND) | ((ring->prod & ring->size) ? @@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ring->prod += MLX4_EN_XDP_TX_NRTXBB; - qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + ring->xmit_more++; - mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0, - op_own, false, false); *doorbell_pending = true; return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 16c09949afd5..634f603f941c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -57,12 +57,12 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ void *__p = (char *) (source) + (offset); \ - u64 val; \ - switch (sizeof(dest)) { \ + __be64 val; \ + switch (sizeof(dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ - case 8: val = get_unaligned((u64 *)__p); \ + case 8: val = get_unaligned((__be64 *)__p); \ (dest) = be64_to_cpu(val); break; \ default: __buggy_use_of_MLX4_GET(); \ } \ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index fdb3ad0cbe54..1856e279a7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -399,7 +399,7 @@ struct mlx4_en_profile { u32 active_ports; u32 small_pkt_int; u8 no_reset; - u8 num_tx_rings_p_up; + u8 max_num_tx_rings_p_up; struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; }; @@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, - struct net_device *dev, unsigned int length, + struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, @@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, int node, int queue_index); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 728a2fb1f5c0..203320923340 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -925,7 +925,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, context->flags &= cpu_to_be32(~(0xf << 28)); context->flags |= cpu_to_be32(states[i + 1] << 28); if (states[i + 1] != MLX4_QP_STATE_RTR) - context->params2 &= ~MLX4_QP_BIT_FPP; + context->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], context, 0, 0, qp); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index fabb53379727..04304dd894c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3185,7 +3185,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, optpar = be32_to_cpu(*(__be32 *) inbox->buf); if (slave != mlx4_master_func_num(dev)) { - qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + qp_ctx->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); /* setting QP rate-limit is disallowed for VFs */ if (qp_ctx->rate_limit_params) return -EPERM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index fdaef00465d7..25deaa5a534c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -6,6 +6,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" depends on MAY_USE_DEVLINK depends on PCI + imply PTP_1588_CLOCK default n ---help--- Core driver for low level functionality of the ConnectX-4 and @@ -29,7 +30,6 @@ config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m - imply PTP_1588_CLOCK default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 87a3099808f3..100fe4ecad9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -4,7 +4,7 @@ subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \ diag/fs_tracepoint.o mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o @@ -13,7 +13,7 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ fpga/ipsec.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ - en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tx.o en_rx.o en_rx_am.o en_txrx.o vxlan.o \ en_arfs.o en_fs_ethtool.o en_selftest.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o @@ -22,7 +22,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc. mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o -mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 1e3a6c3e4132..80eef4163f52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -139,7 +139,7 @@ TRACE_EVENT(mlx5_fs_del_fg, {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} TRACE_EVENT(mlx5_fs_set_fte, - TP_PROTO(const struct fs_fte *fte, bool new_fte), + TP_PROTO(const struct fs_fte *fte, int new_fte), TP_ARGS(fte, new_fte), TP_STRUCT__entry( __field(const struct fs_fte *, fte) @@ -149,7 +149,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, action) __field(u32, flow_tag) __field(u8, mask_enable) - __field(bool, new_fte) + __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc13d3dbd366..ca8845b641c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -267,28 +267,6 @@ struct mlx5e_dcbx { }; #endif -#define MAX_PIN_NUM 8 -struct mlx5e_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; -}; - -struct mlx5e_tstamp { - rwlock_t lock; - struct cyclecounter cycles; - struct timecounter clock; - struct hwtstamp_config hwtstamp_config; - u32 nominal_c_mult; - unsigned long overflow_period; - struct delayed_work overflow_work; - struct mlx5_core_dev *mdev; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5e_pps pps_info; -}; - enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_AM, @@ -375,9 +353,10 @@ struct mlx5e_txqsq { u8 min_inline_mode; u16 edge; struct device *pdev; - struct mlx5e_tstamp *tstamp; __be32 mkey_be; unsigned long state; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; /* control path */ struct mlx5_wq_ctrl wq_ctrl; @@ -543,10 +522,11 @@ struct mlx5e_rq { struct mlx5e_channel *channel; struct device *pdev; struct net_device *netdev; - struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; struct mlx5e_page_cache page_cache; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_post_rx_wqes post_wqes; @@ -588,7 +568,7 @@ struct mlx5e_channel { /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; - struct mlx5e_tstamp *tstamp; + struct hwtstamp_config *tstamp; int ix; }; @@ -789,7 +769,7 @@ struct mlx5e_priv { struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_stats stats; - struct mlx5e_tstamp tstamp; + struct hwtstamp_config tstamp; u16 q_counter; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; @@ -873,12 +853,6 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); -void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, - struct skb_shared_hwtstamps *hwts); -void mlx5e_timestamp_init(struct mlx5e_priv *priv); -void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); -void mlx5e_pps_event_handler(struct mlx5e_priv *priv, - struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); @@ -889,6 +863,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_timestamp_set(struct mlx5e_priv *priv); struct mlx5e_redirect_rqt_param { bool is_rss; @@ -1081,6 +1056,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); +int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); + /* mlx5e generic netdev management API */ struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 4614ddfa91eb..6a7c8b04447e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -256,7 +256,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, goto drop; } mdata = mlx5e_ipsec_add_metadata(skb); - if (unlikely(IS_ERR(mdata))) { + if (IS_ERR(mdata)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); goto drop; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c deleted file mode 100644 index 84dd63e74041..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ /dev/null @@ -1,619 +0,0 @@ -/* - * Copyright (c) 2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/clocksource.h> -#include "en.h" - -enum { - MLX5E_CYCLES_SHIFT = 23 -}; - -enum { - MLX5E_PIN_MODE_IN = 0x0, - MLX5E_PIN_MODE_OUT = 0x1, -}; - -enum { - MLX5E_OUT_PATTERN_PULSE = 0x0, - MLX5E_OUT_PATTERN_PERIODIC = 0x1, -}; - -enum { - MLX5E_EVENT_MODE_DISABLE = 0x0, - MLX5E_EVENT_MODE_REPETETIVE = 0x1, - MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, -}; - -enum { - MLX5E_MTPPS_FS_ENABLE = BIT(0x0), - MLX5E_MTPPS_FS_PATTERN = BIT(0x2), - MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3), - MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4), - MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), - MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), -}; - -void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, - struct skb_shared_hwtstamps *hwts) -{ - u64 nsec; - - read_lock(&tstamp->lock); - nsec = timecounter_cyc2time(&tstamp->clock, timestamp); - read_unlock(&tstamp->lock); - - hwts->hwtstamp = ns_to_ktime(nsec); -} - -static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc) -{ - struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp, - cycles); - - return mlx5_read_internal_timer(tstamp->mdev) & cc->mask; -} - -static void mlx5e_pps_out(struct work_struct *work) -{ - struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps, - out_work); - struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp, - pps_info); - u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - unsigned long flags; - int i; - - for (i = 0; i < tstamp->ptp_info.n_pins; i++) { - u64 tstart; - - write_lock_irqsave(&tstamp->lock, flags); - tstart = tstamp->pps_info.start[i]; - tstamp->pps_info.start[i] = 0; - write_unlock_irqrestore(&tstamp->lock, flags); - if (!tstart) - continue; - - MLX5_SET(mtpps_reg, in, pin, i); - MLX5_SET64(mtpps_reg, in, time_stamp, tstart); - MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP); - mlx5_set_mtpps(tstamp->mdev, in, sizeof(in)); - } -} - -static void mlx5e_timestamp_overflow(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, - overflow_work); - struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_read(&tstamp->clock); - write_unlock_irqrestore(&tstamp->lock, flags); - queue_delayed_work(priv->wq, &tstamp->overflow_work, - msecs_to_jiffies(tstamp->overflow_period * 1000)); -} - -int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) -{ - struct hwtstamp_config config; - int err; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return -EOPNOTSUPP; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* TX HW timestamp */ - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - mutex_lock(&priv->state_lock); - /* RX HW timestamp */ - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); - break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: - /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); - if (err) { - netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); - mutex_unlock(&priv->state_lock); - return err; - } - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - mutex_unlock(&priv->state_lock); - return -ERANGE; - } - - memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config)); - mutex_unlock(&priv->state_lock); - - return copy_to_user(ifr->ifr_data, &config, - sizeof(config)) ? -EFAULT : 0; -} - -int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) -{ - struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return -EOPNOTSUPP; - - return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; -} - -static int mlx5e_ptp_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) -{ - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - u64 ns = timespec64_to_ns(ts); - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_init(&tstamp->clock, &tstamp->cycles, ns); - write_unlock_irqrestore(&tstamp->lock, flags); - - return 0; -} - -static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp, - struct timespec64 *ts) -{ - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - u64 ns; - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - ns = timecounter_read(&tstamp->clock); - write_unlock_irqrestore(&tstamp->lock, flags); - - *ts = ns_to_timespec64(ns); - - return 0; -} - -static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) -{ - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_adjtime(&tstamp->clock, delta); - write_unlock_irqrestore(&tstamp->lock, flags); - - return 0; -} - -static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) -{ - u64 adj; - u32 diff; - unsigned long flags; - int neg_adj = 0; - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - - if (delta < 0) { - neg_adj = 1; - delta = -delta; - } - - adj = tstamp->nominal_c_mult; - adj *= delta; - diff = div_u64(adj, 1000000000ULL); - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_read(&tstamp->clock); - tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff : - tstamp->nominal_c_mult + diff; - write_unlock_irqrestore(&tstamp->lock, flags); - - return 0; -} - -static int mlx5e_extts_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - struct mlx5e_priv *priv = - container_of(tstamp, struct mlx5e_priv, tstamp); - u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - u32 field_select = 0; - u8 pin_mode = 0; - u8 pattern = 0; - int pin = -1; - int err = 0; - - if (!MLX5_PPS_CAP(priv->mdev)) - return -EOPNOTSUPP; - - if (rq->extts.index >= tstamp->ptp_info.n_pins) - return -EINVAL; - - if (on) { - pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index); - if (pin < 0) - return -EBUSY; - pin_mode = MLX5E_PIN_MODE_IN; - pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); - field_select = MLX5E_MTPPS_FS_PIN_MODE | - MLX5E_MTPPS_FS_PATTERN | - MLX5E_MTPPS_FS_ENABLE; - } else { - pin = rq->extts.index; - field_select = MLX5E_MTPPS_FS_ENABLE; - } - - MLX5_SET(mtpps_reg, in, pin, pin); - MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); - MLX5_SET(mtpps_reg, in, pattern, pattern); - MLX5_SET(mtpps_reg, in, enable, on); - MLX5_SET(mtpps_reg, in, field_select, field_select); - - err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); - if (err) - return err; - - return mlx5_set_mtppse(priv->mdev, pin, 0, - MLX5E_EVENT_MODE_REPETETIVE & on); -} - -static int mlx5e_perout_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - struct mlx5e_priv *priv = - container_of(tstamp, struct mlx5e_priv, tstamp); - u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - u64 nsec_now, nsec_delta, time_stamp = 0; - u64 cycles_now, cycles_delta; - struct timespec64 ts; - unsigned long flags; - u32 field_select = 0; - u8 pin_mode = 0; - u8 pattern = 0; - int pin = -1; - int err = 0; - s64 ns; - - if (!MLX5_PPS_CAP(priv->mdev)) - return -EOPNOTSUPP; - - if (rq->perout.index >= tstamp->ptp_info.n_pins) - return -EINVAL; - - if (on) { - pin = ptp_find_pin(tstamp->ptp, PTP_PF_PEROUT, - rq->perout.index); - if (pin < 0) - return -EBUSY; - - pin_mode = MLX5E_PIN_MODE_OUT; - pattern = MLX5E_OUT_PATTERN_PERIODIC; - ts.tv_sec = rq->perout.period.sec; - ts.tv_nsec = rq->perout.period.nsec; - ns = timespec64_to_ns(&ts); - - if ((ns >> 1) != 500000000LL) - return -EINVAL; - - ts.tv_sec = rq->perout.start.sec; - ts.tv_nsec = rq->perout.start.nsec; - ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(tstamp->mdev); - write_lock_irqsave(&tstamp->lock, flags); - nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); - nsec_delta = ns - nsec_now; - cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, - tstamp->cycles.mult); - write_unlock_irqrestore(&tstamp->lock, flags); - time_stamp = cycles_now + cycles_delta; - field_select = MLX5E_MTPPS_FS_PIN_MODE | - MLX5E_MTPPS_FS_PATTERN | - MLX5E_MTPPS_FS_ENABLE | - MLX5E_MTPPS_FS_TIME_STAMP; - } else { - pin = rq->perout.index; - field_select = MLX5E_MTPPS_FS_ENABLE; - } - - MLX5_SET(mtpps_reg, in, pin, pin); - MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); - MLX5_SET(mtpps_reg, in, pattern, pattern); - MLX5_SET(mtpps_reg, in, enable, on); - MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); - MLX5_SET(mtpps_reg, in, field_select, field_select); - - err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); - if (err) - return err; - - return mlx5_set_mtppse(priv->mdev, pin, 0, - MLX5E_EVENT_MODE_REPETETIVE & on); -} - -static int mlx5e_pps_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - - tstamp->pps_info.enabled = !!on; - return 0; -} - -static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - switch (rq->type) { - case PTP_CLK_REQ_EXTTS: - return mlx5e_extts_configure(ptp, rq, on); - case PTP_CLK_REQ_PEROUT: - return mlx5e_perout_configure(ptp, rq, on); - case PTP_CLK_REQ_PPS: - return mlx5e_pps_configure(ptp, rq, on); - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int mlx5e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, - enum ptp_pin_function func, unsigned int chan) -{ - return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; -} - -static const struct ptp_clock_info mlx5e_ptp_clock_info = { - .owner = THIS_MODULE, - .max_adj = 100000000, - .n_alarm = 0, - .n_ext_ts = 0, - .n_per_out = 0, - .n_pins = 0, - .pps = 0, - .adjfreq = mlx5e_ptp_adjfreq, - .adjtime = mlx5e_ptp_adjtime, - .gettime64 = mlx5e_ptp_gettime, - .settime64 = mlx5e_ptp_settime, - .enable = NULL, - .verify = NULL, -}; - -static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) -{ - tstamp->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; - tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; -} - -static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp) -{ - int i; - - tstamp->ptp_info.pin_config = - kzalloc(sizeof(*tstamp->ptp_info.pin_config) * - tstamp->ptp_info.n_pins, GFP_KERNEL); - if (!tstamp->ptp_info.pin_config) - return -ENOMEM; - tstamp->ptp_info.enable = mlx5e_ptp_enable; - tstamp->ptp_info.verify = mlx5e_ptp_verify; - tstamp->ptp_info.pps = 1; - - for (i = 0; i < tstamp->ptp_info.n_pins; i++) { - snprintf(tstamp->ptp_info.pin_config[i].name, - sizeof(tstamp->ptp_info.pin_config[i].name), - "mlx5_pps%d", i); - tstamp->ptp_info.pin_config[i].index = i; - tstamp->ptp_info.pin_config[i].func = PTP_PF_NONE; - tstamp->ptp_info.pin_config[i].chan = i; - } - - return 0; -} - -static void mlx5e_get_pps_caps(struct mlx5e_priv *priv, - struct mlx5e_tstamp *tstamp) -{ - u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - - mlx5_query_mtpps(priv->mdev, out, sizeof(out)); - - tstamp->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, - cap_number_of_pps_pins); - tstamp->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, - cap_max_num_of_pps_in_pins); - tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, - cap_max_num_of_pps_out_pins); - - tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); - tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); - tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); - tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); - tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); - tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); - tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); - tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); -} - -void mlx5e_pps_event_handler(struct mlx5e_priv *priv, - struct ptp_clock_event *event) -{ - struct net_device *netdev = priv->netdev; - struct mlx5e_tstamp *tstamp = &priv->tstamp; - struct timespec64 ts; - u64 nsec_now, nsec_delta; - u64 cycles_now, cycles_delta; - int pin = event->index; - s64 ns; - unsigned long flags; - - switch (tstamp->ptp_info.pin_config[pin].func) { - case PTP_PF_EXTTS: - if (tstamp->pps_info.enabled) { - event->type = PTP_CLOCK_PPSUSR; - event->pps_times.ts_real = ns_to_timespec64(event->timestamp); - } else { - event->type = PTP_CLOCK_EXTTS; - } - ptp_clock_event(tstamp->ptp, event); - break; - case PTP_PF_PEROUT: - mlx5e_ptp_gettime(&tstamp->ptp_info, &ts); - cycles_now = mlx5_read_internal_timer(tstamp->mdev); - ts.tv_sec += 1; - ts.tv_nsec = 0; - ns = timespec64_to_ns(&ts); - write_lock_irqsave(&tstamp->lock, flags); - nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); - nsec_delta = ns - nsec_now; - cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, - tstamp->cycles.mult); - tstamp->pps_info.start[pin] = cycles_now + cycles_delta; - queue_work(priv->wq, &tstamp->pps_info.out_work); - write_unlock_irqrestore(&tstamp->lock, flags); - break; - default: - netdev_err(netdev, "%s: Unhandled event\n", __func__); - } -} - -void mlx5e_timestamp_init(struct mlx5e_priv *priv) -{ - struct mlx5e_tstamp *tstamp = &priv->tstamp; - u64 ns; - u64 frac = 0; - u32 dev_freq; - - mlx5e_timestamp_init_config(tstamp); - dev_freq = MLX5_CAP_GEN(priv->mdev, device_frequency_khz); - if (!dev_freq) { - mlx5_core_warn(priv->mdev, "invalid device_frequency_khz, aborting HW clock init\n"); - return; - } - rwlock_init(&tstamp->lock); - tstamp->cycles.read = mlx5e_read_internal_timer; - tstamp->cycles.shift = MLX5E_CYCLES_SHIFT; - tstamp->cycles.mult = clocksource_khz2mult(dev_freq, - tstamp->cycles.shift); - tstamp->nominal_c_mult = tstamp->cycles.mult; - tstamp->cycles.mask = CLOCKSOURCE_MASK(41); - tstamp->mdev = priv->mdev; - - timecounter_init(&tstamp->clock, &tstamp->cycles, - ktime_to_ns(ktime_get_real())); - - /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. - */ - ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask, - frac, &frac); - do_div(ns, NSEC_PER_SEC / 2 / HZ); - tstamp->overflow_period = ns; - - INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); - INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); - if (tstamp->overflow_period) - queue_delayed_work(priv->wq, &tstamp->overflow_work, 0); - else - mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n"); - - /* Configure the PHC */ - tstamp->ptp_info = mlx5e_ptp_clock_info; - snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); - - /* Initialize 1PPS data structures */ - if (MLX5_PPS_CAP(priv->mdev)) - mlx5e_get_pps_caps(priv, tstamp); - if (tstamp->ptp_info.n_pins) - mlx5e_init_pin_config(tstamp); - - tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, - &priv->mdev->pdev->dev); - if (IS_ERR(tstamp->ptp)) { - mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", - PTR_ERR(tstamp->ptp)); - tstamp->ptp = NULL; - } -} - -void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv) -{ - struct mlx5e_tstamp *tstamp = &priv->tstamp; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return; - - if (priv->tstamp.ptp) { - ptp_clock_unregister(priv->tstamp.ptp); - priv->tstamp.ptp = NULL; - } - - cancel_work_sync(&tstamp->pps_info.out_work); - cancel_delayed_work_sync(&tstamp->overflow_work); - kfree(tstamp->ptp_info.pin_config); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index ece3fb147e3e..157d02917237 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -134,6 +134,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); + memset(res, 0, sizeof(*res)); } int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d12e9fc0d76b..81a112e40fe3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1417,14 +1417,15 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { + struct mlx5_core_dev *mdev = priv->mdev; int ret; ret = ethtool_op_get_ts_info(priv->netdev, info); if (ret) return ret; - info->phc_index = priv->tstamp.ptp ? - ptp_clock_index(priv->tstamp.ptp) : -1; + info->phc_index = mdev->clock.ptp ? + ptp_clock_index(mdev->clock.ptp) : -1; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return 0; @@ -1754,7 +1755,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) { + if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index f11fd07ac4dd..850cdc980ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -291,7 +291,7 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_del_any_vid_rules(priv); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) @@ -302,7 +302,7 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_add_any_vid_rules(priv); + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dfc29720ab77..3a1969a6d509 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -184,7 +184,6 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) struct mlx5e_sw_stats temp, *s = &temp; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; - u64 tx_offload_none = 0; int i, j; memset(s, 0, sizeof(*s)); @@ -199,6 +198,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; s->rx_xdp_tx += rq_stats->xdp_tx; @@ -229,14 +229,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_queue_dropped += sq_stats->dropped; s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; - tx_offload_none += sq_stats->csum_none; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; } } - /* Update calculated offload counters */ - s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner; - s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete; - s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); @@ -376,8 +373,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; - struct ptp_clock_event ptp_event; - struct mlx5_eqe *eqe = NULL; if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; @@ -387,14 +382,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; - case MLX5_DEV_EVENT_PPS: - eqe = (struct mlx5_eqe *)param; - ptp_event.index = eqe->data.pps.pin; - ptp_event.timestamp = - timecounter_cyc2time(&priv->tstamp.clock, - be64_to_cpu(eqe->data.pps.time_stamp)); - mlx5e_pps_event_handler(vpriv, &ptp_event); - break; default: break; } @@ -588,6 +575,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; rq->channel = c; rq->ix = c->ix; rq->mdev = mdev; @@ -1126,6 +1114,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->pdev = c->pdev; sq->tstamp = c->tstamp; + sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->channel = c; sq->txq_ix = txq_ix; @@ -2681,6 +2670,12 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, netif_carrier_on(netdev); } +void mlx5e_timestamp_set(struct mlx5e_priv *priv) +{ + priv->tstamp.tx_type = HWTSTAMP_TX_OFF; + priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -2696,7 +2691,7 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); - mlx5e_timestamp_init(priv); + mlx5e_timestamp_set(priv); if (priv->profile->update_stats) queue_delayed_work(priv->wq, &priv->update_stats_work, 0); @@ -2734,7 +2729,6 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); @@ -3111,8 +3105,8 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev, } #endif -static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { switch (type) { #ifdef CONFIG_MLX5_ESWITCH @@ -3333,8 +3327,8 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { - netdev_err(netdev, "%s feature 0x%llx failed err %d\n", - enable ? "Enable" : "Disable", feature, err); + netdev_err(netdev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); return err; } @@ -3406,6 +3400,80 @@ out: return err; } +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + /* Reset CQE compression to Admin default */ + mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + /* Disable CQE compression */ + netdev_warn(priv->netdev, "Disabling cqe compression"); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) { + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); + mutex_unlock(&priv->state_lock); + return err; + } + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + mutex_unlock(&priv->state_lock); + return -ERANGE; + } + + memcpy(&priv->tstamp, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config *cfg = &priv->tstamp; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; +} + static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 45e03c427faf..765fc74fbb1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -34,6 +34,7 @@ #include <linux/mlx5/fs.h> #include <net/switchdev.h> #include <net/pkt_cls.h> +#include <net/act_api.h> #include <net/netevent.h> #include <net/arp.h> @@ -667,14 +668,6 @@ mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, cls_flower->common.chain_index) return -EOPNOTSUPP; - if (cls_flower->egress_dev) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - dev = mlx5_eswitch_get_uplink_netdev(esw); - return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - cls_flower); - } - switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: return mlx5e_configure_flower(priv, cls_flower); @@ -698,6 +691,14 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct net_device *dev = cb_priv; + + return mlx5e_setup_tc(dev, type, type_data); +} + bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1017,15 +1018,24 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } + err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb, + mlx5_eswitch_get_uplink_netdev(esw)); + if (err) + goto err_neigh_cleanup; + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_neigh_cleanup; + goto err_egdev_cleanup; } return 0; +err_egdev_cleanup: + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb, + mlx5_eswitch_get_uplink_netdev(esw)); + err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1047,7 +1057,8 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) void *ppriv = priv->ppriv; unregister_netdev(rep->netdev); - + tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb, + mlx5_eswitch_get_uplink_netdev(esw)); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f1dd638384d3..6d7df4750e0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,10 +42,11 @@ #include "en_rep.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" +#include "lib/clock.h" -static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) +static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { - return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; + return config->rx_filter == HWTSTAMP_FILTER_ALL; } static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, @@ -627,6 +628,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; + rq->stats.csum_unnecessary++; return; } @@ -644,7 +646,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum_level = 1; skb->encapsulation = 1; rq->stats.csum_unnecessary_inner++; + return; } + rq->stats.csum_unnecessary++; return; } csum_none: @@ -658,7 +662,6 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; @@ -673,8 +676,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, rq->stats.lro_bytes += cqe_bcnt; } - if (unlikely(mlx5e_rx_hw_stamp(tstamp))) - mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); @@ -794,6 +798,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, return false; xdp.data = va + *rx_headroom; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; @@ -1159,12 +1164,25 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - struct net_device *netdev = rq->netdev; - struct mlx5e_tstamp *tstamp = rq->tstamp; + struct net_device *netdev; char *pseudo_header; + u32 qpn; u8 *dgid; u8 g; + qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff; + netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn); + + /* No mapping present, cannot process SKB. This might happen if a child + * interface is going down while having unprocessed CQEs on parent RQ + */ + if (unlikely(!netdev)) { + /* TODO: add drop counters support */ + skb->dev = NULL; + pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn); + return; + } + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; if ((!g) || dgid[0] != 0xff) @@ -1185,8 +1203,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); - if (unlikely(mlx5e_rx_hw_stamp(tstamp))) - mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); @@ -1226,6 +1245,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_free_wqe; mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (unlikely(!skb->dev)) { + dev_kfree_skb_any(skb); + goto wq_free_wqe; + } napi_gro_receive(rq->cq.napi, skb); wq_free_wqe: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6d199ffb1c0b..f8637213afc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -68,6 +68,7 @@ struct mlx5e_sw_stats { u64 rx_xdp_drop; u64 rx_xdp_tx; u64 rx_xdp_tx_full; + u64 tx_csum_none; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -108,6 +109,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -339,6 +341,7 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; u64 lro_packets; @@ -363,6 +366,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, @@ -392,6 +396,7 @@ struct mlx5e_sq_stats { u64 tso_bytes; u64 tso_inner_packets; u64 tso_inner_bytes; + u64 csum_partial; u64 csum_partial_inner; u64 nop; /* less likely accessed in data path */ @@ -408,6 +413,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index da503e6411da..1aa2028ed995 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1317,6 +1317,69 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 upda return true; } +static bool modify_header_match_supported(struct mlx5_flow_spec *spec, + struct tcf_exts *exts) +{ + const struct tc_action *a; + bool modify_ip_header; + LIST_HEAD(actions); + u8 htype, ip_proto; + void *headers_v; + u16 ethertype; + int nkeys, i; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + + /* for non-IP we only re-write MACs, so we're okay */ + if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + goto out_ok; + + modify_ip_header = false; + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + if (!is_tcf_pedit(a)) + continue; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || + htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { + modify_ip_header = true; + break; + } + } + } + + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); + if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + pr_info("can't offload re-write of ip proto %d\n", ip_proto); + return false; + } + +out_ok: + return true; +} + +static bool actions_match_supported(struct mlx5e_priv *priv, + struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + u32 actions; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + actions = flow->esw_attr->action; + else + actions = flow->nic_attr->action; + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return modify_header_match_supported(&parse_attr->spec, exts); + + return true; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) @@ -1378,6 +1441,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return 0; } @@ -1564,7 +1630,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl4.flowi4_tos = tun_key->tos; fl4.daddr = tun_key->u.ipv4.dst; @@ -1573,7 +1639,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1590,7 +1656,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1630,8 +1696,9 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; @@ -1668,7 +1735,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); @@ -1678,7 +1745,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1695,7 +1762,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1736,8 +1803,9 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; @@ -1791,6 +1859,7 @@ vxlan_encap_offload_err: } } + /* must verify if encap is valid or not */ if (found) goto attach_flow; @@ -1817,6 +1886,8 @@ attach_flow: *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) attr->encap_id = e->encap_id; + else + err = -EAGAIN; return err; @@ -1934,6 +2005,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index fee43e40fa16..a7c208a1ad83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -35,6 +35,7 @@ #include "en.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" +#include "lib/clock.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ @@ -193,6 +194,7 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct sq->stats.csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats.csum_partial++; } } else sq->stats.csum_none++; @@ -451,8 +453,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) SKBTX_HW_TSTAMP)) { struct skb_shared_hwtstamps hwts = {}; - mlx5e_fill_hwstamp(sq->tstamp, - get_cqe_ts(cqe), &hwts); + hwts.hwtstamp = + mlx5_timecounter_cyc2time(sq->clock, + get_cqe_ts(cqe)); skb_tstamp_tx(skb, &hwts); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fc606bfd1d6e..60771865c99c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -491,8 +491,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_PPS_EVENT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe); + mlx5_pps_event(dev, eqe); break; case MLX5_EVENT_TYPE_FPGA_ERROR: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c index e37453d838db..c0fd2212e890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -71,11 +71,11 @@ int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, return 0; } -int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps) +int mlx5_fpga_caps(struct mlx5_core_dev *dev) { u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; - return mlx5_core_access_reg(dev, in, sizeof(in), caps, + return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga, MLX5_ST_SZ_BYTES(fpga_cap), MLX5_REG_FPGA_CAP, 0, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h index 94bdfd47c3f0..d05233c9b4f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -65,7 +65,7 @@ struct mlx5_fpga_qp_counters { u64 rx_total_drop; }; -int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps); +int mlx5_fpga_caps(struct mlx5_core_dev *dev); int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 9034e9960a76..dc8970346521 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -139,8 +139,7 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) if (err) goto out; - err = mlx5_fpga_caps(fdev->mdev, - fdev->mdev->caps.hca_cur[MLX5_CAP_FPGA]); + err = mlx5_fpga_caps(fdev->mdev); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index e0d0efd903bc..881e2e55840c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -40,7 +40,8 @@ #include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, u32 underlay_qpn) + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; @@ -52,7 +53,15 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); - MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + if (disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 1); + MLX5_SET(set_flow_table_root_in, in, table_id, 0); + } else { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + } + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); if (ft->vport) { MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); @@ -293,6 +302,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { @@ -305,12 +317,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, list_size); } err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index c6d7bdf255b6..71e2d0f37ad9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -71,8 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, unsigned int index); int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - u32 underlay_qpn); + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect); int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5a7bea688ec8..f77e496f7053 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -145,10 +145,10 @@ static struct init_tree_node { } }; -enum fs_i_mutex_lock_class { - FS_MUTEX_GRANDPARENT, - FS_MUTEX_PARENT, - FS_MUTEX_CHILD +enum fs_i_lock_class { + FS_LOCK_GRANDPARENT, + FS_LOCK_PARENT, + FS_LOCK_CHILD }; static const struct rhashtable_params rhash_fte = { @@ -168,10 +168,16 @@ static const struct rhashtable_params rhash_fg = { }; -static void del_rule(struct fs_node *node); -static void del_flow_table(struct fs_node *node); -static void del_flow_group(struct fs_node *node); -static void del_fte(struct fs_node *node); +static void del_hw_flow_table(struct fs_node *node); +static void del_hw_flow_group(struct fs_node *node); +static void del_hw_fte(struct fs_node *node); +static void del_sw_flow_table(struct fs_node *node); +static void del_sw_flow_group(struct fs_node *node); +static void del_sw_fte(struct fs_node *node); +/* Delete rule (destination) is special case that + * requires to lock the FTE for all the deletion process. + */ +static void del_sw_hw_rule(struct fs_node *node); static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2); static struct mlx5_flow_rule * @@ -179,14 +185,16 @@ find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, - unsigned int refcount, - void (*remove_func)(struct fs_node *)) + void (*del_hw_func)(struct fs_node *), + void (*del_sw_func)(struct fs_node *)) { - atomic_set(&node->refcount, refcount); + atomic_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); - mutex_init(&node->lock); - node->remove_func = remove_func; + init_rwsem(&node->lock); + node->del_hw_func = del_hw_func; + node->del_sw_func = del_sw_func; + node->active = false; } static void tree_add_node(struct fs_node *node, struct fs_node *parent) @@ -202,50 +210,70 @@ static void tree_add_node(struct fs_node *node, struct fs_node *parent) node->root = parent->root; } -static void tree_get_node(struct fs_node *node) +static int tree_get_node(struct fs_node *node) { - atomic_inc(&node->refcount); + return atomic_add_unless(&node->refcount, 1, 0); } -static void nested_lock_ref_node(struct fs_node *node, - enum fs_i_mutex_lock_class class) +static void nested_down_read_ref_node(struct fs_node *node, + enum fs_i_lock_class class) { if (node) { - mutex_lock_nested(&node->lock, class); + down_read_nested(&node->lock, class); atomic_inc(&node->refcount); } } -static void lock_ref_node(struct fs_node *node) +static void nested_down_write_ref_node(struct fs_node *node, + enum fs_i_lock_class class) { if (node) { - mutex_lock(&node->lock); + down_write_nested(&node->lock, class); atomic_inc(&node->refcount); } } -static void unlock_ref_node(struct fs_node *node) +static void down_write_ref_node(struct fs_node *node) { if (node) { - atomic_dec(&node->refcount); - mutex_unlock(&node->lock); + down_write(&node->lock); + atomic_inc(&node->refcount); } } +static void up_read_ref_node(struct fs_node *node) +{ + atomic_dec(&node->refcount); + up_read(&node->lock); +} + +static void up_write_ref_node(struct fs_node *node) +{ + atomic_dec(&node->refcount); + up_write(&node->lock); +} + static void tree_put_node(struct fs_node *node) { struct fs_node *parent_node = node->parent; - lock_ref_node(parent_node); if (atomic_dec_and_test(&node->refcount)) { - if (parent_node) + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { + /* Only root namespace doesn't have parent and we just + * need to free its node. + */ + down_write_ref_node(parent_node); list_del_init(&node->list); - if (node->remove_func) - node->remove_func(node); - kfree(node); + if (node->del_sw_func) + node->del_sw_func(node); + up_write_ref_node(parent_node); + } else { + kfree(node); + } node = NULL; } - unlock_ref_node(parent_node); if (!node && parent_node) tree_put_node(parent_node); } @@ -362,6 +390,15 @@ static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) return container_of(ns, struct mlx5_flow_root_namespace, ns); } +static inline struct mlx5_flow_steering *get_steering(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev->priv.steering; + return NULL; +} + static inline struct mlx5_core_dev *get_dev(struct fs_node *node) { struct mlx5_flow_root_namespace *root = find_root(node); @@ -371,26 +408,36 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node) return NULL; } -static void del_flow_table(struct fs_node *node) +static void del_hw_flow_table(struct fs_node *node) { struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - struct fs_prio *prio; int err; fs_get_obj(ft, node); dev = get_dev(&ft->node); - err = mlx5_cmd_destroy_flow_table(dev, ft); - if (err) - mlx5_core_warn(dev, "flow steering can't destroy ft\n"); - ida_destroy(&ft->fte_allocator); + if (node->active) { + err = mlx5_cmd_destroy_flow_table(dev, ft); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + } +} + +static void del_sw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct fs_prio *prio; + + fs_get_obj(ft, node); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; + kfree(ft); } -static void del_rule(struct fs_node *node) +static void del_sw_hw_rule(struct fs_node *node) { struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; @@ -406,7 +453,6 @@ static void del_rule(struct fs_node *node) fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); trace_mlx5_fs_del_rule(rule); - list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); list_del(&rule->next_ft); @@ -434,117 +480,203 @@ out: "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } + kfree(rule); } -static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) +static void del_hw_fte(struct fs_node *node) { struct mlx5_flow_table *ft; - int ret; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + struct fs_fte *fte; + int err; - ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - WARN_ON(ret); - fte->status = 0; + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); - ida_simple_remove(&ft->fte_allocator, fte->index); + + trace_mlx5_fs_del_fte(fte); + dev = get_dev(&ft->node); + if (node->active) { + err = mlx5_cmd_delete_fte(dev, ft, + fte->index); + if (err) + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + } } -static void del_fte(struct fs_node *node) +static void del_sw_fte(struct fs_node *node) { - struct mlx5_flow_table *ft; + struct mlx5_flow_steering *steering = get_steering(node); struct mlx5_flow_group *fg; - struct mlx5_core_dev *dev; struct fs_fte *fte; int err; fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); - fs_get_obj(ft, fg->node.parent); - trace_mlx5_fs_del_fte(fte); - - dev = get_dev(&ft->node); - err = mlx5_cmd_delete_fte(dev, ft, - fte->index); - if (err) - mlx5_core_warn(dev, - "flow steering can't delete fte in index %d of flow group id %d\n", - fte->index, fg->id); - destroy_fte(fte, fg); + err = rhashtable_remove_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + WARN_ON(err); + ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); + kmem_cache_free(steering->ftes_cache, fte); } -static void del_flow_group(struct fs_node *node) +static void del_hw_flow_group(struct fs_node *node) { struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); trace_mlx5_fs_del_fg(fg); - if (ft->autogroup.active) - ft->autogroup.num_groups--; + if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static void del_sw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + int err; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); + if (ft->autogroup.active) + ft->autogroup.num_groups--; err = rhltable_remove(&ft->fgs_hash, &fg->hash, rhash_fg); WARN_ON(err); - if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) - mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", - fg->id, ft->id); + kmem_cache_free(steering->fgs_cache, fg); } -static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, +static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) +{ + int index; + int ret; + + index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL); + if (index < 0) + return index; + + fte->index = index + fg->start_index; + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_ida_remove; + + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + return 0; + +err_ida_remove: + ida_simple_remove(&fg->fte_allocator, index); + return ret; +} + +static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, u32 *match_value, - unsigned int index) + struct mlx5_flow_act *flow_act) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct fs_fte *fte; - fte = kzalloc(sizeof(*fte), GFP_KERNEL); + fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL); if (!fte) return ERR_PTR(-ENOMEM); memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->flow_tag = flow_act->flow_tag; - fte->index = index; fte->action = flow_act->action; fte->encap_id = flow_act->encap_id; fte->modify_id = flow_act->modify_id; + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + return fte; } -static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) +static void dealloc_flow_group(struct mlx5_flow_steering *steering, + struct mlx5_flow_group *fg) +{ + rhashtable_destroy(&fg->ftes_hash); + kmem_cache_free(steering->fgs_cache, fg); +} + +static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index) { struct mlx5_flow_group *fg; - void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, - create_fg_in, match_criteria); - u8 match_criteria_enable = MLX5_GET(create_flow_group_in, - create_fg_in, - match_criteria_enable); int ret; - fg = kzalloc(sizeof(*fg), GFP_KERNEL); + fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); if (ret) { - kfree(fg); + kmem_cache_free(steering->fgs_cache, fg); return ERR_PTR(ret); - } +} + ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); fg->node.type = FS_TYPE_FLOW_GROUP; - fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in, - start_flow_index); - fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in, - end_flow_index) - fg->start_index + 1; + fg->start_index = start_index; + fg->max_ftes = end_index - start_index + 1; + + return fg; +} + +static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index, + struct list_head *prev) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *fg; + int ret; + + fg = alloc_flow_group(steering, match_criteria_enable, match_criteria, + start_index, end_index); + if (IS_ERR(fg)) + return fg; + + /* initialize refcnt, add to parent list */ + ret = rhltable_insert(&ft->fgs_hash, + &fg->hash, + rhash_fg); + if (ret) { + dealloc_flow_group(steering, fg); + return ERR_PTR(ret); + } + + tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, prev); + atomic_inc(&ft->node.version); + return fg; } @@ -575,7 +707,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); - ida_init(&ft->fte_allocator); return ft; } @@ -693,8 +824,10 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio *prio) { struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_ft_underlay_qp *uqp; int min_level = INT_MAX; int err; + u32 qpn; if (root->root_ft) min_level = root->root_ft->level; @@ -702,10 +835,24 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio if (ft->level >= min_level) return 0; - err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn); + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, + false); + if (err) + break; + } + } + if (err) - mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", - ft->id); + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); else root->root_ft = ft; @@ -724,7 +871,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, fs_get_obj(fte, rule->node.parent); if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; - lock_ref_node(&fte->node); + down_write_ref_node(&fte->node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); @@ -733,7 +880,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, ft, fg->id, modify_mask, fte); - unlock_ref_node(&fte->node); + up_write_ref_node(&fte->node); return err; } @@ -870,7 +1017,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - tree_init_node(&ft->node, 1, del_flow_table); + tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, @@ -882,17 +1029,17 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa err = connect_flow_table(root->dev, ft, fs_prio); if (err) goto destroy_ft; - lock_ref_node(&fs_prio->node); + ft->node.active = true; + down_write_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; - unlock_ref_node(&fs_prio->node); + up_write_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); return ft; destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: - ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -960,54 +1107,6 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, } EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); -/* Flow table should be locked */ -static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft, - u32 *fg_in, - struct list_head - *prev_fg, - bool is_auto_fg) -{ - struct mlx5_flow_group *fg; - struct mlx5_core_dev *dev = get_dev(&ft->node); - int err; - - if (!dev) - return ERR_PTR(-ENODEV); - - fg = alloc_flow_group(fg_in); - if (IS_ERR(fg)) - return fg; - - err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); - if (err) - goto err_free_fg; - - err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) - goto err_remove_fg; - - if (ft->autogroup.active) - ft->autogroup.num_groups++; - /* Add node to tree */ - tree_init_node(&fg->node, !is_auto_fg, del_flow_group); - tree_add_node(&fg->node, &ft->node); - /* Add node to group list */ - list_add(&fg->node.list, prev_fg); - - trace_mlx5_fs_add_fg(fg); - return fg; - -err_remove_fg: - WARN_ON(rhltable_remove(&ft->fgs_hash, - &fg->hash, - rhash_fg)); -err_free_fg: - rhashtable_destroy(&fg->ftes_hash); - kfree(fg); - - return ERR_PTR(err); -} - struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { @@ -1016,7 +1115,13 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable = MLX5_GET(create_flow_group_in, fg_in, match_criteria_enable); + int start_index = MLX5_GET(create_flow_group_in, fg_in, + start_flow_index); + int end_index = MLX5_GET(create_flow_group_in, fg_in, + end_flow_index); + struct mlx5_core_dev *dev = get_dev(&ft->node); struct mlx5_flow_group *fg; + int err; if (!check_valid_mask(match_criteria_enable, match_criteria)) return ERR_PTR(-EINVAL); @@ -1024,9 +1129,21 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (ft->autogroup.active) return ERR_PTR(-EPERM); - lock_ref_node(&ft->node); - fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false); - unlock_ref_node(&ft->node); + down_write_ref_node(&ft->node); + fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, + start_index, end_index, + ft->node.children.prev); + up_write_ref_node(&ft->node); + if (IS_ERR(fg)) + return fg; + + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + if (err) { + tree_put_node(&fg->node); + return ERR_PTR(err); + } + trace_mlx5_fs_add_fg(fg); + fg->node.active = true; return fg; } @@ -1111,7 +1228,7 @@ create_flow_handle(struct fs_fte *fte, /* Add dest to dests list- we need flow tables to be in the * end of the list for forward to next prio rules. */ - tree_init_node(&rule->node, 1, del_rule); + tree_init_node(&rule->node, NULL, del_sw_hw_rule); if (dest && dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) list_add(&rule->node.list, &fte->node.children); @@ -1167,7 +1284,9 @@ add_rule_fte(struct fs_fte *fte, if (err) goto free_handle; + fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; + atomic_inc(&fte->node.version); out: return handle; @@ -1177,59 +1296,17 @@ free_handle: return ERR_PTR(err); } -static struct fs_fte *create_fte(struct mlx5_flow_group *fg, - u32 *match_value, - struct mlx5_flow_act *flow_act) -{ - struct mlx5_flow_table *ft; - struct fs_fte *fte; - int index; - int ret; - - fs_get_obj(ft, fg->node.parent); - index = ida_simple_get(&ft->fte_allocator, fg->start_index, - fg->start_index + fg->max_ftes, - GFP_KERNEL); - if (index < 0) - return ERR_PTR(index); - - fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) { - ret = PTR_ERR(fte); - goto err_alloc; - } - ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - if (ret) - goto err_hash; - - return fte; - -err_hash: - kfree(fte); -err_alloc: - ida_simple_remove(&ft->fte_allocator, index); - return ERR_PTR(ret); -} - -static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria) +static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) { - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct list_head *prev = &ft->node.children; - unsigned int candidate_index = 0; struct mlx5_flow_group *fg; - void *match_criteria_addr; + unsigned int candidate_index = 0; unsigned int group_size = 0; - u32 *in; if (!ft->autogroup.active) return ERR_PTR(-ENOENT); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return ERR_PTR(-ENOMEM); - if (ft->autogroup.num_groups < ft->autogroup.required_groups) /* We save place for flow groups in addition to max types */ group_size = ft->max_fte / (ft->autogroup.required_groups + 1); @@ -1247,25 +1324,55 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, prev = &fg->node.list; } - if (candidate_index + group_size > ft->max_fte) { - fg = ERR_PTR(-ENOSPC); + if (candidate_index + group_size > ft->max_fte) + return ERR_PTR(-ENOSPC); + + fg = alloc_insert_flow_group(ft, + spec->match_criteria_enable, + spec->match_criteria, + candidate_index, + candidate_index + group_size - 1, + prev); + if (IS_ERR(fg)) goto out; - } + + ft->autogroup.num_groups++; + +out: + return fg; +} + +static int create_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + int err; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; MLX5_SET(create_flow_group_in, in, match_criteria_enable, - match_criteria_enable); - MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); - MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + - group_size - 1); + fg->mask.match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + + fg->max_ftes - 1); match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - memcpy(match_criteria_addr, match_criteria, - MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(match_criteria_addr, fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); + + err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id); + if (!err) { + fg->node.active = true; + trace_mlx5_fs_add_fg(fg); + } - fg = create_flow_group_common(ft, in, prev, true); -out: kvfree(in); - return fg; + return err; } static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, @@ -1340,60 +1447,30 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, struct fs_fte *fte) { struct mlx5_flow_handle *handle; - struct mlx5_flow_table *ft; + int old_action; int i; + int ret; - if (fte) { - int old_action; - int ret; - - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - ret = check_conflicting_ftes(fte, flow_act); - if (ret) { - handle = ERR_PTR(ret); - goto unlock_fte; - } - - old_action = fte->action; - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - trace_mlx5_fs_set_fte(fte, false); - goto add_rules; - } - } - fs_get_obj(ft, fg->node.parent); + ret = check_conflicting_ftes(fte, flow_act); + if (ret) + return ERR_PTR(ret); - fte = create_fte(fg, match_value, flow_act); - if (IS_ERR(fte)) - return (void *)fte; - tree_init_node(&fte->node, 0, del_fte); - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - handle = add_rule_fte(fte, fg, dest, dest_num, false); + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); if (IS_ERR(handle)) { - unlock_ref_node(&fte->node); - destroy_fte(fte, fg); - kfree(fte); + fte->action = old_action; return handle; } + trace_mlx5_fs_set_fte(fte, false); - tree_add_node(&fte->node, &fg->node); - /* fte list isn't sorted */ - list_add_tail(&fte->node.list, &fg->node.children); - trace_mlx5_fs_set_fte(fte, true); -add_rules: for (i = 0; i < handle->num_rules; i++) { if (atomic_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } } -unlock_fte: - unlock_ref_node(&fte->node); return handle; } @@ -1441,93 +1518,197 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; } -static struct mlx5_flow_handle * -try_add_to_existing_fg(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - struct mlx5_flow_act *flow_act, - struct mlx5_flow_destination *dest, - int dest_num) -{ +struct match_list { + struct list_head list; struct mlx5_flow_group *g; - struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); +}; + +struct match_list_head { + struct list_head list; + struct match_list first; +}; + +static void free_match_list(struct match_list_head *head) +{ + if (!list_empty(&head->list)) { + struct match_list *iter, *match_tmp; + + list_del(&head->first.list); + tree_put_node(&head->first.g->node); + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node); + list_del(&iter->list); + kfree(iter); + } + } +} + +static int build_match_list(struct match_list_head *match_head, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) +{ struct rhlist_head *tmp, *list; - struct match_list { - struct list_head list; - struct mlx5_flow_group *g; - } match_list, *iter; - LIST_HEAD(match_head); + struct mlx5_flow_group *g; + int err = 0; rcu_read_lock(); + INIT_LIST_HEAD(&match_head->list); /* Collect all fgs which has a matching match_criteria */ list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + /* RCU is atomic, we can't execute FW commands here */ rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; - if (likely(list_empty(&match_head))) { - match_list.g = g; - list_add_tail(&match_list.list, &match_head); + if (likely(list_empty(&match_head->list))) { + if (!tree_get_node(&g->node)) + continue; + match_head->first.g = g; + list_add_tail(&match_head->first.list, + &match_head->list); continue; } - curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { - rcu_read_unlock(); - rule = ERR_PTR(-ENOMEM); - goto free_list; + free_match_list(match_head); + err = -ENOMEM; + goto out; + } + if (!tree_get_node(&g->node)) { + kfree(curr_match); + continue; } curr_match->g = g; - list_add_tail(&curr_match->list, &match_head); + list_add_tail(&curr_match->list, &match_head->list); } +out: rcu_read_unlock(); + return err; +} + +static u64 matched_fgs_get_version(struct list_head *match_head) +{ + struct match_list *iter; + u64 version = 0; + + list_for_each_entry(iter, match_head, list) + version += (u64)atomic_read(&iter->g->node.version); + return version; +} +static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct list_head *match_head, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + int ft_version) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; + struct match_list *iter; + bool take_write = false; + struct fs_fte *fte; + u64 version; + int err; + + fte = alloc_fte(ft, spec->match_value, flow_act); + if (IS_ERR(fte)) + return ERR_PTR(-ENOMEM); + + list_for_each_entry(iter, match_head, list) { + nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); + ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL); + } + +search_again_locked: + version = matched_fgs_get_version(match_head); /* Try to find a fg that already contains a matching fte */ - list_for_each_entry(iter, &match_head, list) { - struct fs_fte *fte; + list_for_each_entry(iter, match_head, list) { + struct fs_fte *fte_tmp; g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); - fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, - rhash_fte); - if (fte) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); - unlock_ref_node(&g->node); - goto free_list; + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) + continue; + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + } else { + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); } - unlock_ref_node(&g->node); + + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte_tmp); + up_write_ref_node(&fte_tmp->node); + tree_put_node(&fte_tmp->node); + kmem_cache_free(steering->ftes_cache, fte); + return rule; } /* No group with matching fte found. Try to add a new fte to any * matching fg. */ - list_for_each_entry(iter, &match_head, list) { - g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, NULL); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { - unlock_ref_node(&g->node); - goto free_list; - } - unlock_ref_node(&g->node); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + list_for_each_entry(iter, match_head, list) + nested_down_write_ref_node(&iter->g->node, + FS_LOCK_PARENT); + take_write = true; } -free_list: - if (!list_empty(&match_head)) { - struct match_list *match_tmp; + /* Check the ft version, for case that new flow group + * was added while the fgs weren't locked + */ + if (atomic_read(&ft->node.version) != ft_version) { + rule = ERR_PTR(-EAGAIN); + goto out; + } - /* The most common case is having one FG. Since we want to - * optimize this case, we save the first on the stack. - * Therefore, no need to free it. - */ - list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); - list_for_each_entry_safe(iter, match_tmp, &match_head, list) { - list_del(&iter->list); - kfree(iter); + /* Check the fgs version, for case the new FTE with the + * same values was added while the fgs weren't locked + */ + if (version != matched_fgs_get_version(match_head)) + goto search_again_locked; + + list_for_each_entry(iter, match_head, list) { + g = iter->g; + + if (!g->node.active) + continue; + err = insert_fte(g, fte); + if (err) { + if (err == -ENOSPC) + continue; + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + kmem_cache_free(steering->ftes_cache, fte); + return ERR_PTR(err); } - } + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + return rule; + } + rule = ERR_PTR(-ENOENT); +out: + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1539,8 +1720,14 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, int dest_num) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; + struct match_list_head match_head; + bool take_write = false; + struct fs_fte *fte; + int version; + int err; int i; if (!check_valid_spec(spec)) @@ -1550,33 +1737,73 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } + nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); +search_again_locked: + version = atomic_read(&ft->node.version); + + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec); + if (err) + return ERR_PTR(err); - nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); - rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); - if (!IS_ERR(rule)) - goto unlock; + if (!take_write) + up_read_ref_node(&ft->node); - g = create_autogroup(ft, spec->match_criteria_enable, - spec->match_criteria); + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); + free_match_list(&match_head); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) + return rule; + + if (!take_write) { + nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); + take_write = true; + } + + if (PTR_ERR(rule) == -EAGAIN || + version != atomic_read(&ft->node.version)) + goto search_again_locked; + + g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = (void *)g; - goto unlock; + up_write_ref_node(&ft->node); + return rule; } - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, NULL); - if (IS_ERR(rule)) { - /* Remove assumes refcount > 0 and autogroup creates a group - * with a refcount = 0. - */ - unlock_ref_node(&ft->node); - tree_get_node(&g->node); - tree_remove_node(&g->node); - return rule; + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + up_write_ref_node(&ft->node); + + err = create_auto_flow_group(ft, g); + if (err) + goto err_release_fg; + + fte = alloc_fte(ft, spec->match_value, flow_act); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + goto err_release_fg; } -unlock: - unlock_ref_node(&ft->node); + + err = insert_fte(g, fte); + if (err) { + kmem_cache_free(steering->ftes_cache, fte); + goto err_release_fg; + } + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, + dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + tree_put_node(&g->node); return rule; + +err_release_fg: + up_write_ref_node(&g->node); + tree_put_node(&g->node); + return ERR_PTR(err); } static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) @@ -1661,23 +1888,43 @@ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) static int update_root_ft_destroy(struct mlx5_flow_table *ft) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_ft_underlay_qp *uqp; struct mlx5_flow_table *new_root_ft = NULL; + int err = 0; + u32 qpn; if (root->root_ft != ft) return 0; new_root_ft = find_next_ft(ft); - if (new_root_ft) { - int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, - root->underlay_qpn); - if (err) { - mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", - ft->id); - return err; + if (!new_root_ft) { + root->root_ft = NULL; + return 0; + } + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn, + false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, + qpn, false); + if (err) + break; } } - root->root_ft = new_root_ft; + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = new_root_ft; + return 0; } @@ -1817,7 +2064,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, 1, NULL); + tree_init_node(&fs_prio->node, NULL, NULL); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -1843,7 +2090,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -1965,10 +2212,12 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering root_ns->dev = steering->dev; root_ns->table_type = table_type; + INIT_LIST_HEAD(&root_ns->underlay_qpns); + ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, NULL); return root_ns; @@ -2066,8 +2315,10 @@ static void clean_tree(struct fs_node *node) struct fs_node *iter; struct fs_node *temp; + tree_get_node(node); list_for_each_entry_safe(iter, temp, &node->children, list) clean_tree(iter); + tree_put_node(node); tree_remove_node(node); } } @@ -2091,6 +2342,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); mlx5_cleanup_fc_stats(dev); + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); kfree(steering); } @@ -2196,6 +2449,16 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev, nic_flow_table))) || ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && @@ -2245,17 +2508,76 @@ err: int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) { struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *new_uqp; + int err = 0; + + new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL); + if (!new_uqp) + return -ENOMEM; + + mutex_lock(&root->chain_lock); + + if (!root->root_ft) { + err = -EINVAL; + goto update_ft_fail; + } + + err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false); + if (err) { + mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", + underlay_qpn, err); + goto update_ft_fail; + } + + new_uqp->qpn = underlay_qpn; + list_add_tail(&new_uqp->list, &root->underlay_qpns); + + mutex_unlock(&root->chain_lock); - root->underlay_qpn = underlay_qpn; return 0; + +update_ft_fail: + mutex_unlock(&root->chain_lock); + kfree(new_uqp); + return err; } EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) { struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *uqp; + bool found = false; + int err = 0; + + mutex_lock(&root->chain_lock); + list_for_each_entry(uqp, &root->underlay_qpns, list) { + if (uqp->qpn == underlay_qpn) { + found = true; + break; + } + } + + if (!found) { + mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n", + underlay_qpn); + err = -EINVAL; + goto out; + } + + err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true); + if (err) + mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", + underlay_qpn, err); + + list_del(&uqp->list); + mutex_unlock(&root->chain_lock); + kfree(uqp); - root->underlay_qpn = 0; return 0; + +out: + mutex_unlock(&root->chain_lock); + return err; } EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5509a752f98e..80f6f3c714c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -52,6 +52,7 @@ enum fs_flow_table_type { FS_FT_FDB = 0X4, FS_FT_SNIFFER_RX = 0X5, FS_FT_SNIFFER_TX = 0X6, + FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX, }; enum fs_flow_table_op_mod { @@ -65,6 +66,8 @@ enum fs_fte_status { struct mlx5_flow_steering { struct mlx5_core_dev *dev; + struct kmem_cache *fgs_cache; + struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; @@ -80,9 +83,12 @@ struct fs_node { struct fs_node *parent; struct fs_node *root; /* lock the node for writing and traversing */ - struct mutex lock; + struct rw_semaphore lock; atomic_t refcount; - void (*remove_func)(struct fs_node *); + bool active; + void (*del_hw_func)(struct fs_node *); + void (*del_sw_func)(struct fs_node *); + atomic_t version; }; struct mlx5_flow_rule { @@ -119,7 +125,6 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; - struct ida fte_allocator; struct rhltable fgs_hash; }; @@ -146,6 +151,11 @@ struct mlx5_fc { struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +struct mlx5_ft_underlay_qp { + struct list_head list; + u32 qpn; +}; + #define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600 /* Calculate the fte_match_param length and without the reserved length. * Make sure the reserved field is the last. @@ -199,6 +209,7 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; + struct ida fte_allocator; u32 id; struct rhashtable ftes_hash; struct rhlist_head hash; @@ -211,7 +222,7 @@ struct mlx5_flow_root_namespace { struct mlx5_flow_table *root_ft; /* Should be held when chaining flow tables */ struct mutex chain_lock; - u32 underlay_qpn; + struct list_head underlay_qpns; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); @@ -260,4 +271,14 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_dst(pos, fte) \ fs_list_for_each_entry(pos, &(fte)->node.children) +#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \ + (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \ + (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \ + (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \ + (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ + (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \ + (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\ + ) + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 43c126c63955..6f338a9219c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -250,3 +250,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_link_ksettings = mlx5i_get_link_ksettings, .get_link = ethtool_op_get_link, }; + +const struct ethtool_ops mlx5i_pkey_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_link = ethtool_op_get_link, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 85298051a3e4..abf270d7f556 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -40,8 +40,6 @@ static int mlx5i_open(struct net_device *netdev); static int mlx5i_close(struct net_device *netdev); -static int mlx5i_dev_init(struct net_device *dev); -static void mlx5i_dev_cleanup(struct net_device *dev); static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); @@ -70,10 +68,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, } /* Called directly after IPoIB netdevice was created to initialize SW structs */ -static void mlx5i_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); @@ -108,11 +106,69 @@ static void mlx5i_cleanup(struct mlx5e_priv *priv) /* Do nothing .. */ } +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_qp *qp = &ipriv->qp; + struct mlx5_qp_context *context; + int ret; + + /* QP states */ + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + context->pri_path.port = 1; + context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index); + context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); + goto err_qp_modify_to_err; + } + memset(context, 0, sizeof(*context)); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + kfree(context); + return 0; + +err_qp_modify_to_err: + mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp); + kfree(context); + return ret; +} + +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_qp_context context; + int err; + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, + &ipriv->qp); + if (err) + mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err); +} + #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 -static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { - struct mlx5_qp_context *context = NULL; u32 *in = NULL; void *addr_path; int ret = 0; @@ -140,43 +196,12 @@ static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core goto out; } - /* QP states */ - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = -ENOMEM; - goto out; - } - - context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); - context->pri_path.port = 1; - context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); - goto out; - } - memset(context, 0, sizeof(*context)); - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); - goto out; - } - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); - goto out; - } - out: - kfree(context); kvfree(in); return ret; } -static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { mlx5_core_destroy_qp(mdev, qp); } @@ -195,10 +220,14 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); - return err; + goto err_destroy_underlay_qp; } return 0; + +err_destroy_underlay_qp: + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); + return err; } static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) @@ -249,7 +278,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; int err; err = mlx5e_create_indirect_rqt(priv); @@ -268,18 +296,12 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_indirect_tirs; - err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); - if (err) - goto err_destroy_direct_tirs; - err = mlx5i_create_flow_steering(priv); if (err) - goto err_remove_rx_underlay_qpn; + goto err_destroy_direct_tirs; return 0; -err_remove_rx_underlay_qpn: - mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: @@ -293,9 +315,6 @@ err_destroy_indirect_rqts: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; - - mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); mlx5i_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); @@ -351,7 +370,7 @@ out: return err; } -static int mlx5i_dev_init(struct net_device *dev) +int mlx5i_dev_init(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv = priv->ppriv; @@ -361,6 +380,9 @@ static int mlx5i_dev_init(struct net_device *dev) dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff; dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff; + /* Add QPN to net-device mapping to HT */ + mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn); + return 0; } @@ -378,63 +400,84 @@ static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } -static void mlx5i_dev_cleanup(struct net_device *dev) +void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5i_priv *ipriv = priv->ppriv; - struct mlx5_qp_context context; + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_uninit_underlay_qp(priv); - /* detach qp from flow-steering by reset it */ - mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp); + /* Delete QPN to net-device mapping from HT */ + mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn); } static int mlx5i_open(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; int err; - mutex_lock(&priv->state_lock); + mutex_lock(&epriv->state_lock); - set_bit(MLX5E_STATE_OPENED, &priv->state); + set_bit(MLX5E_STATE_OPENED, &epriv->state); - err = mlx5e_open_channels(priv, &priv->channels); - if (err) + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err); goto err_clear_state_opened_flag; + } - mlx5e_refresh_tirs(priv, false); - mlx5e_activate_priv_channels(priv); - mlx5e_timestamp_init(priv); + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err); + goto err_reset_qp; + } - mutex_unlock(&priv->state_lock); + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) + goto err_remove_fs_underlay_qp; + + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + mlx5e_timestamp_set(epriv); + + mutex_unlock(&epriv->state_lock); return 0; +err_remove_fs_underlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_reset_qp: + mlx5i_uninit_underlay_qp(epriv); err_clear_state_opened_flag: - clear_bit(MLX5E_STATE_OPENED, &priv->state); - mutex_unlock(&priv->state_lock); + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); return err; } static int mlx5i_close(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. */ - mutex_lock(&priv->state_lock); + mutex_lock(&epriv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &epriv->state)) goto unlock; - clear_bit(MLX5E_STATE_OPENED, &priv->state); + clear_bit(MLX5E_STATE_OPENED, &epriv->state); - mlx5e_timestamp_cleanup(priv); - netif_carrier_off(priv->netdev); - mlx5e_deactivate_priv_channels(priv); - mlx5e_close_channels(&priv->channels); + netif_carrier_off(epriv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5i_uninit_underlay_qp(epriv); + mlx5e_deactivate_priv_channels(epriv); + mlx5e_close_channels(&epriv->channels);; unlock: - mutex_unlock(&priv->state_lock); + mutex_unlock(&epriv->state_lock); return 0; } @@ -492,6 +535,13 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); } +static void mlx5i_set_pkey_index(struct net_device *netdev, int id) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + ipriv->pkey_index = (u16)id; +} + static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) { if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) @@ -510,12 +560,13 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, const char *name, void (*setup)(struct net_device *)) { - const struct mlx5e_profile *profile = &mlx5i_nic_profile; - int nch = profile->max_nch(mdev); + const struct mlx5e_profile *profile; struct net_device *netdev; struct mlx5i_priv *ipriv; struct mlx5e_priv *epriv; struct rdma_netdev *rn; + bool sub_interface; + int nch; int err; if (mlx5i_check_required_hca_cap(mdev)) { @@ -523,10 +574,15 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, return ERR_PTR(-EOPNOTSUPP); } - /* This function should only be called once per mdev */ - err = mlx5e_create_mdev_resources(mdev); - if (err) - return NULL; + /* TODO: Need to find a better way to check if child device*/ + sub_interface = (mdev->mlx5e_res.pdn != 0); + + if (sub_interface) + profile = mlx5i_pkey_get_profile(); + else + profile = &mlx5i_nic_profile; + + nch = profile->max_nch(mdev); netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), name, NET_NAME_UNKNOWN, @@ -535,7 +591,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, nch); if (!netdev) { mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); - goto free_mdev_resources; + return NULL; } ipriv = netdev_priv(netdev); @@ -545,6 +601,20 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, if (!epriv->wq) goto err_free_netdev; + ipriv->sub_interface = sub_interface; + if (!ipriv->sub_interface) { + err = mlx5i_pkey_qpn_ht_init(netdev); + if (err) { + mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); + goto destroy_wq; + } + + /* This should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + goto destroy_ht; + } + profile->init(mdev, netdev, profile, ipriv); mlx5e_attach_netdev(epriv); @@ -556,13 +626,16 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, rn->send = mlx5i_xmit; rn->attach_mcast = mlx5i_attach_mcast; rn->detach_mcast = mlx5i_detach_mcast; + rn->set_id = mlx5i_set_pkey_index; return netdev; +destroy_ht: + mlx5i_pkey_qpn_ht_cleanup(netdev); +destroy_wq: + destroy_workqueue(epriv->wq); err_free_netdev: free_netdev(netdev); -free_mdev_resources: - mlx5e_destroy_mdev_resources(mdev); return NULL; } @@ -570,14 +643,18 @@ EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); void mlx5_rdma_netdev_free(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; const struct mlx5e_profile *profile = priv->profile; mlx5e_detach_netdev(priv); profile->cleanup(priv); destroy_workqueue(priv->wq); - free_netdev(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(priv->mdev); + } + free_netdev(netdev); } EXPORT_SYMBOL(mlx5_rdma_netdev_free); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index a0f405f520f7..49008022c306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -39,6 +39,7 @@ #define MLX5I_MAX_NUM_TC 1 extern const struct ethtool_ops mlx5i_ethtool_ops; +extern const struct ethtool_ops mlx5i_pkey_ethtool_ops; #define MLX5_IB_GRH_BYTES 40 #define MLX5_IPOIB_ENCAP_LEN 4 @@ -49,10 +50,45 @@ extern const struct ethtool_ops mlx5i_ethtool_ops; struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ struct mlx5_core_qp qp; + bool sub_interface; u32 qkey; + u16 pkey_index; + struct mlx5i_pkey_qpn_ht *qpn_htbl; char *mlx5e_priv[0]; }; +/* Underlay QP create/destroy functions */ +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); + +/* Underlay QP state modification init/uninit functions */ +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv); + +/* Allocate/Free underlay QPN to net-device hash table */ +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); + +/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */ +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn); +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); + +/* Get the net-device corresponding to the given underlay QPN */ +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); + +/* Shared ndo functionts */ +int mlx5i_dev_init(struct net_device *dev); +void mlx5i_dev_cleanup(struct net_device *dev); + +/* Parent profile functions */ +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv); + +/* Get child interface nic profile */ +const struct mlx5e_profile *mlx5i_pkey_get_profile(void); + /* Extract mlx5e_priv from IPoIB netdev */ #define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c new file mode 100644 index 000000000000..531b02cc979b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include "ipoib.h" + +#define MLX5I_MAX_LOG_PKEY_SUP 7 + +struct qpn_to_netdev { + struct net_device *netdev; + struct hlist_node hlist; + u32 underlay_qpn; +}; + +struct mlx5i_pkey_qpn_ht { + struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP]; + spinlock_t ht_lock; /* Synchronise with NAPI */ +}; + +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *qpn_htbl; + + qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL); + if (!qpn_htbl) + return -ENOMEM; + + ipriv->qpn_htbl = qpn_htbl; + spin_lock_init(&qpn_htbl->ht_lock); + + return 0; +} + +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + kfree(ipriv->qpn_htbl); +} + +static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets, + u32 qpn) +{ + struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)]; + struct qpn_to_netdev *node; + + hlist_for_each_entry(node, h, hlist) { + if (node->underlay_qpn == qpn) + return node; + } + + return NULL; +} + +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP); + struct qpn_to_netdev *new_node; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + + new_node->netdev = netdev; + new_node->underlay_qpn = qpn; + spin_lock_bh(&ht->ht_lock); + hlist_add_head(&new_node->hlist, &ht->buckets[key]); + spin_unlock_bh(&ht->ht_lock); + + return 0; +} + +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn); + if (!node) { + mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n"); + return -EINVAL; + } + + spin_lock_bh(&ht->ht_lock); + hlist_del_init(&node->hlist); + spin_unlock_bh(&ht->ht_lock); + kfree(node); + + return 0; +} + +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn); + if (!node) + return NULL; + + return node->netdev; +} + +static int mlx5i_pkey_open(struct net_device *netdev); +static int mlx5i_pkey_close(struct net_device *netdev); +static int mlx5i_pkey_dev_init(struct net_device *dev); +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev); +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu); + +static const struct net_device_ops mlx5i_pkey_netdev_ops = { + .ndo_open = mlx5i_pkey_open, + .ndo_stop = mlx5i_pkey_close, + .ndo_init = mlx5i_pkey_dev_init, + .ndo_uninit = mlx5i_pkey_dev_cleanup, + .ndo_change_mtu = mlx5i_pkey_change_mtu, +}; + +/* Child NDOs */ +static int mlx5i_pkey_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + /* Get QPN to netdevice hash table from parent */ + parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); + parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + if (!parent_dev) { + mlx5_core_warn(priv->mdev, "failed to get parent device\n"); + return -EINVAL; + } + + parent_ipriv = netdev_priv(parent_dev); + ipriv->qpn_htbl = parent_ipriv->qpn_htbl; + dev_put(parent_dev); + + return mlx5i_dev_init(dev); +} + +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) +{ + return mlx5i_dev_cleanup(netdev); +} + +static int mlx5i_pkey_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err); + goto err_release_lock; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err); + goto err_unint_underlay_qp; + } + + err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]); + if (err) { + mlx5_core_warn(mdev, "create child tis failed, %d\n", err); + goto err_remove_rx_uderlay_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) { + mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); + goto err_clear_state_opened_flag; + } + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + mutex_unlock(&epriv->state_lock); + + return 0; + +err_clear_state_opened_flag: + mlx5e_destroy_tis(mdev, epriv->tisn[0]); +err_remove_rx_uderlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_unint_underlay_qp: + mlx5i_uninit_underlay_qp(epriv); +err_release_lock: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_pkey_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5i_uninit_underlay_qp(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + mlx5e_destroy_tis(mdev, priv->tisn[0]); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + mutex_unlock(&priv->state_lock); + + return 0; +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mlx5i_init(mdev, netdev, profile, ppriv); + + /* Override parent ndo */ + netdev->netdev_ops = &mlx5i_pkey_netdev_ops; + + /* Set child limited ethtool support */ + netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; + + /* Use dummy rqs */ + priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err); + return err; + } + + return 0; +} + +static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource allocation and init + */ + return 0; +} + +static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource free and de-init + */ +} + +static const struct mlx5e_profile mlx5i_pkey_nic_profile = { + .init = mlx5i_pkey_init, + .cleanup = mlx5i_pkey_cleanup, + .init_tx = mlx5i_pkey_init_tx, + .cleanup_tx = mlx5i_pkey_cleanup_tx, + .init_rx = mlx5i_pkey_init_rx, + .cleanup_rx = mlx5i_pkey_cleanup_rx, + .enable = NULL, + .disable = NULL, + .update_stats = NULL, + .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +const struct mlx5e_profile *mlx5i_pkey_get_profile(void) +{ + return &mlx5i_pkey_nic_profile; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c new file mode 100644 index 000000000000..fa8aed62b231 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/clocksource.h> +#include "en.h" + +enum { + MLX5_CYCLES_SHIFT = 23 +}; + +enum { + MLX5_PIN_MODE_IN = 0x0, + MLX5_PIN_MODE_OUT = 0x1, +}; + +enum { + MLX5_OUT_PATTERN_PULSE = 0x0, + MLX5_OUT_PATTERN_PERIODIC = 0x1, +}; + +enum { + MLX5_EVENT_MODE_DISABLE = 0x0, + MLX5_EVENT_MODE_REPETETIVE = 0x1, + MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2, +}; + +enum { + MLX5_MTPPS_FS_ENABLE = BIT(0x0), + MLX5_MTPPS_FS_PATTERN = BIT(0x2), + MLX5_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), +}; + +static u64 read_internal_timer(const struct cyclecounter *cc) +{ + struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + + return mlx5_read_internal_timer(mdev) & cc->mask; +} + +static void mlx5_pps_out(struct work_struct *work) +{ + struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, + out_work); + struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, + pps_info); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + unsigned long flags; + int i; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + u64 tstart; + + write_lock_irqsave(&clock->lock, flags); + tstart = clock->pps_info.start[i]; + clock->pps_info.start[i] = 0; + write_unlock_irqrestore(&clock->lock, flags); + if (!tstart) + continue; + + MLX5_SET(mtpps_reg, in, pin, i); + MLX5_SET64(mtpps_reg, in, time_stamp, tstart); + MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP); + mlx5_set_mtpps(mdev, in, sizeof(in)); + } +} + +static void mlx5_timestamp_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock, + overflow_work); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + write_unlock_irqrestore(&clock->lock, flags); + schedule_delayed_work(&clock->overflow_work, clock->overflow_period); +} + +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_init(&clock->tc, &clock->cycles, ns); + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + u64 ns; + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + ns = timecounter_read(&clock->tc); + write_unlock_irqrestore(&clock->lock, flags); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_adjtime(&clock->tc, delta); + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + u64 adj; + u32 diff; + unsigned long flags; + int neg_adj = 0; + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + + adj = clock->nominal_c_mult; + adj *= delta; + diff = div_u64(adj, 1000000000ULL); + + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : + clock->nominal_c_mult + diff; + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + if (rq->extts.index >= clock->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + pin_mode = MLX5_PIN_MODE_IN; + pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE; + } else { + pin = rq->extts.index; + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u64 nsec_now, nsec_delta, time_stamp = 0; + u64 cycles_now, cycles_delta; + struct timespec64 ts; + unsigned long flags; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + s64 ns; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + if (rq->perout.index >= clock->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + + pin_mode = MLX5_PIN_MODE_OUT; + pattern = MLX5_OUT_PATTERN_PERIODIC; + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + + if ((ns >> 1) != 500000000LL) + return -EINVAL; + + ts.tv_sec = rq->perout.start.sec; + ts.tv_nsec = rq->perout.start.nsec; + ns = timespec64_to_ns(&ts); + cycles_now = mlx5_read_internal_timer(mdev); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + write_unlock_irqrestore(&clock->lock, flags); + time_stamp = cycles_now + cycles_delta; + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE | + MLX5_MTPPS_FS_TIME_STAMP; + } else { + pin = rq->perout.index; + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + + clock->pps_info.enabled = !!on; + return 0; +} + +static int mlx5_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mlx5_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5_perout_configure(ptp, rq, on); + case PTP_CLK_REQ_PPS: + return mlx5_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; +} + +static const struct ptp_clock_info mlx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlx5_p2p", + .max_adj = 100000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 0, + .adjfreq = mlx5_ptp_adjfreq, + .adjtime = mlx5_ptp_adjtime, + .gettime64 = mlx5_ptp_gettime, + .settime64 = mlx5_ptp_settime, + .enable = NULL, + .verify = NULL, +}; + +static int mlx5_init_pin_config(struct mlx5_clock *clock) +{ + int i; + + clock->ptp_info.pin_config = + kzalloc(sizeof(*clock->ptp_info.pin_config) * + clock->ptp_info.n_pins, GFP_KERNEL); + if (!clock->ptp_info.pin_config) + return -ENOMEM; + clock->ptp_info.enable = mlx5_ptp_enable; + clock->ptp_info.verify = mlx5_ptp_verify; + clock->ptp_info.pps = 1; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + snprintf(clock->ptp_info.pin_config[i].name, + sizeof(clock->ptp_info.pin_config[i].name), + "mlx5_pps%d", i); + clock->ptp_info.pin_config[i].index = i; + clock->ptp_info.pin_config[i].func = PTP_PF_NONE; + clock->ptp_info.pin_config[i].chan = i; + } + + return 0; +} + +static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + mlx5_query_mtpps(mdev, out, sizeof(out)); + + clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + + clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); +} + +void mlx5_pps_event(struct mlx5_core_dev *mdev, + struct mlx5_eqe *eqe) +{ + struct mlx5_clock *clock = &mdev->clock; + struct ptp_clock_event ptp_event; + struct timespec64 ts; + u64 nsec_now, nsec_delta; + u64 cycles_now, cycles_delta; + int pin = eqe->data.pps.pin; + s64 ns; + unsigned long flags; + + switch (clock->ptp_info.pin_config[pin].func) { + case PTP_PF_EXTTS: + if (clock->pps_info.enabled) { + ptp_event.type = PTP_CLOCK_PPSUSR; + ptp_event.pps_times.ts_real = ns_to_timespec64(eqe->data.pps.time_stamp); + } else { + ptp_event.type = PTP_CLOCK_EXTTS; + } + ptp_clock_event(clock->ptp, &ptp_event); + break; + case PTP_PF_PEROUT: + mlx5_ptp_gettime(&clock->ptp_info, &ts); + cycles_now = mlx5_read_internal_timer(mdev); + ts.tv_sec += 1; + ts.tv_nsec = 0; + ns = timespec64_to_ns(&ts); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + clock->pps_info.start[pin] = cycles_now + cycles_delta; + schedule_work(&clock->pps_info.out_work); + write_unlock_irqrestore(&clock->lock, flags); + break; + default: + mlx5_core_err(mdev, " Unhandled event\n"); + } +} + +void mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u64 ns; + u64 frac = 0; + u32 dev_freq; + + dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); + if (!dev_freq) { + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return; + } + rwlock_init(&clock->lock); + clock->cycles.read = read_internal_timer; + clock->cycles.shift = MLX5_CYCLES_SHIFT; + clock->cycles.mult = clocksource_khz2mult(dev_freq, + clock->cycles.shift); + clock->nominal_c_mult = clock->cycles.mult; + clock->cycles.mask = CLOCKSOURCE_MASK(41); + + timecounter_init(&clock->tc, &clock->cycles, + ktime_to_ns(ktime_get_real())); + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least once every wrap around. + */ + ns = cyclecounter_cyc2ns(&clock->cycles, clock->cycles.mask, + frac, &frac); + do_div(ns, NSEC_PER_SEC / 2 / HZ); + clock->overflow_period = ns; + + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); + INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow); + if (clock->overflow_period) + schedule_delayed_work(&clock->overflow_work, 0); + else + mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n"); + + /* Configure the PHC */ + clock->ptp_info = mlx5_ptp_clock_info; + + /* Initialize 1PPS data structures */ + if (MLX5_PPS_CAP(mdev)) + mlx5_get_pps_caps(mdev); + if (clock->ptp_info.n_pins) + mlx5_init_pin_config(clock); + + clock->ptp = ptp_clock_register(&clock->ptp_info, + &mdev->pdev->dev); + if (IS_ERR(clock->ptp)) { + mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + PTR_ERR(clock->ptp)); + clock->ptp = NULL; + } +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (clock->ptp) { + ptp_clock_unregister(clock->ptp); + clock->ptp = NULL; + } + + cancel_work_sync(&clock->pps_info.out_work); + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock->ptp_info.pin_config); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h new file mode 100644 index 000000000000..a8eecedd46c2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_CLOCK_H__ +#define __LIB_CLOCK_H__ + +void mlx5_init_clock(struct mlx5_core_dev *mdev); +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + u64 nsec; + + read_lock(&clock->lock); + nsec = timecounter_cyc2time(&clock->tc, timestamp); + read_unlock(&clock->lock); + + return ns_to_ktime(nsec); +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0d2c8dcd6eae..ecbe9fad22d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -59,6 +59,7 @@ #include "lib/mlx5.h" #include "fpga/core.h" #include "accel/ipsec.h" +#include "lib/clock.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -889,6 +890,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_reserved_gids(dev); + mlx5_init_clock(dev); + err = mlx5_init_rl_table(dev); if (err) { dev_err(&pdev->dev, "Failed to init rate limiting\n"); @@ -949,6 +952,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b7c2900b75f9..8f00de2fe283 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -93,6 +93,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_core_page_fault(struct mlx5_core_dev *dev, struct mlx5_pagefault *pfault); +void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 6c48e9959b65..2a8b529ce6dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -109,7 +109,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) mlx5_core_warn(dev, "failed to restore VF %d settings, err %d\n", vf, err); - continue; + continue; } } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 4b88158173f3..4816504419fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -17,7 +17,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ - spectrum_ipip.o spectrum_acl_flex_actions.o + spectrum_ipip.o spectrum_acl_flex_actions.o \ + spectrum_mr.o spectrum_mr_tcam.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index ab3ffe7a8eda..6a979a09ab72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -399,23 +399,25 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block) } EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block) +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_continue); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_jump); @@ -674,6 +676,7 @@ enum mlxsw_afa_trapdisc_trap_action { MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); enum mlxsw_afa_trapdisc_forward_action { + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, }; @@ -727,6 +730,22 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, + trap_id); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 501819c790d6..a8d3314c3a24 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -57,10 +57,12 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block); int mlxsw_afa_block_commit(struct mlxsw_afa_block *block); char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 17eba19100de..d44e673a4c4e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4549,6 +4549,27 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); */ MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* reg_ratr_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8); + +/* reg_ratr_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24); + static inline void mlxsw_reg_ratr_pack(char *payload, enum mlxsw_reg_ratr_op op, bool valid, @@ -4576,6 +4597,20 @@ static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); } +static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index, + bool counter_enable) +{ + enum mlxsw_reg_flow_counter_set_type set_type; + + if (counter_enable) + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES; + else + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT; + + mlxsw_reg_ratr_counter_index_set(payload, counter_index); + mlxsw_reg_ratr_counter_set_type_set(payload, set_type); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -5297,15 +5332,6 @@ enum mlxsw_reg_rauht_trap_id { */ MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); -enum mlxsw_reg_flow_counter_set_type { - /* No count */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, - /* Count packets and bytes */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, - /* Count only packets */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, -}; - /* reg_rauht_counter_set_type * Counter set type for flow counters * Access: RW diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e9b94430afed..e1e11c726c16 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -53,6 +53,7 @@ #include <linux/notifier.h> #include <linux/dcbnl.h> #include <linux/inetdevice.h> +#include <linux/netlink.h> #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> @@ -3312,6 +3313,14 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } +static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) +{ + skb->offload_mr_fwd_mark = 1; + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); +} + static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port, void *priv) { @@ -3355,6 +3364,10 @@ out: MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) +#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + #define MLXSW_SP_EVENTL(_func, _trap_id) \ MLXSW_EVENTL(_func, _trap_id, SP_EVENT) @@ -3425,6 +3438,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -3653,6 +3667,9 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr); + static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -3722,6 +3739,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_router_init; } + /* Initialize netdevice notifier after router is initialized, so that + * the event handler can use router structures. + */ + mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; + err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); + goto err_netdev_notifier; + } + err = mlxsw_sp_span_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); @@ -3755,6 +3782,8 @@ err_dpipe_init: err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); +err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: mlxsw_sp_afa_fini(mlxsw_sp); @@ -3781,6 +3810,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_dpipe_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -4006,14 +4036,21 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, - struct netdev_lag_upper_info *lag_upper_info) + struct netdev_lag_upper_info *lag_upper_info, + struct netlink_ext_ack *extack) { u16 lag_id; - if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { + NL_SET_ERR_MSG(extack, + "spectrum: Exceeded number of supported LAG devices"); return false; - if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + } + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG(extack, + "spectrum: LAG device using unsupported Tx type"); return false; + } return true; } @@ -4218,6 +4255,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, { struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; + struct netlink_ext_ack *extack; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err = 0; @@ -4225,6 +4263,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; + extack = netdev_notifier_info_to_extack(&info->info); switch (event) { case NETDEV_PRECHANGEUPPER: @@ -4232,25 +4271,43 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, if (!is_vlan_dev(upper_dev) && !netif_is_lag_master(upper_dev) && !netif_is_bridge_master(upper_dev) && - !netif_is_ovs_master(upper_dev)) + !netif_is_ovs_master(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Unknown upper device type"); return -EINVAL; + } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; + } if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, - info->upper_info)) + info->upper_info, extack)) return -EINVAL; - if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) + if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is a LAG master and this device has a VLAN"); return -EINVAL; + } if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && - !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) + !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on a LAG port"); return -EINVAL; - if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) + } + if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is an OVS master and this device has a VLAN"); return -EINVAL; - if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) + } + if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on an OVS port"); return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4258,7 +4315,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lower_dev, - upper_dev); + upper_dev, + extack); else mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lower_dev, @@ -4349,18 +4407,25 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; struct net_device *upper_dev; int err = 0; + extack = netdev_notifier_info_to_extack(&info->info); + switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!netif_is_bridge_master(upper_dev)) + if (!netif_is_bridge_master(upper_dev)) { + NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers"); return -EINVAL; + } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4368,7 +4433,8 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, vlan_dev, - upper_dev); + upper_dev, + extack); else mlxsw_sp_port_bridge_leave(mlxsw_sp_port, vlan_dev, @@ -4431,13 +4497,17 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) return netif_is_l3_master(info->upper_dev); } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, +static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp *mlxsw_sp; int err = 0; - if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) + mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event, ptr); + else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); @@ -4451,10 +4521,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return notifier_from_errno(err); } -static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { - .notifier_call = mlxsw_sp_netdevice_event, -}; - static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .notifier_call = mlxsw_sp_inetaddr_event, .priority = 10, /* Must be called before FIB notifier block */ @@ -4482,7 +4548,6 @@ static int __init mlxsw_sp_module_init(void) { int err; - register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); @@ -4503,7 +4568,6 @@ err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; } @@ -4514,7 +4578,6 @@ static void __exit mlxsw_sp_module_exit(void) unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } module_init(mlxsw_sp_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e907ec446a73..28feb745a38a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -139,6 +139,7 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_sb; struct mlxsw_sp_bridge; struct mlxsw_sp_router; +struct mlxsw_sp_mr; struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; @@ -153,12 +154,14 @@ struct mlxsw_sp { struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; + struct mlxsw_sp_mr *mr; struct mlxsw_afa *afa; struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; struct { DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); } kvdl; + struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; struct { @@ -324,7 +327,8 @@ void mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, - struct net_device *br_dev); + struct net_device *br_dev, + struct netlink_ext_ack *extack); void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev); @@ -391,6 +395,13 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused, unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); +bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +int +mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event, + struct netdev_notifier_changeupper_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); @@ -468,9 +479,9 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, enum mlxsw_afk_element element, const char *key_value, const char *mask_value, unsigned int len); -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id); +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index eede75fbd585..93dcd315f7d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -378,15 +378,15 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, key_value, mask_value, len); } -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) { - mlxsw_afa_block_continue(rulei->act_block); + return mlxsw_afa_block_continue(rulei->act_block); } -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id) +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id) { - mlxsw_afa_block_jump(rulei->act_block, group_id); + return mlxsw_afa_block_jump(rulei->act_block, group_id); } int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 50b40de1fb91..7e8284b46968 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -608,7 +608,10 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, goto err_rulei_create; } - mlxsw_sp_acl_rulei_act_continue(rulei); + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (WARN_ON(err)) + goto err_rulei_act_continue; + err = mlxsw_sp_acl_rulei_commit(rulei); if (err) goto err_rulei_commit; @@ -623,6 +626,7 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, err_rule_insert: err_rulei_commit: +err_rulei_act_continue: mlxsw_sp_acl_rulei_destroy(rulei); err_rulei_create: parman_item_remove(region->parman, parman_prio, parman_item); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72b..a056f23d3a0e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -43,21 +43,36 @@ enum mlxsw_sp_field_metadata_id { MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, }; static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { - { .name = "erif_port", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, - .bitwidth = 32, - .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, + { + .name = "erif_port", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + .bitwidth = 32, + .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, }, - { .name = "l3_forward", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, - .bitwidth = 1, + { + .name = "l3_forward", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + .bitwidth = 1, }, - { .name = "l3_drop", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, - .bitwidth = 1, + { + .name = "l3_drop", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + .bitwidth = 1, + }, + { + .name = "adj_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + .bitwidth = 32, + }, + { + .name = "adj_hash_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, + .bitwidth = 32, }, }; @@ -826,6 +841,359 @@ static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_DPIPE_TABLE_NAME_HOST6); } +static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + return devlink_dpipe_match_put(skb, &match); +} + +static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_action_put(skb, &action); +} + +static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nexthop *nh; + u64 size = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) + if (mlxsw_sp_nexthop_offload(nh) && + !mlxsw_sp_nexthop_group_has_ipip(nh)) + size++; + return size; +} + +enum mlxsw_sp_dpipe_table_adj_match { + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT, +}; + +enum mlxsw_sp_dpipe_table_adj_action { + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *actions) +{ + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int +mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_values, + struct devlink_dpipe_action *actions) +{ struct devlink_dpipe_value *action_value; + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT; + + entry->action_values = action_values; + entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry, + u32 adj_index, u32 adj_hash_index, + unsigned char *ha, + struct mlxsw_sp_rif *rif) +{ + struct devlink_dpipe_value *value; + u32 *p_rif_value; + u32 *p_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + p_index = value->value; + *p_index = adj_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + p_index = value->value; + *p_index = adj_hash_index; + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + ether_addr_copy(value->value, ha); + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + p_rif_value = value->value; + *p_rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; +} + +static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct devlink_dpipe_entry *entry) +{ + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); + unsigned char *ha = mlxsw_sp_nexthop_ha(nh); + u32 adj_hash_index = 0; + u32 adj_index = 0; + int err; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); + __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, + adj_hash_index, ha, rif); + err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp_nexthop *nh; + int entry_index = 0; + int nh_count_max; + int nh_count = 0; + int nh_skip; + int j; + int err; + + rtnl_lock(); + nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + nh_skip = nh_count; + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + if (nh_count < nh_skip) + goto skip; + + mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry); + entry->index = entry_index; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + entry_index++; + j++; +skip: + nh_count++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (nh_count != nh_count_max) + goto start_again; + rtnl_unlock(); + + return 0; + +err_ctx_prepare: +err_entry_append: + rtnl_unlock(); + return err; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(match_values[0])); + memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(actions[0])); + memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(action_values[0])); + + mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions); + err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry, + match_values, matches, + action_values, actions); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_nexthop *nh; + u32 adj_hash_index = 0; + u32 adj_index = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); + if (enable) + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + else + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_update(mlxsw_sp, + adj_index + adj_hash_index, nh); + } + return 0; +} + +static u64 +mlxsw_sp_dpipe_table_adj_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + u64 size; + + rtnl_lock(); + size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); + rtnl_unlock(); + + return size; +} + +static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { + .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update, + .size_get = mlxsw_sp_dpipe_table_adj_size_get, +}; + +static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ, + &mlxsw_sp_dpipe_table_adj_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -846,8 +1214,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); if (err) goto err_host6_table_init; - return 0; + err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp); + if (err) + goto err_adj_table_init; + + return 0; +err_adj_table_init: + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); err_host6_table_init: mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); err_host4_table_init: @@ -861,6 +1235,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index 283fde4e6783..815d543cf114 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -56,5 +56,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" +#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj" #endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 8aace9a06a5d..2f0e57857ea4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -63,7 +63,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) { + if (is_tcf_gact_ok(a)) { + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (err) + return err; + } else if (is_tcf_gact_shot(a)) { err = mlxsw_sp_acl_rulei_act_drop(rulei); if (err) return err; @@ -84,7 +88,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(ruleset); group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); - mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + if (err) + return err; } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 1c2db831d83b..6fb49129ce87 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -47,7 +47,6 @@ struct mlxsw_sp_ipip_entry { enum mlxsw_sp_ipip_type ipipt; struct net_device *ol_dev; /* Overlay. */ struct mlxsw_sp_rif_ipip_lb *ol_lb; - unsigned int ref_count; /* Number of next hops using the tunnel. */ struct mlxsw_sp_fib_entry *decap_fib_entry; struct list_head ipip_list_node; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c new file mode 100644 index 000000000000..1f84bb8e9135 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -0,0 +1,1011 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/rhashtable.h> + +#include "spectrum_mr.h" +#include "spectrum_router.h" + +struct mlxsw_sp_mr { + const struct mlxsw_sp_mr_ops *mr_ops; + void *catchall_route_priv; + struct delayed_work stats_update_dw; + struct list_head table_list; +#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */ + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_vif { + struct net_device *dev; + const struct mlxsw_sp_rif *rif; + unsigned long vif_flags; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as one of the egress VIFs + */ + struct list_head route_evif_list; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as an ingress VIF + */ + struct list_head route_ivif_list; +}; + +struct mlxsw_sp_mr_route_vif_entry { + struct list_head vif_node; + struct list_head route_node; + struct mlxsw_sp_mr_vif *mr_vif; + struct mlxsw_sp_mr_route *mr_route; +}; + +struct mlxsw_sp_mr_table { + struct list_head node; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp *mlxsw_sp; + u32 vr_id; + struct mlxsw_sp_mr_vif vifs[MAXVIFS]; + struct list_head route_list; + struct rhashtable route_ht; + char catchall_route_priv[0]; + /* catchall_route_priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_route { + struct list_head node; + struct rhash_head ht_node; + struct mlxsw_sp_mr_route_key key; + enum mlxsw_sp_mr_route_action route_action; + u16 min_mtu; + struct mfc_cache *mfc4; + void *route_priv; + const struct mlxsw_sp_mr_table *mr_table; + /* A list of route_vif_entry structs that point to the egress VIFs */ + struct list_head evif_list; + /* A route_vif_entry struct that point to the ingress VIF */ + struct mlxsw_sp_mr_route_vif_entry ivif; +}; + +static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = { + .key_len = sizeof(struct mlxsw_sp_mr_route_key), + .key_offset = offsetof(struct mlxsw_sp_mr_route, key), + .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node), + .automatic_shrinking = true, +}; + +static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif) +{ + return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER)); +} + +static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) +{ + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif; +} + +static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif) +{ + return vif->dev; +} + +static bool +mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route) +{ + vifi_t ivif; + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + ivif = mr_route->mfc4->mfc_parent; + return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static int +mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + int valid_evifs; + + valid_evifs = 0; + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + valid_evifs++; + return valid_evifs; +} + +static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mr_route->key.source_mask.addr4 == INADDR_ANY; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static enum mlxsw_sp_mr_route_action +mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* If the ingress port is not regular and resolved, trap the route */ + if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* The kernel does not match a (*,G) route that the ingress interface is + * not one of the egress interfaces, so trap these kind of routes. + */ + if (mlxsw_sp_mr_route_starg(mr_route) && + !mlxsw_sp_mr_route_ivif_in_evifs(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If the route has no valid eVIFs, trap it. */ + if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If one of the eVIFs has no RIF, trap-and-forward the route as there + * is some more routing to do in software too. + */ + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD; + + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; +} + +static enum mlxsw_sp_mr_route_prio +mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route) +{ + return mlxsw_sp_mr_route_starg(mr_route) ? + MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG; +} + +static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_key *key, + const struct mfc_cache *mfc) +{ + bool starg = (mfc->mfc_origin == INADDR_ANY); + + memset(key, 0, sizeof(*key)); + key->vrid = mr_table->vr_id; + key->proto = mr_table->proto; + key->group.addr4 = mfc->mfc_mcastgrp; + key->group_mask.addr4 = 0xffffffff; + key->source.addr4 = mfc->mfc_origin; + key->source_mask.addr4 = starg ? 0 : 0xffffffff; +} + +static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + rve = kzalloc(sizeof(*rve), GFP_KERNEL); + if (!rve) + return -ENOMEM; + rve->mr_route = mr_route; + rve->mr_vif = mr_vif; + list_add_tail(&rve->route_node, &mr_route->evif_list); + list_add_tail(&rve->vif_node, &mr_vif->route_evif_list); + return 0; +} + +static void +mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve) +{ + list_del(&rve->route_node); + list_del(&rve->vif_node); + kfree(rve); +} + +static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + mr_route->ivif.mr_route = mr_route; + mr_route->ivif.mr_vif = mr_vif; + list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list); +} + +static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route) +{ + list_del(&mr_route->ivif.vif_node); +} + +static int +mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + u16 *erif_indices; + u16 irif_index; + u16 erif = 0; + + erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices), + GFP_KERNEL); + if (!erif_indices) + return -ENOMEM; + + list_for_each_entry(rve, &mr_route->evif_list, route_node) { + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + + erif_indices[erif++] = rifi; + } + } + + if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif); + else + irif_index = 0; + + route_info->irif_index = irif_index; + route_info->erif_indices = erif_indices; + route_info->min_mtu = mr_route->min_mtu; + route_info->route_action = mr_route->route_action; + route_info->erif_num = erif; + return 0; +} + +static void +mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info) +{ + kfree(route_info->erif_indices); +} + +static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + bool replace) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_info route_info; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + int err; + + err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info); + if (err) + return err; + + if (!replace) { + struct mlxsw_sp_mr_route_params route_params; + + mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_route->route_priv) { + err = -ENOMEM; + goto out; + } + + route_params.key = mr_route->key; + route_params.value = route_info; + route_params.prio = mlxsw_sp_mr_route_prio(mr_route); + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_route->route_priv, + &route_params); + if (err) + kfree(mr_route->route_priv); + } else { + err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv, + &route_info); + } +out: + mlxsw_sp_mr_route_info_destroy(&route_info); + return err; +} + +static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv); + kfree(mr_route->route_priv); +} + +static struct mlxsw_sp_mr_route * +mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + struct mlxsw_sp_mr_route *mr_route; + int err = 0; + int i; + + /* Allocate and init a new route and fill it with parameters */ + mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL); + if (!mr_route) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&mr_route->evif_list); + mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc); + + /* Find min_mtu and link iVIF and eVIFs */ + mr_route->min_mtu = ETH_MAX_MTU; + ipmr_cache_hold(mfc); + mr_route->mfc4 = mfc; + mr_route->mr_table = mr_table; + for (i = 0; i < MAXVIFS; i++) { + if (mfc->mfc_un.res.ttls[i] != 255) { + err = mlxsw_sp_mr_route_evif_link(mr_route, + &mr_table->vifs[i]); + if (err) + goto err; + if (mr_table->vifs[i].dev && + mr_table->vifs[i].dev->mtu < mr_route->min_mtu) + mr_route->min_mtu = mr_table->vifs[i].dev->mtu; + } + } + mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]); + + mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); + return mr_route; +err: + ipmr_cache_put(mfc); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); + return ERR_PTR(err); +} + +static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + + mlxsw_sp_mr_route_ivif_unlink(mr_route); + ipmr_cache_put(mr_route->mfc4); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); +} + +static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route, + bool offload) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (offload) + mr_route->mfc4->mfc_flags |= MFC_OFFLOAD; + else + mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route) +{ + bool offload; + + offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_set(mr_route, offload); +} + +static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + mlxsw_sp_mr_mfc_offload_set(mr_route, false); + mlxsw_sp_mr_route_erase(mr_table, mr_route); + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_route->node); + mlxsw_sp_mr_route_destroy(mr_table, mr_route); +} + +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace) +{ + struct mlxsw_sp_mr_route *mr_orig_route = NULL; + struct mlxsw_sp_mr_route *mr_route; + int err; + + /* If the route is a (*,*) route, abort, as these kind of routes are + * used for proxy routes. + */ + if (mfc->mfc_origin == INADDR_ANY && mfc->mfc_mcastgrp == INADDR_ANY) { + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + return -EINVAL; + } + + /* Create a new route */ + mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc); + if (IS_ERR(mr_route)) + return PTR_ERR(mr_route); + + /* Find any route with a matching key */ + mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht, + &mr_route->key, + mlxsw_sp_mr_route_ht_params); + if (replace) { + /* On replace case, make the route point to the new route_priv. + */ + if (WARN_ON(!mr_orig_route)) { + err = -ENOENT; + goto err_no_orig_route; + } + mr_route->route_priv = mr_orig_route->route_priv; + } else if (mr_orig_route) { + /* On non replace case, if another route with the same key was + * found, abort, as duplicate routes are used for proxy routes. + */ + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + err = -EINVAL; + goto err_duplicate_route; + } + + /* Put it in the table data-structures */ + list_add_tail(&mr_route->node, &mr_table->route_list); + err = rhashtable_insert_fast(&mr_table->route_ht, + &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Write the route to the hardware */ + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); + if (err) + goto err_mr_route_write; + + /* Destroy the original route */ + if (replace) { + rhashtable_remove_fast(&mr_table->route_ht, + &mr_orig_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_orig_route->node); + mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route); + } + + mlxsw_sp_mr_mfc_offload_update(mr_route); + return 0; + +err_mr_route_write: + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); +err_rhashtable_insert: + list_del(&mr_route->node); +err_no_orig_route: +err_duplicate_route: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + return err; +} + +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route *mr_route; + struct mlxsw_sp_mr_route_key key; + + mlxsw_sp_mr_route4_key(mr_table, &key, mfc); + mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key, + mlxsw_sp_mr_route_ht_params); + if (mr_route) + __mlxsw_sp_mr_route_del(mr_table, mr_route); +} + +/* Should be called after the VIF struct is updated */ +static int +mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 irif_index; + int err; + + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return 0; + + /* rve->mr_vif->rif is guaranteed to be valid at this stage */ + irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv, + irif_index); + if (err) + return err; + + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + /* No need to rollback here because the iRIF change only takes + * place after the action has been updated. + */ + return err; + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; +} + +static void +mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv, + MLXSW_SP_MR_ROUTE_ACTION_TRAP); + rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +/* Should be called after the RIF struct is updated */ +static int +mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 erif_index = 0; + int err; + + /* Update the route action, as the new eVIF can be a tunnel or a pimreg + * device which will require updating the action. + */ + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) { + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + return err; + } + + /* Add the eRIF */ + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_erif_add(mlxsw_sp, + rve->mr_route->route_priv, + erif_index); + if (err) + goto err_route_erif_add; + } + + /* Update the minimum MTU */ + if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = rve->mr_vif->dev->mtu; + err = mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->min_mtu); + if (err) + goto err_route_min_mtu_update; + } + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; + +err_route_min_mtu_update: + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, + erif_index); +err_route_erif_add: + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->route_action); + return err; +} + +/* Should be called before the RIF struct is updated */ +static void +mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 rifi; + + /* If the unresolved RIF was not valid, no need to delete it */ + if (!mlxsw_sp_mr_vif_valid(rve->mr_vif)) + return; + + /* Update the route action: if there is only one valid eVIF in the + * route, set the action to trap as the VIF deletion will lead to zero + * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to + * determine the route action. + */ + if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1) + route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + else + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + + /* Delete the erif from the route */ + rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi); + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_route_vif_entry *irve, *erve; + int err; + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = rif; + mr_vif->vif_flags = vif_flags; + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) { + err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve); + if (err) + goto err_irif_unresolve; + } + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) { + err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve); + if (err) + goto err_erif_unresolve; + } + return 0; + +err_erif_unresolve: + list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list, + vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, erve); +err_irif_unresolve: + list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list, + vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve); + mr_vif->rif = NULL; + return err; +} + +static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, rve); + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve); + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = NULL; +} + +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return -EINVAL; + if (mr_vif->dev) + return -EEXIST; + return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif); +} + +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return; + if (WARN_ON(!mr_vif->dev)) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif); +} + +struct mlxsw_sp_mr_vif * +mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, + const struct net_device *dev) +{ + vifi_t vif_index; + + for (vif_index = 0; vif_index < MAXVIFS; vif_index++) + if (mr_table->vifs[vif_index].dev == dev) + return &mr_table->vifs[vif_index]; + return NULL; +} + +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return 0; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return 0; + return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif, + mr_vif->vif_flags, rif); +} + +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif); +} + +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_vif_entry *rve; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + /* Search for a VIF that use that RIF */ + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + + /* Update all the routes that uses that VIF as eVIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) { + if (mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = mtu; + mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + mtu); + } + } +} + +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 vr_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_mr_route_params catchall_route_params = { + .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + .key = { + .vrid = vr_id, + }, + .value = { + .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP, + } + }; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_table *mr_table; + int err; + int i; + + mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_table) + return ERR_PTR(-ENOMEM); + + mr_table->vr_id = vr_id; + mr_table->mlxsw_sp = mlxsw_sp; + mr_table->proto = proto; + INIT_LIST_HEAD(&mr_table->route_list); + + err = rhashtable_init(&mr_table->route_ht, + &mlxsw_sp_mr_route_ht_params); + if (err) + goto err_route_rhashtable_init; + + for (i = 0; i < MAXVIFS; i++) { + INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list); + INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list); + } + + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_table->catchall_route_priv, + &catchall_route_params); + if (err) + goto err_ops_route_create; + list_add_tail(&mr_table->node, &mr->table_list); + return mr_table; + +err_ops_route_create: + rhashtable_destroy(&mr_table->route_ht); +err_route_rhashtable_init: + kfree(mr_table); + return ERR_PTR(err); +} + +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + WARN_ON(!mlxsw_sp_mr_table_empty(mr_table)); + list_del(&mr_table->node); + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, + &mr_table->catchall_route_priv); + rhashtable_destroy(&mr_table->route_ht); + kfree(mr_table); +} + +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp_mr_route *mr_route, *tmp; + int i; + + list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node) + __mlxsw_sp_mr_route_del(mr_table, mr_route); + + for (i = 0; i < MAXVIFS; i++) { + mr_table->vifs[i].dev = NULL; + mr_table->vifs[i].rif = NULL; + } +} + +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table) +{ + int i; + + for (i = 0; i < MAXVIFS; i++) + if (mr_table->vifs[i].dev) + return false; + return list_empty(&mr_table->route_list); +} + +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u64 packets, bytes; + + if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return; + + mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets, + &bytes); + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (mr_route->mfc4->mfc_un.res.pkt != packets) + mr_route->mfc4->mfc_un.res.lastuse = jiffies; + mr_route->mfc4->mfc_un.res.pkt = packets; + mr_route->mfc4->mfc_un.res.bytes = bytes; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_stats_update(struct work_struct *work) +{ + struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr, + stats_update_dw.work); + struct mlxsw_sp_mr_table *mr_table; + struct mlxsw_sp_mr_route *mr_route; + unsigned long interval; + + rtnl_lock(); + list_for_each_entry(mr_table, &mr->table_list, node) + list_for_each_entry(mr_route, &mr_table->route_list, node) + mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp, + mr_route); + rtnl_unlock(); + + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); +} + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops) +{ + struct mlxsw_sp_mr *mr; + unsigned long interval; + int err; + + mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL); + if (!mr) + return -ENOMEM; + mr->mr_ops = mr_ops; + mlxsw_sp->mr = mr; + INIT_LIST_HEAD(&mr->table_list); + + err = mr_ops->init(mlxsw_sp, mr->priv); + if (err) + goto err; + + /* Create the delayed work for counter updates */ + INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update); + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); + return 0; +err: + kfree(mr); + return err; +} + +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + cancel_delayed_work_sync(&mr->stats_update_dw); + mr->mr_ops->fini(mr->priv); + kfree(mr); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h new file mode 100644 index 000000000000..5d26a122af49 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -0,0 +1,134 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_H +#define _MLXSW_SPECTRUM_MCROUTER_H + +#include <linux/mroute.h> +#include "spectrum_router.h" +#include "spectrum.h" + +enum mlxsw_sp_mr_route_action { + MLXSW_SP_MR_ROUTE_ACTION_FORWARD, + MLXSW_SP_MR_ROUTE_ACTION_TRAP, + MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD, +}; + +enum mlxsw_sp_mr_route_prio { + MLXSW_SP_MR_ROUTE_PRIO_SG, + MLXSW_SP_MR_ROUTE_PRIO_STARG, + MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + __MLXSW_SP_MR_ROUTE_PRIO_MAX +}; + +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1) + +struct mlxsw_sp_mr_route_key { + int vrid; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr group; + union mlxsw_sp_l3addr group_mask; + union mlxsw_sp_l3addr source; + union mlxsw_sp_l3addr source_mask; +}; + +struct mlxsw_sp_mr_route_info { + enum mlxsw_sp_mr_route_action route_action; + u16 irif_index; + u16 *erif_indices; + size_t erif_num; + u16 min_mtu; +}; + +struct mlxsw_sp_mr_route_params { + struct mlxsw_sp_mr_route_key key; + struct mlxsw_sp_mr_route_info value; + enum mlxsw_sp_mr_route_prio prio; +}; + +struct mlxsw_sp_mr_ops { + int priv_size; + int route_priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params); + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info); + int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u64 *packets, u64 *bytes); + int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + enum mlxsw_sp_mr_route_action route_action); + int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 min_mtu); + int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 irif_index); + int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv); + void (*fini)(void *priv); +}; + +struct mlxsw_sp_mr; +struct mlxsw_sp_mr_table; + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops); +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace); +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc); +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index); +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu); +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto); +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table); +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table); +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c new file mode 100644 index 000000000000..39c21c70ac32 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -0,0 +1,838 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/parman.h> + +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_actions.h" +#include "spectrum_mr.h" + +struct mlxsw_sp_mr_tcam_region { + struct mlxsw_sp *mlxsw_sp; + enum mlxsw_reg_rtar_key_type rtar_key_type; + struct parman *parman; + struct parman_prio *parman_prios; +}; + +struct mlxsw_sp_mr_tcam { + struct mlxsw_sp_mr_tcam_region ipv4_tcam_region; +}; + +/* This struct maps to one RIGR2 register entry */ +struct mlxsw_sp_mr_erif_sublist { + struct list_head list; + u32 rigr2_kvdl_index; + int num_erifs; + u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS]; + bool synced; +}; + +struct mlxsw_sp_mr_tcam_erif_list { + struct list_head erif_sublists; + u32 kvdl_index; +}; + +static bool +mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MC_ERIF_LIST_ENTRIES); + + return erif_sublist->num_erifs == erif_list_entries; +} + +static void +mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + INIT_LIST_HEAD(&erif_list->erif_sublists); +} + +#define MLXSW_SP_KVDL_RIGR2_SIZE 1 + +static struct mlxsw_sp_mr_erif_sublist * +mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + int err; + + erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL); + if (!erif_sublist) + return ERR_PTR(-ENOMEM); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE, + &erif_sublist->rigr2_kvdl_index); + if (err) { + kfree(erif_sublist); + return ERR_PTR(err); + } + + list_add_tail(&erif_sublist->list, &erif_list->erif_sublists); + return erif_sublist; +} + +static void +mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + list_del(&erif_sublist->list); + mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index); + kfree(erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + u16 erif_index) +{ + struct mlxsw_sp_mr_erif_sublist *sublist; + + /* If either there is no erif_entry or the last one is full, allocate a + * new one. + */ + if (list_empty(&erif_list->erif_sublists)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + erif_list->kvdl_index = sublist->rigr2_kvdl_index; + } else { + sublist = list_last_entry(&erif_list->erif_sublists, + struct mlxsw_sp_mr_erif_sublist, + list); + sublist->synced = false; + if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, + erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + } + } + + /* Add the eRIF to the last entry's last index */ + sublist->erif_indices[sublist->num_erifs++] = erif_index; + return 0; +} + +static void +mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp; + + list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists, + list) + mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *curr_sublist; + char rigr2_pl[MLXSW_REG_RIGR2_LEN]; + int err; + int i; + + list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) { + if (curr_sublist->synced) + continue; + + /* If the sublist is not the last one, pack the next index */ + if (list_is_last(&curr_sublist->list, + &erif_list->erif_sublists)) { + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + false, 0); + } else { + struct mlxsw_sp_mr_erif_sublist *next_sublist; + + next_sublist = list_next_entry(curr_sublist, list); + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + true, + next_sublist->rigr2_kvdl_index); + } + + /* Pack all the erifs */ + for (i = 0; i < curr_sublist->num_erifs; i++) { + u16 erif_index = curr_sublist->erif_indices[i]; + + mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true, + erif_index); + } + + /* Write the entry */ + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2), + rigr2_pl); + if (err) + /* No need of a rollback here because this + * hardware entry should not be pointed yet. + */ + return err; + curr_sublist->synced = true; + } + return 0; +} + +static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to, + struct mlxsw_sp_mr_tcam_erif_list *from) +{ + list_splice(&from->erif_sublists, &to->erif_sublists); + to->kvdl_index = from->kvdl_index; +} + +struct mlxsw_sp_mr_tcam_route { + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + u32 counter_index; + struct parman_item parman_item; + struct parman_prio *parman_prio; + enum mlxsw_sp_mr_route_action action; + struct mlxsw_sp_mr_route_key key; + u16 irif_index; + u16 min_mtu; +}; + +static struct mlxsw_afa_block * +mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_mr_route_action route_action, + u16 irif_index, u32 counter_index, + u16 min_mtu, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_afa_block *afa_block; + int err; + + afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); + if (!afa_block) + return ERR_PTR(-ENOMEM); + + err = mlxsw_afa_block_append_counter(afa_block, counter_index); + if (err) + goto err; + + switch (route_action) { + case MLXSW_SP_MR_ROUTE_ACTION_TRAP: + err = mlxsw_afa_block_append_trap(afa_block, + MLXSW_TRAP_ID_ACL1); + if (err) + goto err; + break; + case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD: + case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: + /* If we are about to append a multicast router action, commit + * the erif_list. + */ + err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list); + if (err) + goto err; + + err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index, + min_mtu, false, + erif_list->kvdl_index); + if (err) + goto err; + + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) { + err = mlxsw_afa_block_append_trap_and_forward(afa_block, + MLXSW_TRAP_ID_ACL2); + if (err) + goto err; + } + break; + default: + err = -EINVAL; + goto err; + } + + err = mlxsw_afa_block_commit(afa_block); + if (err) + goto err; + return afa_block; +err: + mlxsw_afa_block_destroy(afa_block); + return ERR_PTR(err); +} + +static void +mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block) +{ + mlxsw_afa_block_destroy(afa_block); +} + +static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + ntohl(key->group.addr4), + ntohl(key->group_mask.addr4), + ntohl(key->source.addr4), + ntohl(key->source_mask.addr4), + mlxsw_afa_block_first_set(afa_block)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid, + struct parman_item *parman_item) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid, + 0, 0, 0, 0, 0, 0, NULL); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int +mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + struct mlxsw_sp_mr_route_info *route_info) +{ + int err; + int i; + + for (i = 0; i < route_info->erif_num; i++) { + u16 erif_index = route_info->erif_indices[i]; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list, + erif_index); + if (err) + return err; + } + return 0; +} + +static int +mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route, + enum mlxsw_sp_mr_route_prio prio) +{ + struct parman_prio *parman_prio = NULL; + int err; + + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio]; + err = parman_item_add(mr_tcam->ipv4_tcam_region.parman, + parman_prio, &route->parman_item); + if (err) + return err; + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + route->parman_prio = parman_prio; + return 0; +} + +static void +mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route) +{ + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_item_remove(mr_tcam->ipv4_tcam_region.parman, + route->parman_prio, &route->parman_item); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } +} + +static int +mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + int err; + + route->key = route_params->key; + route->irif_index = route_params->value.irif_index; + route->min_mtu = route_params->value.min_mtu; + route->action = route_params->value.route_action; + + /* Create the egress RIFs list */ + mlxsw_sp_mr_erif_list_init(&route->erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list, + &route_params->value); + if (err) + goto err_erif_populate; + + /* Create the flow counter */ + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index); + if (err) + goto err_counter_alloc; + + /* Create the flexible action block */ + route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(route->afa_block)) { + err = PTR_ERR(route->afa_block); + goto err_afa_block_create; + } + + /* Allocate place in the TCAM */ + err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route, + route_params->prio); + if (err) + goto err_parman_item_add; + + /* Write the route to the TCAM */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, route->afa_block); + if (err) + goto err_route_replace; + return 0; + +err_route_replace: + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); +err_parman_item_add: + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); +err_afa_block_create: + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); +err_erif_populate: +err_counter_alloc: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + return err; +} + +static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, + void *priv, void *route_priv) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid, + &route->parman_item); + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); +} + +static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u64 *packets, + u64 *bytes) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index, + packets, bytes); +} + +static int +mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + enum mlxsw_sp_mr_route_action route_action) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->action = route_action; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 min_mtu) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->min_mtu = min_mtu; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 irif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return -EINVAL; + route->irif_index = irif_index; + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + int err; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list, + erif_index); + if (err) + return err; + + /* Commit the action only if the route action is not TRAP */ + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return mlxsw_sp_mr_erif_list_commit(mlxsw_sp, + &route->erif_list); + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + int i; + + /* Create a copy of the original erif_list without the deleted entry */ + mlxsw_sp_mr_erif_list_init(&erif_list); + list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) { + for (i = 0; i < erif_sublist->num_erifs; i++) { + u16 curr_erif = erif_sublist->erif_indices[i]; + + if (curr_erif == erif_index) + continue; + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list, + curr_erif); + if (err) + goto err_erif_list_add; + } + } + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_list_add: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +static int +mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new erif_list */ + mlxsw_sp_mr_erif_list_init(&erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info); + if (err) + goto err_erif_populate; + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route_info->route_action, + route_info->irif_index, + route->counter_index, + route_info->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + route->action = route_info->route_action; + route->irif_index = route_info->irif_index; + route->min_mtu = route_info->min_mtu; + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_populate: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16 + +static int +mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE, + mr_tcam_region->rtar_key_type, + MLXSW_SP_MR_TCAM_REGION_BASE_COUNT); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void +mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE, + mr_tcam_region->rtar_key_type, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE, + mr_tcam_region->rtar_key_type, new_count); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void mlxsw_sp_mr_tcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rrcr_pl[MLXSW_REG_RRCR_LEN]; + + mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE, + from_index, count, + mr_tcam_region->rtar_key_type, to_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl); +} + +static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = { + .base_count = MLXSW_SP_MR_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp_mr_tcam_region_parman_resize, + .move = mlxsw_sp_mr_tcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +static int +mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_region *mr_tcam_region, + enum mlxsw_reg_rtar_key_type rtar_key_type) +{ + struct parman_prio *parman_prios; + struct parman *parman; + int err; + int i; + + mr_tcam_region->rtar_key_type = rtar_key_type; + mr_tcam_region->mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region); + if (err) + return err; + + parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops, + mr_tcam_region); + if (!parman) { + err = -ENOMEM; + goto err_parman_create; + } + mr_tcam_region->parman = parman; + + parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, + sizeof(*parman_prios), GFP_KERNEL); + if (!parman_prios) { + err = -ENOMEM; + goto err_parman_prios_alloc; + } + mr_tcam_region->parman_prios = parman_prios; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_init(mr_tcam_region->parman, + &mr_tcam_region->parman_prios[i], i); + return 0; + +err_parman_prios_alloc: + parman_destroy(parman); +err_parman_create: + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); + return err; +} + +static void +mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + int i; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_fini(&mr_tcam_region->parman_prios[i]); + kfree(mr_tcam_region->parman_prios); + parman_destroy(mr_tcam_region->parman); + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); +} + +static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES)) + return -EIO; + + return mlxsw_sp_mr_tcam_region_init(mlxsw_sp, + &mr_tcam->ipv4_tcam_region, + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST); +} + +static void mlxsw_sp_mr_tcam_fini(void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region); +} + +const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp_mr_tcam), + .route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route), + .init = mlxsw_sp_mr_tcam_init, + .route_create = mlxsw_sp_mr_tcam_route_create, + .route_update = mlxsw_sp_mr_tcam_route_update, + .route_stats = mlxsw_sp_mr_tcam_route_stats, + .route_action_update = mlxsw_sp_mr_tcam_route_action_update, + .route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update, + .route_irif_update = mlxsw_sp_mr_tcam_route_irif_update, + .route_erif_add = mlxsw_sp_mr_tcam_route_erif_add, + .route_erif_del = mlxsw_sp_mr_tcam_route_erif_del, + .route_destroy = mlxsw_sp_mr_tcam_route_destroy, + .fini = mlxsw_sp_mr_tcam_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h new file mode 100644 index 000000000000..f9b59ee25406 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h @@ -0,0 +1,43 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H +#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H + +#include "spectrum.h" +#include "spectrum_mr.h" + +extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0bd93dc88ffa..3330120f2f8e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -65,6 +65,8 @@ #include "spectrum_cnt.h" #include "spectrum_dpipe.h" #include "spectrum_ipip.h" +#include "spectrum_mr.h" +#include "spectrum_mr_tcam.h" #include "spectrum_router.h" struct mlxsw_sp_vr; @@ -78,6 +80,7 @@ struct mlxsw_sp_router { struct rhashtable neigh_ht; struct rhashtable nexthop_group_ht; struct rhashtable nexthop_ht; + struct list_head nexthop_list; struct { struct mlxsw_sp_lpm_tree *trees; unsigned int tree_count; @@ -458,6 +461,7 @@ struct mlxsw_sp_vr { unsigned int rif_count; struct mlxsw_sp_fib *fib4; struct mlxsw_sp_fib *fib6; + struct mlxsw_sp_mr_table *mr4_table; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -652,7 +656,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4 || !!vr->fib6; + return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -692,8 +696,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, static u32 mlxsw_sp_fix_tb_id(u32 tb_id) { - /* For our purpose, squash main and local table into one */ - if (tb_id == RT_TABLE_LOCAL) + /* For our purpose, squash main, default and local tables into one */ + if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT) tb_id = RT_TABLE_MAIN; return tb_id; } @@ -743,9 +747,18 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, err = PTR_ERR(vr->fib6); goto err_fib6_create; } + vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr->mr4_table)) { + err = PTR_ERR(vr->mr4_table); + goto err_mr_table_create; + } vr->tb_id = tb_id; return vr; +err_mr_table_create: + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; err_fib6_create: mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; @@ -754,6 +767,8 @@ err_fib6_create: static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { + mlxsw_sp_mr_table_destroy(vr->mr4_table); + vr->mr4_table = NULL; mlxsw_sp_fib_destroy(vr->fib6); vr->fib6 = NULL; mlxsw_sp_fib_destroy(vr->fib4); @@ -774,7 +789,8 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { if (!vr->rif_count && list_empty(&vr->fib4->node_list) && - list_empty(&vr->fib6->node_list)) + list_empty(&vr->fib6->node_list) && + mlxsw_sp_mr_table_empty(vr->mr4_table)) mlxsw_sp_vr_destroy(vr); } @@ -986,9 +1002,8 @@ err_ol_ipip_lb_create: } static void -mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry) { - WARN_ON(ipip_entry->ref_count > 0); mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); kfree(ipip_entry); } @@ -1184,26 +1199,22 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_ipip_entry * -mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, - struct net_device *ol_dev) +mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) { u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); struct mlxsw_sp_router *router = mlxsw_sp->router; - struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; enum mlxsw_sp_l3proto ul_proto; union mlxsw_sp_l3addr saddr; + /* The configuration where several tunnels have the same local address + * in the same underlay table needs special treatment in the HW. That is + * currently not implemented in the driver. + */ list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) { - if (ipip_entry->ol_dev == ol_dev) - goto inc_ref_count; - - /* The configuration where several tunnels have the same local - * address in the same underlay table needs special treatment in - * the HW. That is currently not implemented in the driver. - */ ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, @@ -1215,29 +1226,18 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(ipip_entry)) return ipip_entry; - decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); - if (decap_fib_entry) - mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, - decap_fib_entry); - list_add_tail(&ipip_entry->ipip_list_node, &mlxsw_sp->router->ipip_list); -inc_ref_count: - ++ipip_entry->ref_count; return ipip_entry; } static void -mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) { - if (--ipip_entry->ref_count == 0) { - list_del(&ipip_entry->ipip_list_node); - if (ipip_entry->decap_fib_entry) - mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); - mlxsw_sp_ipip_entry_destroy(ipip_entry); - } + list_del(&ipip_entry->ipip_list_node); + mlxsw_sp_ipip_entry_dealloc(ipip_entry); } static bool @@ -1279,6 +1279,168 @@ mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, return NULL; } +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + +bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (ipip_entry->ol_dev == ol_dev) + return ipip_entry; + + return NULL; +} + +static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_ipip_type ipipt; + + mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt); + if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, + MLXSW_SP_L3_PROTO_IPV4) || + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, + MLXSW_SP_L3_PROTO_IPV6)) { + ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, + ol_dev); + if (IS_ERR(ipip_entry)) + return PTR_ERR(ipip_entry); + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); +} + +static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) { + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, + ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry && ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_rif_ipip_lb *lb_rif; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!ipip_entry) + return 0; + + /* When a tunneling device is moved to a different VRF, we need to + * update the backing loopback. Since RIFs can't be edited, we need to + * destroy and recreate it. That might create a window of opportunity + * where RALUE and RATR registers end up referencing a RIF that's + * already gone. RATRs are handled by the RIF destroy, and to take care + * of RALUE, demote the decap route back. + */ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + + lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt, + ol_dev); + if (IS_ERR(lb_rif)) + return PTR_ERR(lb_rif); + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); + ipip_entry->ol_lb = lb_rif; + + if (ol_dev->flags & IFF_UP) { + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, + ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + } + + return 0; +} + +int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event, + struct netdev_notifier_changeupper_info *info) +{ + switch (event) { + case NETDEV_REGISTER: + return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev); + case NETDEV_UNREGISTER: + mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev); + return 0; + case NETDEV_UP: + return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev); + case NETDEV_DOWN: + mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev); + return 0; + case NETDEV_CHANGEUPPER: + if (netif_is_l3_master(info->upper_dev)) + return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp, + ol_dev); + return 0; + } + return 0; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -1316,7 +1478,7 @@ mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, typeof(*neigh_entry), rif_list_node); } - if (neigh_entry->rif_list_node.next == &rif->neigh_list) + if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list)) return NULL; return list_next_entry(neigh_entry, rif_list_node); } @@ -1664,7 +1826,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n"); break; } num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); @@ -2028,6 +2190,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ struct list_head rif_list_node; + struct list_head router_list_node; struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group * this belongs to */ @@ -2050,6 +2213,8 @@ struct mlxsw_sp_nexthop { struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_ipip_entry *ipip_entry; }; + unsigned int counter_index; + bool counter_valid; }; struct mlxsw_sp_nexthop_group { @@ -2066,6 +2231,112 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct devlink *devlink; + + devlink = priv_to_devlink(mlxsw_sp->core); + if (!devlink_dpipe_table_counter_enabled(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index)) + return; + + nh->counter_valid = true; +} + +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index); + nh->counter_valid = false; +} + +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter) +{ + if (!nh->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index, + p_counter, NULL); +} + +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh) { + if (list_empty(&router->nexthop_list)) + return NULL; + else + return list_first_entry(&router->nexthop_list, + typeof(*nh), router_list_node); + } + if (list_is_last(&nh->router_list_node, &router->nexthop_list)) + return NULL; + return list_next_entry(nh, router_list_node); +} + +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh) +{ + return nh->offloaded; +} + +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) +{ + if (!nh->offloaded) + return NULL; + return nh->neigh_entry->ha; +} + +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_hash_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + u32 adj_hash_index = 0; + int i; + + if (!nh->offloaded || !nh_grp->adj_index_valid) + return -EINVAL; + + *p_adj_index = nh_grp->adj_index; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter == nh) + break; + if (nh_iter->offloaded) + adj_hash_index++; + } + + *p_adj_hash_index = adj_hash_index; + return 0; +} + +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) +{ + return nh->rif; +} + +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) + return true; + } + return false; +} + static struct fib_info * mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) { @@ -2323,8 +2594,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; @@ -2333,6 +2604,11 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, true, MLXSW_REG_RATR_TYPE_ETHERNET, adj_index, neigh_entry->rif); mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + if (nh->counter_valid) + mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true); + else + mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } @@ -2367,7 +2643,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, if (nh->update || reallocate) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: - err = mlxsw_sp_nexthop_mac_update + err = mlxsw_sp_nexthop_update (mlxsw_sp, adj_index, nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: @@ -2655,36 +2931,16 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev, - enum mlxsw_sp_ipip_type *p_type) -{ - struct mlxsw_sp_router *router = mlxsw_sp->router; - const struct mlxsw_sp_ipip_ops *ipip_ops; - enum mlxsw_sp_ipip_type ipipt; - - for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { - ipip_ops = router->ipip_ops_arr[ipipt]; - if (dev->type == ipip_ops->dev_type) { - if (p_type) - *p_type = ipipt; - return true; - } - } - return false; -} - static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, struct mlxsw_sp_nexthop *nh, struct net_device *ol_dev) { if (!nh->nh_grp->gateway || nh->ipip_entry) return 0; - nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); - if (IS_ERR(nh->ipip_entry)) - return PTR_ERR(nh->ipip_entry); + nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!nh->ipip_entry) + return -ENOENT; __mlxsw_sp_nexthop_neigh_update(nh, false); return 0; @@ -2699,7 +2955,6 @@ static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, return; __mlxsw_sp_nexthop_neigh_update(nh, true); - mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); nh->ipip_entry = NULL; } @@ -2723,6 +2978,7 @@ static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_rif_fini(nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); break; } @@ -2742,7 +2998,11 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); + if (err) + return err; + mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); + return 0; } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; @@ -2784,6 +3044,9 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + if (!dev) return 0; @@ -2807,6 +3070,8 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } @@ -3116,7 +3381,7 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, return; if (mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_set(fib_entry); - else if (!mlxsw_sp_fib_entry_should_offload(fib_entry)) + else mlxsw_sp_fib_entry_offload_unset(fib_entry); return; default: @@ -3500,20 +3765,6 @@ static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib) { - struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; - struct mlxsw_sp_lpm_tree *lpm_tree; - - /* Aggregate prefix lengths across all virtual routers to make - * sure we only have used prefix lengths in the LPM tree. - */ - mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - fib->proto); - if (IS_ERR(lpm_tree)) - goto err_tree_get; - mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); - -err_tree_get: if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) return; mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); @@ -4009,7 +4260,11 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV6)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); + if (err) + return err; + mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); + return 0; } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; @@ -4044,6 +4299,9 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); if (!dev) return 0; @@ -4056,6 +4314,8 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, @@ -4606,6 +4866,75 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info, + bool replace) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace); +} + +static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc); + mlxsw_sp_vr_put(vr); +} + +static int +mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev); + return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev, + ven_info->vif_index, + ven_info->vif_flags, rif); +} + +static void +mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index); + mlxsw_sp_vr_put(vr); +} + static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; @@ -4616,6 +4945,10 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; + /* The multicast router code does not need an abort trap as by default, + * packets that don't match any routes are trapped to the CPU. + */ + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, MLXSW_SP_LPM_TREE_MIN + 1); @@ -4697,6 +5030,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; + + mlxsw_sp_mr_table_flush(vr->mr4_table); mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); /* If virtual router was only used for IPv4, then it's no @@ -4729,6 +5064,8 @@ struct mlxsw_sp_fib_event_work { struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; + struct mfc_entry_notifier_info men_info; + struct vif_entry_notifier_info ven_info; }; struct mlxsw_sp *mlxsw_sp; unsigned long event; @@ -4815,6 +5152,55 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) kfree(fib_work); } +static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; + bool replace; + int err; + + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, + replace); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: + err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, + &fib_work->ven_info); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_VIF_DEL: + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, + &fib_work->ven_info); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rule = fib_work->fr_info.rule; + if (!ipmr_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib_abort(mlxsw_sp); + fib_rule_put(rule); + break; + } + rtnl_unlock(); + kfree(fib_work); +} + static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { @@ -4860,6 +5246,30 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, } } +static void +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); + ipmr_cache_hold(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: /* fall through */ + case FIB_EVENT_VIF_DEL: + memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); + dev_hold(fib_work->ven_info.dev); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + } +} + /* Called with rcu_read_lock() */ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) @@ -4869,7 +5279,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_router *router; if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6)) + (info->family != AF_INET && info->family != AF_INET6 && + info->family != RTNL_FAMILY_IPMR)) return NOTIFY_DONE; fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); @@ -4889,6 +5300,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); mlxsw_sp_router_fib6_event(fib_work, info); break; + case RTNL_FAMILY_IPMR: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); + mlxsw_sp_router_fibmr_event(fib_work, info); + break; } mlxsw_core_schedule_work(&fib_work->work); @@ -5073,6 +5488,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); if (IS_ERR(vr)) return ERR_CAST(vr); + vr->rif_count++; err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); if (err) @@ -5102,12 +5518,17 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_configure; + err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif); + if (err) + goto err_mr_rif_add; + mlxsw_sp_rif_counters_alloc(rif); mlxsw_sp->router->rifs[rif_index] = rif; - vr->rif_count++; return rif; +err_mr_rif_add: + ops->deconfigure(rif); err_configure: if (fid) mlxsw_sp_fid_put(fid); @@ -5115,6 +5536,7 @@ err_fid_get: kfree(rif); err_rif_alloc: err_rif_index_alloc: + vr->rif_count--; mlxsw_sp_vr_put(vr); return ERR_PTR(err); } @@ -5129,14 +5551,15 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - vr->rif_count--; mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); + mlxsw_sp_mr_rif_del(vr->mr4_table, rif); ops->deconfigure(rif); if (fid) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); kfree(rif); + vr->rif_count--; mlxsw_sp_vr_put(vr); } @@ -5472,6 +5895,17 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) if (err) goto err_rif_fdb_op; + if (rif->mtu != dev->mtu) { + struct mlxsw_sp_vr *vr; + + /* The RIF is relevant only to its mr_table instance, as unlike + * unicast routing, in multicast routing a RIF cannot be shared + * between several multicast routing tables. + */ + vr = &mlxsw_sp->router->vrs[rif->vr_id]; + mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu); + } + ether_addr_copy(rif->addr, dev->dev_addr); rif->mtu = dev->mtu; @@ -5634,7 +6068,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } -static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) { return mlxsw_core_max_ports(mlxsw_sp->core) + 1; } @@ -5990,10 +6424,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_nexthop_group_ht_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list); err = mlxsw_sp_lpm_init(mlxsw_sp); if (err) goto err_lpm_init; + err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops); + if (err) + goto err_mr_init; + err = mlxsw_sp_vrs_init(mlxsw_sp); if (err) goto err_vrs_init; @@ -6015,6 +6454,8 @@ err_register_fib_notifier: err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: + mlxsw_sp_mr_fini(mlxsw_sp); +err_mr_init: mlxsw_sp_lpm_fini(mlxsw_sp); err_lpm_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); @@ -6036,6 +6477,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_fib_notifier(&mlxsw_sp->router->fib_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); + mlxsw_sp_mr_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index ae4c99b3f2fc..3f2d840cb285 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -62,6 +62,7 @@ enum mlxsw_sp_rif_counter_dir { }; struct mlxsw_sp_neigh_entry; +struct mlxsw_sp_nexthop; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); @@ -69,6 +70,7 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, @@ -108,5 +110,24 @@ union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev); __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh); +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_hash_index); +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); +#define mlxsw_sp_nexthop_for_each(nh, router) \ + for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ + nh = mlxsw_sp_nexthop_next(router, nh)) +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter); +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0f9eac5f4ebf..7b8548e25ae7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -46,8 +46,10 @@ #include <linux/workqueue.h> #include <linux/jiffies.h> #include <linux/rtnetlink.h> +#include <linux/netlink.h> #include <net/switchdev.h> +#include "spectrum_router.h" #include "spectrum.h" #include "core.h" #include "reg.h" @@ -78,7 +80,8 @@ struct mlxsw_sp_bridge_device { struct list_head ports_list; struct list_head mids_list; u8 vlan_enabled:1, - multicast_enabled:1; + multicast_enabled:1, + mrouter:1; const struct mlxsw_sp_bridge_ops *ops; }; @@ -107,7 +110,8 @@ struct mlxsw_sp_bridge_vlan { struct mlxsw_sp_bridge_ops { int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port); + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port); @@ -168,6 +172,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, bridge_device->dev = br_dev; bridge_device->vlan_enabled = vlan_enabled; bridge_device->multicast_enabled = br_multicast_enabled(br_dev); + bridge_device->mrouter = br_multicast_router(br_dev); INIT_LIST_HEAD(&bridge_device->ports_list); if (vlan_enabled) { bridge->vlan_enabled_exists = true; @@ -810,6 +815,60 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp, + u16 mid_idx, bool add) +{ + char *smid_pl; + int err; + + smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL); + if (!smid_pl) + return -ENOMEM; + + mlxsw_reg_smid_pack(smid_pl, mid_idx, + mlxsw_sp_router_port(mlxsw_sp), add); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); + kfree(smid_pl); + return err; +} + +static void +mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + bool add) +{ + struct mlxsw_sp_mid *mid; + + list_for_each_entry(mid, &bridge_device->mids_list, list) + mlxsw_sp_smid_router_port_set(mlxsw_sp, mid->mid, add); +} + +static int +mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool is_mrouter) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_device) + return 0; + + if (bridge_device->mrouter != is_mrouter) + mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device, + is_mrouter); + bridge_device->mrouter = is_mrouter; + return 0; +} + static int mlxsw_sp_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -847,6 +906,11 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->orig_dev, attr->u.mc_disabled); break; + case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: + err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans, + attr->orig_dev, + attr->u.mrouter); + break; default: err = -EOPNOTSUPP; break; @@ -1241,7 +1305,8 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, } static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, - long *ports_bitmap) + long *ports_bitmap, + bool set_router_port) { char *smid_pl; int err, i; @@ -1256,9 +1321,15 @@ static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } + mlxsw_reg_smid_port_mask_set(smid_pl, + mlxsw_sp_router_port(mlxsw_sp), 1); + for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core)) mlxsw_reg_smid_port_set(smid_pl, i, 1); + mlxsw_reg_smid_port_set(smid_pl, mlxsw_sp_router_port(mlxsw_sp), + set_router_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); kfree(smid_pl); return err; @@ -1362,7 +1433,8 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); mid->mid = mid_idx; - err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap); + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap, + bridge_device->mrouter); kfree(flood_bitmap); if (err) return false; @@ -1735,12 +1807,15 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { static int mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port) + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - if (is_vlan_dev(bridge_port->dev)) + if (is_vlan_dev(bridge_port->dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge"); return -EINVAL; + } mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1); if (WARN_ON(!mlxsw_sp_port_vlan)) @@ -1797,13 +1872,16 @@ mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port) + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; u16 vid; - if (!is_vlan_dev(bridge_port->dev)) + if (!is_vlan_dev(bridge_port->dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge"); return -EINVAL; + } vid = vlan_dev_vlan_id(bridge_port->dev); mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); @@ -1811,7 +1889,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, return -EINVAL; if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { - netdev_err(mlxsw_sp_port->dev, "Can't bridge VLAN uppers of the same port\n"); + NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port"); return -EINVAL; } @@ -1854,7 +1932,8 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, - struct net_device *br_dev) + struct net_device *br_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_bridge_device *bridge_device; @@ -1867,7 +1946,7 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, bridge_device = bridge_port->bridge_device; err = bridge_device->ops->port_join(bridge_device, bridge_port, - mlxsw_sp_port); + mlxsw_sp_port, extack); if (err) goto err_port_join; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index a98103539f6b..ec6cef8267ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -93,6 +93,8 @@ enum { MLXSW_TRAP_ID_ACL0 = 0x1C0, /* Multicast trap used for routes with trap action */ MLXSW_TRAP_ID_ACL1 = 0x1C1, + /* Multicast trap used for routes with trap-and-forward action */ + MLXSW_TRAP_ID_ACL2 = 0x1C2, MLXSW_TRAP_ID_MAX = 0x1FF }; |