aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/9p/trans_fd.c60
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/common.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/icmp_socket.c2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/batman-adv/tp_meter.c4
-rw-r--r--net/bluetooth/af_bluetooth.c6
-rw-r--r--net/bluetooth/hidp/core.c2
-rw-r--r--net/bluetooth/l2cap_core.c20
-rw-r--r--net/bridge/br_netlink.c11
-rw-r--r--net/caif/caif_dev.c5
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/caif/cfcnfg.c10
-rw-r--r--net/caif/cfctrl.c4
-rw-r--r--net/can/af_can.c36
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c35
-rw-r--r--net/core/ethtool.c15
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/flow_dissector.c3
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c10
-rw-r--r--net/core/skbuff.c17
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/dccp/ccids/ccid2.c3
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/igmp.c44
-rw-r--r--net/ipv4/ip_gre.c3
-rw-r--r--net/ipv4/ip_tunnel.c7
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c3
-rw-r--r--net/ipv4/raw.c17
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/xfrm4_input.c12
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/exthdrs.c9
-rw-r--r--net/ipv6/ip6_fib.c83
-rw-r--r--net/ipv6/ip6_gre.c72
-rw-r--r--net/ipv6/ip6_output.c24
-rw-r--r--net/ipv6/ip6_tunnel.c30
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/mcast.c25
-rw-r--r--net/ipv6/netfilter/ip6_tables.c8
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/route.c20
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/xfrm6_input.c10
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c1
-rw-r--r--net/iucv/af_iucv.c6
-rw-r--r--net/kcm/kcmsock.c25
-rw-r--r--net/key/af_key.c12
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c9
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c128
-rw-r--r--net/netfilter/nf_conntrack_netlink.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c3
-rw-r--r--net/netfilter/nf_tables_api.c15
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c5
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/x_tables.c9
-rw-r--r--net/netfilter/xt_bpf.c20
-rw-r--r--net/netfilter/xt_osf.c7
-rw-r--r--net/netlink/af_netlink.c6
-rw-r--r--net/nfc/llcp_sock.c6
-rw-r--r--net/nfc/nci/uart.c2
-rw-r--r--net/openvswitch/flow.c15
-rw-r--r--net/openvswitch/flow_netlink.c51
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/rds/af_rds.c4
-rw-r--r--net/rds/ib.c6
-rw-r--r--net/rds/rdma.c4
-rw-r--r--net/rds/send.c3
-rw-r--r--net/rds/tcp.c5
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rxrpc/af_rxrpc.c4
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_meta_mark.c1
-rw-r--r--net/sched/act_meta_skbtcindex.c1
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_bpf.c100
-rw-r--r--net/sched/cls_u32.c1
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sched/sch_api.c17
-rw-r--r--net/sched/sch_generic.c22
-rw-r--r--net/sched/sch_ingress.c32
-rw-r--r--net/sched/sch_red.c31
-rw-r--r--net/sctp/debug.c3
-rw-r--r--net/sctp/input.c28
-rw-r--r--net/sctp/ipv6.c1
-rw-r--r--net/sctp/offload.c3
-rw-r--r--net/sctp/outqueue.c4
-rw-r--r--net/sctp/socket.c141
-rw-r--r--net/sctp/stream.c22
-rw-r--r--net/sctp/transport.c29
-rw-r--r--net/sctp/ulpqueue.c24
-rw-r--r--net/smc/af_smc.c6
-rw-r--r--net/smc/smc_clc.c18
-rw-r--r--net/socket.c19
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c1
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--net/sunrpc/clnt.c16
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/sched.c26
-rw-r--r--net/sunrpc/svcauth_unix.c2
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xprt.c30
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c78
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c157
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c329
-rw-r--r--net/sunrpc/xprtrdma/module.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c168
-rw-r--r--net/sunrpc/xprtrdma/transport.c130
-rw-r--r--net/sunrpc/xprtrdma/verbs.c282
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h117
-rw-r--r--net/sunrpc/xprtsock.c36
-rw-r--r--net/tipc/bearer.c5
-rw-r--r--net/tipc/group.c69
-rw-r--r--net/tipc/monitor.c6
-rw-r--r--net/tipc/node.c26
-rw-r--r--net/tipc/server.c4
-rw-r--r--net/tipc/socket.c8
-rw-r--r--net/tls/tls_main.c17
-rw-r--r--net/tls/tls_sw.c18
-rw-r--r--net/unix/af_unix.c15
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/wireless/Makefile39
-rw-r--r--net/wireless/certs/sforshee.hex86
-rw-r--r--net/wireless/certs/sforshee.x509bin680 -> 0 bytes
-rw-r--r--net/wireless/core.c8
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/nl80211.c20
-rw-r--r--net/wireless/reg.c3
-rw-r--r--net/wireless/wext-compat.c3
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_input.c69
-rw-r--r--net/xfrm/xfrm_policy.c24
-rw-r--r--net/xfrm/xfrm_state.c24
-rw-r--r--net/xfrm/xfrm_user.c44
183 files changed, 2104 insertions, 1536 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8dfdd94e430f..bad01b14a4ad 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
vlan_gvrp_uninit_applicant(real_dev);
}
- /* Take it out of our own structures, but be sure to interlock with
- * HW accelerating devices or SW vlan input packet processing if
- * VLAN is not 0 (leave it there for 802.1p).
- */
- if (vlan_id)
- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 80f5c79053a4..d6f7f7cb79c4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -228,32 +228,31 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
}
}
-static int
-p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
+static __poll_t
+p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
{
- int ret, n;
+ __poll_t ret, n;
struct p9_trans_fd *ts = NULL;
if (client && client->status == Connected)
ts = client->trans;
- if (!ts)
- return -EREMOTEIO;
+ if (!ts) {
+ if (err)
+ *err = -EREMOTEIO;
+ return POLLERR;
+ }
if (!ts->rd->f_op->poll)
- return -EIO;
-
- if (!ts->wr->f_op->poll)
- return -EIO;
-
- ret = ts->rd->f_op->poll(ts->rd, pt);
- if (ret < 0)
- return ret;
+ ret = DEFAULT_POLLMASK;
+ else
+ ret = ts->rd->f_op->poll(ts->rd, pt);
if (ts->rd != ts->wr) {
- n = ts->wr->f_op->poll(ts->wr, pt);
- if (n < 0)
- return n;
+ if (!ts->wr->f_op->poll)
+ n = DEFAULT_POLLMASK;
+ else
+ n = ts->wr->f_op->poll(ts->wr, pt);
ret = (ret & ~POLLOUT) | (n & ~POLLIN);
}
@@ -298,7 +297,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
static void p9_read_work(struct work_struct *work)
{
- int n, err;
+ __poll_t n;
+ int err;
struct p9_conn *m;
int status = REQ_STATUS_ERROR;
@@ -398,7 +398,7 @@ end_clear:
if (test_and_clear_bit(Rpending, &m->wsched))
n = POLLIN;
else
- n = p9_fd_poll(m->client, NULL);
+ n = p9_fd_poll(m->client, NULL, NULL);
if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
@@ -448,7 +448,8 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
static void p9_write_work(struct work_struct *work)
{
- int n, err;
+ __poll_t n;
+ int err;
struct p9_conn *m;
struct p9_req_t *req;
@@ -506,7 +507,7 @@ end_clear:
if (test_and_clear_bit(Wpending, &m->wsched))
n = POLLOUT;
else
- n = p9_fd_poll(m->client, NULL);
+ n = p9_fd_poll(m->client, NULL, NULL);
if ((n & POLLOUT) &&
!test_and_set_bit(Wworksched, &m->wsched)) {
@@ -581,7 +582,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
static void p9_conn_create(struct p9_client *client)
{
- int n;
+ __poll_t n;
struct p9_trans_fd *ts = client->trans;
struct p9_conn *m = &ts->conn;
@@ -597,7 +598,7 @@ static void p9_conn_create(struct p9_client *client)
INIT_LIST_HEAD(&m->poll_pending_link);
init_poll_funcptr(&m->pt, p9_pollwait);
- n = p9_fd_poll(client, &m->pt);
+ n = p9_fd_poll(client, &m->pt, NULL);
if (n & POLLIN) {
p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
set_bit(Rpending, &m->wsched);
@@ -617,17 +618,16 @@ static void p9_conn_create(struct p9_client *client)
static void p9_poll_mux(struct p9_conn *m)
{
- int n;
+ __poll_t n;
+ int err = -ECONNRESET;
if (m->err < 0)
return;
- n = p9_fd_poll(m->client, NULL);
- if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
+ n = p9_fd_poll(m->client, NULL, &err);
+ if (n & (POLLERR | POLLHUP | POLLNVAL)) {
p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
- if (n >= 0)
- n = -ECONNRESET;
- p9_conn_cancel(m, n);
+ p9_conn_cancel(m, err);
}
if (n & POLLIN) {
@@ -663,7 +663,7 @@ static void p9_poll_mux(struct p9_conn *m)
static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
{
- int n;
+ __poll_t n;
struct p9_trans_fd *ts = client->trans;
struct p9_conn *m = &ts->conn;
@@ -680,7 +680,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
if (test_and_clear_bit(Wpending, &m->wsched))
n = POLLOUT;
else
- n = p9_fd_poll(m->client, NULL);
+ n = p9_fd_poll(m->client, NULL, NULL);
if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
schedule_work(&m->wq);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 325c56043007..086a4abdfa7c 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -543,3 +543,7 @@ static void p9_trans_xen_exit(void)
return xenbus_unregister_driver(&xen_9pfs_front_driver);
}
module_exit(p9_trans_xen_exit);
+
+MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
+MODULE_DESCRIPTION("Xen Transport for 9P");
+MODULE_LICENSE("GPL");
diff --git a/net/atm/common.c b/net/atm/common.c
index 8a4f99114cd2..8f12f1c6fa14 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -648,11 +648,11 @@ out:
return error;
}
-unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct atm_vcc *vcc;
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
diff --git a/net/atm/common.h b/net/atm/common.h
index d9d583712a91..5850649068bb 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
-unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1b659ab652fb..bbe8414b6ee7 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
orig_node->last_seen = jiffies;
/* find packet count of corresponding one hop neighbor */
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
if_num = if_incoming->if_num;
orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
} else {
neigh_rq_count = 0;
}
- spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
/* pay attention to not get a value bigger than 100 % */
if (orig_eq_count > neigh_rq_count)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 341ceab8338d..e0e2bfcd6b3e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
}
orig_gw = batadv_gw_node_get(bat_priv, orig_node);
- if (!orig_node)
+ if (!orig_gw)
goto out;
if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a98cf1104a30..ebe6e38934e4 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
*/
if (skb->priority >= 256 && skb->priority <= 263)
frag_header.priority = skb->priority - 256;
+ else
+ frag_header.priority = 0;
ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bded31121d12..a98e0a986cef 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -292,7 +292,7 @@ out:
return len;
}
-static unsigned int batadv_socket_poll(struct file *file, poll_table *wait)
+static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
{
struct batadv_socket_client *socket_client = file->private_data;
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 4ef4bde2cc2d..76451460c98d 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -176,7 +176,7 @@ static ssize_t batadv_log_read(struct file *file, char __user *buf,
return error;
}
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
+static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
{
struct batadv_priv *bat_priv = file->private_data;
struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 15cd2139381e..ebc4e2241c77 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
/**
* batadv_tp_sender_timeout - timer that fires in case of packet loss
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
*
* If fired it means that there was packet loss.
* Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
/**
* batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
* reached without received ack
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
*/
static void batadv_tp_receiver_shutdown(struct timer_list *t)
{
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 91e3ba280706..671b907ba678 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -421,7 +421,7 @@ out:
}
EXPORT_SYMBOL(bt_sock_stream_recvmsg);
-static inline unsigned int bt_accept_poll(struct sock *parent)
+static inline __poll_t bt_accept_poll(struct sock *parent)
{
struct bt_sock *s, *n;
struct sock *sk;
@@ -437,11 +437,11 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
return 0;
}
-unsigned int bt_sock_poll(struct file *file, struct socket *sock,
+__poll_t bt_sock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
BT_DBG("sock %p, sk %p", sock, sk);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f2cec70d520c..1036e4fa1ea2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -789,7 +789,7 @@ static int hidp_setup_hid(struct hidp_session *session,
hid->dev.parent = &session->conn->hcon->dev;
hid->ll_driver = &hidp_hid_driver;
- /* True if device is blacklisted in drivers/hid/hid-core.c */
+ /* True if device is blacklisted in drivers/hid/hid-quirks.c */
if (hid_ignore(hid)) {
hid_destroy_device(session->hid);
session->hid = NULL;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 43ba91c440bc..fc6615d59165 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
break;
case L2CAP_CONF_EFS:
- remote_efs = 1;
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
+ remote_efs = 1;
memcpy(&efs, (void *) val, olen);
+ }
break;
case L2CAP_CONF_EWS:
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
break;
case L2CAP_CONF_EFS:
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
memcpy(&efs, (void *)val, olen);
- if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != chan->local_stype)
- return -ECONNREFUSED;
+ if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != chan->local_stype)
+ return -ECONNREFUSED;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+ (unsigned long) &efs, endptr - ptr);
+ }
break;
case L2CAP_CONF_FCS:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index d0ef0a8e8831..015f465c514b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
struct net_bridge *br = netdev_priv(dev);
int err;
+ err = register_netdevice(dev);
+ if (err)
+ return err;
+
if (tb[IFLA_ADDRESS]) {
spin_lock_bh(&br->lock);
br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
spin_unlock_bh(&br->lock);
}
- err = register_netdevice(dev);
- if (err)
- return err;
-
err = br_changelink(dev, tb, data, extack);
if (err)
- unregister_netdevice(dev);
+ br_dev_delete(dev, NULL);
+
return err;
}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 2d38b6e34203..e0adcd123f48 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_lock(&caifdevs->lock);
list_add_rcu(&caifd->list, &caifdevs->list);
- strncpy(caifd->layer.name, dev->name,
- sizeof(caifd->layer.name) - 1);
- caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+ strlcpy(caifd->layer.name, dev->name,
+ sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
cfcnfg_add_phy_layer(cfg,
dev,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 632d5a416d97..64048cec41e0 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,11 +934,11 @@ static int caif_release(struct socket *sock)
}
/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static unsigned int caif_poll(struct file *file,
+static __poll_t caif_poll(struct file *file,
struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask;
+ __poll_t mask;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
sock_poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 5cd44f001f64..1a082a946045 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
dev_add_pack(&caif_usb_type);
pack_added = true;
- strncpy(layer->name, dev->name,
- sizeof(layer->name) - 1);
- layer->name[sizeof(layer->name) - 1] = 0;
+ strlcpy(layer->name, dev->name, sizeof(layer->name));
return 0;
}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 273cb07f57d8..8f00bea093b9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
case CAIFPROTO_RFM:
l->linktype = CFCTRL_SRV_RFM;
l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
- strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
- sizeof(l->u.rfm.volume)-1);
- l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+ strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+ sizeof(l->u.rfm.volume));
break;
case CAIFPROTO_UTIL:
l->linktype = CFCTRL_SRV_UTIL;
l->endpoint = 0x00;
l->chtype = 0x00;
- strncpy(l->u.utility.name, s->sockaddr.u.util.service,
- sizeof(l->u.utility.name)-1);
- l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+ strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+ sizeof(l->u.utility.name));
caif_assert(sizeof(l->u.utility.name) > 10);
l->u.utility.paramlen = s->param.size;
if (l->u.utility.paramlen > sizeof(l->u.utility.params))
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index f5afda1abc76..655ed7032150 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
memset(utility_name, 0, sizeof(utility_name));
- strncpy(utility_name, param->u.utility.name,
- UTILITY_NAME_LENGTH - 1);
+ strlcpy(utility_name, param->u.utility.name,
+ UTILITY_NAME_LENGTH);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
cfpkt_add_body(pkt, &tmp8, 1);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 003b2d6d655f..4d7f988a3130 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CAN_MTU ||
- cfd->len > CAN_MAX_DLEN,
- "PF_CAN: dropped non conform CAN skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
- goto drop;
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
+ cfd->len > CAN_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
}
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -742,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CANFD_MTU ||
- cfd->len > CANFD_MAX_DLEN,
- "PF_CAN: dropped non conform CAN FD skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
- goto drop;
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
+ cfd->len > CANFD_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
}
/*
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 522873ed120b..b7d9293940b5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -72,12 +72,10 @@ static inline int connection_based(struct sock *sk)
static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
void *key)
{
- unsigned long bits = (unsigned long)key;
-
/*
* Avoid a wakeup if event not interesting for us
*/
- if (bits && !(bits & (POLLIN | POLLERR)))
+ if (key && !(key_to_poll(key) & (POLLIN | POLLERR)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
@@ -833,11 +831,11 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
-unsigned int datagram_poll(struct file *file, struct socket *sock,
+__poll_t datagram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index f47e96b62308..613fb4066be7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- return dev_alloc_name_ns(net, dev, name);
+ BUG_ON(!net);
+
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ if (strchr(name, '%'))
+ return dev_alloc_name_ns(net, dev, name);
+ else if (__dev_get_by_name(net, name))
+ return -EEXIST;
+ else if (dev->name != name)
+ strlcpy(dev->name, name, IFNAMSIZ);
+
+ return 0;
}
EXPORT_SYMBOL(dev_get_valid_name);
@@ -3139,10 +3151,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
@@ -3904,7 +3927,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
goto do_drop;
- if (troom > 0 && __skb_linearize(skb))
+ if (skb_linearize(skb))
goto do_drop;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f8fcf450a36e..8225416911ae 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
}
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
- char name[sizeof(current->comm)];
-
- pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
- get_task_comm(name, current), details);
-}
-
/* Query device for its ethtool_cmd settings.
*
* Backward compatibility note: for compatibility with legacy ethtool,
@@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
&link_ksettings);
if (err < 0)
return err;
- if (!convert_link_ksettings_to_legacy_settings(&cmd,
- &link_ksettings))
- warn_incomplete_ethtool_legacy_settings_conversion(
- "link modes are only partially reported");
+ convert_link_ksettings_to_legacy_settings(&cmd,
+ &link_ksettings);
/* send a sensible cmd tag back to user */
cmd.cmd = ETHTOOL_GSET;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..1c0eb436671f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -458,6 +458,10 @@ do_pass:
convert_bpf_extensions(fp, &insn))
break;
+ if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+ fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+ *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+
*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
@@ -1054,11 +1058,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
*/
goto out_err_free;
- /* We are guaranteed to never error here with cBPF to eBPF
- * transitions, since there's no issue with type compatibility
- * checks on program arrays.
- */
fp = bpf_prog_select_runtime(fp, &err);
+ if (err)
+ goto out_err_free;
kfree(old_prog);
return fp;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15ce30063765..544bddf08e13 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -976,8 +976,8 @@ ip_proto_again:
out_good:
ret = true;
- key_control->thoff = (u16)nhoff;
out:
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
@@ -985,7 +985,6 @@ out:
out_bad:
ret = false;
- key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d1f5fe986edd..7f831711b6e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
@@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
n1 != NULL;
n1 = rcu_dereference_protected(n1->next,
lockdep_is_held(&tbl->lock))) {
- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
rc = n1;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b797832565d3..60a71be75aea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
spin_lock_bh(&net->nsid_lock);
peer = idr_find(&net->netns_ids, id);
if (peer)
- get_net(peer);
+ peer = maybe_get_net(peer);
spin_unlock_bh(&net->nsid_lock);
rcu_read_unlock();
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 1c4810919a0a..b9057478d69c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
-#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8..778d7f03404a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
-static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+static struct net *get_target_net(struct sock *sk, int netnsid)
{
struct net *net;
- net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ net = get_net_ns_by_id(sock_net(sk), netnsid);
if (!net)
return ERR_PTR(-EINVAL);
/* For now, the caller is required to have CAP_NET_ADMIN in
* the user namespace owning the target net ns.
*/
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
put_net(net);
return ERR_PTR(-EACCES);
}
@@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
ifla_policy, NULL) >= 0) {
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(skb->sk, netnsid);
if (IS_ERR(tgt_net)) {
tgt_net = net;
netnsid = -1;
@@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
if (IS_ERR(tgt_net))
return PTR_ERR(tgt_net);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b0ff396fa9d..08f574081315 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
int i, new_frags;
u32 d_off;
- if (!num_frags)
- return 0;
-
if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
return -EINVAL;
+ if (!num_frags)
+ goto release;
+
new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < new_frags; i++) {
page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
skb_shinfo(skb)->nr_frags = new_frags;
+release:
skb_zcopy_clear(skb, false);
return 0;
}
@@ -3654,8 +3655,6 @@ normal:
skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
SKBTX_SHARED_FRAG;
- if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
- goto err;
while (pos < offset + len) {
if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
goto err;
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ goto err;
*nskb_frag = *frag;
__skb_frag_ref(nskb_frag);
@@ -4293,7 +4294,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock *sk = skb->sk;
if (!skb_may_tx_timestamp(sk, false))
- return;
+ goto err;
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
@@ -4302,7 +4303,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
sock_put(sk);
+ return;
}
+
+err:
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
diff --git a/net/core/sock.c b/net/core/sock.c
index c0b5b2f17412..1211159718ad 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2496,7 +2496,7 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
}
EXPORT_SYMBOL(sock_no_getname);
-unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
+__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
{
return 0;
}
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 217f4e3b82f6..146b50e30659 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cbc3dde4cfcc..a47ad6cd41c0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
.proc_handler = proc_dointvec
+#else
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &one,
+#endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 1c75cd1255f6..92d016e87816 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t)
ccid2_pr_debug("RTO_EXPIRE\n");
+ if (sk->sk_state == DCCP_CLOSED)
+ goto out;
+
/* back-off timer */
hc->tx_rto <<= 1;
if (hc->tx_rto > DCCP_RTO_MAX)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0c55ffb859bf..f91e3816806b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,7 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
-unsigned int dccp_poll(struct file *file, struct socket *sock,
+__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait);
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void dccp_req_err(struct sock *sk, u64 seq);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9d43c1f40274..8b8db3d481bd 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -318,10 +318,10 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
* take care of normal races (between the test and the event) and we don't
* go look at any of the socket buffers directly.
*/
-unsigned int dccp_poll(struct file *file, struct socket *sock,
+__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- unsigned int mask;
+ __poll_t mask;
struct sock *sk = sock->sk;
sock_poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 518cea17b811..9c2dde819817 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1209,11 +1209,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
}
-static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- int mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll(file, sock, wait);
if (!skb_queue_empty(&scp->other_receive_queue))
mask |= POLLRDBAND;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6e7a642493b..a95a55f79137 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
#include <linux/of_net.h>
#include <linux/of_mdio.h>
#include <linux/mdio.h>
-#include <linux/list.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a8d7c5a9fb05..6c231b43974d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
static int arp_constructor(struct neighbour *neigh)
{
- __be32 addr = *(__be32 *)neigh->primary_key;
+ __be32 addr;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
+ u32 inaddr_any = INADDR_ANY;
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
+
+ addr = *(__be32 *)neigh->primary_key;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4573bccd6da..7a93359fbc72 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1428,7 +1428,7 @@ skip:
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d57aa64fa7c7..61fe6e4d23fc 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -981,6 +981,7 @@ static int esp_init_state(struct xfrm_state *x)
switch (encap->encap_type) {
default:
+ err = -EINVAL;
goto error;
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index f8b918c766b0..29b333a62ab0 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -38,7 +38,8 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
__be32 spi;
int err;
- skb_pull(skb, offset);
+ if (!pskb_pull(skb, offset))
+ return NULL;
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
@@ -121,6 +122,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!xo)
goto out;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ goto out;
+
seq = xo->seq.low;
x = skb->sp->xvec[skb->sp->len - 1];
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f52d27a422c3..08259d078b1c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
static void ip_fib_net_exit(struct net *net)
{
- unsigned int i;
+ int i;
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+ /* Destroy the tables in reverse order to guarantee that the
+ * local table, ID 255, is destroyed before the main table, ID
+ * 254. This is necessary as the local table may contain
+ * references to data contained in the main table.
+ */
+ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f04d944f8abe..c586597da20d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
- u32 val;
+ u32 fi_val, val;
if (!type)
continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
val = nla_get_u32(nla);
}
- if (fi->fib_metrics->metrics[type - 1] != val)
+ fi_val = fi->fib_metrics->metrics[type - 1];
+ if (type == RTAX_FEATURES)
+ fi_val &= ~DST_FEATURE_ECN_CA;
+
+ if (fi_val != val)
return false;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1f8f302dbf3..2d49717a7421 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
#include <linux/rtnetlink.h>
#include <linux/times.h>
#include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
#include <net/net_namespace.h>
#include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return htonl(INADDR_ANY);
+
+ for_ifa(in_dev) {
+ if (fl4->saddr == ifa->ifa_local)
+ return fl4->saddr;
+ } endfor_ifa(in_dev);
+
+ return htonl(INADDR_ANY);
+}
+
static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
pip->daddr = fl4.daddr;
- pip->saddr = fl4.saddr;
+ pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(net, skb, NULL);
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -538,7 +562,7 @@ empty_source:
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bb6239169b1a..45ffd3d045d2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
len = gre_hdr_len + sizeof(*ershdr);
if (unlikely(!pskb_may_pull(skb, len)))
- return -ENOMEM;
+ return PACKET_REJECT;
iph = ip_hdr(skb);
ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &gre_tap_netdev_ops;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce4..6d21068f9b55 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
@@ -520,8 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
else
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 949f432a5f04..51b1669334fe 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -200,7 +200,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
mtu = dst_mtu(dst);
if (skb->len > mtu) {
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f88221aebc9d..eb8246c39de0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -202,13 +202,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
- private = table->private;
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
- /*
- * Ensure we load private-> members after we've fetched the base
- * pointer.
- */
- smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
@@ -373,7 +368,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4cbe5e80f3bf..cc984d0e0c69 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -260,13 +260,8 @@ ipt_do_table(struct sk_buff *skb,
WARN_ON(!(table->valid_hooks & (1 << hook)));
local_bh_disable();
addend = xt_write_recseq_begin();
- private = table->private;
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
- /*
- * Ensure we load private-> members after we've fetched the base
- * pointer.
- */
- smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
@@ -439,7 +434,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 17b4ca562944..69060e3abe85 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net)
{
-#ifdef CONFIG_PROC_FS
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+#ifdef CONFIG_PROC_FS
proc_remove(cn->procdir);
cn->procdir = NULL;
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
+ WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33b70bfd1122..5e570aa9e43b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,18 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
+ int hdrincl;
err = -EMSGSIZE;
if (len > 0xFFFF)
goto out;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
+ */
+ hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
/*
* Check the flags.
*/
@@ -593,7 +600,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
- if (inet->hdrincl)
+ if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -615,12 +622,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
- if (!inet->hdrincl) {
+ if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -645,7 +652,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg, len,
&rt, msg->msg_flags, &ipc.sockc);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 43b69af242e1..4e153b23bcec 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2762,6 +2762,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err == 0 && rt->dst.error)
err = -rt->dst.error;
} else {
+ fl4.flowi4_iif = LOOPBACK_IFINDEX;
rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
err = 0;
if (IS_ERR(rt))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f08eebe60446..1b38b4282cc9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -493,9 +493,9 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
* take care of normal races (between the test and the event) and we don't
* go look at any of the socket buffers directly.
*/
-unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
- unsigned int mask;
+ __poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
int state;
@@ -2298,6 +2298,9 @@ adjudge_to_death:
tcp_send_active_reset(sk, GFP_ATOMIC);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
+ } else if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_set_state(sk, TCP_CLOSE);
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9550cc42de2d..45f750e85714 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
u32 new_sample = tp->rcv_rtt_est.rtt_us;
long m = sample;
- if (m == 0)
- m = 1;
-
if (new_sample != 0) {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
+ if (!delta_us)
+ delta_us = 1;
tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ u32 delta_us;
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
tcp_rcv_rtt_update(tp, delta_us, 0);
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 77ea45da0fe9..94e28350f420 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b6a2aa1dcf56..4d58e2ce0b5b 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 16df6dd44b98..388158c9d9f6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -48,11 +48,19 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
+ * Also close if our net namespace is exiting; in that case there is no
+ * hope of ever communicating again since all netns interfaces are already
+ * down (or about to be down), and we need to release our dst references,
+ * which have been moved to the netns loopback interface, so the namespace
+ * can finish exiting. This condition is only possible if we are a kernel
+ * socket, as those do not hold references to the namespace.
+ *
* Criteria is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
* 2. If we have strong memory pressure.
+ * 3. If our net namespace is exiting.
*/
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
@@ -81,6 +89,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
+
+ if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_done(sk);
+ return 1;
+ }
+
return 0;
}
@@ -264,6 +279,7 @@ void tcp_delack_timer_handler(struct sock *sk)
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
+ tcp_mstamp_refresh(tcp_sk(sk));
tcp_send_ack(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
@@ -632,6 +648,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
+ tcp_mstamp_refresh(tp);
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
if (tp->linger2 >= 0) {
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e4ff25c947c5..ef45adfc0edb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2502,9 +2502,9 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* but then block when reading it. Add special case code
* to work around these arguably broken applications.
*/
-unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
- unsigned int mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 01801b77bd0d..ea6e6e7df0ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index e50b7fea57ee..bcfc00e88756 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
return xfrm4_extract_header(skb);
}
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dst_input(skb);
+}
+
static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
iph->tos, skb->dev))
goto drop;
}
- return dst_input(skb);
+
+ if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+ goto drop;
+
+ return 0;
drop:
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e6265e2c274e..20ca486b3cad 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(net);
np->repflow = net->ipv6.sysctl.flowlabel_reflect;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a902ff8f59be..1a7f00cd4803 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -890,13 +890,12 @@ static int esp6_init_state(struct xfrm_state *x)
x->props.header_len += IPV4_BEET_PHMAXLEN +
(sizeof(struct ipv6hdr) - sizeof(struct iphdr));
break;
+ default:
case XFRM_MODE_TRANSPORT:
break;
case XFRM_MODE_TUNNEL:
x->props.header_len += sizeof(struct ipv6hdr);
break;
- default:
- goto error;
}
align = ALIGN(crypto_aead_blocksize(aead), 4);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 333a478aa161..f52c314d4c97 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -60,7 +60,8 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
int nhoff;
int err;
- skb_pull(skb, offset);
+ if (!pskb_pull(skb, offset))
+ return NULL;
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
goto out;
@@ -148,6 +149,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!xo)
goto out;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ goto out;
+
seq = xo->seq.low;
x = skb->sp->xvec[skb->sp->len - 1];
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 83bd75713535..bc68eb661970 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
sr_phdr->segments[0] = **addr_p;
*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
+ if (sr_ihdr->hdrlen > hops * 2) {
+ int tlvs_offset, tlvs_length;
+
+ tlvs_offset = (1 + hops * 2) << 3;
+ tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
+ memcpy((char *)sr_phdr + tlvs_offset,
+ (char *)sr_ihdr + tlvs_offset, tlvs_length);
+ }
+
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(sr_phdr)) {
struct net *net = NULL;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08..217683d40f12 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
if (!(fn->fn_flags & RTN_RTINFO)) {
RCU_INIT_POINTER(fn->leaf, NULL);
rt6_release(leaf);
+ /* remove null_entry in the root node */
+ } else if (fn->fn_flags & RTN_TL_ROOT &&
+ rcu_access_pointer(fn->leaf) ==
+ net->ipv6.ip6_null_entry) {
+ RCU_INIT_POINTER(fn->leaf, NULL);
}
return fn;
@@ -1221,8 +1226,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
if (!rcu_access_pointer(fn->leaf)) {
- atomic_inc(&rt->rt6i_ref);
- rcu_assign_pointer(fn->leaf, rt);
+ if (fn->fn_flags & RTN_TL_ROOT) {
+ /* put back null_entry for root node */
+ rcu_assign_pointer(fn->leaf,
+ info->nl_net->ipv6.ip6_null_entry);
+ } else {
+ atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, rt);
+ }
}
fn = sn;
}
@@ -1241,23 +1252,28 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
- lockdep_is_held(&table->tb6_lock));
- if (pn != fn && pn_leaf == rt) {
- pn_leaf = NULL;
- RCU_INIT_POINTER(pn->leaf, NULL);
- atomic_dec(&rt->rt6i_ref);
- }
- if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
- pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (pn != fn) {
+ struct rt6_info *pn_leaf =
+ rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ atomic_dec(&rt->rt6i_ref);
}
+ if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table,
+ pn);
+#if RT6_DEBUG >= 2
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf =
+ info->nl_net->ipv6.ip6_null_entry;
+ }
#endif
- atomic_inc(&pn_leaf->rt6i_ref);
- rcu_assign_pointer(pn->leaf, pn_leaf);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
+ }
}
#endif
goto failure;
@@ -1265,13 +1281,17 @@ out:
return err;
failure:
- /* fn->leaf could be NULL if fn is an intermediate node and we
- * failed to add the new route to it in both subtree creation
- * failure and fib6_add_rt2node() failure case.
- * In both cases, fib6_repair_tree() should be called to fix
- * fn->leaf.
+ /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
+ * 1. fn is an intermediate node and we failed to add the new
+ * route to it in both subtree creation failure and fib6_add_rt2node()
+ * failure case.
+ * 2. fn is the root node in the table and we fail to add the first
+ * default route to it.
*/
- if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+ if (fn &&
+ (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
+ (fn->fn_flags & RTN_TL_ROOT &&
+ !rcu_access_pointer(fn->leaf))))
fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
@@ -1526,6 +1546,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
struct fib6_walker *w;
int iter = 0;
+ /* Set fn->leaf to null_entry for root node. */
+ if (fn->fn_flags & RTN_TL_ROOT) {
+ rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+ return fn;
+ }
+
for (;;) {
struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
lockdep_is_held(&table->tb6_lock));
@@ -1680,10 +1706,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
}
read_unlock(&net->ipv6.fib6_walker_lock);
- /* If it was last route, expunge its radix tree node */
+ /* If it was last route, call fib6_repair_tree() to:
+ * 1. For root node, put back null_entry as how the table was created.
+ * 2. For other nodes, expunge its radix tree node.
+ */
if (!rcu_access_pointer(fn->leaf)) {
- fn->fn_flags &= ~RTN_RTINFO;
- net->ipv6.rt6_stats->fib_route_nodes--;
+ if (!(fn->fn_flags & RTN_TL_ROOT)) {
+ fn->fn_flags &= ~RTN_RTINFO;
+ net->ipv6.rt6_stats->fib_route_nodes--;
+ }
fn = fib6_repair_tree(net, table, fn);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4cfd8e0696fe..873549228ccb 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, 1);
if (register_netdevice(dev) < 0)
goto failed_free;
+ ip6gre_tnl_link_config(nt, 1);
+
/* Can use a lockless transmit, unless we generate output sequences */
if (!(nt->parms.o_flags & TUNNEL_SEQ))
dev->features |= NETIF_F_LLTX;
@@ -1014,6 +1015,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
eth_random_addr(dev->perm_addr);
}
+#define GRE6_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
+static void ip6gre_tnl_init_features(struct net_device *dev)
+{
+ struct ip6_tnl *nt = netdev_priv(dev);
+
+ dev->features |= GRE6_FEATURES;
+ dev->hw_features |= GRE6_FEATURES;
+
+ if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+ nt->encap.type == TUNNEL_ENCAP_NONE) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
+ /* Can use a lockless transmit, unless we generate
+ * output sequences
+ */
+ dev->features |= NETIF_F_LLTX;
+ }
+}
+
static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
@@ -1048,6 +1079,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ ip6gre_tnl_init_features(dev);
+
return 0;
}
@@ -1271,7 +1304,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
- struct ip6_tnl *tunnel;
int ret;
ret = ip6gre_tunnel_init_common(dev);
@@ -1280,10 +1312,6 @@ static int ip6gre_tap_init(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
-
- ip6gre_tnl_link_config(tunnel, 1);
-
return 0;
}
@@ -1298,16 +1326,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_get_iflink = ip6_tnl_get_iflink,
};
-#define GRE6_FEATURES (NETIF_F_SG | \
- NETIF_F_FRAGLIST | \
- NETIF_F_HIGHDMA | \
- NETIF_F_HW_CSUM)
-
static void ip6gre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &ip6gre_tap_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
@@ -1380,32 +1404,16 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
-
- dev->features |= GRE6_FEATURES;
- dev->hw_features |= GRE6_FEATURES;
-
- if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
- (nt->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
-
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
err = register_netdevice(dev);
if (err)
goto out;
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+ if (tb[IFLA_MTU])
+ ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
dev_hold(dev);
ip6gre_tunnel_link(ign, nt);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..3763dc01e374 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+ if (!np->autoflowlabel_set)
+ return ip6_default_np_autolabel(net);
+ else
+ return np->autoflowlabel;
+}
+
/*
* xmit an sk_buff (used by TCP, SCTP and DCCP)
* Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -1198,14 +1206,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(&rt->dst);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
+ if (mtu < IPV6_MIN_MTU)
+ return -EINVAL;
cork->base.fragsize = mtu;
if (dst_allfrag(rt->dst.path))
cork->base.flags |= IPCORK_ALLFRAG;
@@ -1626,7 +1636,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -1725,11 +1735,13 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.flags = 0;
cork.base.addr = 0;
cork.base.opt = NULL;
+ cork.base.dst = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
- if (err)
+ if (err) {
+ ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
-
+ }
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index db84f523656d..1ee5584c3555 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -642,8 +642,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
- rel_info);
+ skb_dst_update_pmtu(skb2, rel_info);
}
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -1074,10 +1073,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
@@ -1123,10 +1123,14 @@ route_lookup:
max_headroom += 8;
mtu -= 8;
}
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- if (skb_dst(skb) && !t->parms.collect_md)
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ } else if (mtu < 576) {
+ mtu = 576;
+ }
+
+ skb_dst_update_pmtu(skb, mtu);
if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
@@ -1671,11 +1675,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
- if (tnl->parms.proto == IPPROTO_IPIP) {
- if (new_mtu < ETH_MIN_MTU)
+ if (tnl->parms.proto == IPPROTO_IPV6) {
+ if (new_mtu < IPV6_MIN_MTU)
return -EINVAL;
} else {
- if (new_mtu < IPV6_MIN_MTU)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
}
if (new_mtu > 0xFFF8 - dev->hard_header_len)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index dbb74f3c57a7..8c184f84f353 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
mtu = dst_mtu(dst);
if (!skb->ignore_df && skb->len > mtu) {
- skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
+ skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..e8ffb5b5d84e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
break;
case IPV6_AUTOFLOWLABEL:
np->autoflowlabel = valbool;
+ np->autoflowlabel_set = 1;
retv = 0;
break;
case IPV6_RECVFRAGSIZE:
@@ -1335,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_AUTOFLOWLABEL:
- val = np->autoflowlabel;
+ val = ip6_autoflowlabel(sock_net(sk), np);
break;
case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fc6d7d143f2c..844642682b83 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, struct mld2_grec **ppgr)
+ int type, struct mld2_grec **ppgr, unsigned int mtu)
{
- struct net_device *dev = pmc->idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr;
- if (!skb)
- skb = mld_newpack(pmc->idev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = mld_newpack(pmc->idev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = skb_put(skb, sizeof(struct mld2_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV6_MIN_MTU)
+ return skb;
+
isquery = type == MLD2_MODE_IS_INCLUDE ||
type == MLD2_MODE_IS_EXCLUDE;
truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
}
}
first = 1;
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -1814,7 +1819,7 @@ empty_source:
mld_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f06e25065a34..66a8c69a3db4 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -282,12 +282,7 @@ ip6t_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
- private = table->private;
- /*
- * Ensure we load private-> members after we've fetched the base
- * pointer.
- */
- smp_read_barrier_depends();
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -458,7 +453,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (!xt_find_jump_offset(offsets, newpos,
newinfo->number))
return 0;
- e = entry0 + newpos;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 2b1a15846f9a..92c0047e7e33 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
if (range->flags & NF_NAT_RANGE_MAP_IPS)
return -EINVAL;
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_target masquerade_tg6_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV6,
.checkentry = masquerade_tg6_checkentry,
+ .destroy = masquerade_tg6_destroy,
.target = masquerade_tg6,
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7a8d1500d374..0458b761f3c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
}
rt->dst.flags |= DST_HOST;
+ rt->dst.input = ip6_input;
rt->dst.output = ip6_output;
rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- if (!fibmatch)
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
- else
- dst = ip6_route_lookup(net, &fl6, 0);
+ dst = ip6_route_input_lookup(net, dev, &fl6, flags);
rcu_read_unlock();
} else {
fl6.flowi6_oif = oif;
- if (!fibmatch)
- dst = ip6_route_output(net, NULL, &fl6);
- else
- dst = ip6_route_lookup(net, &fl6, 0);
+ dst = ip6_route_output(net, NULL, &fl6);
}
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
+ if (fibmatch && rt->dst.from) {
+ struct rt6_info *ort = container_of(rt->dst.from,
+ struct rt6_info, dst);
+
+ dst_hold(&ort->dst);
+ ip6_rt_put(rt);
+ rt = ort;
+ }
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
ip6_rt_put(rt);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d7dc23c1b2ca..3873d3877135 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -934,8 +934,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
df = 0;
}
- if (tunnel->parms.iph.daddr && skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (tunnel->parms.iph.daddr)
+ skb_dst_update_pmtu(skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1f04ec0e4a7a..7178476b3d2f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c9204c01..278e49cd67d4 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
{
struct tcphdr *th;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a0f89ad76f9d..2a04dc9c781b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..841f4a07438e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
}
EXPORT_SYMBOL(xfrm6_rcv_spi);
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (xfrm_trans_queue(skb, ip6_rcv_finish))
+ __kfree_skb(skb);
+ return -1;
+}
+
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- ip6_rcv_finish);
+ xfrm6_transport_finish2);
return -1;
}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 02556e356f87..dc93002ff9d1 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 148533169b1d..64331158d693 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1474,7 +1474,7 @@ done:
return copied;
}
-static inline unsigned int iucv_accept_poll(struct sock *parent)
+static inline __poll_t iucv_accept_poll(struct sock *parent)
{
struct iucv_sock *isk, *n;
struct sock *sk;
@@ -1489,11 +1489,11 @@ static inline unsigned int iucv_accept_poll(struct sock *parent)
return 0;
}
-unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
+__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d4e98f20fc2a..4a8d407f8902 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
if (!csk)
return -EINVAL;
- /* We must prevent loops or risk deadlock ! */
- if (csk->sk_family == PF_KCM)
+ /* Only allow TCP sockets to be attached for now */
+ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
+ csk->sk_protocol != IPPROTO_TCP)
+ return -EOPNOTSUPP;
+
+ /* Don't allow listeners or closed sockets */
+ if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
return -EOPNOTSUPP;
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
@@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
return err;
}
- sock_hold(csk);
-
write_lock_bh(&csk->sk_callback_lock);
+
+ /* Check if sk_user_data is aready by KCM or someone else.
+ * Must be done under lock to prevent race conditions.
+ */
+ if (csk->sk_user_data) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ strp_done(&psock->strp);
+ kmem_cache_free(kcm_psockp, psock);
+ return -EALREADY;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
csk->sk_data_ready = psock_data_ready;
csk->sk_write_space = psock_write_space;
csk->sk_state_change = psock_state_change;
+
write_unlock_bh(&csk->sk_callback_lock);
+ sock_hold(csk);
+
/* Finished initialization, now add the psock to the MUX. */
spin_lock_bh(&mux->lock);
head = &mux->psocks;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3dffb892d52c..7e2e7188e7f4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -401,6 +401,11 @@ static int verify_address_len(const void *p)
#endif
int len;
+ if (sp->sadb_address_len <
+ DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family),
+ sizeof(uint64_t)))
+ return -EINVAL;
+
switch (addr->sa_family) {
case AF_INET:
len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t));
@@ -511,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
uint16_t ext_type;
int ext_len;
+ if (len < sizeof(*ehdr))
+ return -EINVAL;
+
ext_len = ehdr->sadb_ext_len;
ext_len *= sizeof(uint64_t);
ext_type = ehdr->sadb_ext_type;
@@ -2194,8 +2202,10 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
return PTR_ERR(out_skb);
err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(out_skb);
return err;
+ }
out_hdr = (struct sadb_msg *) out_skb->data;
out_hdr->sadb_msg_version = PF_KEY_V2;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 167f83b853e6..1621b6ab17ba 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,16 +291,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
int i;
mutex_lock(&sta->ampdu_mlme.mtx);
- for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++)
___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
WLAN_REASON_QSTA_LEAVE_QBSS,
reason != AGG_STOP_DESTROY_STA &&
reason != AGG_STOP_PEER_REQUEST);
- }
- mutex_unlock(&sta->ampdu_mlme.mtx);
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
___ieee80211_stop_tx_ba_session(sta, i, reason);
+ mutex_unlock(&sta->ampdu_mlme.mtx);
/* stopping might queue the work again - so cancel only afterwards */
cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 70e9d2ca8bbe..4daafb07602f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
}
return true;
case NL80211_IFTYPE_MESH_POINT:
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
+ return false;
if (multicast)
return true;
return ether_addr_equal(sdata->vif.addr, hdr->addr1);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9ee71cb276d7..fbaf3bd05b2e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1636,17 +1636,14 @@ static int
ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
{
struct msghdr msg = {NULL,};
- struct kvec iov;
+ struct kvec iov = {buffer, buflen};
int len;
EnterFunction(7);
/* Receive a packet */
- iov.iov_base = buffer;
- iov.iov_len = (size_t)buflen;
-
- len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
-
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+ len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
if (len < 0)
return len;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 85f643c1e227..4efaa3066c78 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1044,7 +1044,7 @@ static void gc_worker(struct work_struct *work)
* we will just continue with next hash slot.
*/
rcu_read_unlock();
- cond_resched_rcu_qs();
+ cond_resched();
} while (++buckets < goal);
if (gc_work->exiting)
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index cf1bf2605c10..dc6347342e34 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -103,7 +103,6 @@ struct bitstr {
#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
-#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
static unsigned int get_len(struct bitstr *bs);
static unsigned int get_bit(struct bitstr *bs);
static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs)
return v;
}
+static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
+{
+ bits += bs->bit;
+ bytes += bits / BITS_PER_BYTE;
+ if (bits % BITS_PER_BYTE > 0)
+ bytes++;
+
+ if (*bs->cur + bytes > *bs->end)
+ return 1;
+
+ return 0;
+}
+
/****************************************************************************/
static unsigned int get_bit(struct bitstr *bs)
{
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
INC_BIT(bs);
-
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
+
len = *bs->cur++;
bs->cur += len;
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
- CHECK_BOUND(bs, 0);
return H323_ERROR_NONE;
}
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
bs->cur += 2;
break;
case CONS: /* 64K < Range < 4G */
+ if (nf_h323_error_boundary(bs, 0, 2))
+ return H323_ERROR_BOUND;
len = get_bits(bs, 2) + 1;
BYTE_ALIGN(bs);
if (base && (f->attr & DECODE)) { /* timeToLive */
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
break;
case UNCO:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
bs->cur += len;
break;
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
PRINT("\n");
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
INC_BITS(bs, f->sz);
}
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
len = f->lb;
break;
case WORD: /* 2-byte length */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) << 8;
len += (*bs->cur++) + f->lb;
break;
case SEMI:
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
break;
default:
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
bs->cur += len >> 3;
bs->bit = len & 7;
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
/* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
INC_BITS(bs, (len << 2));
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
break;
case BYTE: /* Range == 256 */
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) + f->lb;
break;
case SEMI:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs) + f->lb;
break;
default: /* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
break;
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
PRINT("\n");
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
switch (f->sz) {
case BYTE: /* Range == 256 */
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
len = (*bs->cur++) + f->lb;
break;
default: /* 2 <= Range <= 255 */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
len = get_bits(bs, f->sz) + f->lb;
BYTE_ALIGN(bs);
break;
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
bs->cur += len << 1;
- CHECK_BOUND(bs, 0);
+ if (nf_h323_error_boundary(bs, 0, 0))
+ return H323_ERROR_BOUND;
return H323_ERROR_NONE;
}
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
/* Extensible? */
+ if (nf_h323_error_boundary(bs, 0, 1))
+ return H323_ERROR_BOUND;
ext = (f->attr & EXT) ? get_bit(bs) : 0;
/* Get fields bitmap */
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Decode */
if (son->attr & OPEN) { /* Open field */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
" ", son->name);
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
/* Get the extension bitmap */
+ if (nf_h323_error_boundary(bs, 0, 7))
+ return H323_ERROR_BOUND;
bmp2_len = get_bits(bs, 7) + 1;
- CHECK_BOUND(bs, (bmp2_len + 7) >> 3);
+ if (nf_h323_error_boundary(bs, 0, bmp2_len))
+ return H323_ERROR_BOUND;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
/* Check Range */
if (i >= f->ub) { /* Newer Version? */
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
bs->cur += len;
continue;
}
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
if (!((0x80000000 >> opt) & bmp2)) /* Not present */
continue;
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
son->name);
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
switch (f->sz) {
case BYTE:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 1);
+ if (nf_h323_error_boundary(bs, 1, 0))
+ return H323_ERROR_BOUND;
count = *bs->cur++;
break;
case WORD:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
count = *bs->cur++;
count <<= 8;
count += *bs->cur++;
break;
case SEMI:
BYTE_ALIGN(bs);
- CHECK_BOUND(bs, 2);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
count = get_len(bs);
break;
default:
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
count = get_bits(bs, f->sz);
break;
}
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
for (i = 0; i < count; i++) {
if (son->attr & OPEN) {
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
" ", son->name);
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
/* Decode the choice index number */
+ if (nf_h323_error_boundary(bs, 0, 1))
+ return H323_ERROR_BOUND;
if ((f->attr & EXT) && get_bit(bs)) {
ext = 1;
+ if (nf_h323_error_boundary(bs, 0, 7))
+ return H323_ERROR_BOUND;
type = get_bits(bs, 7) + f->lb;
} else {
ext = 0;
+ if (nf_h323_error_boundary(bs, 0, f->sz))
+ return H323_ERROR_BOUND;
type = get_bits(bs, f->sz);
if (type >= f->lb)
return H323_ERROR_RANGE;
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
/* Check Range */
if (type >= f->ub) { /* Newer version? */
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, 2, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
bs->cur += len;
return H323_ERROR_NONE;
}
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
if (ext || (son->attr & OPEN)) {
BYTE_ALIGN(bs);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
len = get_len(bs);
- CHECK_BOUND(bs, len);
+ if (nf_h323_error_boundary(bs, len, 0))
+ return H323_ERROR_BOUND;
if (!base || !(son->attr & DECODE)) {
PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
son->name);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59c08997bfdf..382d49792f42 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_labels.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
static int ctnetlink_change_timeout(struct nf_conn *ct,
const struct nlattr * const cda[])
{
- u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
+ u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
- ct->timeout = nfct_time_stamp + timeout * HZ;
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ ct->timeout = nfct_time_stamp + (u32)timeout;
if (test_bit(IPS_DYING_BIT, &ct->status))
return -ETIME;
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net,
int err = -EINVAL;
struct nf_conntrack_helper *helper;
struct nf_conn_tstamp *tstamp;
+ u64 timeout;
ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
if (IS_ERR(ct))
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net,
if (!cda[CTA_TIMEOUT])
goto err1;
- ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+ timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ ct->timeout = (u32)timeout + nfct_time_stamp;
rcu_read_lock();
if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b12fc07111d0..37ef35b861f2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct,
IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
timeout = timeouts[TCP_CONNTRACK_UNACK];
+ else if (ct->proto.tcp.last_win == 0 &&
+ timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
+ timeout = timeouts[TCP_CONNTRACK_RETRANS];
else
timeout = timeouts[new_state];
spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d8327b43e4dc..07bd4138c84e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(chain, &table->chains, list) {
- if (ctx && ctx->chain[0] &&
+ if (ctx && ctx->chain &&
strcmp(ctx->chain, chain->name) != 0)
continue;
@@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
struct nft_obj_filter *filter = cb->data;
- kfree(filter->table);
- kfree(filter);
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
return 0;
}
@@ -5847,6 +5849,12 @@ static int __net_init nf_tables_init_net(struct net *net)
return 0;
}
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->nft.af_info));
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
@@ -5917,6 +5925,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
+ .exit = nf_tables_exit_net,
};
static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 41628b393673..d33ce6d5ebce 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/errno.h>
+#include <linux/capability.h>
#include <net/netlink.h>
#include <net/sock.h>
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth;
int ret = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
return -EINVAL;
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth;
bool tuple_set = false;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
struct nfnl_cthelper *nlcth, *n;
int j = 0, ret;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (tb[NFCTH_NAME])
helper_name = nla_data(tb[NFCTH_NAME]);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5afab86381c..e955bec0acc6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
static void __net_exit nfnl_log_net_exit(struct net *net)
{
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
+ unsigned int i;
+
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
#endif
nf_log_unset(net, &nfulnl_logger);
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
}
static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index a16356cacec3..c09b36755ed7 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
static void __net_exit nfnl_queue_net_exit(struct net *net)
{
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ unsigned int i;
+
nf_unregister_queue_handler(net);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
#endif
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
}
static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a0a93d987a3b..47ec1046ad11 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
[NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
[NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
};
static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a77dd514297c..55802e97f906 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net)
return 0;
}
+static void __net_exit xt_net_exit(struct net *net)
+{
+ int i;
+
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
+}
+
static struct pernet_operations xt_net_ops = {
.init = xt_net_init,
+ .exit = xt_net_exit,
};
static int __init xt_init(void)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 041da0d9c06f..06b090d8e901 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
{
struct sock_fprog_kern program;
+ if (len > XT_BPF_MAX_NUM_INSTR)
+ return -EINVAL;
+
program.len = len;
program.filter = insns;
@@ -52,18 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
{
- mm_segment_t oldfs = get_fs();
- int retval, fd;
-
- set_fs(KERNEL_DS);
- fd = bpf_obj_get_user(path, 0);
- set_fs(oldfs);
- if (fd < 0)
- return fd;
-
- retval = __bpf_mt_check_fd(fd, ret);
- sys_close(fd);
- return retval;
+ if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
+ return -EINVAL;
+
+ *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+ return PTR_ERR_OR_ZERO(*ret);
}
static int bpf_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 36e14b1f061d..a34f314a8c23 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/capability.h>
#include <linux/if.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
struct xt_osf_finger *kf = NULL, *sf;
int err = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
struct xt_osf_finger *sf;
int err = -ENOENT;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b9e0ee4e22f5..84a4e4c3be4b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
struct sock *sk = skb->sk;
int ret = -ENOMEM;
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return 0;
+
dev_hold(dev);
if (is_vmalloc_addr(skb->head))
@@ -2381,13 +2384,14 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *,
struct netlink_ext_ack *))
{
- struct netlink_ext_ack extack = {};
+ struct netlink_ext_ack extack;
struct nlmsghdr *nlh;
int err;
while (skb->len >= nlmsg_total_size(0)) {
int msglen;
+ memset(&extack, 0, sizeof(extack));
nlh = nlmsg_hdr(skb);
err = 0;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index fb7afcaa3004..985909f105eb 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -531,7 +531,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
return 0;
}
-static inline unsigned int llcp_accept_poll(struct sock *parent)
+static inline __poll_t llcp_accept_poll(struct sock *parent)
{
struct nfc_llcp_sock *llcp_sock, *parent_sock;
struct sock *sk;
@@ -549,11 +549,11 @@ static inline unsigned int llcp_accept_poll(struct sock *parent)
return 0;
}
-static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
+static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
pr_debug("%p\n", sk);
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 8d104c1db628..a66f102c6c01 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -305,7 +305,7 @@ static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
return 0;
}
-static unsigned int nci_uart_tty_poll(struct tty_struct *tty,
+static __poll_t nci_uart_tty_poll(struct tty_struct *tty,
struct file *filp, poll_table *wait)
{
return 0;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..f039064ce922 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return -EINVAL;
skb_reset_network_header(skb);
+ key->eth.type = skb->protocol;
} else {
eth = eth_hdr(skb);
ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
if (unlikely(parse_vlan(skb, key)))
return -ENOMEM;
- skb->protocol = parse_ethertype(skb);
- if (unlikely(skb->protocol == htons(0)))
+ key->eth.type = parse_ethertype(skb);
+ if (unlikely(key->eth.type == htons(0)))
return -ENOMEM;
+ /* Multiple tagged packets need to retain TPID to satisfy
+ * skb_vlan_pop(), which will later shift the ethertype into
+ * skb->protocol.
+ */
+ if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+ skb->protocol = key->eth.cvlan.tpid;
+ else
+ skb->protocol = key->eth.type;
+
skb_reset_network_header(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
}
skb_reset_mac_len(skb);
- key->eth.type = skb->protocol;
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 624ea74353dd..f143908b651d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,7 +49,6 @@
#include <net/mpls.h>
#include <net/vxlan.h>
#include <net/tun_proto.h>
-#include <net/erspan.h>
#include "flow_netlink.h"
@@ -334,8 +333,7 @@ size_t ovs_tun_key_attr_size(void)
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
- + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */
- + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
+ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
}
static size_t ovs_nsh_key_attr_size(void)
@@ -402,7 +400,6 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
- [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
};
static const struct ovs_len_tbl
@@ -634,33 +631,6 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
-static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask,
- bool log)
-{
- unsigned long opt_key_offset;
- struct erspan_metadata opts;
-
- BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
-
- memset(&opts, 0, sizeof(opts));
- opts.index = nla_get_be32(attr);
-
- /* Index has only 20-bit */
- if (ntohl(opts.index) & ~INDEX_MASK) {
- OVS_NLERR(log, "ERSPAN index number %x too large.",
- ntohl(opts.index));
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
- opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
- SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
- is_mask);
-
- return 0;
-}
-
static int ip_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -768,19 +738,6 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
break;
- case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- if (opts_type) {
- OVS_NLERR(log, "Multiple metadata blocks provided");
- return -EINVAL;
- }
-
- err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
- if (err)
- return err;
-
- tun_flags |= TUNNEL_ERSPAN_OPT;
- opts_type = type;
- break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
@@ -905,10 +862,6 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
- else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
- ((struct erspan_metadata *)tun_opts)->index))
- return -EMSGSIZE;
}
return 0;
@@ -2533,8 +2486,6 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
- case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
- break;
}
};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index da215e5c1399..3b4d6a3cf190 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4073,12 +4073,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
return 0;
}
-static unsigned int packet_poll(struct file *file, struct socket *sock,
+static __poll_t packet_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- unsigned int mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll(file, sock, wait);
spin_lock_bh(&sk->sk_receive_queue.lock);
if (po->rx_ring.pg_vec) {
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1b050dd17393..44417480dab7 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -341,12 +341,12 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
return 0;
}
-static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
+static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct pep_sock *pn = pep_sk(sk);
- unsigned int mask = 0;
+ __poll_t mask = 0;
poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b405f77d664c..88aa8ad0f5b6 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -152,12 +152,12 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
* to send to a congested destination, the system call may still fail (and
* return ENOBUFS).
*/
-static unsigned int rds_poll(struct file *file, struct socket *sock,
+static __poll_t rds_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct rds_sock *rs = rds_sk_to_rs(sk);
- unsigned int mask = 0;
+ __poll_t mask = 0;
unsigned long flags;
poll_wait(file, sk_sleep(sk), wait);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 36dd2099048a..b2a5067b4afe 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -301,13 +301,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
if (rds_conn_state(conn) == RDS_CONN_UP) {
struct rds_ib_device *rds_ibdev;
- struct rdma_dev_addr *dev_addr;
ic = conn->c_transport_data;
- dev_addr = &ic->i_cm_id->route.addr.dev_addr;
- rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
- rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+ rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
+ (union ib_gid *)&iinfo->dst_gid);
rds_ibdev = ic->rds_ibdev;
iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index bc2f1e0977d6..634cfcb7bba6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
+ if (args->nr_local == 0)
+ return -EINVAL;
+
/* figure out the number of pages in the vector */
for (i = 0; i < args->nr_local; i++) {
if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
diff --git a/net/rds/send.c b/net/rds/send.c
index b52cdc8ae428..f72466c63f0c 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
continue;
if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+ if (cmsg->cmsg_len <
+ CMSG_LEN(sizeof(struct rds_rdma_args)))
+ return -EINVAL;
args = CMSG_DATA(cmsg);
*rdma_bytes += args->remote_vec.bytes;
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 6b7ee71f40c6..ab7356e0ba83 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock)
sizeof(val));
}
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
- return tcp_sk(tc->t_sock->sk)->snd_nxt;
+ /* seq# of the last byte of data in tcp send buffer */
+ return tcp_sk(tc->t_sock->sk)->write_seq;
}
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 1aafbf7c3011..864ca7d8f019 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -54,7 +54,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index dc860d1bb608..9b76e0fa1722 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
* m_ack_seq is set to the sequence number of the last byte of
* header and data. see rds_tcp_is_acked().
*/
- tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc);
+ tc->t_last_sent_nxt = rds_tcp_write_seq(tc);
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
@@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
- rm, rds_tcp_snd_nxt(tc),
+ rm, rds_tcp_write_seq(tc),
(unsigned long long)rm->m_ack_seq);
}
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 2064c3a35ef8..124c77e9d058 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1139,10 +1139,10 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
return -ENOMEM;
}
-static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
+static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait)
{
struct rfkill_data *data = file->private_data;
- unsigned int res = POLLOUT | POLLWRNORM;
+ __poll_t res = POLLOUT | POLLWRNORM;
poll_wait(file, &data->read_wait, wait);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index dcd818fa837e..21ad6a3a465c 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -729,12 +729,12 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
/*
* permit an RxRPC socket to be polled
*/
-static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
+static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct rxrpc_sock *rx = rxrpc_sk(sk);
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48ef7fc3..a0ac42b3ed06 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 1e3f10e5da99..6445184b2759 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
#include <net/pkt_sched.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
static int skbmark_encode(struct sk_buff *skb, void *skbdata,
struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 2ea1f26c9e96..7221437ca3a6 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
#include <net/pkt_sched.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
struct tcf_meta_info *e)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8b3e59388480..08b61849c2a2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_t *tm = &m->tcf_tm;
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ddcf04b4ab43..b9d63d2246e6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,7 +23,6 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
-#include <linux/err.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -352,6 +351,8 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
{
struct tcf_chain *chain;
+ if (!block)
+ return;
/* Hold a refcnt for all chains, except 0, so that they don't disappear
* while we are iterating.
*/
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6fe798c2df1a..a62586e2dbdb 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
struct list_head link;
struct tcf_result res;
bool exts_integrated;
- bool offloaded;
u32 gen_flags;
struct tcf_exts exts;
u32 handle;
@@ -148,73 +147,63 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
}
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- enum tc_clsbpf_command cmd)
+ struct cls_bpf_prog *oldprog)
{
- bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
struct tcf_block *block = tp->chain->block;
- bool skip_sw = tc_skip_sw(prog->gen_flags);
struct tc_cls_bpf_offload cls_bpf = {};
+ struct cls_bpf_prog *obj;
+ bool skip_sw;
int err;
+ skip_sw = prog && tc_skip_sw(prog->gen_flags);
+ obj = prog ?: oldprog;
+
tc_cls_common_offload_init(&cls_bpf.common, tp);
- cls_bpf.command = cmd;
- cls_bpf.exts = &prog->exts;
- cls_bpf.prog = prog->filter;
- cls_bpf.name = prog->bpf_name;
- cls_bpf.exts_integrated = prog->exts_integrated;
- cls_bpf.gen_flags = prog->gen_flags;
+ cls_bpf.command = TC_CLSBPF_OFFLOAD;
+ cls_bpf.exts = &obj->exts;
+ cls_bpf.prog = prog ? prog->filter : NULL;
+ cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+ cls_bpf.name = obj->bpf_name;
+ cls_bpf.exts_integrated = obj->exts_integrated;
+ cls_bpf.gen_flags = obj->gen_flags;
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
- if (addorrep) {
+ if (prog) {
if (err < 0) {
- cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ cls_bpf_offload_cmd(tp, oldprog, prog);
return err;
} else if (err > 0) {
prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
}
}
- if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+ if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
return -EINVAL;
return 0;
}
+static u32 cls_bpf_flags(u32 flags)
+{
+ return flags & CLS_BPF_SUPPORTED_GEN_FLAGS;
+}
+
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
- struct cls_bpf_prog *obj = prog;
- enum tc_clsbpf_command cmd;
- bool skip_sw;
- int ret;
-
- skip_sw = tc_skip_sw(prog->gen_flags) ||
- (oldprog && tc_skip_sw(oldprog->gen_flags));
-
- if (oldprog && oldprog->offloaded) {
- if (!tc_skip_hw(prog->gen_flags)) {
- cmd = TC_CLSBPF_REPLACE;
- } else if (!tc_skip_sw(prog->gen_flags)) {
- obj = oldprog;
- cmd = TC_CLSBPF_DESTROY;
- } else {
- return -EINVAL;
- }
- } else {
- if (tc_skip_hw(prog->gen_flags))
- return skip_sw ? -EINVAL : 0;
- cmd = TC_CLSBPF_ADD;
- }
-
- ret = cls_bpf_offload_cmd(tp, obj, cmd);
- if (ret)
- return ret;
+ if (prog && oldprog &&
+ cls_bpf_flags(prog->gen_flags) !=
+ cls_bpf_flags(oldprog->gen_flags))
+ return -EINVAL;
- obj->offloaded = true;
- if (oldprog)
- oldprog->offloaded = false;
+ if (prog && tc_skip_hw(prog->gen_flags))
+ prog = NULL;
+ if (oldprog && tc_skip_hw(oldprog->gen_flags))
+ oldprog = NULL;
+ if (!prog && !oldprog)
+ return 0;
- return 0;
+ return cls_bpf_offload_cmd(tp, prog, oldprog);
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +211,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
{
int err;
- if (!prog->offloaded)
- return;
-
- err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
- if (err) {
+ err = cls_bpf_offload_cmd(tp, NULL, prog);
+ if (err)
pr_err("Stopping hardware offload failed: %d\n", err);
- return;
- }
-
- prog->offloaded = false;
}
static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct cls_bpf_prog *prog)
{
- if (!prog->offloaded)
- return;
+ struct tcf_block *block = tp->chain->block;
+ struct tc_cls_bpf_offload cls_bpf = {};
+
+ tc_cls_common_offload_init(&cls_bpf.common, tp);
+ cls_bpf.command = TC_CLSBPF_STATS;
+ cls_bpf.exts = &prog->exts;
+ cls_bpf.prog = prog->filter;
+ cls_bpf.name = prog->bpf_name;
+ cls_bpf.exts_integrated = prog->exts_integrated;
+ cls_bpf.gen_flags = prog->gen_flags;
- cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
}
static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4f4247..507859cdd1cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#include <linux/netdevice.h>
#include <linux/idr.h>
struct tc_u_knode {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index df3110d69585..07c10bac06a0 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
return 0;
- return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+ return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len);
}
static struct tcf_ematch_ops em_nbyte_ops = {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b6c4f536876b..52529b7f8d96 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
tcm->tcm_info = refcount_read(&q->refcnt);
if (nla_put_string(skb, TCA_KIND, q->ops->id))
goto nla_put_failure;
+ if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
+ goto nla_put_failure;
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto nla_put_failure;
qlen = q->q.qlen;
@@ -1061,17 +1063,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
- if (qdisc_is_percpu_stats(sch)) {
- sch->cpu_bstats =
- netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
- if (!sch->cpu_bstats)
- goto err_out4;
-
- sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
- if (!sch->cpu_qstats)
- goto err_out4;
- }
-
if (tca[TCA_STAB]) {
stab = qdisc_get_stab(tca[TCA_STAB]);
if (IS_ERR(stab)) {
@@ -1113,7 +1104,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
ops->destroy(sch);
err_out3:
dev_put(dev);
- kfree((char *) sch - sch->padded);
+ qdisc_free(sch);
err_out2:
module_put(ops->owner);
err_out:
@@ -1121,8 +1112,6 @@ err_out:
return NULL;
err_out4:
- free_percpu(sch->cpu_bstats);
- free_percpu(sch->cpu_qstats);
/*
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cd1b200acae7..cac003fddf3e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -633,6 +633,19 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
qdisc_skb_head_init(&sch->q);
spin_lock_init(&sch->q.lock);
+ if (ops->static_flags & TCQ_F_CPUSTATS) {
+ sch->cpu_bstats =
+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!sch->cpu_bstats)
+ goto errout1;
+
+ sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!sch->cpu_qstats) {
+ free_percpu(sch->cpu_bstats);
+ goto errout1;
+ }
+ }
+
spin_lock_init(&sch->busylock);
lockdep_set_class(&sch->busylock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
@@ -642,6 +655,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
dev->qdisc_running_key ?: &qdisc_running_key);
sch->ops = ops;
+ sch->flags = ops->static_flags;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
@@ -649,6 +663,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
refcount_set(&sch->refcnt, 1);
return sch;
+errout1:
+ kfree(p);
errout:
return ERR_PTR(err);
}
@@ -698,7 +714,7 @@ void qdisc_reset(struct Qdisc *qdisc)
}
EXPORT_SYMBOL(qdisc_reset);
-static void qdisc_free(struct Qdisc *qdisc)
+void qdisc_free(struct Qdisc *qdisc)
{
if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
@@ -1040,6 +1056,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
if (!tp_head) {
RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+ /* Wait for flying RCU callback before it is freed. */
+ rcu_barrier_bh();
return;
}
@@ -1055,7 +1073,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
rcu_assign_pointer(*miniqp->p_miniq, miniq);
if (miniq_old)
- /* This is counterpart of the rcu barrier above. We need to
+ /* This is counterpart of the rcu barriers above. We need to
* block potential new user of miniq_old until all readers
* are not seeing it.
*/
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 5ecc38f35d47..003e1b063447 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -66,7 +66,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- int err;
+
+ net_inc_ingress_queue();
mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
@@ -74,14 +75,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
q->block_info.chain_head_change = clsact_chain_head_change;
q->block_info.chain_head_change_priv = &q->miniqp;
- err = tcf_block_get_ext(&q->block, sch, &q->block_info);
- if (err)
- return err;
-
- net_inc_ingress_queue();
- sch->flags |= TCQ_F_CPUSTATS;
-
- return 0;
+ return tcf_block_get_ext(&q->block, sch, &q->block_info);
}
static void ingress_destroy(struct Qdisc *sch)
@@ -120,6 +114,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
.priv_size = sizeof(struct ingress_sched_data),
+ .static_flags = TCQ_F_CPUSTATS,
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
@@ -172,6 +167,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
+ net_inc_ingress_queue();
+ net_inc_egress_queue();
+
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -188,20 +186,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
q->egress_block_info.chain_head_change = clsact_chain_head_change;
q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
- err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
- if (err)
- goto err_egress_block_get;
-
- net_inc_ingress_queue();
- net_inc_egress_queue();
-
- sch->flags |= TCQ_F_CPUSTATS;
-
- return 0;
-
-err_egress_block_get:
- tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
- return err;
+ return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
}
static void clsact_destroy(struct Qdisc *sch)
@@ -228,6 +213,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.cl_ops = &clsact_class_ops,
.id = "clsact",
.priv_size = sizeof(struct clsact_sched_data),
+ .static_flags = TCQ_F_CPUSTATS,
.init = clsact_init,
.destroy = clsact_destroy,
.dump = ingress_dump,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9d874e60e032..f0747eb87dc4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
.handle = sch->handle,
.parent = sch->parent,
};
+ int err;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable)
opt.command = TC_RED_DESTROY;
}
- return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+
+ if (!err && enable)
+ sch->flags |= TCQ_F_OFFLOADED;
+ else
+ sch->flags &= ~TCQ_F_OFFLOADED;
+
+ return err;
}
static void red_destroy(struct Qdisc *sch)
@@ -274,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
return red_change(sch, opt);
}
-static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_red_qopt_offload hw_stats = {
@@ -286,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
.stats.qstats = &sch->qstats,
},
};
- int err;
- opt->flags &= ~TC_RED_OFFLOADED;
- if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
- return 0;
-
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
- &hw_stats);
- if (err == -EOPNOTSUPP)
+ if (!(sch->flags & TCQ_F_OFFLOADED))
return 0;
- if (!err)
- opt->flags |= TC_RED_OFFLOADED;
-
- return err;
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats);
}
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -319,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
int err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
- err = red_dump_offload(sch, &opt);
+ err = red_dump_offload_stats(sch, &opt);
if (err)
goto nla_put_failure;
@@ -347,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.marked = q->stats.prob_mark + q->stats.forced_mark,
};
- if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
+ if (sch->flags & TCQ_F_OFFLOADED) {
struct red_stats hw_stats = {0};
struct tc_red_qopt_offload hw_stats_request = {
.command = TC_RED_XSTATS,
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 3f619fdcbf0a..291c97b07058 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
case SCTP_CID_AUTH:
return "AUTH";
+ case SCTP_CID_RECONF:
+ return "RECONF";
+
default:
break;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c..141c9c466ec1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
return;
}
- if (t->param_flags & SPP_PMTUD_ENABLE) {
- /* Update transports view of the MTU */
- sctp_transport_update_pmtu(t, pmtu);
-
- /* Update association pmtu. */
- sctp_assoc_sync_pmtu(asoc);
- }
+ if (!(t->param_flags & SPP_PMTUD_ENABLE))
+ /* We can't allow retransmitting in such case, as the
+ * retransmission would be sized just as before, and thus we
+ * would get another icmp, and retransmit again.
+ */
+ return;
- /* Retransmit with the new pmtu setting.
- * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
- * Needed will never be sent, but if a message was sent before
- * PMTU discovery was disabled that was larger than the PMTU, it
- * would not be fragmented, so it must be re-transmitted fragmented.
+ /* Update transports view of the MTU. Return if no update was needed.
+ * If an update wasn't needed/possible, it also doesn't make sense to
+ * try to retransmit now.
*/
+ if (!sctp_transport_update_pmtu(t, pmtu))
+ return;
+
+ /* Update association pmtu. */
+ sctp_assoc_sync_pmtu(asoc);
+
+ /* Retransmit with the new pmtu setting. */
sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 3b18085e3b10..5d4c15bf66d2 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -826,6 +826,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
case AF_INET:
if (!__ipv6_only_sock(sctp_opt2sk(sp)))
return 1;
+ /* fallthru */
default:
return 0;
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 275925b93b29..35bc7106d182 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+ goto out;
+
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 7d67feeeffc1..c4ec99b20150 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -918,9 +918,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
break;
case SCTP_CID_ABORT:
- if (sctp_test_T_bit(chunk)) {
+ if (sctp_test_T_bit(chunk))
packet->vtag = asoc->c.my_vtag;
- }
+ /* fallthru */
/* The following chunks are "response" chunks, i.e.
* they are generated in response to something we
* received. If we are sending these, then we can
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index eb17a911aa29..737e551fbf67 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -85,7 +85,7 @@
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len, struct sock **orig_sk);
+ size_t msg_len);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -335,16 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
if (len < sizeof (struct sockaddr))
return NULL;
+ if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+ return NULL;
+
/* V4 mapped address are really of AF_INET family */
if (addr->sa.sa_family == AF_INET6 &&
- ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
- if (!opt->pf->af_supported(AF_INET, opt))
- return NULL;
- } else {
- /* Does this PF support this AF? */
- if (!opt->pf->af_supported(addr->sa.sa_family, opt))
- return NULL;
- }
+ ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+ !opt->pf->af_supported(AF_INET, opt))
+ return NULL;
/* If we get this far, af is valid. */
af = sctp_get_af_specific(addr->sa.sa_family);
@@ -970,13 +968,6 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
* This is used for tunneling the sctp_bindx() request through sctp_setsockopt()
* from userspace.
*
- * We don't use copy_from_user() for optimization: we first do the
- * sanity checks (buffer size -fast- and access check-healthy
- * pointer); if all of those succeed, then we can alloc the memory
- * (expensive operation) needed to copy the data to kernel. Then we do
- * the copying without checking the user space area
- * (__copy_from_user()).
- *
* On exit there is no need to do sockfd_put(), sys_setsockopt() does
* it.
*
@@ -1006,25 +997,15 @@ static int sctp_setsockopt_bindx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- /* Check the user passed a healthy pointer. */
- if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
- return -EFAULT;
-
- /* Alloc space for the address array in kernel memory. */
- kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
- if (unlikely(!kaddrs))
- return -ENOMEM;
-
- if (__copy_from_user(kaddrs, addrs, addrs_size)) {
- kfree(kaddrs);
- return -EFAULT;
- }
+ kaddrs = vmemdup_user(addrs, addrs_size);
+ if (unlikely(IS_ERR(kaddrs)))
+ return PTR_ERR(kaddrs);
/* Walk through the addrs buffer and count the number of addresses. */
addr_buf = kaddrs;
while (walk_size < addrs_size) {
if (walk_size + sizeof(sa_family_t) > addrs_size) {
- kfree(kaddrs);
+ kvfree(kaddrs);
return -EINVAL;
}
@@ -1035,7 +1016,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
* causes the address buffer to overflow return EINVAL.
*/
if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
- kfree(kaddrs);
+ kvfree(kaddrs);
return -EINVAL;
}
addrcnt++;
@@ -1065,7 +1046,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
}
out:
- kfree(kaddrs);
+ kvfree(kaddrs);
return err;
}
@@ -1323,13 +1304,6 @@ out_free:
* land and invoking either sctp_connectx(). This is used for tunneling
* the sctp_connectx() request through sctp_setsockopt() from userspace.
*
- * We don't use copy_from_user() for optimization: we first do the
- * sanity checks (buffer size -fast- and access check-healthy
- * pointer); if all of those succeed, then we can alloc the memory
- * (expensive operation) needed to copy the data to kernel. Then we do
- * the copying without checking the user space area
- * (__copy_from_user()).
- *
* On exit there is no need to do sockfd_put(), sys_setsockopt() does
* it.
*
@@ -1345,7 +1319,6 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
sctp_assoc_t *assoc_id)
{
struct sockaddr *kaddrs;
- gfp_t gfp = GFP_KERNEL;
int err = 0;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
@@ -1354,24 +1327,12 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- /* Check the user passed a healthy pointer. */
- if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
- return -EFAULT;
+ kaddrs = vmemdup_user(addrs, addrs_size);
+ if (unlikely(IS_ERR(kaddrs)))
+ return PTR_ERR(kaddrs);
- /* Alloc space for the address array in kernel memory. */
- if (sk->sk_socket->file)
- gfp = GFP_USER | __GFP_NOWARN;
- kaddrs = kmalloc(addrs_size, gfp);
- if (unlikely(!kaddrs))
- return -ENOMEM;
-
- if (__copy_from_user(kaddrs, addrs, addrs_size)) {
- err = -EFAULT;
- } else {
- err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
- }
-
- kfree(kaddrs);
+ err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
+ kvfree(kaddrs);
return err;
}
@@ -1883,8 +1844,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
*/
if (sinit) {
if (sinit->sinit_num_ostreams) {
- asoc->c.sinit_num_ostreams =
- sinit->sinit_num_ostreams;
+ __u16 outcnt = sinit->sinit_num_ostreams;
+
+ asoc->c.sinit_num_ostreams = outcnt;
+ /* outcnt has been changed, so re-init stream */
+ err = sctp_stream_init(&asoc->stream, outcnt, 0,
+ GFP_KERNEL);
+ if (err)
+ goto out_free;
}
if (sinit->sinit_max_instreams) {
asoc->c.sinit_max_instreams =
@@ -1971,7 +1938,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
/* sk can be changed by peel off when waiting for buf. */
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
if (err) {
if (err == -ESRCH) {
/* asoc is already dead. */
@@ -2277,7 +2244,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
event = sctp_ulpevent_make_sender_dry_event(asoc,
- GFP_ATOMIC);
+ GFP_USER | __GFP_NOWARN);
if (!event)
return -ENOMEM;
@@ -3498,6 +3465,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
if (optlen < sizeof(struct sctp_hmacalgo))
return -EINVAL;
+ optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
+ SCTP_AUTH_NUM_HMACS * sizeof(u16));
hmacs = memdup_user(optval, optlen);
if (IS_ERR(hmacs))
@@ -3536,6 +3505,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
if (optlen <= sizeof(struct sctp_authkey))
return -EINVAL;
+ /* authkey->sca_keylength is u16, so optlen can't be bigger than
+ * this.
+ */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(struct sctp_authkey));
authkey = memdup_user(optval, optlen);
if (IS_ERR(authkey))
@@ -3891,13 +3865,20 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen < sizeof(struct sctp_reset_streams))
+ if (optlen < sizeof(*params))
return -EINVAL;
+ /* srs_number_streams is u16, so optlen can't be bigger than this. */
+ optlen = min_t(unsigned int, optlen, USHRT_MAX +
+ sizeof(__u16) * sizeof(*params));
params = memdup_user(optval, optlen);
if (IS_ERR(params))
return PTR_ERR(params);
+ if (params->srs_number_streams * sizeof(__u16) >
+ optlen - sizeof(*params))
+ goto out;
+
asoc = sctp_id2assoc(sk, params->srs_assoc_id);
if (!asoc)
goto out;
@@ -4494,7 +4475,7 @@ static int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock);
local_bh_disable();
- percpu_counter_inc(&sctp_sockets_allocated);
+ sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
/* Nothing can fail after this block, otherwise
@@ -4538,7 +4519,7 @@ static void sctp_destroy_sock(struct sock *sk)
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
- percpu_counter_dec(&sctp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
}
@@ -5011,7 +4992,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
len = sizeof(int);
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int)))
+ if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
return -EFAULT;
return 0;
}
@@ -5641,6 +5622,9 @@ copy_getaddrs:
err = -EFAULT;
goto out;
}
+ /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
+ * but we can't change it anymore.
+ */
if (put_user(bytes_copied, optlen))
err = -EFAULT;
out:
@@ -6077,7 +6061,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
params.assoc_id = 0;
} else if (len >= sizeof(struct sctp_assoc_value)) {
len = sizeof(struct sctp_assoc_value);
- if (copy_from_user(&params, optval, sizeof(params)))
+ if (copy_from_user(&params, optval, len))
return -EFAULT;
} else
return -EINVAL;
@@ -6247,7 +6231,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
if (len < sizeof(struct sctp_authkeyid))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
+
+ len = sizeof(struct sctp_authkeyid);
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id);
@@ -6259,7 +6245,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
else
val.scact_keynumber = ep->active_key_id;
- len = sizeof(struct sctp_authkeyid);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, &val, len))
@@ -6285,7 +6270,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
@@ -6330,7 +6315,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
to = p->gauth_chunks;
@@ -7496,11 +7481,11 @@ out:
* here, again, by modeling the current TCP/UDP code. We don't have
* a good way to test with it yet.
*/
-unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct sctp_sock *sp = sctp_sk(sk);
- unsigned int mask;
+ __poll_t mask;
poll_wait(file, sk_sleep(sk), wait);
@@ -7998,12 +7983,12 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len, struct sock **orig_sk)
+ size_t msg_len)
{
struct sock *sk = asoc->base.sk;
- int err = 0;
long current_timeo = *timeo_p;
DEFINE_WAIT(wait);
+ int err = 0;
pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
*timeo_p, msg_len);
@@ -8032,17 +8017,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
- if (sk != asoc->base.sk) {
- release_sock(sk);
- sk = asoc->base.sk;
- lock_sock(sk);
- }
+ if (sk != asoc->base.sk)
+ goto do_error;
*timeo_p = current_timeo;
}
out:
- *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 76ea66be0bbe..524dfeb94c41 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
sctp_stream_outq_migrate(stream, NULL, outcnt);
sched->sched_all(stream);
- i = sctp_stream_alloc_out(stream, outcnt, gfp);
- if (i)
- return i;
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+ goto out;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
@@ -170,19 +170,17 @@ in:
if (!incnt)
goto out;
- i = sctp_stream_alloc_in(stream, incnt, gfp);
- if (i) {
- ret = -ENOMEM;
- goto free;
+ ret = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (ret) {
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+ stream->outcnt = 0;
+ goto out;
}
stream->incnt = incnt;
- goto out;
-free:
- sched->free(stream);
- kfree(stream->out);
- stream->out = NULL;
out:
return ret;
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf5..47f82bd794d9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
struct dst_entry *dst = sctp_transport_dst_check(t);
+ bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
- pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
- __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
- /* Use default minimum segment size and disable
- * pmtu discovery on this transport.
- */
- t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
- } else {
- t->pathmtu = pmtu;
+ pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+ __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+ /* Use default minimum segment instead */
+ pmtu = SCTP_DEFAULT_MINSEGMENT;
}
+ pmtu = SCTP_TRUNC4(pmtu);
if (dst) {
dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
dst = sctp_transport_dst_check(t);
}
- if (!dst)
+ if (!dst) {
t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+ dst = t->dst;
+ }
+
+ if (dst) {
+ /* Re-fetch, as under layers may have a higher minimum size */
+ pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ change = t->pathmtu != pmtu;
+ }
+ t->pathmtu = pmtu;
+
+ return change;
}
/* Caches the dst entry and source address for a transport's destination
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index a71be33f3afe..e36ec5dd64c6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
gfp_t gfp)
{
- struct sctp_association *asoc;
- __u16 needed, freed;
-
- asoc = ulpq->asoc;
+ struct sctp_association *asoc = ulpq->asoc;
+ __u32 freed = 0;
+ __u16 needed;
- if (chunk) {
- needed = ntohs(chunk->chunk_hdr->length);
- needed -= sizeof(struct sctp_data_chunk);
- } else
- needed = SCTP_DEFAULT_MAXWINDOW;
-
- freed = 0;
+ needed = ntohs(chunk->chunk_hdr->length) -
+ sizeof(struct sctp_data_chunk);
if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
freed = sctp_ulpq_renege_order(ulpq, needed);
- if (freed < needed) {
+ if (freed < needed)
freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
- }
}
/* If able to free enough room, accept this chunk. */
- if (chunk && (freed >= needed)) {
- int retval;
- retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+ if (freed >= needed) {
+ int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
/*
* Enter partial delivery if chunk has not been
* delivered; otherwise, drain the reassembly queue.
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6451c5013e06..449f62e1e270 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1107,7 +1107,7 @@ out:
return rc;
}
-static unsigned int smc_accept_poll(struct sock *parent)
+static __poll_t smc_accept_poll(struct sock *parent)
{
struct smc_sock *isk;
struct sock *sk;
@@ -1126,11 +1126,11 @@ static unsigned int smc_accept_poll(struct sock *parent)
return 0;
}
-static unsigned int smc_poll(struct file *file, struct socket *sock,
+static __poll_t smc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask = 0;
+ __poll_t mask = 0;
struct smc_sock *smc;
int rc;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 1800e16b2a02..511548085d16 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -35,7 +35,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
struct smc_clc_msg_hdr *clcm = buf;
struct msghdr msg = {NULL, 0};
int reason_code = 0;
- struct kvec vec;
+ struct kvec vec = {buf, buflen};
int len, datlen;
int krflags;
@@ -43,12 +43,15 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
* so we don't consume any subsequent CLC message or payload data
* in the TCP byte stream
*/
- vec.iov_base = buf;
- vec.iov_len = buflen;
+ /*
+ * Caller must make sure that buflen is no less than
+ * sizeof(struct smc_clc_msg_hdr)
+ */
krflags = MSG_PEEK | MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
- len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
- sizeof(struct smc_clc_msg_hdr), krflags);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+ sizeof(struct smc_clc_msg_hdr));
+ len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (signal_pending(current)) {
reason_code = -EINTR;
clc_sk->sk_err = EINTR;
@@ -83,12 +86,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
/* receive the complete CLC message */
- vec.iov_base = buf;
- vec.iov_len = buflen;
memset(&msg, 0, sizeof(struct msghdr));
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
krflags = MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
- len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
+ len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (len < datlen) {
smc->sk.sk_err = EPROTO;
reason_code = -EPROTO;
diff --git a/net/socket.c b/net/socket.c
index 05f361faec45..2f378449bc1b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -118,7 +118,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
-static unsigned int sock_poll(struct file *file,
+static __poll_t sock_poll(struct file *file,
struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
@@ -436,8 +436,10 @@ static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = get_unused_fd_flags(flags);
- if (unlikely(fd < 0))
+ if (unlikely(fd < 0)) {
+ sock_release(sock);
return fd;
+ }
newfile = sock_alloc_file(sock, flags, NULL);
if (likely(!IS_ERR(newfile))) {
@@ -1095,9 +1097,9 @@ out_release:
EXPORT_SYMBOL(sock_create_lite);
/* No kernel lock held - perfect */
-static unsigned int sock_poll(struct file *file, poll_table *wait)
+static __poll_t sock_poll(struct file *file, poll_table *wait)
{
- unsigned int busy_flag = 0;
+ __poll_t busy_flag = 0;
struct socket *sock;
/*
@@ -2619,6 +2621,15 @@ out_fs:
core_initcall(sock_init); /* early initcall */
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ bpf_jit_enable = 1;
+#endif
+ return 0;
+}
+pure_initcall(jit_init);
+
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba319..1fdab5c4eda8 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
* allows a thread in BH context to safely check if the process
* lock is held. In this case, if the lock is held, queue work.
*/
- if (sock_owned_by_user(strp->sk)) {
+ if (sock_owned_by_user_nocheck(strp->sk)) {
queue_work(strp_wq, &strp->work);
return;
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index c4778cae58ef..444380f968f1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
goto out_free_groups;
creds->cr_group_info->gid[i] = kgid;
}
+ groups_sort(creds->cr_group_info);
return 0;
out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 5dd4e6c9fef2..26531193fce4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
goto out;
rsci.cred.cr_group_info->gid[i] = kgid;
}
+ groups_sort(rsci.cred.cr_group_info);
/* mech name */
len = qword_get(&mesg, buf, mlen);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e68943895be4..aa36dad32db1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -930,10 +930,10 @@ out:
static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
-static unsigned int cache_poll(struct file *filp, poll_table *wait,
+static __poll_t cache_poll(struct file *filp, poll_table *wait,
struct cache_detail *cd)
{
- unsigned int mask;
+ __poll_t mask;
struct cache_reader *rp = filp->private_data;
struct cache_queue *cq;
@@ -1501,7 +1501,7 @@ static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
return cache_write(filp, buf, count, ppos, cd);
}
-static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
+static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
{
struct cache_detail *cd = PDE_DATA(file_inode(filp));
@@ -1720,7 +1720,7 @@ static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
return cache_write(filp, buf, count, ppos, cd);
}
-static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
+static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait)
{
struct cache_detail *cd = RPC_I(file_inode(filp))->private;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e2a4184f3c5d..6e432ecd7f99 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1376,22 +1376,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
EXPORT_SYMBOL_GPL(rpc_setbufsize);
/**
- * rpc_protocol - Get transport protocol number for an RPC client
- * @clnt: RPC client to query
- *
- */
-int rpc_protocol(struct rpc_clnt *clnt)
-{
- int protocol;
-
- rcu_read_lock();
- protocol = rcu_dereference(clnt->cl_xprt)->prot;
- rcu_read_unlock();
- return protocol;
-}
-EXPORT_SYMBOL_GPL(rpc_protocol);
-
-/**
* rpc_net_ns - Get the network namespace for this RPC client
* @clnt: RPC client to query
*
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7803f3b6aa53..5c4330325787 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -340,12 +340,12 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
return res;
}
-static unsigned int
+static __poll_t
rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
{
struct inode *inode = file_inode(filp);
struct rpc_inode *rpci = RPC_I(inode);
- unsigned int mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = POLLOUT | POLLWRNORM;
poll_wait(filp, &rpci->waitq, wait);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b1b49edd7c4d..896691afbb1a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
void (*do_action)(struct rpc_task *);
/*
- * Execute any pending callback first.
+ * Perform the next FSM step or a pending callback.
+ *
+ * tk_action may be NULL if the task has been killed.
+ * In particular, note that rpc_killall_tasks may
+ * do this at any time, so beware when dereferencing.
*/
- do_action = task->tk_callback;
- task->tk_callback = NULL;
- if (do_action == NULL) {
- /*
- * Perform the next FSM step.
- * tk_action may be NULL if the task has been killed.
- * In particular, note that rpc_killall_tasks may
- * do this at any time, so beware when dereferencing.
- */
- do_action = task->tk_action;
- if (do_action == NULL)
- break;
+ do_action = task->tk_action;
+ if (task->tk_callback) {
+ do_action = task->tk_callback;
+ task->tk_callback = NULL;
}
- trace_rpc_task_run_action(task->tk_client, task, task->tk_action);
+ if (!do_action)
+ break;
+ trace_rpc_task_run_action(task->tk_client, task, do_action);
do_action(task);
/*
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 740b67d5a733..af7f28fb8102 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
ug.gi->gid[i] = kgid;
}
+ groups_sort(ug.gi);
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
cred->cr_group_info->gid[i] = kgid;
}
+ groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ff8e06cd067e..5570719e4787 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -338,8 +338,8 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
- msg.msg_flags);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
+ len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 333b9d697ae5..2436fd1125fc 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
if (task->tk_status != -ETIMEDOUT)
return;
- dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
+ trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
if (!req->rq_reply_bytes_recvd) {
if (xprt->ops->timer)
xprt->ops->timer(xprt, task);
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ unsigned int connect_cookie;
int status, numreqs;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
} else if (!req->rq_bytes_sent)
return;
+ connect_cookie = xprt->connect_cookie;
req->rq_xtime = ktime_get();
status = xprt->ops->send_request(task);
trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
xprt->stat.bklog_u += xprt->backlog.qlen;
xprt->stat.sending_u += xprt->sending.qlen;
xprt->stat.pending_u += xprt->pending.qlen;
+ spin_unlock_bh(&xprt->transport_lock);
- /* Don't race with disconnect */
- if (!xprt_connected(xprt))
- task->tk_status = -ENOTCONN;
- else {
+ req->rq_connect_cookie = connect_cookie;
+ if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
/*
- * Sleep on the pending queue since
- * we're expecting a reply.
+ * Sleep on the pending queue if we're expecting a reply.
+ * The spinlock ensures atomicity between the test of
+ * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
*/
- if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task))
+ spin_lock(&xprt->recv_lock);
+ if (!req->rq_reply_bytes_recvd) {
rpc_sleep_on(&xprt->pending, task, xprt_timer);
- req->rq_connect_cookie = xprt->connect_cookie;
+ /*
+ * Send an extra queue wakeup call if the
+ * connection was dropped in case the call to
+ * rpc_sleep_on() raced.
+ */
+ if (!xprt_connected(xprt))
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ }
+ spin_unlock(&xprt->recv_lock);
}
- spin_unlock_bh(&xprt->transport_lock);
}
static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 8b818bb3518a..ed1a4a3065ee 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
- __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
@@ -74,21 +73,13 @@ out_fail:
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
- struct rpcrdma_rep *rep;
int rc = 0;
while (count--) {
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- pr_err("RPC: %s: reply buffer alloc failed\n",
- __func__);
- rc = PTR_ERR(rep);
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
break;
- }
-
- rpcrdma_recv_buffer_put(rep);
}
-
return rc;
}
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
+ __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free;
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
buffer->rb_bc_srv_max_requests = reqs;
request_module("svcrdma");
-
+ trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
out_free:
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
-/**
- * rpcrdma_bc_marshal_reply - Send backwards direction reply
- * @rqst: buffer containing RPC reply data
- *
- * Returns zero on success.
- */
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
+static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch))
return -EIO;
+
+ trace_xprtrdma_cb_reply(rqst);
+ return 0;
+}
+
+/**
+ * xprt_rdma_bc_send_reply - marshal and send a backchannel reply
+ * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
+ *
+ * Caller holds the transport's write lock.
+ *
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
+ */
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ int rc;
+
+ if (!xprt_connected(rqst->rq_xprt))
+ goto drop_connection;
+
+ rc = rpcrdma_bc_marshal_reply(rqst);
+ if (rc < 0)
+ goto failed_marshal;
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ goto drop_connection;
return 0;
+
+failed_marshal:
+ if (rc != -ENOTCONN)
+ return rc;
+drop_connection:
+ xprt_disconnect_done(rqst->rq_xprt);
+ return -ENOTCONN;
}
/**
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
- smp_mb__before_atomic();
- WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
- clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
- smp_mb__after_atomic();
-
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
/**
* rpcrdma_bc_receive_call - Handle a backward direction call
- * @xprt: transport receiving the call
+ * @r_xprt: transport receiving the call
* @rep: receive buffer containing the call
*
* Operational assumptions:
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
- dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
- set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf;
memset(buf, 0, sizeof(*buf));
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* the Upper Layer is done decoding it.
*/
req = rpcr_to_rdmar(rqst);
- dprintk("RPC: %s: attaching rep %p to req %p\n",
- __func__, rep, req);
req->rl_reply = rep;
-
- /* Defeat the retransmit detection logic in send_request */
- req->rl_connect_cookie = 0;
+ trace_xprtrdma_cb_call(rqst);
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 29fc84c7ff98..d5f95bb39300 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
}
static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
+fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
static struct ib_fmr_attr fmr_attr = {
.max_pages = RPCRDMA_MAX_FMR_SGES,
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
.page_shift = PAGE_SHIFT
};
- mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(u64), GFP_KERNEL);
- if (!mw->fmr.fm_physaddrs)
+ if (!mr->fmr.fm_physaddrs)
goto out_free;
- mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(*mw->mw_sg), GFP_KERNEL);
- if (!mw->mw_sg)
+ mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_free;
- sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES);
+ sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
- mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
+ mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
&fmr_attr);
- if (IS_ERR(mw->fmr.fm_mr))
+ if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err;
return 0;
out_fmr_err:
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
- PTR_ERR(mw->fmr.fm_mr));
+ PTR_ERR(mr->fmr.fm_mr));
out_free:
- kfree(mw->mw_sg);
- kfree(mw->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
+ kfree(mr->fmr.fm_physaddrs);
return -ENOMEM;
}
static int
-__fmr_unmap(struct rpcrdma_mw *mw)
+__fmr_unmap(struct rpcrdma_mr *mr)
{
LIST_HEAD(l);
int rc;
- list_add(&mw->fmr.fm_mr->list, &l);
+ list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
- list_del(&mw->fmr.fm_mr->list);
+ list_del(&mr->fmr.fm_mr->list);
return rc;
}
static void
-fmr_op_release_mr(struct rpcrdma_mw *r)
+fmr_op_release_mr(struct rpcrdma_mr *mr)
{
LIST_HEAD(unmap_list);
int rc;
/* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- kfree(r->fmr.fm_physaddrs);
- kfree(r->mw_sg);
+ kfree(mr->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
- rc = __fmr_unmap(r);
+ rc = __fmr_unmap(mr);
if (rc)
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
- r, rc);
+ mr, rc);
- rc = ib_dealloc_fmr(r->fmr.fm_mr);
+ rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc)
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
- r, rc);
+ mr, rc);
- kfree(r);
+ kfree(mr);
}
/* Reset of a single FMR.
*/
static void
-fmr_op_recover_mr(struct rpcrdma_mw *mw)
+fmr_op_recover_mr(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
int rc;
/* ORDER: invalidate first */
- rc = __fmr_unmap(mw);
-
- /* ORDER: then DMA unmap */
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ rc = __fmr_unmap(mr);
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ /* ORDER: then DMA unmap */
+ rpcrdma_mr_unmap_and_put(mr);
+
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw);
+ pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ trace_xprtrdma_dma_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- fmr_op_release_mr(mw);
+ fmr_op_release_mr(mr);
}
static int
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
*/
static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_mr_seg *seg1 = seg;
int len, pageoff, i, rc;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
u64 *dma_pages;
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset);
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
len += seg->mr_len;
++seg;
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
- dma_pages[i] = sg_dma_address(&mw->mw_sg[i]);
- rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents,
+ for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
+ dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
+ rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
dma_pages[0]);
if (rc)
goto out_maperr;
- mw->mw_handle = mw->fmr.fm_mr->rkey;
- mw->mw_length = len;
- mw->mw_offset = dma_pages[0] + pageoff;
+ mr->mr_handle = mr->fmr.fm_mr->rkey;
+ mr->mr_length = len;
+ mr->mr_offset = dma_pages[0] + pageoff;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
- pageoff, mw->mw_nents, rc);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ pageoff, mr->mr_nents, rc);
+ rpcrdma_mr_unmap_and_put(mr);
return ERR_PTR(-EIO);
}
@@ -256,13 +256,13 @@ out_maperr:
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
LIST_HEAD(unmap_list);
int rc;
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR.
*/
- list_for_each_entry(mw, mws, mw_list) {
+ list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+ __func__, &mr->fmr);
+ trace_xprtrdma_localinv(mr);
+ list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_del(&mw->fmr.fm_mr->list);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ list_del(&mr->fmr.fm_mr->list);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- list_del(&mw->fmr.fm_mr->list);
- fmr_op_recover_mr(mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ list_del(&mr->fmr.fm_mr->list);
+ fmr_op_recover_mr(mr);
}
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e10a15..90f688f19783 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
/* Lightweight memory registration using Fast Registration Work
- * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ * Requests (FRWR).
*
* FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
- * Work Request (frmr_op_map). When the RDMA operation is finished, this
+ * Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
- * (frmr_op_unmap).
+ * (frwr_op_unmap_sync).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
*
* As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mws immediately so that no work (like managing a linked list
+ * rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall.
*
* But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mws list when recovery is
+ * recovered MRs are placed back on the rb_mrs list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset.
*
@@ -96,26 +96,26 @@ out_not_supported:
}
static int
-frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- unsigned int depth = ia->ri_max_frmr_depth;
- struct rpcrdma_frmr *f = &r->frmr;
+ unsigned int depth = ia->ri_max_frwr_depth;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
- if (IS_ERR(f->fr_mr))
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
+ if (IS_ERR(frwr->fr_mr))
goto out_mr_err;
- r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
- if (!r->mw_sg)
+ mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_list_err;
- sg_init_table(r->mw_sg, depth);
- init_completion(&f->fr_linv_done);
+ sg_init_table(mr->mr_sg, depth);
+ init_completion(&frwr->fr_linv_done);
return 0;
out_mr_err:
- rc = PTR_ERR(f->fr_mr);
+ rc = PTR_ERR(frwr->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
@@ -124,83 +124,85 @@ out_list_err:
rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n",
__func__);
- ib_dereg_mr(f->fr_mr);
+ ib_dereg_mr(frwr->fr_mr);
return rc;
}
static void
-frwr_op_release_mr(struct rpcrdma_mw *r)
+frwr_op_release_mr(struct rpcrdma_mr *mr)
{
int rc;
- /* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ /* Ensure MR is not on any rl_registered list */
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- rc = ib_dereg_mr(r->frmr.fr_mr);
+ rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- r, rc);
- kfree(r->mw_sg);
- kfree(r);
+ mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static int
-__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- struct rpcrdma_frmr *f = &r->frmr;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- rc = ib_dereg_mr(f->fr_mr);
+ rc = ib_dereg_mr(frwr->fr_mr);
if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
- rc, r);
+ rc, mr);
return rc;
}
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
- ia->ri_max_frmr_depth);
- if (IS_ERR(f->fr_mr)) {
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
+ ia->ri_max_frwr_depth);
+ if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(f->fr_mr), r);
- return PTR_ERR(f->fr_mr);
+ PTR_ERR(frwr->fr_mr), mr);
+ return PTR_ERR(frwr->fr_mr);
}
- dprintk("RPC: %s: recovered FRMR %p\n", __func__, f);
- f->fr_state = FRMR_IS_INVALID;
+ dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
+ frwr->fr_state = FRWR_IS_INVALID;
return 0;
}
-/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
+/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/
static void
-frwr_op_recover_mr(struct rpcrdma_mw *mw)
+frwr_op_recover_mr(struct rpcrdma_mr *mr)
{
- enum rpcrdma_frmr_state state = mw->frmr.fr_state;
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc;
- rc = __frwr_reset_mr(ia, mw);
- if (state != FRMR_FLUSHED_LI)
+ rc = __frwr_mr_reset(ia, mr);
+ if (state != FRWR_FLUSHED_LI) {
+ trace_xprtrdma_dma_unmap(mr);
ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ }
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw);
+ pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- frwr_op_release_mr(mw);
+ frwr_op_release_mr(mr);
}
static int
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
- ia->ri_max_frmr_depth =
+ ia->ri_max_frwr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
- __func__, ia->ri_max_frmr_depth);
-
- /* Add room for frmr register and invalidate WRs.
- * 1. FRMR reg WR for head
- * 2. FRMR invalidate WR for head
- * 3. N FRMR reg WRs for pagelist
- * 4. N FRMR invalidate WRs for pagelist
- * 5. FRMR reg WR for tail
- * 6. FRMR invalidate WR for tail
+ __func__, ia->ri_max_frwr_depth);
+
+ /* Add room for frwr register and invalidate WRs.
+ * 1. FRWR reg WR for head
+ * 2. FRWR invalidate WR for head
+ * 3. N FRWR reg WRs for pagelist
+ * 4. N FRWR invalidate WRs for pagelist
+ * 5. FRWR reg WR for tail
+ * 6. FRWR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
depth = 7;
- /* Calculate N if the device max FRMR depth is smaller than
+ /* Calculate N if the device max FRWR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS.
*/
- if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
- delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+ if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
do {
- depth += 2; /* FRMR reg + invalidate */
- delta -= ia->ri_max_frmr_depth;
+ depth += 2; /* FRWR reg + invalidate */
+ delta -= ia->ri_max_frwr_depth;
} while (delta > 0);
}
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
- ia->ri_max_frmr_depth);
+ ia->ri_max_frwr_depth);
return 0;
}
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth);
+ RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
}
static void
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
static void
frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_FR;
+ frwr->fr_state = FRWR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg");
}
+ trace_xprtrdma_wc_fastreg(wc, frwr);
}
/**
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
+ trace_xprtrdma_wc_li(wc, frwr);
}
/**
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
- complete(&frmr->fr_linv_done);
+ complete(&frwr->fr_linv_done);
+ trace_xprtrdma_wc_li_wake(wc, frwr);
}
/* Post a REG_MR Work Request to register a memory region
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/
static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
- struct rpcrdma_mw *mw;
- struct rpcrdma_frmr *frmr;
- struct ib_mr *mr;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
+ struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n;
u8 key;
- mw = NULL;
+ mr = NULL;
do {
- if (mw)
- rpcrdma_defer_mr_recovery(mw);
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ if (mr)
+ rpcrdma_mr_defer_recovery(mr);
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
- } while (mw->frmr.fr_state != FRMR_IS_INVALID);
- frmr = &mw->frmr;
- frmr->fr_state = FRMR_IS_VALID;
- mr = frmr->fr_mr;
- reg_wr = &frmr->fr_regwr;
-
- if (nsegs > ia->ri_max_frmr_depth)
- nsegs = ia->ri_max_frmr_depth;
+ } while (mr->frwr.fr_state != FRWR_IS_INVALID);
+ frwr = &mr->frwr;
+ frwr->fr_state = FRWR_IS_VALID;
+
+ if (nsegs > ia->ri_max_frwr_depth)
+ nsegs = ia->ri_max_frwr_depth;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
++seg;
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mw->mw_nents))
+ ibmr = frwr->fr_mr;
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
+ if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
- dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
- __func__, frmr, mw->mw_nents, mr->length);
-
- key = (u8)(mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(mr, ++key);
+ key = (u8)(ibmr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(ibmr, ++key);
+ reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR;
- frmr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frmr->fr_cqe;
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0;
- reg_wr->mr = mr;
- reg_wr->key = mr->rkey;
+ reg_wr->mr = ibmr;
+ reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (rc)
goto out_senderr;
- mw->mw_handle = mr->rkey;
- mw->mw_length = mr->length;
- mw->mw_offset = mr->iova;
+ mr->mr_handle = ibmr->rkey;
+ mr->mr_length = ibmr->length;
+ mr->mr_offset = ibmr->iova;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- frmr->fr_state = FRMR_IS_INVALID;
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ frwr->fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
- frmr->fr_mr, n, mw->mw_nents);
- rpcrdma_defer_mr_recovery(mw);
+ frwr->fr_mr, n, mr->mr_nents);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
out_senderr:
- pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
- rpcrdma_defer_mr_recovery(mw);
+ pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-ENOTCONN);
}
+/* Handle a remotely invalidated mr on the @mrs list
+ */
+static void
+frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
+{
+ struct rpcrdma_mr *mr;
+
+ list_for_each_entry(mr, mrs, mr_list)
+ if (mr->mr_handle == rep->rr_inv_rkey) {
+ list_del(&mr->mr_list);
+ trace_xprtrdma_remoteinv(mr);
+ mr->frwr.fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_unmap_and_put(mr);
+ break; /* only one invalidated MR per RPC */
+ }
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_frmr *f;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
int count, rc;
/* ORDER: Invalidate all of the MRs first
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- f = NULL;
+ frwr = NULL;
count = 0;
prev = &first;
- list_for_each_entry(mw, mws, mw_list) {
- mw->frmr.fr_state = FRMR_IS_INVALID;
+ list_for_each_entry(mr, mrs, mr_list) {
+ mr->frwr.fr_state = FRWR_IS_INVALID;
- if (mw->mw_flags & RPCRDMA_MW_F_RI)
- continue;
+ frwr = &mr->frwr;
+ trace_xprtrdma_localinv(mr);
- f = &mw->frmr;
- dprintk("RPC: %s: invalidating frmr %p\n",
- __func__, f);
-
- f->fr_cqe.done = frwr_wc_localinv;
- last = &f->fr_invwr;
+ frwr->fr_cqe.done = frwr_wc_localinv;
+ last = &frwr->fr_invwr;
memset(last, 0, sizeof(*last));
- last->wr_cqe = &f->fr_cqe;
+ last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV;
- last->ex.invalidate_rkey = mw->mw_handle;
+ last->ex.invalidate_rkey = mr->mr_handle;
count++;
*prev = last;
prev = &last->next;
}
- if (!f)
+ if (!frwr)
goto unmap;
/* Strong send queue ordering guarantees that when the
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* are complete.
*/
last->send_flags = IB_SEND_SIGNALED;
- f->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&f->fr_linv_done);
+ frwr->fr_cqe.done = frwr_wc_localinv_wake;
+ reinit_completion(&frwr->fr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first)
- wait_for_completion(&f->fr_linv_done);
+ wait_for_completion(&frwr->fr_linv_done);
if (rc)
goto reset_mrs;
/* ORDER: Now DMA unmap all of the MRs, and return
- * them to the free MW list.
+ * them to the free MR list.
*/
unmap:
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping frmr %p\n",
- __func__, &mw->frmr);
- ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
reset_mrs:
- pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
+ pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
while (bad_wr) {
- f = container_of(bad_wr, struct rpcrdma_frmr,
- fr_invwr);
- mw = container_of(f, struct rpcrdma_mw, frmr);
+ frwr = container_of(bad_wr, struct rpcrdma_frwr,
+ fr_invwr);
+ mr = container_of(frwr, struct rpcrdma_mr, frwr);
- __frwr_reset_mr(ia, mw);
+ __frwr_mr_reset(ia, mr);
bad_wr = bad_wr->next;
}
@@ -553,6 +561,7 @@ reset_mrs:
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 560712bd9fa2..a762d192372b 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,18 +1,20 @@
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/
/* rpcrdma.ko module initialization
*/
+#include <linux/types.h>
+#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
-#include "xprt_rdma.h"
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
+#include <asm/swab.h>
+
+#define CREATE_TRACE_POINTS
+#include "xprt_rdma.h"
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ed34dc0f144c..162e5dd82466 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
}
static void
-xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
+xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{
- *iptr++ = cpu_to_be32(mw->mw_handle);
- *iptr++ = cpu_to_be32(mw->mw_length);
- xdr_encode_hyper(iptr, mw->mw_offset);
+ *iptr++ = cpu_to_be32(mr->mr_handle);
+ *iptr++ = cpu_to_be32(mr->mr_length);
+ xdr_encode_hyper(iptr, mr->mr_offset);
}
static int
-encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
+encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{
__be32 *p;
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
if (unlikely(!p))
return -EMSGSIZE;
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
static int
-encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
+encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
u32 position)
{
__be32 *p;
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
*p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position);
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int pos;
int nsegs;
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- false, &mw);
+ false, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_read_segment(xdr, mw, pos) < 0)
+ if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__, pos,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
r_xprt->rx_stats.read_chunk_count++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
return 0;
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.write_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in this Write chunk */
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.reply_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in the Reply chunk */
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
struct ib_sge *sge;
unsigned int count;
- dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
- __func__, sc->sc_unmap_count, sc);
-
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p;
int ret;
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
- return rpcrdma_bc_marshal_reply(rqst);
-#endif
-
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rtype = rpcrdma_areadch;
}
+ /* If this is a retransmit, discard previously registered
+ * chunks. Very likely the connection has been replaced,
+ * so these registrations are invalid and unusable.
+ */
+ while (unlikely(!list_empty(&req->rl_registered))) {
+ struct rpcrdma_mr *mr;
+
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ rpcrdma_mr_defer_recovery(mr);
+ }
+
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
if (ret)
goto out_err;
- dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n",
- rqst->rq_task->tk_pid, __func__,
- transfertypes[rtype], transfertypes[wtype],
- xdr_stream_pos(xdr));
+ trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
&rqst->rq_snd_buf, rtype);
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len)
curlen = copy_len;
- dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
- __func__, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup(rqst, copy_len, curlen);
srcp += curlen;
copy_len -= curlen;
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > pagelist_len)
curlen = pagelist_len;
- dprintk("RPC: %s: page %d"
- " srcp 0x%p len %d curlen %d\n",
- __func__, i, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup_pg(rqst, i, srcp,
+ copy_len, curlen);
destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]);
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count;
}
-/* Caller must guarantee @rep remains stable during this call.
- */
-static void
-rpcrdma_mark_remote_invalidation(struct list_head *mws,
- struct rpcrdma_rep *rep)
-{
- struct rpcrdma_mw *mw;
-
- if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
- return;
-
- list_for_each_entry(mw, mws, mw_list)
- if (mw->mw_handle == rep->rr_inv_rkey) {
- mw->mw_flags = RPCRDMA_MW_F_RI;
- break; /* only one invalidated MR per RPC */
- }
-}
-
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
@@ -1058,26 +1026,19 @@ out_short:
static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
{
+ u32 handle;
+ u64 offset;
__be32 *p;
p = xdr_inline_decode(xdr, 4 * sizeof(*p));
if (unlikely(!p))
return -EIO;
- ifdebug(FACILITY) {
- u64 offset;
- u32 handle;
-
- handle = be32_to_cpup(p++);
- *length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
- dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
- __func__, *length, (unsigned long long)offset,
- handle);
- } else {
- *length = be32_to_cpup(p + 1);
- }
+ handle = be32_to_cpup(p++);
+ *length = be32_to_cpup(p++);
+ xdr_decode_hyper(p, &offset);
+ trace_xprtrdma_decode_seg(handle, *length, offset);
return 0;
}
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
*length += seglength;
}
- dprintk("RPC: %s: segcount=%u, %u bytes\n",
- __func__, be32_to_cpup(p), *length);
return 0;
}
@@ -1296,8 +1255,7 @@ out:
* being marshaled.
*/
out_badheader:
- dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
- rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+ trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
status = -EIO;
goto out;
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- rpcrdma_release_rqst(rep->rr_rxprt, req);
+ trace_xprtrdma_defer_cmp(rep);
+ if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+ r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep);
}
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;
- dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
-
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
@@ -1405,14 +1364,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
- dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
- __func__, rep, req, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
- if (list_empty(&req->rl_registered) &&
- !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
- rpcrdma_complete_rqst(rep);
- else
- queue_work(rpcrdma_receive_wq, &rep->rr_work);
+ queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
@@ -1424,8 +1378,7 @@ out_badstatus:
return;
out_badversion:
- dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(rep->rr_vers));
+ trace_xprtrdma_reply_vers(rep);
goto repost;
/* The RPC transaction has already been terminated, or the header
@@ -1433,12 +1386,11 @@ out_badversion:
*/
out_norqst:
spin_unlock(&xprt->recv_lock);
- dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
- __func__, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply_rqst(rep);
goto repost;
out_shortreply:
- dprintk("RPC: %s: short/invalid reply\n", __func__);
+ trace_xprtrdma_reply_short(rep);
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 646c24494ea7..4b1ecfe979cf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
+#include <linux/smp.h>
#include "xprt_rdma.h"
@@ -66,8 +67,7 @@
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
-static unsigned int xprt_rdma_inline_write_padding;
-unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -80,6 +80,7 @@ static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1;
+static unsigned int dummy;
static struct ctl_table_header *sunrpc_table_header;
@@ -113,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
},
{
.procname = "rdma_inline_write_padding",
- .data = &xprt_rdma_inline_write_padding,
+ .data = &dummy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -258,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt_clear_connected(xprt);
- dprintk("RPC: %s: %sconnect\n", __func__,
- r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
if (rc)
xprt_wake_pending_tasks(xprt, rc);
- dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
}
@@ -274,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
rx_xprt);
- pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
+ trace_xprtrdma_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id);
}
@@ -294,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- dprintk("RPC: %s: called\n", __func__);
+ trace_xprtrdma_destroy(r_xprt);
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
@@ -305,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
-
xprt_free(xprt);
- dprintk("RPC: %s: returning\n", __func__);
-
module_put(THIS_MODULE);
}
@@ -360,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
/*
* Set up RDMA-specific connect data.
*/
-
- sap = (struct sockaddr *)&cdata.addr;
- memcpy(sap, args->dstaddr, args->addrlen);
+ sap = args->dstaddr;
/* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */
@@ -372,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
if (rpc_get_port(sap))
xprt_set_bound(xprt);
+ xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt->max_reqs;
@@ -386,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize;
- cdata.padding = xprt_rdma_inline_write_padding;
-
/*
* Create new transport instance, which includes initialized
* o ia
@@ -397,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt);
- rc = rpcrdma_ia_open(new_xprt, sap);
+ rc = rpcrdma_ia_open(new_xprt);
if (rc)
goto out1;
@@ -406,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
*/
new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep;
- new_ep->rep_remote_addr = cdata.addr;
rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data);
if (rc)
goto out2;
- /*
- * Allocate pre-registered send and receive buffers for headers and
- * any inline data. Also specify any padding which will be provided
- * from a preregistered zero buffer.
- */
rc = rpcrdma_buffer_create(new_xprt);
if (rc)
goto out3;
- /*
- * Register a callback for connection events. This is necessary because
- * connection loss notification is async. We also catch connection loss
- * when reaping receives.
- */
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker);
- xprt_rdma_format_addresses(xprt, sap);
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
if (xprt->max_payload == 0)
goto out4;
@@ -444,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
dprintk("RPC: %s: %s:%s\n", __func__,
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT]);
+ trace_xprtrdma_create(new_xprt);
return xprt;
out4:
- xprt_rdma_free_addresses(xprt);
- rc = -EINVAL;
+ rpcrdma_buffer_destroy(&new_xprt->rx_buf);
+ rc = -ENODEV;
out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
+ trace_xprtrdma_destroy(new_xprt);
+ xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
return ERR_PTR(rc);
}
@@ -487,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
rpcrdma_ep_disconnect(ep, ia);
}
+/**
+ * xprt_rdma_set_port - update server port with rpcbind result
+ * @xprt: controlling RPC transport
+ * @port: new port value
+ *
+ * Transport connect status is unchanged.
+ */
static void
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
{
- struct sockaddr_in *sap;
+ struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
+ char buf[8];
- sap = (struct sockaddr_in *)&xprt->addr;
- sap->sin_port = htons(port);
- sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
- sap->sin_port = htons(port);
- dprintk("RPC: %s: %u\n", __func__, port);
+ dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
+ __func__, xprt,
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PORT],
+ port);
+
+ rpc_set_port(sap, port);
+
+ kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+ snprintf(buf, sizeof(buf), "%u", port);
+ xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
+
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+ snprintf(buf, sizeof(buf), "%4hx", port);
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
}
/**
@@ -515,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
static void
xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{
- dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
-
xprt_force_disconnect(xprt);
}
@@ -639,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL)
- return -ENOMEM;
+ goto out_get;
flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
@@ -652,18 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
- task->tk_pid, __func__, rqst->rq_callsize,
- rqst->rq_rcvsize, req);
-
+ req->rl_cpu = smp_processor_id();
req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
+ trace_xprtrdma_allocate(task, req);
return 0;
out_fail:
rpcrdma_buffer_put(req);
+out_get:
+ trace_xprtrdma_allocate(task, NULL);
return -ENOMEM;
}
@@ -680,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
- return;
-
- dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
-
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req);
+ trace_xprtrdma_rpc_done(task, req);
rpcrdma_buffer_put(req);
}
@@ -696,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
*
* Caller holds the transport's write lock.
*
- * Return values:
- * 0: The request has been sent
- * ENOTCONN: Caller needs to invoke connect logic then call again
- * ENOBUFS: Call again later to send the request
- * EIO: A permanent error occurred. The request was not sent,
- * and don't try it again
- *
- * send_request invokes the meat of RPC RDMA. It must do the following:
- *
- * 1. Marshal the RPC request into an RPC RDMA request, which means
- * putting a header in front of data, and creating IOVs for RDMA
- * from those in the request.
- * 2. In marshaling, detect opportunities for RDMA, and use them.
- * 3. Post a recv message to set up asynch completion, then send
- * the request (rpcrdma_ep_post).
- * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-ENOBUFS if the caller should call again later
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
*/
static int
xprt_rdma_send_request(struct rpc_task *task)
@@ -722,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (unlikely(!rqst->rq_buffer))
+ return xprt_rdma_bc_send_reply(rqst);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
if (!xprt_connected(xprt))
goto drop_connection;
- /* On retransmit, remove any previously registered chunks */
- if (unlikely(!list_empty(&req->rl_registered)))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
-
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
@@ -742,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
- set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
+ __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -902,8 +887,7 @@ int xprt_rdma_init(void)
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
- dprintk("\tPadding %d\n\tMemreg %d\n",
- xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+ dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 710b3f77db82..f4eb63e8e689 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -71,8 +71,8 @@
/*
* internal functions
*/
-static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
-static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
struct workqueue_struct *recv_wq;
recv_wq = alloc_workqueue("xprtrdma_receive",
- WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
+ WQ_MEM_RECLAIM | WQ_HIGHPRI,
0);
if (!recv_wq)
return -ENOMEM;
@@ -108,7 +108,10 @@ static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;
+ struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
+ rx_ep);
+ trace_xprtrdma_qp_error(r_xprt, event);
pr_err("rpcrdma: %s on device %s ep %p\n",
ib_event_msg(event->event), event->device->name, context);
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_send(sc, wc);
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rr_cqe);
/* WARNING: Only wr_id and status are reliable at this point */
+ trace_xprtrdma_wc_receive(rep, wc);
if (wc->status != IB_WC_SUCCESS)
goto out_fail;
/* status == SUCCESS means all fields in wc are trustworthy */
- dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
- __func__, rep, wc->byte_len);
-
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
unsigned int rsize, wsize;
/* Default settings for RPC-over-RDMA Version One */
- r_xprt->rx_ia.ri_reminv_expected = false;
r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
if (pmsg &&
pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
- r_xprt->rx_ia.ri_reminv_expected = true;
r_xprt->rx_ia.ri_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
struct rpcrdma_xprt *xprt = id->context;
struct rpcrdma_ia *ia = &xprt->rx_ia;
struct rpcrdma_ep *ep = &xprt->rx_ep;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
-#endif
int connstate = 0;
+ trace_xprtrdma_conn_upcall(xprt, event);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_ADDR_ERROR:
ia->ri_async_rc = -EHOSTUNREACH;
- dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
- __func__, ep);
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
ia->ri_async_rc = -ENETUNREACH;
- dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
- __func__, ep);
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- pr_info("rpcrdma: removing device %s for %pIS:%u\n",
+ pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name,
- sap, rpc_get_port(sap));
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV;
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENETDOWN;
goto connected;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
- sap, rpc_get_port(sap),
+ dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
rdma_reject_msg(id, event->status));
connstate = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
@@ -287,8 +281,9 @@ connected:
wake_up_all(&ep->rep_connect_wait);
/*FALLTHROUGH*/
default:
- dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
- __func__, sap, rpc_get_port(sap),
+ dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
+ __func__,
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
ia->ri_device->name, ia->ri_ops->ro_displayname,
ep, rdma_event_msg(event->event));
break;
@@ -298,13 +293,14 @@ connected:
}
static struct rdma_cm_id *
-rpcrdma_create_id(struct rpcrdma_xprt *xprt,
- struct rpcrdma_ia *ia, struct sockaddr *addr)
+rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
{
unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
struct rdma_cm_id *id;
int rc;
+ trace_xprtrdma_conn_start(xprt);
+
init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
ia->ri_async_rc = -ETIMEDOUT;
- rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+ rc = rdma_resolve_addr(id, NULL,
+ (struct sockaddr *)&xprt->rx_xprt.addr,
+ RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc);
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
- dprintk("RPC: %s: wait() exited: %i\n",
- __func__, rc);
+ trace_xprtrdma_conn_tout(xprt);
goto out;
}
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
- dprintk("RPC: %s: wait() exited: %i\n",
- __func__, rc);
+ trace_xprtrdma_conn_tout(xprt);
goto out;
}
rc = ia->ri_async_rc;
@@ -365,19 +361,18 @@ out:
/**
* rpcrdma_ia_open - Open and initialize an Interface Adapter.
- * @xprt: controlling transport
- * @addr: IP address of remote peer
+ * @xprt: transport with IA to (re)initialize
*
* Returns 0 on success, negative errno if an appropriate
* Interface Adapter could not be found and opened.
*/
int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
int rc;
- ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+ ia->ri_id = rpcrdma_create_id(xprt, ia);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
goto out_err;
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
}
switch (xprt_rdma_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRWR:
if (frwr_is_supported(ia)) {
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
break;
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
/* Allow waiters to continue */
complete(&ia->ri_remove_done);
+
+ trace_xprtrdma_remove(r_xprt);
}
/**
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
void
rpcrdma_ia_close(struct rpcrdma_ia *ia)
{
- dprintk("RPC: %s: entering\n", __func__);
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id);
@@ -630,9 +626,6 @@ out1:
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- dprintk("RPC: %s: entering, connected is %d\n",
- __func__, ep->rep_connected);
-
cancel_delayed_work_sync(&ep->rep_connect_worker);
if (ia->ri_id->qp) {
@@ -653,13 +646,12 @@ static int
rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
int rc, err;
- pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
+ trace_xprtrdma_reinsert(r_xprt);
rc = -EHOSTUNREACH;
- if (rpcrdma_ia_open(r_xprt, sap))
+ if (rpcrdma_ia_open(r_xprt))
goto out1;
rc = -ENOMEM;
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
goto out3;
}
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
return 0;
out3:
@@ -691,16 +683,15 @@ static int
rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
struct rdma_cm_id *id, *old;
int err, rc;
- dprintk("RPC: %s: reconnecting...\n", __func__);
+ trace_xprtrdma_reconnect(r_xprt);
rpcrdma_ep_disconnect(ep, ia);
rc = -EHOSTUNREACH;
- id = rpcrdma_create_id(r_xprt, ia, sap);
+ id = rpcrdma_create_id(r_xprt, ia);
if (IS_ERR(id))
goto out;
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
int rc;
rc = rdma_disconnect(ia->ri_id);
- if (!rc) {
+ if (!rc)
/* returns without wait if not connected */
wait_event_interruptible(ep->rep_connect_wait,
ep->rep_connected != 1);
- dprintk("RPC: %s: after wait, %sconnected\n", __func__,
- (ep->rep_connected == 1) ? "still " : "dis");
- } else {
- dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
+ else
ep->rep_connected = rc;
- }
+ trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
+ rx_ep), rc);
ib_drain_qp(ia->ri_id->qp);
}
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
{
struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
rb_recovery_worker.work);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
spin_lock(&buf->rb_recovery_lock);
while (!list_empty(&buf->rb_stale_mrs)) {
- mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
+ mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
- dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
- mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
+ trace_xprtrdma_recover_mr(mr);
+ mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
spin_lock(&buf->rb_recovery_lock);
}
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
}
void
-rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
+rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
spin_lock(&buf->rb_recovery_lock);
- rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
+ rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
schedule_delayed_work(&buf->rb_recovery_worker, 0);
}
static void
-rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
+rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(all);
for (count = 0; count < 32; count++) {
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int rc;
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
break;
- rc = ia->ri_ops->ro_init_mr(ia, mw);
+ rc = ia->ri_ops->ro_init_mr(ia, mr);
if (rc) {
- kfree(mw);
+ kfree(mr);
break;
}
- mw->mw_xprt = r_xprt;
+ mr->mr_xprt = r_xprt;
- list_add(&mw->mw_list, &free);
- list_add(&mw->mw_all, &all);
+ list_add(&mr->mr_list, &free);
+ list_add(&mr->mr_all, &all);
}
- spin_lock(&buf->rb_mwlock);
- list_splice(&free, &buf->rb_mws);
+ spin_lock(&buf->rb_mrlock);
+ list_splice(&free, &buf->rb_mrs);
list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count;
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
- dprintk("RPC: %s: created %u MRs\n", __func__, count);
+ trace_xprtrdma_createmrs(r_xprt, count);
}
static void
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
}
struct rpcrdma_req *
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
return req;
}
-struct rpcrdma_rep *
+/**
+ * rpcrdma_create_rep - Allocate an rpcrdma_rep object
+ * @r_xprt: controlling transport
+ *
+ * Returns 0 on success or a negative errno on failure.
+ */
+int
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
int rc;
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
- return rep;
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_list, &buf->rb_recv_bufs);
+ spin_unlock(&buf->rb_lock);
+ return 0;
out_free:
kfree(rep);
out:
- return ERR_PTR(rc);
+ dprintk("RPC: %s: reply buffer %d alloc failed\n",
+ __func__, rc);
+ return rc;
}
int
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
- spin_lock_init(&buf->rb_mwlock);
+ spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock);
- INIT_LIST_HEAD(&buf->rb_mws);
+ INIT_LIST_HEAD(&buf->rb_mrs);
INIT_LIST_HEAD(&buf->rb_all);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_DELAYED_WORK(&buf->rb_recovery_worker,
rpcrdma_mr_recovery_worker);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
- for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
- struct rpcrdma_rep *rep;
-
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- dprintk("RPC: %s: reply buffer %d alloc failed\n",
- __func__, i);
- rc = PTR_ERR(rep);
+ for (i = 0; i <= buf->rb_max_requests; i++) {
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
goto out;
- }
- list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
rc = rpcrdma_sendctxs_create(r_xprt);
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req)
}
static void
-rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
+rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int count;
count = 0;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
while (!list_empty(&buf->rb_all)) {
- mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
- list_del(&mw->mw_all);
+ mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
+ list_del(&mr->mr_all);
- spin_unlock(&buf->rb_mwlock);
- ia->ri_ops->ro_release_mr(mw);
+ spin_unlock(&buf->rb_mrlock);
+ ia->ri_ops->ro_release_mr(mr);
count++;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
}
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
r_xprt->rx_stats.mrs_allocated = 0;
dprintk("RPC: %s: released %u MRs\n", __func__, count);
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
spin_unlock(&buf->rb_reqslock);
buf->rb_recv_count = 0;
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
}
-struct rpcrdma_mw *
-rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_mr_get - Allocate an rpcrdma_mr object
+ * @r_xprt: controlling transport
+ *
+ * Returns an initialized rpcrdma_mr or NULL if no free
+ * rpcrdma_mr objects are available.
+ */
+struct rpcrdma_mr *
+rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_mw *mw = NULL;
+ struct rpcrdma_mr *mr = NULL;
- spin_lock(&buf->rb_mwlock);
- if (!list_empty(&buf->rb_mws))
- mw = rpcrdma_pop_mw(&buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
+ if (!list_empty(&buf->rb_mrs))
+ mr = rpcrdma_mr_pop(&buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
- if (!mw)
- goto out_nomws;
- mw->mw_flags = 0;
- return mw;
+ if (!mr)
+ goto out_nomrs;
+ return mr;
-out_nomws:
- dprintk("RPC: %s: no MWs available\n", __func__);
+out_nomrs:
+ trace_xprtrdma_nomrs(r_xprt);
if (r_xprt->rx_ep.rep_connected != -ENODEV)
schedule_delayed_work(&buf->rb_refresh_worker, 0);
@@ -1315,14 +1316,39 @@ out_nomws:
return NULL;
}
+static void
+__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
+{
+ spin_lock(&buf->rb_mrlock);
+ rpcrdma_mr_push(mr, &buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
+}
+
+/**
+ * rpcrdma_mr_put - Release an rpcrdma_mr object
+ * @mr: object to release
+ *
+ */
void
-rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
+rpcrdma_mr_put(struct rpcrdma_mr *mr)
{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
+}
+
+/**
+ * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
+ * @mr: object to release
+ *
+ */
+void
+rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
+{
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- spin_lock(&buf->rb_mwlock);
- rpcrdma_push_mw(mw, &buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ trace_xprtrdma_dma_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
}
static struct rpcrdma_rep *
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
req = rpcrdma_buffer_get_req_locked(buffers);
req->rl_reply = rpcrdma_buffer_get_rep(buffers);
spin_unlock(&buffers->rb_lock);
+
return req;
out_reqbuf:
spin_unlock(&buffers->rb_lock);
- pr_warn("RPC: %s: out of request buffers\n", __func__);
return NULL;
}
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
req->rl_reply = NULL;
}
- dprintk("RPC: %s: posting %d s/g entries\n",
- __func__, send_wr->num_sge);
-
if (!ep->rep_send_count ||
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
send_wr->send_flags &= ~IB_SEND_SIGNALED;
--ep->rep_send_count;
}
+
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
+ trace_xprtrdma_post_send(req, rc);
if (rc)
- goto out_postsend_err;
+ return -ENOTCONN;
return 0;
-
-out_postsend_err:
- pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
- return -ENOTCONN;
}
int
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
goto out_map;
rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
+ trace_xprtrdma_post_recv(rep, rc);
if (rc)
- goto out_postrecv;
+ return -ENOTCONN;
return 0;
out_map:
pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
return -EIO;
-
-out_postrecv:
- pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
- return -ENOTCONN;
}
/**
* rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
* @r_xprt: transport associated with these backchannel resources
- * @min_reqs: minimum number of incoming requests expected
+ * @count: minimum number of incoming requests expected
*
* Returns zero if all requested buffers were posted, or a negative errno.
*/
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
out_reqbuf:
spin_unlock(&buffers->rb_lock);
- pr_warn("%s: no extra receive buffers\n", __func__);
+ trace_xprtrdma_noreps(r_xprt);
return -ENOMEM;
out_rc:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 51686d9eac5f..69883a960a3f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,11 +73,10 @@ struct rpcrdma_ia {
struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
- unsigned int ri_max_frmr_depth;
+ unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
- bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
@@ -101,7 +100,6 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
};
@@ -232,29 +230,29 @@ enum {
};
/*
- * struct rpcrdma_mw - external memory region metadata
+ * struct rpcrdma_mr - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
- * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
+ * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
-enum rpcrdma_frmr_state {
- FRMR_IS_INVALID, /* ready to be used */
- FRMR_IS_VALID, /* in use */
- FRMR_FLUSHED_FR, /* flushed FASTREG WR */
- FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
+enum rpcrdma_frwr_state {
+ FRWR_IS_INVALID, /* ready to be used */
+ FRWR_IS_VALID, /* in use */
+ FRWR_FLUSHED_FR, /* flushed FASTREG WR */
+ FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
};
-struct rpcrdma_frmr {
+struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
- enum rpcrdma_frmr_state fr_state;
+ enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
u64 *fm_physaddrs;
};
-struct rpcrdma_mw {
- struct list_head mw_list;
- struct scatterlist *mw_sg;
- int mw_nents;
- enum dma_data_direction mw_dir;
- unsigned long mw_flags;
+struct rpcrdma_mr {
+ struct list_head mr_list;
+ struct scatterlist *mr_sg;
+ int mr_nents;
+ enum dma_data_direction mr_dir;
union {
struct rpcrdma_fmr fmr;
- struct rpcrdma_frmr frmr;
+ struct rpcrdma_frwr frwr;
};
- struct rpcrdma_xprt *mw_xprt;
- u32 mw_handle;
- u32 mw_length;
- u64 mw_offset;
- struct list_head mw_all;
-};
-
-/* mw_flags */
-enum {
- RPCRDMA_MW_F_RI = 1,
+ struct rpcrdma_xprt *mr_xprt;
+ u32 mr_handle;
+ u32 mr_length;
+ u64 mr_offset;
+ struct list_head mr_all;
};
/*
@@ -342,6 +334,7 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
+ int rl_cpu;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
@@ -361,8 +354,7 @@ struct rpcrdma_req {
/* rl_flags */
enum {
- RPCRDMA_REQ_F_BACKCHANNEL = 0,
- RPCRDMA_REQ_F_PENDING,
+ RPCRDMA_REQ_F_PENDING = 0,
RPCRDMA_REQ_F_TX_RESOURCES,
};
@@ -373,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
}
static inline struct rpcrdma_req *
-rpcr_to_rdmar(struct rpc_rqst *rqst)
+rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
return rqst->rq_xprtdata;
}
static inline void
-rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
+rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
{
- list_add_tail(&mw->mw_list, list);
+ list_add_tail(&mr->mr_list, list);
}
-static inline struct rpcrdma_mw *
-rpcrdma_pop_mw(struct list_head *list)
+static inline struct rpcrdma_mr *
+rpcrdma_mr_pop(struct list_head *list)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
- mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
- list_del(&mw->mw_list);
- return mw;
+ mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
+ list_del(&mr->mr_list);
+ return mr;
}
/*
@@ -401,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
- spinlock_t rb_mwlock; /* protect rb_mws list */
- struct list_head rb_mws;
+ spinlock_t rb_mrlock; /* protect rb_mrs list */
+ struct list_head rb_mrs;
struct list_head rb_all;
unsigned long rb_sc_head;
@@ -437,13 +429,11 @@ struct rpcrdma_buffer {
* This data should be set with mount options
*/
struct rpcrdma_create_data_internal {
- struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */
- unsigned int padding; /* non-rdma write header padding */
};
/*
@@ -483,17 +473,19 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
- struct rpcrdma_mw **);
+ struct rpcrdma_mr **);
+ void (*ro_reminv)(struct rpcrdma_rep *rep,
+ struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_recover_mr)(struct rpcrdma_mw *);
+ void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init_mr)(struct rpcrdma_ia *,
- struct rpcrdma_mw *);
- void (*ro_release_mr)(struct rpcrdma_mw *);
+ struct rpcrdma_mr *);
+ void (*ro_release_mr)(struct rpcrdma_mr *mr);
const char *ro_displayname;
const int ro_send_w_inv_ok;
};
@@ -524,6 +516,18 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+static inline const char *
+rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
+}
+
+static inline const char *
+rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
+}
+
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
@@ -537,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
-int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
@@ -563,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
-struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
+int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
-struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
-void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
+struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_mr_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
-void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
-
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
@@ -662,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -670,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
+
+#include <trace/events/rpcrdma.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6d0cc3b8f932..18803021f242 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
#include "sunrpc.h"
+#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
+
static void xs_close(struct rpc_xprt *xprt);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport)
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
out:
mutex_unlock(&transport->recv_mutex);
@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
out:
mutex_unlock(&transport->recv_mutex);
@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
.offset = offset,
.count = len,
};
+ size_t ret;
dprintk("RPC: xs_tcp_data_recv started\n");
do {
@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
/* Skip over any trailing bytes on short reads */
xs_tcp_read_discard(transport, &desc);
} while (desc.count);
+ ret = len - desc.count;
+ if (ret < rd_desc->count)
+ rd_desc->count -= ret;
+ else
+ rd_desc->count = 0;
trace_xs_tcp_data_recv(transport);
dprintk("RPC: xs_tcp_data_recv done\n");
- return len - desc.count;
+ return ret;
}
static void xs_tcp_data_receive(struct sock_xprt *transport)
@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk;
read_descriptor_t rd_desc = {
- .count = 2*1024*1024,
.arg.data = xprt,
};
unsigned long total = 0;
- int loop;
int read = 0;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
goto out;
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- for (loop = 0; loop < 64; loop++) {
+ for (;;) {
+ rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
lock_sock(sk);
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
- if (read <= 0) {
+ if (rd_desc.count != 0 || read < 0) {
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
release_sock(sk);
break;
}
release_sock(sk);
total += read;
- rd_desc.count = 65536;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
queue_work(xprtiod_workqueue, &transport->recv_worker);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 47ec121574ce..c8001471da6c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -324,6 +324,7 @@ restart:
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
+ kfree(b);
return -EINVAL;
}
@@ -347,8 +348,10 @@ restart:
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
- if (tipc_mon_create(net, bearer_id))
+ if (tipc_mon_create(net, bearer_id)) {
+ bearer_disable(net, b);
return -ENOMEM;
+ }
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 95fec2c057d6..5f4ffae807ee 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
static void tipc_group_decr_active(struct tipc_group *grp,
struct tipc_member *m)
{
- if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+ m->state == MBR_REMITTED)
grp->active_cnt--;
}
@@ -351,8 +352,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
if (m->window >= ADV_IDLE)
return;
- if (!list_empty(&m->congested))
- return;
+ list_del_init(&m->congested);
/* Sort member into congested members' list */
list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +369,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
u16 prev = grp->bc_snd_nxt - 1;
struct tipc_member *m;
struct rb_node *n;
+ u16 ackers = 0;
for (n = rb_first(&grp->members); n; n = rb_next(n)) {
m = container_of(n, struct tipc_member, tree_node);
if (tipc_group_is_enabled(m)) {
tipc_group_update_member(m, len);
m->bc_acked = prev;
+ ackers++;
}
}
/* Mark number of acknowledges to expect, if any */
if (ack)
- grp->bc_ackers = grp->member_cnt;
+ grp->bc_ackers = ackers;
grp->bc_snd_nxt++;
}
@@ -561,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
int max_active = grp->max_active;
int reclaim_limit = max_active * 3 / 4;
int active_cnt = grp->active_cnt;
- struct tipc_member *m, *rm;
+ struct tipc_member *m, *rm, *pm;
m = tipc_group_find_member(grp, node, port);
if (!m)
@@ -604,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
}
+ grp->active_cnt--;
+ list_del_init(&m->list);
+ if (list_empty(&grp->pending))
+ return;
+
+ /* Set oldest pending member to active and advertise */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
break;
case MBR_RECLAIMING:
case MBR_DISCOVERED:
@@ -648,6 +661,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
} else if (mtyp == GRP_REMIT_MSG) {
msg_set_grp_remitted(hdr, m->window);
}
+ msg_set_dest_droppable(hdr, true);
__skb_queue_tail(xmitq, skb);
}
@@ -689,15 +703,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
__skb_queue_tail(inputq, m->event_msg);
}
- if (m->window < ADV_IDLE)
- tipc_group_update_member(m, 0);
- else
- list_del_init(&m->congested);
+ list_del_init(&m->congested);
+ tipc_group_update_member(m, 0);
return;
case GRP_LEAVE_MSG:
if (!m)
return;
m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+ list_del_init(&m->list);
+ list_del_init(&m->congested);
+ *usr_wakeup = true;
/* Wait until WITHDRAW event is received */
if (m->state != MBR_LEAVING) {
@@ -709,8 +724,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
ehdr = buf_msg(m->event_msg);
msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
__skb_queue_tail(inputq, m->event_msg);
- *usr_wakeup = true;
- list_del_init(&m->congested);
return;
case GRP_ADV_MSG:
if (!m)
@@ -741,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
if (!m || m->state != MBR_RECLAIMING)
return;
- list_del_init(&m->list);
- grp->active_cnt--;
remitted = msg_grp_remitted(hdr);
/* Messages preceding the REMIT still in receive queue */
if (m->advertised > remitted) {
m->state = MBR_REMITTED;
in_flight = m->advertised - remitted;
+ m->advertised = ADV_IDLE + in_flight;
+ return;
}
/* All messages preceding the REMIT have been read */
if (m->advertised <= remitted) {
@@ -760,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
m->advertised = ADV_IDLE + in_flight;
+ grp->active_cnt--;
+ list_del_init(&m->list);
/* Set oldest pending member to active and advertise */
if (list_empty(&grp->pending))
@@ -849,19 +864,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
*usr_wakeup = true;
m->usr_pending = false;
node_up = tipc_node_is_up(net, node);
-
- /* Hold back event if more messages might be expected */
- if (m->state != MBR_LEAVING && node_up) {
- m->event_msg = skb;
- tipc_group_decr_active(grp, m);
- m->state = MBR_LEAVING;
- } else {
- if (node_up)
+ m->event_msg = NULL;
+
+ if (node_up) {
+ /* Hold back event if a LEAVE msg should be expected */
+ if (m->state != MBR_LEAVING) {
+ m->event_msg = skb;
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ } else {
msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
- else
+ __skb_queue_tail(inputq, skb);
+ }
+ } else {
+ if (m->state != MBR_LEAVING) {
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+ } else {
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ }
__skb_queue_tail(inputq, skb);
}
+ list_del_init(&m->list);
list_del_init(&m->congested);
}
*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 8e884ed06d4b..32dc33a94bc7 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *self;
struct tipc_peer *peer, *tmp;
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
write_lock_bh(&mon->lock);
tn->monitors[bearer_id] = NULL;
list_for_each_entry_safe(peer, tmp, &self->list, list) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 507017fe0f1b..9036d8756e73 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1880,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
if (strcmp(name, tipc_bclink_name) == 0) {
err = tipc_nl_add_bc_link(net, &msg);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
+ if (err)
+ goto err_free;
} else {
int bearer_id;
struct tipc_node *node;
struct tipc_link *link;
node = tipc_node_find_by_name(net, name, &bearer_id);
- if (!node)
- return -EINVAL;
+ if (!node) {
+ err = -EINVAL;
+ goto err_free;
+ }
tipc_node_read_lock(node);
link = node->links[bearer_id].link;
if (!link) {
tipc_node_read_unlock(node);
- nlmsg_free(msg.skb);
- return -EINVAL;
+ err = -EINVAL;
+ goto err_free;
}
err = __tipc_nl_add_link(net, &msg, link, 0);
tipc_node_read_unlock(node);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
+ if (err)
+ goto err_free;
}
return genlmsg_reply(msg.skb, info);
+
+err_free:
+ nlmsg_free(msg.skb);
+ return err;
}
int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/server.c b/net/tipc/server.c
index d60c30342327..78a292a84afc 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -264,8 +264,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
iov.iov_base = buf;
iov.iov_len = s->max_rcvbuf_size;
msg.msg_name = &addr;
- ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
- MSG_DONTWAIT);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+ ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
if (ret <= 0) {
kmem_cache_free(s->rcvbuf_cache, buf);
goto out_close;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 5d18c0caa92b..2aa46e8cd8fe 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -710,13 +710,13 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static unsigned int tipc_poll(struct file *file, struct socket *sock,
+static __poll_t tipc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_group *grp = tsk->group;
- u32 revents = 0;
+ __poll_t revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
+ case TIPC_CONNECTING:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
revents |= POLLOUT;
/* fall thru' */
case TIPC_LISTEN:
- case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
revents |= POLLIN | POLLRDNORM;
break;
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
__skb_dequeue(arrvq);
__skb_queue_tail(inputq, skb);
}
- refcount_dec(&skb->users);
+ kfree_skb(skb);
spin_unlock_bh(&inputq->lock);
continue;
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index e07ee3ae0023..736719c8314e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
crypto_info = &ctx->crypto_send;
/* Currently we don't support set crypto info more than one time */
- if (TLS_CRYPTO_INFO_READY(crypto_info))
+ if (TLS_CRYPTO_INFO_READY(crypto_info)) {
+ rc = -EBUSY;
goto out;
+ }
rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
@@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
case TLS_CIPHER_AES_GCM_128: {
if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
rc = -EINVAL;
- goto out;
+ goto err_crypto_info;
}
rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
optlen - sizeof(*crypto_info));
@@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
}
default:
rc = -EINVAL;
- goto out;
+ goto err_crypto_info;
}
/* currently SW is default, we will have ethtool in future */
@@ -454,6 +456,15 @@ static int tls_init(struct sock *sk)
struct tls_context *ctx;
int rc = 0;
+ /* The TLS ulp is currently supported only for TCP sockets
+ * in ESTABLISHED state.
+ * Supporting sockets in LISTEN state will require us
+ * to modify the accept implementation to clone rather then
+ * share the ulp context.
+ */
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTSUPP;
+
/* allocate tls context */
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 73d19210dd49..0a9b72fbd761 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -391,7 +391,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
while (msg_data_left(msg)) {
if (sk->sk_err) {
- ret = sk->sk_err;
+ ret = -sk->sk_err;
goto send_end;
}
@@ -544,7 +544,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
size_t copy, required_size;
if (sk->sk_err) {
- ret = sk->sk_err;
+ ret = -sk->sk_err;
goto sendpage_end;
}
@@ -577,6 +577,8 @@ alloc_payload:
get_page(page);
sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
sg_set_page(sg, page, copy, offset);
+ sg_unmark_end(sg);
+
ctx->sg_plaintext_num_elem++;
sk_mem_charge(sk, copy);
@@ -681,18 +683,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
}
default:
rc = -EINVAL;
- goto out;
+ goto free_priv;
}
ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
ctx->tag_size = tag_size;
ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
ctx->iv_size = iv_size;
- ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- GFP_KERNEL);
+ ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
if (!ctx->iv) {
rc = -ENOMEM;
- goto out;
+ goto free_priv;
}
memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
@@ -740,7 +741,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
if (!rc)
- goto out;
+ return 0;
free_aead:
crypto_free_aead(sw_ctx->aead_send);
@@ -751,6 +752,9 @@ free_rec_seq:
free_iv:
kfree(ctx->iv);
ctx->iv = NULL;
+free_priv:
+ kfree(ctx->priv_ctx);
+ ctx->priv_ctx = NULL;
out:
return rc;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a9ee634f3c42..6b7678df41e5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -367,7 +367,7 @@ static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int
/* relaying can only happen while the wq still exists */
u_sleep = sk_sleep(&u->sk);
if (u_sleep)
- wake_up_interruptible_poll(u_sleep, key);
+ wake_up_interruptible_poll(u_sleep, key_to_poll(key));
return 0;
}
@@ -638,8 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
-static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
-static unsigned int unix_dgram_poll(struct file *, struct socket *,
+static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
+static __poll_t unix_dgram_poll(struct file *, struct socket *,
poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
@@ -2640,10 +2640,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return err;
}
-static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
- unsigned int mask;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
@@ -2675,11 +2675,12 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
return mask;
}
-static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
+static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk, *other;
- unsigned int mask, writable;
+ unsigned int writable;
+ __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 5d28abf87fbf..9d95e773f4c8 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,11 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode)
return err;
}
-static unsigned int vsock_poll(struct file *file, struct socket *sock,
+static __poll_t vsock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk;
- unsigned int mask;
+ __poll_t mask;
struct vsock_sock *vsk;
sk = sock->sk;
@@ -951,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
* POLLOUT|POLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
- if (sk->sk_state == TCP_CLOSE) {
+ if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= POLLOUT | POLLWRNORM;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 278d979c211a..1d84f91bbfb0 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
@$(kecho) " GEN $@"
- @echo '#include "reg.h"' > $@
- @echo 'const u8 shipped_regdb_certs[] = {' >> $@
- @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
- @echo '};' >> $@
- @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+ @(echo '#include "reg.h"'; \
+ echo 'const u8 shipped_regdb_certs[] = {'; \
+ cat $^ ; \
+ echo '};'; \
+ echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+ ) > $@
$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
$(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@$(kecho) " GEN $@"
- @echo '#include "reg.h"' > $@
- @echo 'const u8 extra_regdb_certs[] = {' >> $@
- @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
- @echo '};' >> $@
- @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
+ @(set -e; \
+ allf=""; \
+ for f in $^ ; do \
+ # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
+ thisf=$$(od -An -v -tx1 < $$f | \
+ sed -e 's/ /\n/g' | \
+ sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
+ sed -e 's/^/0x/;s/$$/,/'); \
+ # file should not be empty - maybe command substitution failed? \
+ test ! -z "$$thisf";\
+ allf=$$allf$$thisf;\
+ done; \
+ ( \
+ echo '#include "reg.h"'; \
+ echo 'const u8 extra_regdb_certs[] = {'; \
+ echo "$$allf"; \
+ echo '};'; \
+ echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
+ ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644
index 000000000000..14ea66643ffa
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644
index c6f8f9d6b988..000000000000
--- a/net/wireless/certs/sforshee.x509
+++ /dev/null
Binary files differ
diff --git a/net/wireless/core.c b/net/wireless/core.c
index fdde0d98fde1..a6f3cac8c640 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
if (rv)
goto use_default_name;
} else {
+ int rv;
+
use_default_name:
/* NOTE: This is *probably* safe w/out holding rtnl because of
* the restrictions on phy names. Probably this call could
@@ -446,7 +448,11 @@ use_default_name:
* phyX. But, might should add some locking and check return
* value, and use a different name if this one exists?
*/
- dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+ rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+ if (rv < 0) {
+ kfree(rdev);
+ return NULL;
+ }
}
INIT_LIST_HEAD(&rdev->wiphy.wdev_list);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d2f7e8b8a097..eaff636169c2 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -507,8 +507,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
-#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
-
#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
#define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond)
#else
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b1ac23ca20c8..542a4fc0a8d7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
case NL80211_IFTYPE_AP:
if (wdev->ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
- goto nla_put_failure;
+ goto nla_put_failure_locked;
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
@@ -2618,12 +2618,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
const u8 *ssid_ie;
if (!wdev->current_bss)
break;
+ rcu_read_lock();
ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
WLAN_EID_SSID);
- if (!ssid_ie)
- break;
- if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
- goto nla_put_failure;
+ if (ssid_ie &&
+ nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
+ goto nla_put_failure_rcu_locked;
+ rcu_read_unlock();
break;
}
default:
@@ -2635,6 +2636,10 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
genlmsg_end(msg, hdr);
return 0;
+ nla_put_failure_rcu_locked:
+ rcu_read_unlock();
+ nla_put_failure_locked:
+ wdev_unlock(wdev);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -9804,7 +9809,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
*/
if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
rdev->ops->get_station) {
- struct station_info sinfo;
+ struct station_info sinfo = {};
u8 *mac_addr;
mac_addr = wdev->current_bss->pub.bssid;
@@ -11359,7 +11364,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
break;
case NL80211_NAN_FUNC_FOLLOW_UP:
if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
- !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
err = -EINVAL;
goto out;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 78e71b0390be..7b42f0bacfd8 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1769,8 +1769,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS)
return;
- chan_before.center_freq = chan->center_freq;
- chan_before.flags = chan->flags;
+ chan_before = *chan;
if (chan->flags & IEEE80211_CHAN_NO_IR) {
chan->flags &= ~IEEE80211_CHAN_NO_IR;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7ca04a7de85a..05186a47878f 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- /* we are under RTNL - globally locked - so can use a static struct */
- static struct station_info sinfo;
+ struct station_info sinfo = {};
u8 addr[ETH_ALEN];
int err;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 30e5746085b8..ac9477189d1c 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -102,6 +102,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
+ xso->dev = NULL;
dev_put(dev);
return err;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 347ab31574d5..5b2409746ae0 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -8,15 +8,29 @@
*
*/
+#include <linux/bottom_half.h>
+#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <linux/percpu.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
+struct xfrm_trans_tasklet {
+ struct tasklet_struct tasklet;
+ struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+ int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
static struct kmem_cache *secpath_cachep __read_mostly;
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
static struct gro_cells gro_cells;
static struct net_device xfrm_napi_dev;
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
{
int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
xfrm_address_t *daddr;
struct xfrm_mode *inner_mode;
u32 mark = skb->mark;
- unsigned int family;
+ unsigned int family = AF_UNSPEC;
int decaps = 0;
int async = 0;
bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (encap_type < 0) {
x = xfrm_input_state(skb);
+
+ if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+ if (x->km.state == XFRM_STATE_ACQ)
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
+ else
+ XFRM_INC_STATS(net,
+ LINUX_MIB_XFRMINSTATEINVALID);
+ goto drop;
+ }
+
family = x->outer_mode->afinfo->family;
/* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
}
EXPORT_SYMBOL(xfrm_input_resume);
+static void xfrm_trans_reinject(unsigned long data)
+{
+ struct xfrm_trans_tasklet *trans = (void *)data;
+ struct sk_buff_head queue;
+ struct sk_buff *skb;
+
+ __skb_queue_head_init(&queue);
+ skb_queue_splice_init(&trans->queue, &queue);
+
+ while ((skb = __skb_dequeue(&queue)))
+ XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb,
+ int (*finish)(struct net *, struct sock *,
+ struct sk_buff *))
+{
+ struct xfrm_trans_tasklet *trans;
+
+ trans = this_cpu_ptr(&xfrm_trans_tasklet);
+
+ if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ return -ENOBUFS;
+
+ XFRM_TRANS_SKB_CB(skb)->finish = finish;
+ __skb_queue_tail(&trans->queue, skb);
+ tasklet_schedule(&trans->tasklet);
+ return 0;
+}
+EXPORT_SYMBOL(xfrm_trans_queue);
+
void __init xfrm_input_init(void)
{
int err;
+ int i;
init_dummy_netdev(&xfrm_napi_dev);
err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
sizeof(struct sec_path),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+
+ for_each_possible_cpu(i) {
+ struct xfrm_trans_tasklet *trans;
+
+ trans = &per_cpu(xfrm_trans_tasklet, i);
+ __skb_queue_head_init(&trans->queue);
+ tasklet_init(&trans->tasklet, xfrm_trans_reinject,
+ (unsigned long)trans);
+ }
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9542975eb2f9..bd6b0e7a0ee4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -609,7 +609,8 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+ if (policy->walk.dead ||
+ xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
/* skip socket policies */
continue;
}
@@ -974,8 +975,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
}
if (!cnt)
err = -ESRCH;
- else
- xfrm_policy_cache_flush();
out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return err;
@@ -1168,9 +1167,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
again:
pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) {
- bool match = xfrm_selector_match(&pol->selector, fl, family);
+ bool match;
int err = 0;
+ if (pol->family != family) {
+ pol = NULL;
+ goto out;
+ }
+
+ match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
pol = NULL;
@@ -1737,6 +1742,8 @@ void xfrm_policy_cache_flush(void)
bool found = 0;
int cpu;
+ might_sleep();
+
local_bh_disable();
rcu_read_lock();
for_each_possible_cpu(cpu) {
@@ -1833,6 +1840,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
sizeof(struct xfrm_policy *) * num_pols) == 0 &&
xfrm_xdst_can_reuse(xdst, xfrm, err)) {
dst_hold(&xdst->u.dst);
+ xfrm_pols_put(pols, num_pols);
while (err > 0)
xfrm_state_put(xfrm[--err]);
return xdst;
@@ -2055,8 +2063,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
if (num_xfrms <= 0)
goto make_dummy_bundle;
+ local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
- xflo->dst_orig);
+ xflo->dst_orig);
+ local_bh_enable();
+
if (IS_ERR(xdst)) {
err = PTR_ERR(xdst);
if (err != -EAGAIN)
@@ -2143,9 +2154,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
+ local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(
pols, num_pols, fl,
family, dst_orig);
+ local_bh_enable();
+
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
err = PTR_ERR(xdst);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 065d89606888..a3785f538018 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -313,13 +313,14 @@ retry:
if ((type && !try_module_get(type->owner)))
type = NULL;
+ rcu_read_unlock();
+
if (!type && try_load) {
request_module("xfrm-offload-%d-%d", family, proto);
- try_load = 0;
+ try_load = false;
goto retry;
}
- rcu_read_unlock();
return type;
}
@@ -1343,6 +1344,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
if (orig->aead) {
x->aead = xfrm_algo_aead_clone(orig->aead);
+ x->geniv = orig->geniv;
if (!x->aead)
goto error;
}
@@ -1533,8 +1535,12 @@ out:
err = -EINVAL;
spin_lock_bh(&x1->lock);
if (likely(x1->km.state == XFRM_STATE_VALID)) {
- if (x->encap && x1->encap)
+ if (x->encap && x1->encap &&
+ x->encap->encap_type == x1->encap->encap_type)
memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+ else if (x->encap || x1->encap)
+ goto fail;
+
if (x->coaddr && x1->coaddr) {
memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
}
@@ -1551,6 +1557,8 @@ out:
x->km.state = XFRM_STATE_DEAD;
__xfrm_state_put(x);
}
+
+fail:
spin_unlock_bh(&x1->lock);
xfrm_state_put(x1);
@@ -2264,8 +2272,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
goto error;
}
- x->km.state = XFRM_STATE_VALID;
-
error:
return err;
}
@@ -2274,7 +2280,13 @@ EXPORT_SYMBOL(__xfrm_init_state);
int xfrm_init_state(struct xfrm_state *x)
{
- return __xfrm_init_state(x, true, false);
+ int err;
+
+ err = __xfrm_init_state(x, true, false);
+ if (!err)
+ x->km.state = XFRM_STATE_VALID;
+
+ return err;
}
EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 983b0233767b..7f52b8eb177d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
- if (attrs[XFRMA_OFFLOAD_DEV]) {
- err = xfrm_dev_state_add(net, x,
- nla_data(attrs[XFRMA_OFFLOAD_DEV]));
- if (err)
- goto error;
- }
-
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
goto error;
@@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ /* configure the hardware if offload is requested */
+ if (attrs[XFRMA_OFFLOAD_DEV]) {
+ err = xfrm_dev_state_add(net, x,
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+ if (err)
+ goto error;
+ }
+
return x;
error:
@@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
+ if (x->km.state == XFRM_STATE_VOID)
+ x->km.state = XFRM_STATE_VALID;
+
c.seq = nlh->nlmsg_seq;
c.portid = nlh->nlmsg_pid;
c.event = nlh->nlmsg_type;
@@ -1419,11 +1423,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
{
+ u16 prev_family;
int i;
if (nr > XFRM_MAX_DEPTH)
return -EINVAL;
+ prev_family = family;
+
for (i = 0; i < nr; i++) {
/* We never validated the ut->family value, so many
* applications simply leave it at zero. The check was
@@ -1435,6 +1442,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
if (!ut[i].family)
ut[i].family = family;
+ if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
+ (ut[i].family != prev_family))
+ return -EINVAL;
+
+ prev_family = ut[i].family;
+
switch (ut[i].family) {
case AF_INET:
break;
@@ -1445,6 +1458,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
default:
return -EINVAL;
}
+
+ switch (ut[i].id.proto) {
+ case IPPROTO_AH:
+ case IPPROTO_ESP:
+ case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+#endif
+ case IPSEC_PROTO_ANY:
+ break;
+ default:
+ return -EINVAL;
+ }
+
}
return 0;
@@ -2470,7 +2498,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
[XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) },
- [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 },
+ [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 },
};
static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {