aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-09-28 15:19:03 -0700
committerDavid S. Miller <davem@davemloft.net>2020-09-28 15:19:03 -0700
commitbe589d04281495e00fd2fa5733076aed459953b7 (patch)
treebb88fd67f378832ae5026075bd9eb51c1a22cc50
parent414698f6aa31d69fe7fe0fda22ac41cb7777eab5 (diff)
parente8d726c8e869d132a4728d8b82f58213084278f9 (diff)
downloadlinux-be589d04281495e00fd2fa5733076aed459953b7.tar.gz
Merge branch 'net-smc-introduce-SMC-Dv2-support'
Karsten Graul says: ==================== net/smc: introduce SMC-Dv2 support SMC-Dv2 support (see https://www.ibm.com/support/pages/node/6326337) provides multi-subnet support for SMC-D, eliminating the current same-subnet restriction. The new version detects if any of the virtual ISM devices are on the same system and can therefore be used for an SMC-Dv2 connection. Furthermore, SMC-Dv2 eliminates the need for PNET IDs on s390. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/s390/net/ism.h7
-rw-r--r--drivers/s390/net/ism_drv.c47
-rw-r--r--include/net/smc.h4
-rw-r--r--net/smc/af_smc.c613
-rw-r--r--net/smc/smc.h12
-rw-r--r--net/smc/smc_clc.c337
-rw-r--r--net/smc/smc_clc.h179
-rw-r--r--net/smc/smc_core.c25
-rw-r--r--net/smc/smc_core.h15
-rw-r--r--net/smc/smc_ism.c32
-rw-r--r--net/smc/smc_ism.h8
-rw-r--r--net/smc/smc_netns.h1
-rw-r--r--net/smc/smc_pnet.c173
-rw-r--r--net/smc/smc_pnet.h15
14 files changed, 1206 insertions, 262 deletions
diff --git a/drivers/s390/net/ism.h b/drivers/s390/net/ism.h
index 1901e9c80ed8..38fe90c2597d 100644
--- a/drivers/s390/net/ism.h
+++ b/drivers/s390/net/ism.h
@@ -16,6 +16,7 @@
#define ISM_DMB_WORD_OFFSET 1
#define ISM_DMB_BIT_OFFSET (ISM_DMB_WORD_OFFSET * 32)
#define ISM_NR_DMBS 1920
+#define ISM_IDENT_MASK 0x00FFFF
#define ISM_REG_SBA 0x1
#define ISM_REG_IEQ 0x2
@@ -206,6 +207,12 @@ struct ism_dev {
#define ISM_CREATE_REQ(dmb, idx, sf, offset) \
((dmb) | (idx) << 24 | (sf) << 23 | (offset))
+struct ism_systemeid {
+ u8 seid_string[24];
+ u8 serial_number[4];
+ u8 type[4];
+};
+
static inline void __ism_read_cmd(struct ism_dev *ism, void *data,
unsigned long offset, unsigned long len)
{
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 5fbe9eae84d1..fe96ca3c88a5 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -13,6 +13,8 @@
#include <linux/device.h>
#include <linux/pci.h>
#include <linux/err.h>
+#include <linux/ctype.h>
+#include <linux/processor.h>
#include <net/smc.h>
#include <asm/debug.h>
@@ -387,6 +389,42 @@ static int ism_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx,
return 0;
}
+static struct ism_systemeid SYSTEM_EID = {
+ .seid_string = "IBM-SYSZ-IBMSEID00000000",
+ .serial_number = "0000",
+ .type = "0000",
+};
+
+static void ism_create_system_eid(void)
+{
+ struct cpuid id;
+ u16 ident_tail;
+ char tmp[5];
+
+ get_cpu_id(&id);
+ ident_tail = (u16)(id.ident & ISM_IDENT_MASK);
+ snprintf(tmp, 5, "%04X", ident_tail);
+ memcpy(&SYSTEM_EID.serial_number, tmp, 4);
+ snprintf(tmp, 5, "%04X", id.machine);
+ memcpy(&SYSTEM_EID.type, tmp, 4);
+}
+
+static void ism_get_system_eid(struct smcd_dev *smcd, u8 **eid)
+{
+ *eid = &SYSTEM_EID.seid_string[0];
+}
+
+static u16 ism_get_chid(struct smcd_dev *smcd)
+{
+ struct ism_dev *ismdev;
+
+ ismdev = (struct ism_dev *)smcd->priv;
+ if (!ismdev || !ismdev->pdev)
+ return 0;
+
+ return to_zpci(ismdev->pdev)->pchid;
+}
+
static void ism_handle_event(struct ism_dev *ism)
{
struct smcd_event *entry;
@@ -443,6 +481,8 @@ static const struct smcd_ops ism_ops = {
.reset_vlan_required = ism_reset_vlan_required,
.signal_event = ism_signal_ieq,
.move_data = ism_move,
+ .get_system_eid = ism_get_system_eid,
+ .get_chid = ism_get_chid,
};
static int ism_dev_init(struct ism_dev *ism)
@@ -471,6 +511,10 @@ static int ism_dev_init(struct ism_dev *ism)
if (ret)
goto unreg_ieq;
+ if (!ism_add_vlan_id(ism->smcd, ISM_RESERVED_VLANID))
+ /* hardware is V2 capable */
+ ism_create_system_eid();
+
ret = smcd_register_dev(ism->smcd);
if (ret)
goto unreg_ieq;
@@ -550,6 +594,9 @@ static void ism_dev_exit(struct ism_dev *ism)
struct pci_dev *pdev = ism->pdev;
smcd_unregister_dev(ism->smcd);
+ if (SYSTEM_EID.serial_number[0] != '0' ||
+ SYSTEM_EID.type[0] != '0')
+ ism_del_vlan_id(ism->smcd, ISM_RESERVED_VLANID);
unregister_ieq(ism);
unregister_sba(ism);
free_irq(pci_irq_vector(pdev, 0), ism);
diff --git a/include/net/smc.h b/include/net/smc.h
index 646feb4bc75f..e441aa97ad61 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -37,6 +37,8 @@ struct smcd_dmb {
#define ISM_EVENT_GID 1
#define ISM_EVENT_SWR 2
+#define ISM_RESERVED_VLANID 0x1FFF
+
#define ISM_ERROR 0xFFFF
struct smcd_event {
@@ -63,6 +65,8 @@ struct smcd_ops {
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
+ void (*get_system_eid)(struct smcd_dev *dev, u8 **eid);
+ u16 (*get_chid)(struct smcd_dev *dev);
};
struct smcd_dev {
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index ed8f97166be9..e874d0e6267f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -26,6 +26,7 @@
#include <linux/sched/signal.h>
#include <linux/if_vlan.h>
#include <linux/rcupdate_wait.h>
+#include <linux/ctype.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -448,6 +449,16 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc,
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
}
+static bool smc_isascii(char *hostname)
+{
+ int i;
+
+ for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
+ if (!isascii(hostname[i]))
+ return false;
+ return true;
+}
+
static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
@@ -459,6 +470,22 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
+ if (clc->hdr.version > SMC_V1 &&
+ (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) {
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)clc;
+ struct smc_clc_first_contact_ext *fce =
+ (struct smc_clc_first_contact_ext *)
+ (((u8 *)clc_v2) + sizeof(*clc_v2));
+
+ memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid,
+ SMC_MAX_EID_LEN);
+ smc->conn.lgr->peer_os = fce->os_type;
+ smc->conn.lgr->peer_smc_release = fce->release;
+ if (smc_isascii(fce->hostname))
+ memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
+ SMC_MAX_HOSTNAME_LEN);
+ }
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -504,7 +531,8 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
}
/* decline and fall back during connect */
-static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
+static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
+ u8 version)
{
int rc;
@@ -514,7 +542,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
return reason_code;
}
if (reason_code != SMC_CLC_DECL_PEERDECL) {
- rc = smc_clc_send_decline(smc, reason_code);
+ rc = smc_clc_send_decline(smc, reason_code, version);
if (rc < 0) {
if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */
@@ -564,47 +592,121 @@ static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
{
/* Find ISM device with same PNETID as connecting interface */
smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
- if (!ini->ism_dev)
+ if (!ini->ism_dev[0])
return SMC_CLC_DECL_NOSMCDDEV;
+ else
+ ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
return 0;
}
+/* determine possible V2 ISM devices (either without PNETID or with PNETID plus
+ * PNETID matching net_device)
+ */
+static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
+ struct smc_init_info *ini)
+{
+ int rc = SMC_CLC_DECL_NOSMCDDEV;
+ struct smcd_dev *smcd;
+ int i = 1;
+
+ if (smcd_indicated(ini->smc_type_v1))
+ rc = 0; /* already initialized for V1 */
+ mutex_lock(&smcd_dev_list.mutex);
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+ if (smcd->going_away || smcd == ini->ism_dev[0])
+ continue;
+ if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
+ smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
+ ini->ism_dev[i] = smcd;
+ ini->ism_chid[i] = smc_ism_get_chid(ini->ism_dev[i]);
+ ini->is_smcd = true;
+ rc = 0;
+ i++;
+ if (i > SMC_MAX_ISM_DEVS)
+ break;
+ }
+ }
+ mutex_unlock(&smcd_dev_list.mutex);
+ ini->ism_offered_cnt = i - 1;
+ if (!ini->ism_dev[0] && !ini->ism_dev[1])
+ ini->smcd_version = 0;
+
+ return rc;
+}
+
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
struct smc_init_info *ini)
{
- if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
+ if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
return SMC_CLC_DECL_ISMVLANERR;
return 0;
}
+static int smc_find_proposal_devices(struct smc_sock *smc,
+ struct smc_init_info *ini)
+{
+ int rc = 0;
+
+ /* check if there is an ism device available */
+ if (ini->smcd_version & SMC_V1) {
+ if (smc_find_ism_device(smc, ini) ||
+ smc_connect_ism_vlan_setup(smc, ini)) {
+ if (ini->smc_type_v1 == SMC_TYPE_B)
+ ini->smc_type_v1 = SMC_TYPE_R;
+ else
+ ini->smc_type_v1 = SMC_TYPE_N;
+ } /* else ISM V1 is supported for this connection */
+ if (smc_find_rdma_device(smc, ini)) {
+ if (ini->smc_type_v1 == SMC_TYPE_B)
+ ini->smc_type_v1 = SMC_TYPE_D;
+ else
+ ini->smc_type_v1 = SMC_TYPE_N;
+ } /* else RDMA is supported for this connection */
+ }
+ if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini))
+ ini->smc_type_v2 = SMC_TYPE_N;
+
+ /* if neither ISM nor RDMA are supported, fallback */
+ if (!smcr_indicated(ini->smc_type_v1) &&
+ ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
+ rc = SMC_CLC_DECL_NOSMCDEV;
+
+ return rc;
+}
+
/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
* used, the VLAN ID will be registered again during the connection setup.
*/
-static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
+static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
struct smc_init_info *ini)
{
- if (!is_smcd)
+ if (!smcd_indicated(ini->smc_type_v1))
return 0;
- if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
+ if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
return SMC_CLC_DECL_CNFERR;
return 0;
}
+#define SMC_CLC_MAX_ACCEPT_LEN \
+ (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
+ sizeof(struct smc_clc_first_contact_ext) + \
+ sizeof(struct smc_clc_msg_trail))
+
/* CLC handshake during connect */
-static int smc_connect_clc(struct smc_sock *smc, int smc_type,
- struct smc_clc_msg_accept_confirm *aclc,
+static int smc_connect_clc(struct smc_sock *smc,
+ struct smc_clc_msg_accept_confirm_v2 *aclc2,
struct smc_init_info *ini)
{
int rc = 0;
/* do inband token exchange */
- rc = smc_clc_send_proposal(smc, smc_type, ini);
+ rc = smc_clc_send_proposal(smc, ini);
if (rc)
return rc;
/* receive SMC Accept CLC message */
- return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
- CLC_WAIT_TIME);
+ return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
+ SMC_CLC_ACCEPT, CLC_WAIT_TIME);
}
/* setup for RDMA connection of client */
@@ -618,7 +720,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
ini->is_smcd = false;
ini->ib_lcl = &aclc->r0.lcl;
ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
- ini->first_contact_peer = aclc->hdr.flag;
+ ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini);
@@ -678,7 +780,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
smc_rmb_sync_sg_for_device(&smc->conn);
- reason_code = smc_clc_send_confirm(smc);
+ reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
+ SMC_V1);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->first_contact_local);
@@ -704,6 +807,25 @@ static int smc_connect_rdma(struct smc_sock *smc,
return 0;
}
+/* The server has chosen one of the proposed ISM devices for the communication.
+ * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
+ */
+static int
+smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
+ struct smc_init_info *ini)
+{
+ int i;
+
+ for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
+ if (ini->ism_chid[i] == ntohs(aclc->chid)) {
+ ini->ism_selected = i;
+ return 0;
+ }
+ }
+
+ return -EPROTO;
+}
+
/* setup for ISM connection of client */
static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
@@ -712,8 +834,17 @@ static int smc_connect_ism(struct smc_sock *smc,
int rc = 0;
ini->is_smcd = true;
- ini->ism_peer_gid = aclc->d0.gid;
- ini->first_contact_peer = aclc->hdr.flag;
+ ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
+
+ if (aclc->hdr.version == SMC_V2) {
+ struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)aclc;
+
+ rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
+ if (rc)
+ return rc;
+ }
+ ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
@@ -736,7 +867,8 @@ static int smc_connect_ism(struct smc_sock *smc,
smc_rx_init(smc);
smc_tx_init(smc);
- rc = smc_clc_send_confirm(smc);
+ rc = smc_clc_send_confirm(smc, ini->first_contact_local,
+ aclc->hdr.version);
if (rc)
return smc_connect_abort(smc, rc, ini->first_contact_local);
mutex_unlock(&smc_server_lgr_pending);
@@ -749,13 +881,32 @@ static int smc_connect_ism(struct smc_sock *smc,
return 0;
}
+/* check if received accept type and version matches a proposed one */
+static int smc_connect_check_aclc(struct smc_init_info *ini,
+ struct smc_clc_msg_accept_confirm *aclc)
+{
+ if ((aclc->hdr.typev1 == SMC_TYPE_R &&
+ !smcr_indicated(ini->smc_type_v1)) ||
+ (aclc->hdr.typev1 == SMC_TYPE_D &&
+ ((!smcd_indicated(ini->smc_type_v1) &&
+ !smcd_indicated(ini->smc_type_v2)) ||
+ (aclc->hdr.version == SMC_V1 &&
+ !smcd_indicated(ini->smc_type_v1)) ||
+ (aclc->hdr.version == SMC_V2 &&
+ !smcd_indicated(ini->smc_type_v2)))))
+ return SMC_CLC_DECL_MODEUNSUPP;
+
+ return 0;
+}
+
/* perform steps before actually connecting */
static int __smc_connect(struct smc_sock *smc)
{
- bool ism_supported = false, rdma_supported = false;
- struct smc_clc_msg_accept_confirm aclc;
- struct smc_init_info ini = {0};
- int smc_type;
+ u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
+ struct smc_clc_msg_accept_confirm_v2 *aclc2;
+ struct smc_clc_msg_accept_confirm *aclc;
+ struct smc_init_info *ini = NULL;
+ u8 *buf = NULL;
int rc = 0;
if (smc->use_fallback)
@@ -765,58 +916,73 @@ static int __smc_connect(struct smc_sock *smc)
if (!tcp_sk(smc->clcsock->sk)->syn_smc)
return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
- /* IPSec connections opt out of SMC-R optimizations */
+ /* IPSec connections opt out of SMC optimizations */
if (using_ipsec(smc))
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
+ return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
+ version);
- /* get vlan id from IP device */
- if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
- return smc_connect_decline_fallback(smc,
- SMC_CLC_DECL_GETVLANERR);
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini)
+ return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
+ version);
- /* check if there is an ism device available */
- if (!smc_find_ism_device(smc, &ini) &&
- !smc_connect_ism_vlan_setup(smc, &ini)) {
- /* ISM is supported for this connection */
- ism_supported = true;
- smc_type = SMC_TYPE_D;
- }
-
- /* check if there is a rdma device available */
- if (!smc_find_rdma_device(smc, &ini)) {
- /* RDMA is supported for this connection */
- rdma_supported = true;
- if (ism_supported)
- smc_type = SMC_TYPE_B; /* both */
- else
- smc_type = SMC_TYPE_R; /* only RDMA */
+ ini->smcd_version = SMC_V1;
+ ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0;
+ ini->smc_type_v1 = SMC_TYPE_B;
+ ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N;
+
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
+ ini->smcd_version &= ~SMC_V1;
+ ini->smc_type_v1 = SMC_TYPE_N;
+ if (!ini->smcd_version) {
+ rc = SMC_CLC_DECL_GETVLANERR;
+ goto fallback;
+ }
}
- /* if neither ISM nor RDMA are supported, fallback */
- if (!rdma_supported && !ism_supported)
- return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
+ rc = smc_find_proposal_devices(smc, ini);
+ if (rc)
+ goto fallback;
- /* perform CLC handshake */
- rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
- if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
- return smc_connect_decline_fallback(smc, rc);
+ buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
+ if (!buf) {
+ rc = SMC_CLC_DECL_MEM;
+ goto fallback;
}
+ aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
+ aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
+
+ /* perform CLC handshake */
+ rc = smc_connect_clc(smc, aclc2, ini);
+ if (rc)
+ goto vlan_cleanup;
+
+ /* check if smc modes and versions of CLC proposal and accept match */
+ rc = smc_connect_check_aclc(ini, aclc);
+ version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version;
+ if (rc)
+ goto vlan_cleanup;
/* depending on previous steps, connect using rdma or ism */
- if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
- rc = smc_connect_rdma(smc, &aclc, &ini);
- else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
- rc = smc_connect_ism(smc, &aclc, &ini);
- else
- rc = SMC_CLC_DECL_MODEUNSUPP;
- if (rc) {
- smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
- return smc_connect_decline_fallback(smc, rc);
- }
+ if (aclc->hdr.typev1 == SMC_TYPE_R)
+ rc = smc_connect_rdma(smc, aclc, ini);
+ else if (aclc->hdr.typev1 == SMC_TYPE_D)
+ rc = smc_connect_ism(smc, aclc, ini);
+ if (rc)
+ goto vlan_cleanup;
- smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
+ smc_connect_ism_vlan_cleanup(smc, ini);
+ kfree(buf);
+ kfree(ini);
return 0;
+
+vlan_cleanup:
+ smc_connect_ism_vlan_cleanup(smc, ini);
+ kfree(buf);
+fallback:
+ kfree(ini);
+ return smc_connect_decline_fallback(smc, rc, version);
}
static void smc_connect_work(struct work_struct *work)
@@ -1132,10 +1298,10 @@ static void smc_listen_out_err(struct smc_sock *new_smc)
/* listen worker: decline and fall back if possible */
static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
- bool local_first)
+ struct smc_init_info *ini, u8 version)
{
/* RDMA setup failed, switch back to TCP */
- if (local_first)
+ if (ini->first_contact_local)
smc_lgr_cleanup_early(&new_smc->conn);
else
smc_conn_free(&new_smc->conn);
@@ -1146,7 +1312,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
- if (smc_clc_send_decline(new_smc, reason_code) < 0) {
+ if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
return;
}
@@ -1154,6 +1320,47 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
smc_listen_out_connected(new_smc);
}
+/* listen worker: version checking */
+static int smc_listen_v2_check(struct smc_sock *new_smc,
+ struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
+ struct smc_clc_v2_extension *pclc_v2_ext;
+
+ ini->smc_type_v1 = pclc->hdr.typev1;
+ ini->smc_type_v2 = pclc->hdr.typev2;
+ ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0;
+ if (pclc->hdr.version > SMC_V1)
+ ini->smcd_version |=
+ ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0;
+ if (!smc_ism_v2_capable) {
+ ini->smcd_version &= ~SMC_V2;
+ goto out;
+ }
+ pclc_v2_ext = smc_get_clc_v2_ext(pclc);
+ if (!pclc_v2_ext) {
+ ini->smcd_version &= ~SMC_V2;
+ goto out;
+ }
+ pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
+ if (!pclc_smcd_v2_ext)
+ ini->smcd_version &= ~SMC_V2;
+
+out:
+ if (!ini->smcd_version) {
+ if (pclc->hdr.typev1 == SMC_TYPE_B ||
+ pclc->hdr.typev2 == SMC_TYPE_B)
+ return SMC_CLC_DECL_NOSMCDEV;
+ if (pclc->hdr.typev1 == SMC_TYPE_D ||
+ pclc->hdr.typev2 == SMC_TYPE_D)
+ return SMC_CLC_DECL_NOSMCDDEV;
+ return SMC_CLC_DECL_NOSMCRDEV;
+ }
+
+ return 0;
+}
+
/* listen worker: check prefixes */
static int smc_listen_prfx_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc)
@@ -1161,6 +1368,8 @@ static int smc_listen_prfx_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
+ if (pclc->hdr.typev1 == SMC_TYPE_N)
+ return 0;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
return SMC_CLC_DECL_DIFFPREFIX;
@@ -1188,7 +1397,6 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
/* listen worker: initialize connection and buffers for SMC-D */
static int smc_listen_ism_init(struct smc_sock *new_smc,
- struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
int rc;
@@ -1211,6 +1419,125 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
return 0;
}
+static bool smc_is_already_selected(struct smcd_dev *smcd,
+ struct smc_init_info *ini,
+ int matches)
+{
+ int i;
+
+ for (i = 0; i < matches; i++)
+ if (smcd == ini->ism_dev[i])
+ return true;
+
+ return false;
+}
+
+/* check for ISM devices matching proposed ISM devices */
+static void smc_check_ism_v2_match(struct smc_init_info *ini,
+ u16 proposed_chid, u64 proposed_gid,
+ unsigned int *matches)
+{
+ struct smcd_dev *smcd;
+
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+ if (smcd->going_away)
+ continue;
+ if (smc_is_already_selected(smcd, ini, *matches))
+ continue;
+ if (smc_ism_get_chid(smcd) == proposed_chid &&
+ !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
+ ini->ism_peer_gid[*matches] = proposed_gid;
+ ini->ism_dev[*matches] = smcd;
+ (*matches)++;
+ break;
+ }
+ }
+}
+
+static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
+ struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_smcd_v2_extension *smcd_v2_ext;
+ struct smc_clc_v2_extension *smc_v2_ext;
+ struct smc_clc_msg_smcd *pclc_smcd;
+ unsigned int matches = 0;
+ u8 *eid = NULL;
+ int i;
+
+ if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
+ return;
+
+ pclc_smcd = smc_get_clc_msg_smcd(pclc);
+ smc_v2_ext = smc_get_clc_v2_ext(pclc);
+ smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
+ if (!smcd_v2_ext ||
+ !smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */
+ goto not_found;
+
+ mutex_lock(&smcd_dev_list.mutex);
+ if (pclc_smcd->ism.chid)
+ /* check for ISM device matching proposed native ISM device */
+ smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
+ ntohll(pclc_smcd->ism.gid), &matches);
+ for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
+ /* check for ISM devices matching proposed non-native ISM
+ * devices
+ */
+ smc_check_ism_v2_match(ini,
+ ntohs(smcd_v2_ext->gidchid[i - 1].chid),
+ ntohll(smcd_v2_ext->gidchid[i - 1].gid),
+ &matches);
+ }
+ mutex_unlock(&smcd_dev_list.mutex);
+
+ if (ini->ism_dev[0]) {
+ smc_ism_get_system_eid(ini->ism_dev[0], &eid);
+ if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN))
+ goto not_found;
+ } else {
+ goto not_found;
+ }
+
+ /* separate - outside the smcd_dev_list.lock */
+ for (i = 0; i < matches; i++) {
+ ini->smcd_version = SMC_V2;
+ ini->is_smcd = true;
+ ini->ism_selected = i;
+ if (smc_listen_ism_init(new_smc, ini))
+ /* try next active ISM device */
+ continue;
+ return; /* matching and usable V2 ISM device found */
+ }
+
+not_found:
+ ini->smcd_version &= ~SMC_V2;
+ ini->ism_dev[0] = NULL;
+ ini->is_smcd = false;
+}
+
+static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
+ struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
+
+ /* check if ISM V1 is available */
+ if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
+ goto not_found;
+ ini->is_smcd = true; /* prepare ISM check */
+ ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
+ if (smc_find_ism_device(new_smc, ini))
+ goto not_found;
+ ini->ism_selected = 0;
+ if (!smc_listen_ism_init(new_smc, ini))
+ return; /* V1 ISM device found */
+
+not_found:
+ ini->ism_dev[0] = NULL;
+ ini->is_smcd = false;
+}
+
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
{
@@ -1225,6 +1552,67 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
return 0;
}
+static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
+ struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ int rc;
+
+ if (!smcr_indicated(ini->smc_type_v1))
+ return SMC_CLC_DECL_NOSMCDEV;
+
+ /* prepare RDMA check */
+ ini->ib_lcl = &pclc->lcl;
+ rc = smc_find_rdma_device(new_smc, ini);
+ if (rc) {
+ /* no RDMA device found */
+ if (ini->smc_type_v1 == SMC_TYPE_B)
+ /* neither ISM nor RDMA device found */
+ rc = SMC_CLC_DECL_NOSMCDEV;
+ return rc;
+ }
+ rc = smc_listen_rdma_init(new_smc, ini);
+ if (rc)
+ return rc;
+ return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
+}
+
+/* determine the local device matching to proposal */
+static int smc_listen_find_device(struct smc_sock *new_smc,
+ struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ int rc;
+
+ /* check for ISM device matching V2 proposed device */
+ smc_find_ism_v2_device_serv(new_smc, pclc, ini);
+ if (ini->ism_dev[0])
+ return 0;
+
+ if (!(ini->smcd_version & SMC_V1))
+ return SMC_CLC_DECL_NOSMCDEV;
+
+ /* check for matching IP prefix and subnet length */
+ rc = smc_listen_prfx_check(new_smc, pclc);
+ if (rc)
+ return rc;
+
+ /* get vlan id from IP device */
+ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
+ return SMC_CLC_DECL_GETVLANERR;
+
+ /* check for ISM device matching V1 proposed device */
+ smc_find_ism_v1_device_serv(new_smc, pclc, ini);
+ if (ini->ism_dev[0])
+ return 0;
+
+ if (pclc->hdr.typev1 == SMC_TYPE_D)
+ return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */
+
+ /* check if RDMA is available */
+ return smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
+}
+
/* listen worker: finish RDMA setup */
static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
@@ -1250,17 +1638,18 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
return reason_code;
}
-/* setup for RDMA connection of server */
+/* setup for connection of server */
static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
+ u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
struct socket *newclcsock = new_smc->clcsock;
- struct smc_clc_msg_accept_confirm cclc;
+ struct smc_clc_msg_accept_confirm_v2 *cclc2;
+ struct smc_clc_msg_accept_confirm *cclc;
struct smc_clc_msg_proposal_area *buf;
struct smc_clc_msg_proposal *pclc;
- struct smc_init_info ini = {0};
- bool ism_supported = false;
+ struct smc_init_info *ini = NULL;
int rc = 0;
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
@@ -1292,101 +1681,76 @@ static void smc_listen_work(struct work_struct *work)
SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
if (rc)
goto out_decl;
+ version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;
- /* IPSec connections opt out of SMC-R optimizations */
+ /* IPSec connections opt out of SMC optimizations */
if (using_ipsec(new_smc)) {
rc = SMC_CLC_DECL_IPSEC;
goto out_decl;
}
- /* check for matching IP prefix and subnet length */
- rc = smc_listen_prfx_check(new_smc, pclc);
- if (rc)
+ ini = kzalloc(sizeof(*ini), GFP_KERNEL);
+ if (!ini) {
+ rc = SMC_CLC_DECL_MEM;
goto out_decl;
+ }
- /* get vlan id from IP device */
- if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
- rc = SMC_CLC_DECL_GETVLANERR;
+ /* initial version checking */
+ rc = smc_listen_v2_check(new_smc, pclc, ini);
+ if (rc)
goto out_decl;
- }
mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
- /* check if ISM is available */
- if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
- struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
-
- ini.is_smcd = true; /* prepare ISM check */
- ini.ism_peer_gid = pclc_smcd->gid;
- rc = smc_find_ism_device(new_smc, &ini);
- if (!rc)
- rc = smc_listen_ism_init(new_smc, pclc, &ini);
- if (!rc)
- ism_supported = true;
- else if (pclc->hdr.path == SMC_TYPE_D)
- goto out_unlock; /* skip RDMA and decline */
- }
-
- /* check if RDMA is available */
- if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
- /* prepare RDMA check */
- ini.is_smcd = false;
- ini.ism_dev = NULL;
- ini.ib_lcl = &pclc->lcl;
- rc = smc_find_rdma_device(new_smc, &ini);
- if (rc) {
- /* no RDMA device found */
- if (pclc->hdr.path == SMC_TYPE_B)
- /* neither ISM nor RDMA device found */
- rc = SMC_CLC_DECL_NOSMCDEV;
- goto out_unlock;
- }
- rc = smc_listen_rdma_init(new_smc, &ini);
- if (rc)
- goto out_unlock;
- rc = smc_listen_rdma_reg(new_smc, ini.first_contact_local);
- if (rc)
- goto out_unlock;
- }
+ /* determine ISM or RoCE device used for connection */
+ rc = smc_listen_find_device(new_smc, pclc, ini);
+ if (rc)
+ goto out_unlock;
/* send SMC Accept CLC message */
- rc = smc_clc_send_accept(new_smc, ini.first_contact_local);
+ rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
+ ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1);
if (rc)
goto out_unlock;
/* SMC-D does not need this lock any more */
- if (ism_supported)
+ if (ini->is_smcd)
mutex_unlock(&smc_server_lgr_pending);
/* receive SMC Confirm CLC message */
- rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
+ cclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
+ cclc = (struct smc_clc_msg_accept_confirm *)cclc2;
+ memset(buf, 0, sizeof(struct smc_clc_msg_proposal_area));
+ rc = smc_clc_wait_msg(new_smc, cclc2,
+ sizeof(struct smc_clc_msg_proposal_area),
SMC_CLC_CONFIRM, CLC_WAIT_TIME);
if (rc) {
- if (!ism_supported)
+ if (!ini->is_smcd)
goto out_unlock;
goto out_decl;
}
/* finish worker */
- if (!ism_supported) {
- rc = smc_listen_rdma_finish(new_smc, &cclc,
- ini.first_contact_local);
+ if (!ini->is_smcd) {
+ rc = smc_listen_rdma_finish(new_smc, cclc,
+ ini->first_contact_local);
if (rc)
goto out_unlock;
mutex_unlock(&smc_server_lgr_pending);
}
- smc_conn_save_peer_info(new_smc, &cclc);
+ smc_conn_save_peer_info(new_smc, cclc);
smc_listen_out_connected(new_smc);
goto out_free;
out_unlock:
mutex_unlock(&smc_server_lgr_pending);
out_decl:
- smc_listen_decline(new_smc, rc, ini.first_contact_local);
+ smc_listen_decline(new_smc, rc, ini, version);
out_free:
+ kfree(ini);
kfree(buf);
}
@@ -2092,6 +2456,9 @@ static int __init smc_init(void)
if (rc)
return rc;
+ smc_ism_init();
+ smc_clc_init();
+
rc = smc_pnet_init();
if (rc)
goto out_pernet_subsys;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 2bd57e57b7e7..d65e15f0c944 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -19,10 +19,19 @@
#include "smc_ib.h"
#define SMC_V1 1 /* SMC version V1 */
+#define SMC_V2 2 /* SMC version V2 */
+#define SMC_RELEASE 0
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
+#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM
+ * devices
+ */
+
+#define SMC_MAX_HOSTNAME_LEN 32
+#define SMC_MAX_EID_LEN 32
+
extern struct proto smc_proto;
extern struct proto smc_proto6;
@@ -246,6 +255,9 @@ extern struct workqueue_struct *smc_close_wq; /* wq for close work */
extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */
+#define ntohll(x) be64_to_cpu(x)
+#define htonll(x) cpu_to_be64(x)
+
/* convert an u32 value into network byte order, store it into a 3 byte field */
static inline void hton24(u8 *net, u32 host)
{
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index bd116e1949b9..696d89c2dce4 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -14,6 +14,8 @@
#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
+#include <linux/utsname.h>
+#include <linux/ctype.h>
#include <net/addrconf.h>
#include <net/sock.h>
@@ -27,6 +29,7 @@
#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
+#define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
#define SMC_CLC_RECV_BUF_LEN 100
/* eye catcher "SMCR" EBCDIC for CLC messages */
@@ -34,13 +37,88 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
/* eye catcher "SMCD" EBCDIC for CLC messages */
static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
+static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN];
+
+/* check arriving CLC proposal */
+static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
+{
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
+ struct smc_clc_smcd_v2_extension *smcd_v2_ext;
+ struct smc_clc_msg_hdr *hdr = &pclc->hdr;
+ struct smc_clc_v2_extension *v2_ext;
+
+ v2_ext = smc_get_clc_v2_ext(pclc);
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (hdr->version == SMC_V1) {
+ if (hdr->typev1 == SMC_TYPE_N)
+ return false;
+ if (ntohs(hdr->length) !=
+ sizeof(*pclc) + ntohs(pclc->iparea_offset) +
+ sizeof(*pclc_prfx) +
+ pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(struct smc_clc_ipv6_prefix) +
+ sizeof(struct smc_clc_msg_trail))
+ return false;
+ } else {
+ if (ntohs(hdr->length) !=
+ sizeof(*pclc) +
+ sizeof(struct smc_clc_msg_smcd) +
+ (hdr->typev1 != SMC_TYPE_N ?
+ sizeof(*pclc_prfx) +
+ pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(struct smc_clc_ipv6_prefix) : 0) +
+ (hdr->typev2 != SMC_TYPE_N ?
+ sizeof(*v2_ext) +
+ v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) +
+ (smcd_indicated(hdr->typev2) ?
+ sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt *
+ sizeof(struct smc_clc_smcd_gid_chid) :
+ 0) +
+ sizeof(struct smc_clc_msg_trail))
+ return false;
+ }
+ return true;
+}
+
+/* check arriving CLC accept or confirm */
+static bool
+smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
+{
+ struct smc_clc_msg_hdr *hdr = &clc_v2->hdr;
+
+ if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
+ return false;
+ if (hdr->version == SMC_V1) {
+ if ((hdr->typev1 == SMC_TYPE_R &&
+ ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
+ (hdr->typev1 == SMC_TYPE_D &&
+ ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
+ return false;
+ } else {
+ if (hdr->typev1 == SMC_TYPE_D &&
+ ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
+ (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
+ sizeof(struct smc_clc_first_contact_ext)))
+ return false;
+ }
+ return true;
+}
+
+static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
+{
+ memset(fce, 0, sizeof(*fce));
+ fce->os_type = SMC_CLC_OS_LINUX;
+ fce->release = SMC_RELEASE;
+ memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
+ (*len) += sizeof(*fce);
+}
+
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
{
- struct smc_clc_msg_proposal_prefix *pclc_prfx;
- struct smc_clc_msg_accept_confirm *clc;
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2;
struct smc_clc_msg_proposal *pclc;
struct smc_clc_msg_decline *dclc;
struct smc_clc_msg_trail *trl;
@@ -51,29 +129,19 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
switch (clcm->type) {
case SMC_CLC_PROPOSAL:
pclc = (struct smc_clc_msg_proposal *)clcm;
- pclc_prfx = smc_clc_proposal_get_prefix(pclc);
- if (ntohs(pclc->hdr.length) <
- sizeof(*pclc) + ntohs(pclc->iparea_offset) +
- sizeof(*pclc_prfx) +
- pclc_prfx->ipv6_prefixes_cnt *
- sizeof(struct smc_clc_ipv6_prefix) +
- sizeof(*trl))
+ if (!smc_clc_msg_prop_valid(pclc))
return false;
trl = (struct smc_clc_msg_trail *)
((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
break;
case SMC_CLC_ACCEPT:
case SMC_CLC_CONFIRM:
- if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D)
- return false;
- clc = (struct smc_clc_msg_accept_confirm *)clcm;
- if ((clcm->path == SMC_TYPE_R &&
- ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
- (clcm->path == SMC_TYPE_D &&
- ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
+ clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm;
+ if (!smc_clc_msg_acc_conf_valid(clc_v2))
return false;
trl = (struct smc_clc_msg_trail *)
- ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
+ ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) -
+ sizeof(*trl));
break;
case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm;
@@ -327,9 +395,6 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
goto out;
}
- if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N)
- reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */
-
/* receive the complete CLC message */
memset(&msg, 0, sizeof(struct msghdr));
if (datlen > buflen) {
@@ -365,7 +430,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
dclc = (struct smc_clc_msg_decline *)clcm;
reason_code = SMC_CLC_DECL_PEERDECL;
smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
- if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
+ if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 &
+ SMC_FIRST_CONTACT_MASK) {
smc->conn.lgr->sync_err = 1;
smc_lgr_terminate_sched(smc->conn.lgr);
}
@@ -377,7 +443,7 @@ out:
}
/* send CLC DECLINE message across internal TCP socket */
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
{
struct smc_clc_msg_decline dclc;
struct msghdr msg;
@@ -388,8 +454,10 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
dclc.hdr.type = SMC_CLC_DECLINE;
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
- dclc.hdr.version = SMC_V1;
- dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
+ dclc.hdr.version = version;
+ dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
+ dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
+ SMC_FIRST_CONTACT_MASK : 0;
if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
smc_ib_is_valid_local_systemid())
memcpy(dclc.id_for_peer, local_systemid,
@@ -408,18 +476,20 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
}
/* send CLC PROPOSAL message across internal TCP socket */
-int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_init_info *ini)
+int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
{
+ struct smc_clc_smcd_v2_extension *smcd_v2_ext;
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct smc_clc_msg_proposal *pclc_base;
+ struct smc_clc_smcd_gid_chid *gidchids;
struct smc_clc_msg_proposal_area *pclc;
struct smc_clc_ipv6_prefix *ipv6_prfx;
+ struct smc_clc_v2_extension *v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
int len, i, plen, rc;
int reason_code = 0;
- struct kvec vec[5];
+ struct kvec vec[8];
struct msghdr msg;
pclc = kzalloc(sizeof(*pclc), GFP_KERNEL);
@@ -430,56 +500,121 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
pclc_smcd = &pclc->pclc_smcd;
pclc_prfx = &pclc->pclc_prfx;
ipv6_prfx = pclc->pclc_prfx_ipv6;
+ v2_ext = &pclc->pclc_v2_ext;
+ smcd_v2_ext = &pclc->pclc_smcd_v2_ext;
+ gidchids = pclc->pclc_gidchids;
trl = &pclc->pclc_trl;
+ pclc_base->hdr.version = SMC_V2;
+ pclc_base->hdr.typev1 = ini->smc_type_v1;
+ pclc_base->hdr.typev2 = ini->smc_type_v2;
+ plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl);
+
/* retrieve ip prefixes for CLC proposal msg */
- rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
- if (rc) {
- kfree(pclc);
- return SMC_CLC_DECL_CNFERR; /* configuration error */
+ if (ini->smc_type_v1 != SMC_TYPE_N) {
+ rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
+ if (rc) {
+ if (ini->smc_type_v2 == SMC_TYPE_N) {
+ kfree(pclc);
+ return SMC_CLC_DECL_CNFERR;
+ }
+ pclc_base->hdr.typev1 = SMC_TYPE_N;
+ } else {
+ pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
+ plen += sizeof(*pclc_prfx) +
+ pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(ipv6_prfx[0]);
+ }
}
- /* send SMC Proposal CLC message */
- plen = sizeof(*pclc_base) + sizeof(*pclc_prfx) +
- (pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
- sizeof(*trl);
+ /* build SMC Proposal CLC message */
memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
pclc_base->hdr.type = SMC_CLC_PROPOSAL;
- pclc_base->hdr.version = SMC_V1; /* SMC version */
- pclc_base->hdr.path = smc_type;
- if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
+ if (smcr_indicated(ini->smc_type_v1)) {
/* add SMC-R specifics */
memcpy(pclc_base->lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE);
memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
ETH_ALEN);
- pclc_base->iparea_offset = htons(0);
}
- if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
+ if (smcd_indicated(ini->smc_type_v1)) {
/* add SMC-D specifics */
- plen += sizeof(*pclc_smcd);
- pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
- pclc_smcd->gid = ini->ism_dev->local_gid;
+ if (ini->ism_dev[0]) {
+ pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid);
+ pclc_smcd->ism.chid =
+ htons(smc_ism_get_chid(ini->ism_dev[0]));
+ }
+ }
+ if (ini->smc_type_v2 == SMC_TYPE_N) {
+ pclc_smcd->v2_ext_offset = 0;
+ } else {
+ u16 v2_ext_offset;
+ u8 *eid = NULL;
+
+ v2_ext_offset = sizeof(*pclc_smcd) -
+ offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
+ if (ini->smc_type_v1 != SMC_TYPE_N)
+ v2_ext_offset += sizeof(*pclc_prfx) +
+ pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(ipv6_prfx[0]);
+ pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
+ v2_ext->hdr.eid_cnt = 0;
+ v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
+ v2_ext->hdr.flag.release = SMC_RELEASE;
+ v2_ext->hdr.flag.seid = 1;
+ v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
+ offsetofend(struct smc_clnt_opts_area_hdr,
+ smcd_v2_ext_offset) +
+ v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
+ if (ini->ism_dev[0])
+ smc_ism_get_system_eid(ini->ism_dev[0], &eid);
+ else
+ smc_ism_get_system_eid(ini->ism_dev[1], &eid);
+ if (eid)
+ memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
+ plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext);
+ if (ini->ism_offered_cnt) {
+ for (i = 1; i <= ini->ism_offered_cnt; i++) {
+ gidchids[i - 1].gid =
+ htonll(ini->ism_dev[i]->local_gid);
+ gidchids[i - 1].chid =
+ htons(smc_ism_get_chid(ini->ism_dev[i]));
+ }
+ plen += ini->ism_offered_cnt *
+ sizeof(struct smc_clc_smcd_gid_chid);
+ }
}
pclc_base->hdr.length = htons(plen);
-
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+
+ /* send SMC Proposal CLC message */
memset(&msg, 0, sizeof(msg));
i = 0;
vec[i].iov_base = pclc_base;
vec[i++].iov_len = sizeof(*pclc_base);
- if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
- vec[i].iov_base = pclc_smcd;
- vec[i++].iov_len = sizeof(*pclc_smcd);
+ vec[i].iov_base = pclc_smcd;
+ vec[i++].iov_len = sizeof(*pclc_smcd);
+ if (ini->smc_type_v1 != SMC_TYPE_N) {
+ vec[i].iov_base = pclc_prfx;
+ vec[i++].iov_len = sizeof(*pclc_prfx);
+ if (pclc_prfx->ipv6_prefixes_cnt > 0) {
+ vec[i].iov_base = ipv6_prfx;
+ vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(ipv6_prfx[0]);
+ }
}
- vec[i].iov_base = pclc_prfx;
- vec[i++].iov_len = sizeof(*pclc_prfx);
- if (pclc_prfx->ipv6_prefixes_cnt > 0) {
- vec[i].iov_base = ipv6_prfx;
- vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
- sizeof(ipv6_prfx[0]);
+ if (ini->smc_type_v2 != SMC_TYPE_N) {
+ vec[i].iov_base = v2_ext;
+ vec[i++].iov_len = sizeof(*v2_ext);
+ vec[i].iov_base = smcd_v2_ext;
+ vec[i++].iov_len = sizeof(*smcd_v2_ext);
+ if (ini->ism_offered_cnt) {
+ vec[i].iov_base = gidchids;
+ vec[i++].iov_len = ini->ism_offered_cnt *
+ sizeof(struct smc_clc_smcd_gid_chid);
+ }
}
vec[i].iov_base = trl;
vec[i++].iov_len = sizeof(*trl);
@@ -499,29 +634,47 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
/* build and send CLC CONFIRM / ACCEPT message */
static int smc_clc_send_confirm_accept(struct smc_sock *smc,
- struct smc_clc_msg_accept_confirm *clc,
- int first_contact)
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+ int first_contact, u8 version)
{
struct smc_connection *conn = &smc->conn;
+ struct smc_clc_msg_accept_confirm *clc;
+ struct smc_clc_first_contact_ext fce;
+ struct smc_clc_msg_trail trl;
+ struct kvec vec[3];
struct msghdr msg;
- struct kvec vec;
+ int i, len;
/* send SMC Confirm CLC msg */
- clc->hdr.version = SMC_V1; /* SMC version */
+ clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
+ clc->hdr.version = version; /* SMC version */
if (first_contact)
- clc->hdr.flag = 1;
+ clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
if (conn->lgr->is_smcd) {
/* SMC-D specific settings */
memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
- clc->hdr.path = SMC_TYPE_D;
- clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+ clc->hdr.typev1 = SMC_TYPE_D;
clc->d0.gid = conn->lgr->smcd->local_gid;
clc->d0.token = conn->rmb_desc->token;
clc->d0.dmbe_size = conn->rmbe_size_short;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
- memcpy(clc->d0.smcd_trl.eyecatcher, SMCD_EYECATCHER,
+ if (version == SMC_V1) {
+ clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+ } else {
+ u8 *eid = NULL;
+
+ clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd));
+ smc_ism_get_system_eid(conn->lgr->smcd, &eid);
+ if (eid)
+ memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN);
+ len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
+ if (first_contact)
+ smc_clc_fill_fce(&fce, &len);
+ clc_v2->hdr.length = htons(len);
+ }
+ memcpy(trl.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
} else {
struct smc_link *link = conn->lnk;
@@ -530,7 +683,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
link = conn->lnk;
memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
- clc->hdr.path = SMC_TYPE_R;
+ clc->hdr.typev1 = SMC_TYPE_R;
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
memcpy(clc->r0.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
@@ -554,29 +707,43 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial);
- memcpy(clc->r0.smcr_trl.eyecatcher, SMC_EYECATCHER,
- sizeof(SMC_EYECATCHER));
+ memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
}
memset(&msg, 0, sizeof(msg));
- vec.iov_base = clc;
- vec.iov_len = ntohs(clc->hdr.length);
- return kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
+ i = 0;
+ vec[i].iov_base = clc_v2;
+ if (version > SMC_V1)
+ vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl);
+ else
+ vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
+ SMCD_CLC_ACCEPT_CONFIRM_LEN :
+ SMCR_CLC_ACCEPT_CONFIRM_LEN) -
+ sizeof(trl);
+ if (version > SMC_V1 && first_contact) {
+ vec[i].iov_base = &fce;
+ vec[i++].iov_len = sizeof(fce);
+ }
+ vec[i].iov_base = &trl;
+ vec[i++].iov_len = sizeof(trl);
+ return kernel_sendmsg(smc->clcsock, &msg, vec, 1,
ntohs(clc->hdr.length));
}
/* send CLC CONFIRM message across internal TCP socket */
-int smc_clc_send_confirm(struct smc_sock *smc)
+int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
+ u8 version)
{
- struct smc_clc_msg_accept_confirm cclc;
+ struct smc_clc_msg_accept_confirm_v2 cclc_v2;
int reason_code = 0;
int len;
/* send SMC Confirm CLC msg */
- memset(&cclc, 0, sizeof(cclc));
- cclc.hdr.type = SMC_CLC_CONFIRM;
- len = smc_clc_send_confirm_accept(smc, &cclc, 0);
- if (len < ntohs(cclc.hdr.length)) {
+ memset(&cclc_v2, 0, sizeof(cclc_v2));
+ cclc_v2.hdr.type = SMC_CLC_CONFIRM;
+ len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
+ version);
+ if (len < ntohs(cclc_v2.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
@@ -589,16 +756,28 @@ int smc_clc_send_confirm(struct smc_sock *smc)
}
/* send CLC ACCEPT message across internal TCP socket */
-int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact)
+int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
+ u8 version)
{
- struct smc_clc_msg_accept_confirm aclc;
+ struct smc_clc_msg_accept_confirm_v2 aclc_v2;
int len;
- memset(&aclc, 0, sizeof(aclc));
- aclc.hdr.type = SMC_CLC_ACCEPT;
- len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact);
- if (len < ntohs(aclc.hdr.length))
+ memset(&aclc_v2, 0, sizeof(aclc_v2));
+ aclc_v2.hdr.type = SMC_CLC_ACCEPT;
+ len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
+ version);
+ if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
+
+void __init smc_clc_init(void)
+{
+ struct new_utsname *u;
+
+ memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */
+ u = utsname();
+ memcpy(smc_hostname, u->nodename,
+ min_t(size_t, strlen(u->nodename), sizeof(smc_hostname)));
+}
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index fcd8521c7737..b3f46ab79e47 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -54,19 +54,19 @@
#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
+#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */
+
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
u8 type; /* proposal / accept / confirm / decline */
__be16 length;
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
- flag : 1,
- rsvd : 1,
- path : 2;
+ typev2 : 2,
+ typev1 : 2;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u8 path : 2,
- rsvd : 1,
- flag : 1,
+ u8 typev1 : 2,
+ typev2 : 2,
version : 4;
#endif
} __packed; /* format defined in RFC7609 */
@@ -81,8 +81,6 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
-#define SMC_CLC_MAX_V6_PREFIX 8
-
/* Struct would be 4 byte aligned, but it is used in an array that is sent
* to peers and must conform to RFC7609, hence we need to use packed here.
*/
@@ -91,6 +89,44 @@ struct smc_clc_ipv6_prefix {
u8 prefix_len;
} __packed; /* format defined in RFC7609 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+struct smc_clc_v2_flag {
+ u8 release : 4,
+ rsvd : 3,
+ seid : 1;
+};
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+struct smc_clc_v2_flag {
+ u8 seid : 1,
+ rsvd : 3,
+ release : 4;
+};
+#endif
+
+struct smc_clnt_opts_area_hdr {
+ u8 eid_cnt; /* number of user defined EIDs */
+ u8 ism_gid_cnt; /* number of ISMv2 GIDs */
+ u8 reserved1;
+ struct smc_clc_v2_flag flag;
+ u8 reserved2[2];
+ __be16 smcd_v2_ext_offset; /* SMC-Dv2 Extension Offset */
+};
+
+struct smc_clc_smcd_gid_chid {
+ __be64 gid; /* ISM GID */
+ __be16 chid; /* ISMv2 CHID */
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2
+ * (https://www.ibm.com/support/pages/node/6326337)
+ */
+
+struct smc_clc_v2_extension {
+ struct smc_clnt_opts_area_hdr hdr;
+ u8 roce[16]; /* RoCEv2 GID */
+ u8 reserved[16];
+ u8 user_eids[0][SMC_MAX_EID_LEN];
+};
+
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
u8 prefix_len; /* number of significant bits in mask */
@@ -99,8 +135,15 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
} __aligned(4);
struct smc_clc_msg_smcd { /* SMC-D GID information */
- u64 gid; /* ISM GID of requestor */
- u8 res[32];
+ struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
+ __be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
+ u8 reserved[28];
+};
+
+struct smc_clc_smcd_v2_extension {
+ u8 system_eid[SMC_MAX_EID_LEN];
+ u8 reserved[16];
+ struct smc_clc_smcd_gid_chid gidchid[0];
};
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
@@ -109,11 +152,16 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
__be16 iparea_offset; /* offset to IP address information area */
} __aligned(4);
+#define SMC_CLC_MAX_V6_PREFIX 8
+
struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal pclc_base;
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX];
+ struct smc_clc_v2_extension pclc_v2_ext;
+ struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext;
+ struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS];
struct smc_clc_msg_trail pclc_trl;
};
@@ -134,11 +182,9 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* packet sequence number */
- struct smc_clc_msg_trail smcr_trl;
- /* eye catcher "SMCR" EBCDIC */
} __packed;
-struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */
+struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
u64 gid; /* Sender GID */
u64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
@@ -150,26 +196,63 @@ struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */
dmbe_size : 4;
#endif
u16 reserved4;
- u32 linkid; /* Link identifier */
- u32 reserved5[3];
- struct smc_clc_msg_trail smcd_trl;
- /* eye catcher "SMCD" EBCDIC */
+ __be32 linkid; /* Link identifier */
} __packed;
+#define SMC_CLC_OS_ZOS 1
+#define SMC_CLC_OS_LINUX 2
+#define SMC_CLC_OS_AIX 3
+
+struct smc_clc_first_contact_ext {
+ u8 reserved1;
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 os_type : 4,
+ release : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 release : 4,
+ os_type : 4;
+#endif
+ u8 reserved2[2];
+ u8 hostname[SMC_MAX_HOSTNAME_LEN];
+};
+
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
- struct smcd_clc_msg_accept_confirm d0; /* SMC-D */
+ struct { /* SMC-D */
+ struct smcd_clc_msg_accept_confirm_common d0;
+ u32 reserved5[3];
+ };
};
} __packed; /* format defined in RFC7609 */
+struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
+ struct smc_clc_msg_hdr hdr;
+ union {
+ struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
+ struct { /* SMC-D */
+ struct smcd_clc_msg_accept_confirm_common d0;
+ __be16 chid;
+ u8 eid[SMC_MAX_EID_LEN];
+ u8 reserved5[8];
+ };
+ };
+};
+
struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_hdr hdr;
u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
__be32 peer_diagnosis; /* diagnosis information */
- u8 reserved2[4];
- struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 os_type : 4,
+ reserved : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 4,
+ os_type : 4;
+#endif
+ u8 reserved2[3];
+ struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);
/* determine start of the prefix area within the proposal message */
@@ -180,16 +263,58 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
+static inline bool smcr_indicated(int smc_type)
+{
+ return smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B;
+}
+
+static inline bool smcd_indicated(int smc_type)
+{
+ return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B;
+}
+
/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
{
- if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
+ if (smcd_indicated(prop->hdr.typev1) &&
+ ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
return NULL;
return (struct smc_clc_msg_smcd *)(prop + 1);
}
+static inline struct smc_clc_v2_extension *
+smc_get_clc_v2_ext(struct smc_clc_msg_proposal *prop)
+{
+ struct smc_clc_msg_smcd *prop_smcd = smc_get_clc_msg_smcd(prop);
+
+ if (!prop_smcd || !ntohs(prop_smcd->v2_ext_offset))
+ return NULL;
+
+ return (struct smc_clc_v2_extension *)
+ ((u8 *)prop_smcd +
+ offsetof(struct smc_clc_msg_smcd, v2_ext_offset) +
+ sizeof(prop_smcd->v2_ext_offset) +
+ ntohs(prop_smcd->v2_ext_offset));
+}
+
+static inline struct smc_clc_smcd_v2_extension *
+smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
+{
+ if (!prop_v2ext)
+ return NULL;
+ if (!ntohs(prop_v2ext->hdr.smcd_v2_ext_offset))
+ return NULL;
+
+ return (struct smc_clc_smcd_v2_extension *)
+ ((u8 *)prop_v2ext +
+ offsetof(struct smc_clc_v2_extension, hdr) +
+ offsetof(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) +
+ sizeof(prop_v2ext->hdr.smcd_v2_ext_offset) +
+ ntohs(prop_v2ext->hdr.smcd_v2_ext_offset));
+}
+
struct smcd_dev;
struct smc_init_info;
@@ -197,10 +322,12 @@ int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type, unsigned long timeout);
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
-int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
- struct smc_init_info *ini);
-int smc_clc_send_confirm(struct smc_sock *smc);
-int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact);
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version);
+int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
+int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
+ u8 version);
+int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
+ u8 version);
+void smc_clc_init(void) __init;
#endif
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c811ae1a8add..f1dbb5025c0b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -375,7 +375,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
int i;
if (ini->is_smcd && ini->vlan_id) {
- if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
+ if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
+ ini->vlan_id)) {
rc = SMC_CLC_DECL_ISMVLANERR;
goto out;
}
@@ -412,13 +413,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
- get_device(&ini->ism_dev->dev);
- lgr->peer_gid = ini->ism_peer_gid;
- lgr->smcd = ini->ism_dev;
- lgr_list = &ini->ism_dev->lgr_list;
+ get_device(&ini->ism_dev[ini->ism_selected]->dev);
+ lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
+ lgr->smcd = ini->ism_dev[ini->ism_selected];
+ lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
+ lgr->smc_version = ini->smcd_version;
lgr->peer_shutdown = 0;
- atomic_inc(&ini->ism_dev->lgr_cnt);
+ atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
} else {
/* SMC-R specific settings */
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
@@ -449,7 +451,7 @@ free_lgr:
kfree(lgr);
ism_put_vlan:
if (ini->is_smcd && ini->vlan_id)
- smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
+ smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
out:
if (rc < 0) {
if (rc == -ENOMEM)
@@ -1288,8 +1290,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
spinlock_t *lgr_lock;
int rc = 0;
- lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
- lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
+ lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
+ &smc_lgr_list.list;
+ lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
+ &smc_lgr_list.lock;
ini->first_contact_local = 1;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
if (role == SMC_CLNT && ini->first_contact_peer)
@@ -1301,7 +1305,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
- smcd_lgr_match(lgr, ini->ism_dev, ini->ism_peer_gid) :
+ smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
+ ini->ism_peer_gid[ini->ism_selected]) :
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == ini->vlan_id &&
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 37a5903789b0..f1e867ce2e63 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -231,6 +231,11 @@ struct smc_link_group {
u8 freeing : 1; /* lgr is being freed */
bool is_smcd; /* SMC-R or SMC-D */
+ u8 smc_version;
+ u8 negotiated_eid[SMC_MAX_EID_LEN];
+ u8 peer_os; /* peer operating system */
+ u8 peer_smc_release;
+ u8 peer_hostname[SMC_MAX_HOSTNAME_LEN];
union {
struct { /* SMC-R */
enum smc_lgr_role role;
@@ -291,6 +296,8 @@ struct smc_clc_msg_local;
struct smc_init_info {
u8 is_smcd;
+ u8 smc_type_v1;
+ u8 smc_type_v2;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
@@ -301,8 +308,12 @@ struct smc_init_info {
u8 ib_port;
u32 ib_clcqpn;
/* SMC-D */
- u64 ism_peer_gid;
- struct smcd_dev *ism_dev;
+ u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
+ struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
+ u16 ism_chid[SMC_MAX_ISM_DEVS + 1];
+ u8 ism_offered_cnt; /* # of ISM devices offered */
+ u8 ism_selected; /* index of selected ISM dev*/
+ u8 smcd_version;
};
/* Find the connection associated with the given alert token in the link group.
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 998c525de785..e9a6487a42cb 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -21,7 +21,9 @@ struct smcd_dev_list smcd_dev_list = {
.mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex)
};
-/* Test if an ISM communication is possible. */
+bool smc_ism_v2_capable;
+
+/* Test if an ISM communication is possible - same CPC */
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
{
return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
@@ -39,6 +41,16 @@ int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
return rc < 0 ? rc : 0;
}
+void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid)
+{
+ smcd->ops->get_system_eid(smcd, eid);
+}
+
+u16 smc_ism_get_chid(struct smcd_dev *smcd)
+{
+ return smcd->ops->get_chid(smcd);
+}
+
/* Set a connection using this DMBE. */
void smc_ism_set_conn(struct smc_connection *conn)
{
@@ -319,7 +331,18 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev);
int smcd_register_dev(struct smcd_dev *smcd)
{
mutex_lock(&smcd_dev_list.mutex);
- list_add_tail(&smcd->list, &smcd_dev_list.list);
+ if (list_empty(&smcd_dev_list.list)) {
+ u8 *system_eid = NULL;
+
+ smc_ism_get_system_eid(smcd, &system_eid);
+ if ((*system_eid) + 24 != '0' || (*system_eid) + 28 != '0')
+ smc_ism_v2_capable = true;
+ }
+ /* sort list: devices without pnetid before devices with pnetid */
+ if (smcd->pnetid[0])
+ list_add_tail(&smcd->list, &smcd_dev_list.list);
+ else
+ list_add(&smcd->list, &smcd_dev_list.list);
mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
@@ -399,3 +422,8 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
spin_unlock_irqrestore(&smcd->lock, flags);
}
EXPORT_SYMBOL_GPL(smcd_handle_irq);
+
+void __init smc_ism_init(void)
+{
+ smc_ism_v2_capable = false;
+}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 81cc4537efd3..8048e09ddcf8 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -19,7 +19,10 @@ struct smcd_dev_list { /* List of SMCD devices */
struct mutex mutex; /* Protects list of devices */
};
-extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
+extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
+extern bool smc_ism_v2_capable; /* HW supports ISM V2 and thus
+ * System EID is defined
+ */
struct smc_ism_vlanid { /* VLAN id set on ISM device */
struct list_head list;
@@ -47,4 +50,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
int smc_ism_signal_shutdown(struct smc_link_group *lgr);
+void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid);
+u16 smc_ism_get_chid(struct smcd_dev *dev);
+void smc_ism_init(void);
#endif
diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h
index e7a8fc4ae02f..0f4f35aa43ad 100644
--- a/net/smc/smc_netns.h
+++ b/net/smc/smc_netns.h
@@ -16,5 +16,6 @@ extern unsigned int smc_net_id;
/* per-network namespace private data */
struct smc_net {
struct smc_pnettable pnettable;
+ struct smc_pnetids_ndev pnetids_ndev;
};
#endif
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 70684c49510e..f3c18b991d35 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -29,8 +29,7 @@
#include "smc_ism.h"
#include "smc_core.h"
-#define SMC_ASCII_BLANK 32
-
+static struct net_device *__pnet_find_base_ndev(struct net_device *ndev);
static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
@@ -73,14 +72,22 @@ struct smc_pnetentry {
};
};
+/* Check if the pnetid is set */
+bool smc_pnet_is_pnetid_set(u8 *pnetid)
+{
+ if (pnetid[0] == 0 || pnetid[0] == _S)
+ return false;
+ return true;
+}
+
/* Check if two given pnetids match */
static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
{
int i;
for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
- if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
- (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
+ if ((pnetid1[i] == 0 || pnetid1[i] == _S) &&
+ (pnetid2[i] == 0 || pnetid2[i] == _S))
break;
if (pnetid1[i] != pnetid2[i])
return false;
@@ -238,11 +245,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
char *pnet_name)
{
- u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
mutex_lock(&smc_ib_devices.mutex);
- if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
+ if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) {
memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
SMC_MAX_PNETID_LEN);
ib_dev->pnetid_by_user[ib_port - 1] = true;
@@ -256,11 +262,10 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
*/
static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
{
- u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
bool applied = false;
mutex_lock(&smcd_dev_list.mutex);
- if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
+ if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) {
memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
smcd_dev->pnetid_by_user = true;
applied = true;
@@ -708,10 +713,115 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = {
.n_ops = ARRAY_SIZE(smc_pnet_ops)
};
+bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid)
+{
+ struct smc_net *sn = net_generic(net, smc_net_id);
+ struct smc_pnetids_ndev_entry *pe;
+ bool rc = false;
+
+ read_lock(&sn->pnetids_ndev.lock);
+ list_for_each_entry(pe, &sn->pnetids_ndev.list, list) {
+ if (smc_pnet_match(pnetid, pe->pnetid)) {
+ rc = true;
+ goto unlock;
+ }
+ }
+
+unlock:
+ read_unlock(&sn->pnetids_ndev.lock);
+ return rc;
+}
+
+static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid)
+{
+ struct smc_net *sn = net_generic(net, smc_net_id);
+ struct smc_pnetids_ndev_entry *pe, *pi;
+
+ pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+ if (!pe)
+ return -ENOMEM;
+
+ write_lock(&sn->pnetids_ndev.lock);
+ list_for_each_entry(pi, &sn->pnetids_ndev.list, list) {
+ if (smc_pnet_match(pnetid, pe->pnetid)) {
+ refcount_inc(&pi->refcnt);
+ kfree(pe);
+ goto unlock;
+ }
+ }
+ refcount_set(&pe->refcnt, 1);
+ memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN);
+ list_add_tail(&pe->list, &sn->pnetids_ndev.list);
+
+unlock:
+ write_unlock(&sn->pnetids_ndev.lock);
+ return 0;
+}
+
+static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid)
+{
+ struct smc_net *sn = net_generic(net, smc_net_id);
+ struct smc_pnetids_ndev_entry *pe, *pe2;
+
+ write_lock(&sn->pnetids_ndev.lock);
+ list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) {
+ if (smc_pnet_match(pnetid, pe->pnetid)) {
+ if (refcount_dec_and_test(&pe->refcnt)) {
+ list_del(&pe->list);
+ kfree(pe);
+ }
+ break;
+ }
+ }
+ write_unlock(&sn->pnetids_ndev.lock);
+}
+
+static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev,
+ u8 *ndev_pnetid)
+{
+ struct net_device *base_dev;
+
+ base_dev = __pnet_find_base_ndev(dev);
+ if (base_dev->flags & IFF_UP &&
+ !smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port,
+ ndev_pnetid)) {
+ /* add to PNETIDs list */
+ smc_pnet_add_pnetid(net, ndev_pnetid);
+ }
+}
+
+/* create initial list of netdevice pnetids */
+static void smc_pnet_create_pnetids_list(struct net *net)
+{
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+ struct net_device *dev;
+
+ rtnl_lock();
+ for_each_netdev(net, dev)
+ smc_pnet_add_base_pnetid(net, dev, ndev_pnetid);
+ rtnl_unlock();
+}
+
+/* clean up list of netdevice pnetids */
+static void smc_pnet_destroy_pnetids_list(struct net *net)
+{
+ struct smc_net *sn = net_generic(net, smc_net_id);
+ struct smc_pnetids_ndev_entry *pe, *temp_pe;
+
+ write_lock(&sn->pnetids_ndev.lock);
+ list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) {
+ list_del(&pe->list);
+ kfree(pe);
+ }
+ write_unlock(&sn->pnetids_ndev.lock);
+}
+
static int smc_pnet_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(event_dev);
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
switch (event) {
case NETDEV_REBOOT:
@@ -721,6 +831,17 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
case NETDEV_REGISTER:
smc_pnet_add_by_ndev(event_dev);
return NOTIFY_OK;
+ case NETDEV_UP:
+ smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid);
+ return NOTIFY_OK;
+ case NETDEV_DOWN:
+ event_dev = __pnet_find_base_ndev(event_dev);
+ if (!smc_pnetid_by_dev_port(event_dev->dev.parent,
+ event_dev->dev_port, ndev_pnetid)) {
+ /* remove from PNETIDs list */
+ smc_pnet_remove_pnetid(net, ndev_pnetid);
+ }
+ return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
@@ -735,9 +856,14 @@ int smc_pnet_net_init(struct net *net)
{
struct smc_net *sn = net_generic(net, smc_net_id);
struct smc_pnettable *pnettable = &sn->pnettable;
+ struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
INIT_LIST_HEAD(&pnettable->pnetlist);
rwlock_init(&pnettable->lock);
+ INIT_LIST_HEAD(&pnetids_ndev->list);
+ rwlock_init(&pnetids_ndev->lock);
+
+ smc_pnet_create_pnetids_list(net);
return 0;
}
@@ -752,6 +878,7 @@ int __init smc_pnet_init(void)
rc = register_netdevice_notifier(&smc_netdev_notifier);
if (rc)
genl_unregister_family(&smc_pnet_nl_family);
+
return rc;
}
@@ -760,6 +887,7 @@ void smc_pnet_net_exit(struct net *net)
{
/* flush pnet table */
smc_pnet_remove_by_pnetid(net, NULL);
+ smc_pnet_destroy_pnetids_list(net);
}
void smc_pnet_exit(void)
@@ -768,16 +896,11 @@ void smc_pnet_exit(void)
genl_unregister_family(&smc_pnet_nl_family);
}
-/* Determine one base device for stacked net devices.
- * If the lower device level contains more than one devices
- * (for instance with bonding slaves), just the first device
- * is used to reach a base device.
- */
-static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
+static struct net_device *__pnet_find_base_ndev(struct net_device *ndev)
{
int i, nest_lvl;
- rtnl_lock();
+ ASSERT_RTNL();
nest_lvl = ndev->lower_level;
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
@@ -787,6 +910,18 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
lower = lower->next;
ndev = netdev_lower_get_next(ndev, &lower);
}
+ return ndev;
+}
+
+/* Determine one base device for stacked net devices.
+ * If the lower device level contains more than one devices
+ * (for instance with bonding slaves), just the first device
+ * is used to reach a base device.
+ */
+static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
+{
+ rtnl_lock();
+ ndev = __pnet_find_base_ndev(ndev);
rtnl_unlock();
return ndev;
}
@@ -929,10 +1064,10 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away &&
- (!ini->ism_peer_gid ||
- !smc_ism_cantalk(ini->ism_peer_gid, ini->vlan_id,
+ (!ini->ism_peer_gid[0] ||
+ !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id,
ismdev))) {
- ini->ism_dev = ismdev;
+ ini->ism_dev[0] = ismdev;
break;
}
}
@@ -966,7 +1101,7 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(sk);
- ini->ism_dev = NULL;
+ ini->ism_dev[0] = NULL;
if (!dst)
goto out;
if (!dst->dev)
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 811a65986691..14039272f7e4 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -12,6 +12,8 @@
#ifndef _SMC_PNET_H
#define _SMC_PNET_H
+#include <net/smc.h>
+
#if IS_ENABLED(CONFIG_HAVE_PNETID)
#include <asm/pnet.h>
#endif
@@ -31,6 +33,17 @@ struct smc_pnettable {
struct list_head pnetlist;
};
+struct smc_pnetids_ndev { /* list of pnetids for net devices in UP state*/
+ struct list_head list;
+ rwlock_t lock;
+};
+
+struct smc_pnetids_ndev_entry {
+ struct list_head list;
+ u8 pnetid[SMC_MAX_PNETID_LEN];
+ refcount_t refcnt;
+};
+
static inline int smc_pnetid_by_dev_port(struct device *dev,
unsigned short port, u8 *pnetid)
{
@@ -52,4 +65,6 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
struct smc_init_info *ini,
struct smc_ib_device *known_dev);
+bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid);
+bool smc_pnet_is_pnetid_set(u8 *pnetid);
#endif