net: dsa: make dsa_tree_change_tag_proto actually unwind the tag proto change
[linux.git] / net / smc / af_smc.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
6  *  applies to SOCK_STREAM sockets only
7  *  offers an alternative communication option for TCP-protocol sockets
8  *  applicable with RoCE-cards only
9  *
10  *  Initial restrictions:
11  *    - support for alternate links postponed
12  *
13  *  Copyright IBM Corp. 2016, 2018
14  *
15  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
16  *              based on prototype from Frank Blaschka
17  */
18
19 #define KMSG_COMPONENT "smc"
20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21
22 #include <linux/module.h>
23 #include <linux/socket.h>
24 #include <linux/workqueue.h>
25 #include <linux/in.h>
26 #include <linux/sched/signal.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rcupdate_wait.h>
29 #include <linux/ctype.h>
30
31 #include <net/sock.h>
32 #include <net/tcp.h>
33 #include <net/smc.h>
34 #include <asm/ioctls.h>
35
36 #include <net/net_namespace.h>
37 #include <net/netns/generic.h>
38 #include "smc_netns.h"
39
40 #include "smc.h"
41 #include "smc_clc.h"
42 #include "smc_llc.h"
43 #include "smc_cdc.h"
44 #include "smc_core.h"
45 #include "smc_ib.h"
46 #include "smc_ism.h"
47 #include "smc_pnet.h"
48 #include "smc_netlink.h"
49 #include "smc_tx.h"
50 #include "smc_rx.h"
51 #include "smc_close.h"
52 #include "smc_stats.h"
53 #include "smc_tracepoint.h"
54
55 static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link group
56                                                  * creation on server
57                                                  */
58 static DEFINE_MUTEX(smc_client_lgr_pending);    /* serialize link group
59                                                  * creation on client
60                                                  */
61
62 struct workqueue_struct *smc_hs_wq;     /* wq for handshake work */
63 struct workqueue_struct *smc_close_wq;  /* wq for close work */
64
65 static void smc_tcp_listen_work(struct work_struct *);
66 static void smc_connect_work(struct work_struct *);
67
68 static void smc_set_keepalive(struct sock *sk, int val)
69 {
70         struct smc_sock *smc = smc_sk(sk);
71
72         smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
73 }
74
75 static struct smc_hashinfo smc_v4_hashinfo = {
76         .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
77 };
78
79 static struct smc_hashinfo smc_v6_hashinfo = {
80         .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
81 };
82
83 int smc_hash_sk(struct sock *sk)
84 {
85         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
86         struct hlist_head *head;
87
88         head = &h->ht;
89
90         write_lock_bh(&h->lock);
91         sk_add_node(sk, head);
92         write_unlock_bh(&h->lock);
93         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
94
95         return 0;
96 }
97 EXPORT_SYMBOL_GPL(smc_hash_sk);
98
99 void smc_unhash_sk(struct sock *sk)
100 {
101         struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
102
103         write_lock_bh(&h->lock);
104         if (sk_del_node_init(sk))
105                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
106         write_unlock_bh(&h->lock);
107 }
108 EXPORT_SYMBOL_GPL(smc_unhash_sk);
109
110 struct proto smc_proto = {
111         .name           = "SMC",
112         .owner          = THIS_MODULE,
113         .keepalive      = smc_set_keepalive,
114         .hash           = smc_hash_sk,
115         .unhash         = smc_unhash_sk,
116         .obj_size       = sizeof(struct smc_sock),
117         .h.smc_hash     = &smc_v4_hashinfo,
118         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
119 };
120 EXPORT_SYMBOL_GPL(smc_proto);
121
122 struct proto smc_proto6 = {
123         .name           = "SMC6",
124         .owner          = THIS_MODULE,
125         .keepalive      = smc_set_keepalive,
126         .hash           = smc_hash_sk,
127         .unhash         = smc_unhash_sk,
128         .obj_size       = sizeof(struct smc_sock),
129         .h.smc_hash     = &smc_v6_hashinfo,
130         .slab_flags     = SLAB_TYPESAFE_BY_RCU,
131 };
132 EXPORT_SYMBOL_GPL(smc_proto6);
133
134 static void smc_restore_fallback_changes(struct smc_sock *smc)
135 {
136         if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
137                 smc->clcsock->file->private_data = smc->sk.sk_socket;
138                 smc->clcsock->file = NULL;
139         }
140 }
141
142 static int __smc_release(struct smc_sock *smc)
143 {
144         struct sock *sk = &smc->sk;
145         int rc = 0;
146
147         if (!smc->use_fallback) {
148                 rc = smc_close_active(smc);
149                 sock_set_flag(sk, SOCK_DEAD);
150                 sk->sk_shutdown |= SHUTDOWN_MASK;
151         } else {
152                 if (sk->sk_state != SMC_CLOSED) {
153                         if (sk->sk_state != SMC_LISTEN &&
154                             sk->sk_state != SMC_INIT)
155                                 sock_put(sk); /* passive closing */
156                         if (sk->sk_state == SMC_LISTEN) {
157                                 /* wake up clcsock accept */
158                                 rc = kernel_sock_shutdown(smc->clcsock,
159                                                           SHUT_RDWR);
160                         }
161                         sk->sk_state = SMC_CLOSED;
162                         sk->sk_state_change(sk);
163                 }
164                 smc_restore_fallback_changes(smc);
165         }
166
167         sk->sk_prot->unhash(sk);
168
169         if (sk->sk_state == SMC_CLOSED) {
170                 if (smc->clcsock) {
171                         release_sock(sk);
172                         smc_clcsock_release(smc);
173                         lock_sock(sk);
174                 }
175                 if (!smc->use_fallback)
176                         smc_conn_free(&smc->conn);
177         }
178
179         return rc;
180 }
181
182 static int smc_release(struct socket *sock)
183 {
184         struct sock *sk = sock->sk;
185         struct smc_sock *smc;
186         int old_state, rc = 0;
187
188         if (!sk)
189                 goto out;
190
191         sock_hold(sk); /* sock_put below */
192         smc = smc_sk(sk);
193
194         old_state = sk->sk_state;
195
196         /* cleanup for a dangling non-blocking connect */
197         if (smc->connect_nonblock && old_state == SMC_INIT)
198                 tcp_abort(smc->clcsock->sk, ECONNABORTED);
199
200         if (cancel_work_sync(&smc->connect_work))
201                 sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
202
203         if (sk->sk_state == SMC_LISTEN)
204                 /* smc_close_non_accepted() is called and acquires
205                  * sock lock for child sockets again
206                  */
207                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
208         else
209                 lock_sock(sk);
210
211         if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
212             !smc->use_fallback)
213                 smc_close_active_abort(smc);
214
215         rc = __smc_release(smc);
216
217         /* detach socket */
218         sock_orphan(sk);
219         sock->sk = NULL;
220         release_sock(sk);
221
222         sock_put(sk); /* sock_hold above */
223         sock_put(sk); /* final sock_put */
224 out:
225         return rc;
226 }
227
228 static void smc_destruct(struct sock *sk)
229 {
230         if (sk->sk_state != SMC_CLOSED)
231                 return;
232         if (!sock_flag(sk, SOCK_DEAD))
233                 return;
234
235         sk_refcnt_debug_dec(sk);
236 }
237
238 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
239                                    int protocol)
240 {
241         struct smc_sock *smc;
242         struct proto *prot;
243         struct sock *sk;
244
245         prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
246         sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
247         if (!sk)
248                 return NULL;
249
250         sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
251         sk->sk_state = SMC_INIT;
252         sk->sk_destruct = smc_destruct;
253         sk->sk_protocol = protocol;
254         smc = smc_sk(sk);
255         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
256         INIT_WORK(&smc->connect_work, smc_connect_work);
257         INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
258         INIT_LIST_HEAD(&smc->accept_q);
259         spin_lock_init(&smc->accept_q_lock);
260         spin_lock_init(&smc->conn.send_lock);
261         sk->sk_prot->hash(sk);
262         sk_refcnt_debug_inc(sk);
263         mutex_init(&smc->clcsock_release_lock);
264
265         return sk;
266 }
267
268 static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
269                     int addr_len)
270 {
271         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
272         struct sock *sk = sock->sk;
273         struct smc_sock *smc;
274         int rc;
275
276         smc = smc_sk(sk);
277
278         /* replicate tests from inet_bind(), to be safe wrt. future changes */
279         rc = -EINVAL;
280         if (addr_len < sizeof(struct sockaddr_in))
281                 goto out;
282
283         rc = -EAFNOSUPPORT;
284         if (addr->sin_family != AF_INET &&
285             addr->sin_family != AF_INET6 &&
286             addr->sin_family != AF_UNSPEC)
287                 goto out;
288         /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
289         if (addr->sin_family == AF_UNSPEC &&
290             addr->sin_addr.s_addr != htonl(INADDR_ANY))
291                 goto out;
292
293         lock_sock(sk);
294
295         /* Check if socket is already active */
296         rc = -EINVAL;
297         if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
298                 goto out_rel;
299
300         smc->clcsock->sk->sk_reuse = sk->sk_reuse;
301         rc = kernel_bind(smc->clcsock, uaddr, addr_len);
302
303 out_rel:
304         release_sock(sk);
305 out:
306         return rc;
307 }
308
309 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
310                                    unsigned long mask)
311 {
312         /* options we don't get control via setsockopt for */
313         nsk->sk_type = osk->sk_type;
314         nsk->sk_sndbuf = osk->sk_sndbuf;
315         nsk->sk_rcvbuf = osk->sk_rcvbuf;
316         nsk->sk_sndtimeo = osk->sk_sndtimeo;
317         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
318         nsk->sk_mark = osk->sk_mark;
319         nsk->sk_priority = osk->sk_priority;
320         nsk->sk_rcvlowat = osk->sk_rcvlowat;
321         nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
322         nsk->sk_err = osk->sk_err;
323
324         nsk->sk_flags &= ~mask;
325         nsk->sk_flags |= osk->sk_flags & mask;
326 }
327
328 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
329                              (1UL << SOCK_KEEPOPEN) | \
330                              (1UL << SOCK_LINGER) | \
331                              (1UL << SOCK_BROADCAST) | \
332                              (1UL << SOCK_TIMESTAMP) | \
333                              (1UL << SOCK_DBG) | \
334                              (1UL << SOCK_RCVTSTAMP) | \
335                              (1UL << SOCK_RCVTSTAMPNS) | \
336                              (1UL << SOCK_LOCALROUTE) | \
337                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
338                              (1UL << SOCK_RXQ_OVFL) | \
339                              (1UL << SOCK_WIFI_STATUS) | \
340                              (1UL << SOCK_NOFCS) | \
341                              (1UL << SOCK_FILTER_LOCKED) | \
342                              (1UL << SOCK_TSTAMP_NEW))
343 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
344  * clc socket (since smc is not called for these options from net/core)
345  */
346 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
347 {
348         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
349 }
350
351 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
352                              (1UL << SOCK_KEEPOPEN) | \
353                              (1UL << SOCK_LINGER) | \
354                              (1UL << SOCK_DBG))
355 /* copy only settings and flags relevant for smc from clc to smc socket */
356 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
357 {
358         smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
359 }
360
361 /* register the new rmb on all links */
362 static int smcr_lgr_reg_rmbs(struct smc_link *link,
363                              struct smc_buf_desc *rmb_desc)
364 {
365         struct smc_link_group *lgr = link->lgr;
366         int i, rc = 0;
367
368         rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
369         if (rc)
370                 return rc;
371         /* protect against parallel smc_llc_cli_rkey_exchange() and
372          * parallel smcr_link_reg_rmb()
373          */
374         mutex_lock(&lgr->llc_conf_mutex);
375         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
376                 if (!smc_link_active(&lgr->lnk[i]))
377                         continue;
378                 rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
379                 if (rc)
380                         goto out;
381         }
382
383         /* exchange confirm_rkey msg with peer */
384         rc = smc_llc_do_confirm_rkey(link, rmb_desc);
385         if (rc) {
386                 rc = -EFAULT;
387                 goto out;
388         }
389         rmb_desc->is_conf_rkey = true;
390 out:
391         mutex_unlock(&lgr->llc_conf_mutex);
392         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
393         return rc;
394 }
395
396 static int smcr_clnt_conf_first_link(struct smc_sock *smc)
397 {
398         struct smc_link *link = smc->conn.lnk;
399         struct smc_llc_qentry *qentry;
400         int rc;
401
402         /* receive CONFIRM LINK request from server over RoCE fabric */
403         qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
404                               SMC_LLC_CONFIRM_LINK);
405         if (!qentry) {
406                 struct smc_clc_msg_decline dclc;
407
408                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
409                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
410                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
411         }
412         smc_llc_save_peer_uid(qentry);
413         rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
414         smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
415         if (rc)
416                 return SMC_CLC_DECL_RMBE_EC;
417
418         rc = smc_ib_modify_qp_rts(link);
419         if (rc)
420                 return SMC_CLC_DECL_ERR_RDYLNK;
421
422         smc_wr_remember_qp_attr(link);
423
424         if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
425                 return SMC_CLC_DECL_ERR_REGRMB;
426
427         /* confirm_rkey is implicit on 1st contact */
428         smc->conn.rmb_desc->is_conf_rkey = true;
429
430         /* send CONFIRM LINK response over RoCE fabric */
431         rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
432         if (rc < 0)
433                 return SMC_CLC_DECL_TIMEOUT_CL;
434
435         smc_llc_link_active(link);
436         smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
437
438         /* optional 2nd link, receive ADD LINK request from server */
439         qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
440                               SMC_LLC_ADD_LINK);
441         if (!qentry) {
442                 struct smc_clc_msg_decline dclc;
443
444                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
445                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
446                 if (rc == -EAGAIN)
447                         rc = 0; /* no DECLINE received, go with one link */
448                 return rc;
449         }
450         smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
451         smc_llc_cli_add_link(link, qentry);
452         return 0;
453 }
454
455 static bool smc_isascii(char *hostname)
456 {
457         int i;
458
459         for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
460                 if (!isascii(hostname[i]))
461                         return false;
462         return true;
463 }
464
465 static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
466                                         struct smc_clc_msg_accept_confirm *clc)
467 {
468         struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
469                 (struct smc_clc_msg_accept_confirm_v2 *)clc;
470         struct smc_clc_first_contact_ext *fce;
471         int clc_v2_len;
472
473         if (clc->hdr.version == SMC_V1 ||
474             !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
475                 return;
476
477         if (smc->conn.lgr->is_smcd) {
478                 memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
479                        SMC_MAX_EID_LEN);
480                 clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
481                                          d1);
482         } else {
483                 memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
484                        SMC_MAX_EID_LEN);
485                 clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
486                                          r1);
487         }
488         fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
489         smc->conn.lgr->peer_os = fce->os_type;
490         smc->conn.lgr->peer_smc_release = fce->release;
491         if (smc_isascii(fce->hostname))
492                 memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
493                        SMC_MAX_HOSTNAME_LEN);
494 }
495
496 static void smcr_conn_save_peer_info(struct smc_sock *smc,
497                                      struct smc_clc_msg_accept_confirm *clc)
498 {
499         int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
500
501         smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
502         smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
503         smc->conn.peer_rmbe_size = bufsize;
504         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
505         smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
506 }
507
508 static void smcd_conn_save_peer_info(struct smc_sock *smc,
509                                      struct smc_clc_msg_accept_confirm *clc)
510 {
511         int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
512
513         smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
514         smc->conn.peer_token = clc->d0.token;
515         /* msg header takes up space in the buffer */
516         smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
517         atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
518         smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
519 }
520
521 static void smc_conn_save_peer_info(struct smc_sock *smc,
522                                     struct smc_clc_msg_accept_confirm *clc)
523 {
524         if (smc->conn.lgr->is_smcd)
525                 smcd_conn_save_peer_info(smc, clc);
526         else
527                 smcr_conn_save_peer_info(smc, clc);
528         smc_conn_save_peer_info_fce(smc, clc);
529 }
530
531 static void smc_link_save_peer_info(struct smc_link *link,
532                                     struct smc_clc_msg_accept_confirm *clc,
533                                     struct smc_init_info *ini)
534 {
535         link->peer_qpn = ntoh24(clc->r0.qpn);
536         memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
537         memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
538         link->peer_psn = ntoh24(clc->r0.psn);
539         link->peer_mtu = clc->r0.qp_mtu;
540 }
541
542 static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
543                                        struct smc_stats_fback *fback_arr)
544 {
545         int cnt;
546
547         for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
548                 if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
549                         fback_arr[cnt].count++;
550                         break;
551                 }
552                 if (!fback_arr[cnt].fback_code) {
553                         fback_arr[cnt].fback_code = smc->fallback_rsn;
554                         fback_arr[cnt].count++;
555                         break;
556                 }
557         }
558 }
559
560 static void smc_stat_fallback(struct smc_sock *smc)
561 {
562         struct net *net = sock_net(&smc->sk);
563
564         mutex_lock(&net->smc.mutex_fback_rsn);
565         if (smc->listen_smc) {
566                 smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
567                 net->smc.fback_rsn->srv_fback_cnt++;
568         } else {
569                 smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
570                 net->smc.fback_rsn->clnt_fback_cnt++;
571         }
572         mutex_unlock(&net->smc.mutex_fback_rsn);
573 }
574
575 /* must be called under rcu read lock */
576 static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
577 {
578         struct socket_wq *wq;
579         __poll_t flags;
580
581         wq = rcu_dereference(smc->sk.sk_wq);
582         if (!skwq_has_sleeper(wq))
583                 return;
584
585         /* wake up smc sk->sk_wq */
586         if (!key) {
587                 /* sk_state_change */
588                 wake_up_interruptible_all(&wq->wait);
589         } else {
590                 flags = key_to_poll(key);
591                 if (flags & (EPOLLIN | EPOLLOUT))
592                         /* sk_data_ready or sk_write_space */
593                         wake_up_interruptible_sync_poll(&wq->wait, flags);
594                 else if (flags & EPOLLERR)
595                         /* sk_error_report */
596                         wake_up_interruptible_poll(&wq->wait, flags);
597         }
598 }
599
600 static int smc_fback_mark_woken(wait_queue_entry_t *wait,
601                                 unsigned int mode, int sync, void *key)
602 {
603         struct smc_mark_woken *mark =
604                 container_of(wait, struct smc_mark_woken, wait_entry);
605
606         mark->woken = true;
607         mark->key = key;
608         return 0;
609 }
610
611 static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
612                                      void (*clcsock_callback)(struct sock *sk))
613 {
614         struct smc_mark_woken mark = { .woken = false };
615         struct socket_wq *wq;
616
617         init_waitqueue_func_entry(&mark.wait_entry,
618                                   smc_fback_mark_woken);
619         rcu_read_lock();
620         wq = rcu_dereference(clcsk->sk_wq);
621         if (!wq)
622                 goto out;
623         add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
624         clcsock_callback(clcsk);
625         remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
626
627         if (mark.woken)
628                 smc_fback_wakeup_waitqueue(smc, mark.key);
629 out:
630         rcu_read_unlock();
631 }
632
633 static void smc_fback_state_change(struct sock *clcsk)
634 {
635         struct smc_sock *smc =
636                 smc_clcsock_user_data(clcsk);
637
638         if (!smc)
639                 return;
640         smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
641 }
642
643 static void smc_fback_data_ready(struct sock *clcsk)
644 {
645         struct smc_sock *smc =
646                 smc_clcsock_user_data(clcsk);
647
648         if (!smc)
649                 return;
650         smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
651 }
652
653 static void smc_fback_write_space(struct sock *clcsk)
654 {
655         struct smc_sock *smc =
656                 smc_clcsock_user_data(clcsk);
657
658         if (!smc)
659                 return;
660         smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
661 }
662
663 static void smc_fback_error_report(struct sock *clcsk)
664 {
665         struct smc_sock *smc =
666                 smc_clcsock_user_data(clcsk);
667
668         if (!smc)
669                 return;
670         smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
671 }
672
673 static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
674 {
675         struct sock *clcsk;
676         int rc = 0;
677
678         mutex_lock(&smc->clcsock_release_lock);
679         if (!smc->clcsock) {
680                 rc = -EBADF;
681                 goto out;
682         }
683         clcsk = smc->clcsock->sk;
684
685         if (smc->use_fallback)
686                 goto out;
687         smc->use_fallback = true;
688         smc->fallback_rsn = reason_code;
689         smc_stat_fallback(smc);
690         trace_smc_switch_to_fallback(smc, reason_code);
691         if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
692                 smc->clcsock->file = smc->sk.sk_socket->file;
693                 smc->clcsock->file->private_data = smc->clcsock;
694                 smc->clcsock->wq.fasync_list =
695                         smc->sk.sk_socket->wq.fasync_list;
696
697                 /* There might be some wait entries remaining
698                  * in smc sk->sk_wq and they should be woken up
699                  * as clcsock's wait queue is woken up.
700                  */
701                 smc->clcsk_state_change = clcsk->sk_state_change;
702                 smc->clcsk_data_ready = clcsk->sk_data_ready;
703                 smc->clcsk_write_space = clcsk->sk_write_space;
704                 smc->clcsk_error_report = clcsk->sk_error_report;
705
706                 clcsk->sk_state_change = smc_fback_state_change;
707                 clcsk->sk_data_ready = smc_fback_data_ready;
708                 clcsk->sk_write_space = smc_fback_write_space;
709                 clcsk->sk_error_report = smc_fback_error_report;
710
711                 smc->clcsock->sk->sk_user_data =
712                         (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
713         }
714 out:
715         mutex_unlock(&smc->clcsock_release_lock);
716         return rc;
717 }
718
719 /* fall back during connect */
720 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
721 {
722         struct net *net = sock_net(&smc->sk);
723         int rc = 0;
724
725         rc = smc_switch_to_fallback(smc, reason_code);
726         if (rc) { /* fallback fails */
727                 this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
728                 if (smc->sk.sk_state == SMC_INIT)
729                         sock_put(&smc->sk); /* passive closing */
730                 return rc;
731         }
732         smc_copy_sock_settings_to_clc(smc);
733         smc->connect_nonblock = 0;
734         if (smc->sk.sk_state == SMC_INIT)
735                 smc->sk.sk_state = SMC_ACTIVE;
736         return 0;
737 }
738
739 /* decline and fall back during connect */
740 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
741                                         u8 version)
742 {
743         struct net *net = sock_net(&smc->sk);
744         int rc;
745
746         if (reason_code < 0) { /* error, fallback is not possible */
747                 this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
748                 if (smc->sk.sk_state == SMC_INIT)
749                         sock_put(&smc->sk); /* passive closing */
750                 return reason_code;
751         }
752         if (reason_code != SMC_CLC_DECL_PEERDECL) {
753                 rc = smc_clc_send_decline(smc, reason_code, version);
754                 if (rc < 0) {
755                         this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
756                         if (smc->sk.sk_state == SMC_INIT)
757                                 sock_put(&smc->sk); /* passive closing */
758                         return rc;
759                 }
760         }
761         return smc_connect_fallback(smc, reason_code);
762 }
763
764 static void smc_conn_abort(struct smc_sock *smc, int local_first)
765 {
766         struct smc_connection *conn = &smc->conn;
767         struct smc_link_group *lgr = conn->lgr;
768         bool lgr_valid = false;
769
770         if (smc_conn_lgr_valid(conn))
771                 lgr_valid = true;
772
773         smc_conn_free(conn);
774         if (local_first && lgr_valid)
775                 smc_lgr_cleanup_early(lgr);
776 }
777
778 /* check if there is a rdma device available for this connection. */
779 /* called for connect and listen */
780 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
781 {
782         /* PNET table look up: search active ib_device and port
783          * within same PNETID that also contains the ethernet device
784          * used for the internal TCP socket
785          */
786         smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
787         if (!ini->check_smcrv2 && !ini->ib_dev)
788                 return SMC_CLC_DECL_NOSMCRDEV;
789         if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
790                 return SMC_CLC_DECL_NOSMCRDEV;
791         return 0;
792 }
793
794 /* check if there is an ISM device available for this connection. */
795 /* called for connect and listen */
796 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
797 {
798         /* Find ISM device with same PNETID as connecting interface  */
799         smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
800         if (!ini->ism_dev[0])
801                 return SMC_CLC_DECL_NOSMCDDEV;
802         else
803                 ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
804         return 0;
805 }
806
807 /* is chid unique for the ism devices that are already determined? */
808 static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
809                                            int cnt)
810 {
811         int i = (!ini->ism_dev[0]) ? 1 : 0;
812
813         for (; i < cnt; i++)
814                 if (ini->ism_chid[i] == chid)
815                         return false;
816         return true;
817 }
818
819 /* determine possible V2 ISM devices (either without PNETID or with PNETID plus
820  * PNETID matching net_device)
821  */
822 static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
823                                        struct smc_init_info *ini)
824 {
825         int rc = SMC_CLC_DECL_NOSMCDDEV;
826         struct smcd_dev *smcd;
827         int i = 1;
828         u16 chid;
829
830         if (smcd_indicated(ini->smc_type_v1))
831                 rc = 0;         /* already initialized for V1 */
832         mutex_lock(&smcd_dev_list.mutex);
833         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
834                 if (smcd->going_away || smcd == ini->ism_dev[0])
835                         continue;
836                 chid = smc_ism_get_chid(smcd);
837                 if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
838                         continue;
839                 if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
840                     smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
841                         ini->ism_dev[i] = smcd;
842                         ini->ism_chid[i] = chid;
843                         ini->is_smcd = true;
844                         rc = 0;
845                         i++;
846                         if (i > SMC_MAX_ISM_DEVS)
847                                 break;
848                 }
849         }
850         mutex_unlock(&smcd_dev_list.mutex);
851         ini->ism_offered_cnt = i - 1;
852         if (!ini->ism_dev[0] && !ini->ism_dev[1])
853                 ini->smcd_version = 0;
854
855         return rc;
856 }
857
858 /* Check for VLAN ID and register it on ISM device just for CLC handshake */
859 static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
860                                       struct smc_init_info *ini)
861 {
862         if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
863                 return SMC_CLC_DECL_ISMVLANERR;
864         return 0;
865 }
866
867 static int smc_find_proposal_devices(struct smc_sock *smc,
868                                      struct smc_init_info *ini)
869 {
870         int rc = 0;
871
872         /* check if there is an ism device available */
873         if (!(ini->smcd_version & SMC_V1) ||
874             smc_find_ism_device(smc, ini) ||
875             smc_connect_ism_vlan_setup(smc, ini))
876                 ini->smcd_version &= ~SMC_V1;
877         /* else ISM V1 is supported for this connection */
878
879         /* check if there is an rdma device available */
880         if (!(ini->smcr_version & SMC_V1) ||
881             smc_find_rdma_device(smc, ini))
882                 ini->smcr_version &= ~SMC_V1;
883         /* else RDMA is supported for this connection */
884
885         ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
886                                               ini->smcr_version & SMC_V1);
887
888         /* check if there is an ism v2 device available */
889         if (!(ini->smcd_version & SMC_V2) ||
890             !smc_ism_is_v2_capable() ||
891             smc_find_ism_v2_device_clnt(smc, ini))
892                 ini->smcd_version &= ~SMC_V2;
893
894         /* check if there is an rdma v2 device available */
895         ini->check_smcrv2 = true;
896         ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
897         if (!(ini->smcr_version & SMC_V2) ||
898             smc->clcsock->sk->sk_family != AF_INET ||
899             !smc_clc_ueid_count() ||
900             smc_find_rdma_device(smc, ini))
901                 ini->smcr_version &= ~SMC_V2;
902         ini->check_smcrv2 = false;
903
904         ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
905                                               ini->smcr_version & SMC_V2);
906
907         /* if neither ISM nor RDMA are supported, fallback */
908         if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
909                 rc = SMC_CLC_DECL_NOSMCDEV;
910
911         return rc;
912 }
913
914 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
915  * used, the VLAN ID will be registered again during the connection setup.
916  */
917 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
918                                         struct smc_init_info *ini)
919 {
920         if (!smcd_indicated(ini->smc_type_v1))
921                 return 0;
922         if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
923                 return SMC_CLC_DECL_CNFERR;
924         return 0;
925 }
926
927 #define SMC_CLC_MAX_ACCEPT_LEN \
928         (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
929          sizeof(struct smc_clc_first_contact_ext) + \
930          sizeof(struct smc_clc_msg_trail))
931
932 /* CLC handshake during connect */
933 static int smc_connect_clc(struct smc_sock *smc,
934                            struct smc_clc_msg_accept_confirm_v2 *aclc2,
935                            struct smc_init_info *ini)
936 {
937         int rc = 0;
938
939         /* do inband token exchange */
940         rc = smc_clc_send_proposal(smc, ini);
941         if (rc)
942                 return rc;
943         /* receive SMC Accept CLC message */
944         return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
945                                 SMC_CLC_ACCEPT, CLC_WAIT_TIME);
946 }
947
948 void smc_fill_gid_list(struct smc_link_group *lgr,
949                        struct smc_gidlist *gidlist,
950                        struct smc_ib_device *known_dev, u8 *known_gid)
951 {
952         struct smc_init_info *alt_ini = NULL;
953
954         memset(gidlist, 0, sizeof(*gidlist));
955         memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
956
957         alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
958         if (!alt_ini)
959                 goto out;
960
961         alt_ini->vlan_id = lgr->vlan_id;
962         alt_ini->check_smcrv2 = true;
963         alt_ini->smcrv2.saddr = lgr->saddr;
964         smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
965
966         if (!alt_ini->smcrv2.ib_dev_v2)
967                 goto out;
968
969         memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
970                SMC_GID_SIZE);
971
972 out:
973         kfree(alt_ini);
974 }
975
976 static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
977                                        struct smc_clc_msg_accept_confirm *aclc,
978                                        struct smc_init_info *ini)
979 {
980         struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
981                 (struct smc_clc_msg_accept_confirm_v2 *)aclc;
982         struct smc_clc_first_contact_ext *fce =
983                 (struct smc_clc_first_contact_ext *)
984                         (((u8 *)clc_v2) + sizeof(*clc_v2));
985
986         if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
987                 return 0;
988
989         if (fce->v2_direct) {
990                 memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
991                 ini->smcrv2.uses_gateway = false;
992         } else {
993                 if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
994                                       smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
995                                       ini->smcrv2.nexthop_mac,
996                                       &ini->smcrv2.uses_gateway))
997                         return SMC_CLC_DECL_NOROUTE;
998                 if (!ini->smcrv2.uses_gateway) {
999                         /* mismatch: peer claims indirect, but its direct */
1000                         return SMC_CLC_DECL_NOINDIRECT;
1001                 }
1002         }
1003         return 0;
1004 }
1005
1006 /* setup for RDMA connection of client */
1007 static int smc_connect_rdma(struct smc_sock *smc,
1008                             struct smc_clc_msg_accept_confirm *aclc,
1009                             struct smc_init_info *ini)
1010 {
1011         int i, reason_code = 0;
1012         struct smc_link *link;
1013         u8 *eid = NULL;
1014
1015         ini->is_smcd = false;
1016         ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
1017         ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
1018         memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
1019         memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
1020         memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
1021
1022         reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
1023         if (reason_code)
1024                 return reason_code;
1025
1026         mutex_lock(&smc_client_lgr_pending);
1027         reason_code = smc_conn_create(smc, ini);
1028         if (reason_code) {
1029                 mutex_unlock(&smc_client_lgr_pending);
1030                 return reason_code;
1031         }
1032
1033         smc_conn_save_peer_info(smc, aclc);
1034
1035         if (ini->first_contact_local) {
1036                 link = smc->conn.lnk;
1037         } else {
1038                 /* set link that was assigned by server */
1039                 link = NULL;
1040                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1041                         struct smc_link *l = &smc->conn.lgr->lnk[i];
1042
1043                         if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
1044                             !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
1045                                     SMC_GID_SIZE) &&
1046                             (aclc->hdr.version > SMC_V1 ||
1047                              !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
1048                                      sizeof(l->peer_mac)))) {
1049                                 link = l;
1050                                 break;
1051                         }
1052                 }
1053                 if (!link) {
1054                         reason_code = SMC_CLC_DECL_NOSRVLINK;
1055                         goto connect_abort;
1056                 }
1057                 smc_switch_link_and_count(&smc->conn, link);
1058         }
1059
1060         /* create send buffer and rmb */
1061         if (smc_buf_create(smc, false)) {
1062                 reason_code = SMC_CLC_DECL_MEM;
1063                 goto connect_abort;
1064         }
1065
1066         if (ini->first_contact_local)
1067                 smc_link_save_peer_info(link, aclc, ini);
1068
1069         if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
1070                 reason_code = SMC_CLC_DECL_ERR_RTOK;
1071                 goto connect_abort;
1072         }
1073
1074         smc_close_init(smc);
1075         smc_rx_init(smc);
1076
1077         if (ini->first_contact_local) {
1078                 if (smc_ib_ready_link(link)) {
1079                         reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1080                         goto connect_abort;
1081                 }
1082         } else {
1083                 if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
1084                         reason_code = SMC_CLC_DECL_ERR_REGRMB;
1085                         goto connect_abort;
1086                 }
1087         }
1088         smc_rmb_sync_sg_for_device(&smc->conn);
1089
1090         if (aclc->hdr.version > SMC_V1) {
1091                 struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1092                         (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1093
1094                 eid = clc_v2->r1.eid;
1095                 if (ini->first_contact_local)
1096                         smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
1097                                           link->smcibdev, link->gid);
1098         }
1099
1100         reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
1101                                            aclc->hdr.version, eid, ini);
1102         if (reason_code)
1103                 goto connect_abort;
1104
1105         smc_tx_init(smc);
1106
1107         if (ini->first_contact_local) {
1108                 /* QP confirmation over RoCE fabric */
1109                 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
1110                 reason_code = smcr_clnt_conf_first_link(smc);
1111                 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
1112                 if (reason_code)
1113                         goto connect_abort;
1114         }
1115         mutex_unlock(&smc_client_lgr_pending);
1116
1117         smc_copy_sock_settings_to_clc(smc);
1118         smc->connect_nonblock = 0;
1119         if (smc->sk.sk_state == SMC_INIT)
1120                 smc->sk.sk_state = SMC_ACTIVE;
1121
1122         return 0;
1123 connect_abort:
1124         smc_conn_abort(smc, ini->first_contact_local);
1125         mutex_unlock(&smc_client_lgr_pending);
1126         smc->connect_nonblock = 0;
1127
1128         return reason_code;
1129 }
1130
1131 /* The server has chosen one of the proposed ISM devices for the communication.
1132  * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
1133  */
1134 static int
1135 smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
1136                                struct smc_init_info *ini)
1137 {
1138         int i;
1139
1140         for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
1141                 if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
1142                         ini->ism_selected = i;
1143                         return 0;
1144                 }
1145         }
1146
1147         return -EPROTO;
1148 }
1149
1150 /* setup for ISM connection of client */
1151 static int smc_connect_ism(struct smc_sock *smc,
1152                            struct smc_clc_msg_accept_confirm *aclc,
1153                            struct smc_init_info *ini)
1154 {
1155         u8 *eid = NULL;
1156         int rc = 0;
1157
1158         ini->is_smcd = true;
1159         ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
1160
1161         if (aclc->hdr.version == SMC_V2) {
1162                 struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
1163                         (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1164
1165                 rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
1166                 if (rc)
1167                         return rc;
1168         }
1169         ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
1170
1171         /* there is only one lgr role for SMC-D; use server lock */
1172         mutex_lock(&smc_server_lgr_pending);
1173         rc = smc_conn_create(smc, ini);
1174         if (rc) {
1175                 mutex_unlock(&smc_server_lgr_pending);
1176                 return rc;
1177         }
1178
1179         /* Create send and receive buffers */
1180         rc = smc_buf_create(smc, true);
1181         if (rc) {
1182                 rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
1183                 goto connect_abort;
1184         }
1185
1186         smc_conn_save_peer_info(smc, aclc);
1187         smc_close_init(smc);
1188         smc_rx_init(smc);
1189         smc_tx_init(smc);
1190
1191         if (aclc->hdr.version > SMC_V1) {
1192                 struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
1193                         (struct smc_clc_msg_accept_confirm_v2 *)aclc;
1194
1195                 eid = clc_v2->d1.eid;
1196         }
1197
1198         rc = smc_clc_send_confirm(smc, ini->first_contact_local,
1199                                   aclc->hdr.version, eid, NULL);
1200         if (rc)
1201                 goto connect_abort;
1202         mutex_unlock(&smc_server_lgr_pending);
1203
1204         smc_copy_sock_settings_to_clc(smc);
1205         smc->connect_nonblock = 0;
1206         if (smc->sk.sk_state == SMC_INIT)
1207                 smc->sk.sk_state = SMC_ACTIVE;
1208
1209         return 0;
1210 connect_abort:
1211         smc_conn_abort(smc, ini->first_contact_local);
1212         mutex_unlock(&smc_server_lgr_pending);
1213         smc->connect_nonblock = 0;
1214
1215         return rc;
1216 }
1217
1218 /* check if received accept type and version matches a proposed one */
1219 static int smc_connect_check_aclc(struct smc_init_info *ini,
1220                                   struct smc_clc_msg_accept_confirm *aclc)
1221 {
1222         if (aclc->hdr.typev1 != SMC_TYPE_R &&
1223             aclc->hdr.typev1 != SMC_TYPE_D)
1224                 return SMC_CLC_DECL_MODEUNSUPP;
1225
1226         if (aclc->hdr.version >= SMC_V2) {
1227                 if ((aclc->hdr.typev1 == SMC_TYPE_R &&
1228                      !smcr_indicated(ini->smc_type_v2)) ||
1229                     (aclc->hdr.typev1 == SMC_TYPE_D &&
1230                      !smcd_indicated(ini->smc_type_v2)))
1231                         return SMC_CLC_DECL_MODEUNSUPP;
1232         } else {
1233                 if ((aclc->hdr.typev1 == SMC_TYPE_R &&
1234                      !smcr_indicated(ini->smc_type_v1)) ||
1235                     (aclc->hdr.typev1 == SMC_TYPE_D &&
1236                      !smcd_indicated(ini->smc_type_v1)))
1237                         return SMC_CLC_DECL_MODEUNSUPP;
1238         }
1239
1240         return 0;
1241 }
1242
1243 /* perform steps before actually connecting */
1244 static int __smc_connect(struct smc_sock *smc)
1245 {
1246         u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
1247         struct smc_clc_msg_accept_confirm_v2 *aclc2;
1248         struct smc_clc_msg_accept_confirm *aclc;
1249         struct smc_init_info *ini = NULL;
1250         u8 *buf = NULL;
1251         int rc = 0;
1252
1253         if (smc->use_fallback)
1254                 return smc_connect_fallback(smc, smc->fallback_rsn);
1255
1256         /* if peer has not signalled SMC-capability, fall back */
1257         if (!tcp_sk(smc->clcsock->sk)->syn_smc)
1258                 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
1259
1260         /* IPSec connections opt out of SMC optimizations */
1261         if (using_ipsec(smc))
1262                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
1263                                                     version);
1264
1265         ini = kzalloc(sizeof(*ini), GFP_KERNEL);
1266         if (!ini)
1267                 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
1268                                                     version);
1269
1270         ini->smcd_version = SMC_V1 | SMC_V2;
1271         ini->smcr_version = SMC_V1 | SMC_V2;
1272         ini->smc_type_v1 = SMC_TYPE_B;
1273         ini->smc_type_v2 = SMC_TYPE_B;
1274
1275         /* get vlan id from IP device */
1276         if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
1277                 ini->smcd_version &= ~SMC_V1;
1278                 ini->smcr_version = 0;
1279                 ini->smc_type_v1 = SMC_TYPE_N;
1280                 if (!ini->smcd_version) {
1281                         rc = SMC_CLC_DECL_GETVLANERR;
1282                         goto fallback;
1283                 }
1284         }
1285
1286         rc = smc_find_proposal_devices(smc, ini);
1287         if (rc)
1288                 goto fallback;
1289
1290         buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
1291         if (!buf) {
1292                 rc = SMC_CLC_DECL_MEM;
1293                 goto fallback;
1294         }
1295         aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
1296         aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
1297
1298         /* perform CLC handshake */
1299         rc = smc_connect_clc(smc, aclc2, ini);
1300         if (rc)
1301                 goto vlan_cleanup;
1302
1303         /* check if smc modes and versions of CLC proposal and accept match */
1304         rc = smc_connect_check_aclc(ini, aclc);
1305         version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
1306         if (rc)
1307                 goto vlan_cleanup;
1308
1309         /* depending on previous steps, connect using rdma or ism */
1310         if (aclc->hdr.typev1 == SMC_TYPE_R) {
1311                 ini->smcr_version = version;
1312                 rc = smc_connect_rdma(smc, aclc, ini);
1313         } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
1314                 ini->smcd_version = version;
1315                 rc = smc_connect_ism(smc, aclc, ini);
1316         }
1317         if (rc)
1318                 goto vlan_cleanup;
1319
1320         SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
1321         smc_connect_ism_vlan_cleanup(smc, ini);
1322         kfree(buf);
1323         kfree(ini);
1324         return 0;
1325
1326 vlan_cleanup:
1327         smc_connect_ism_vlan_cleanup(smc, ini);
1328         kfree(buf);
1329 fallback:
1330         kfree(ini);
1331         return smc_connect_decline_fallback(smc, rc, version);
1332 }
1333
1334 static void smc_connect_work(struct work_struct *work)
1335 {
1336         struct smc_sock *smc = container_of(work, struct smc_sock,
1337                                             connect_work);
1338         long timeo = smc->sk.sk_sndtimeo;
1339         int rc = 0;
1340
1341         if (!timeo)
1342                 timeo = MAX_SCHEDULE_TIMEOUT;
1343         lock_sock(smc->clcsock->sk);
1344         if (smc->clcsock->sk->sk_err) {
1345                 smc->sk.sk_err = smc->clcsock->sk->sk_err;
1346         } else if ((1 << smc->clcsock->sk->sk_state) &
1347                                         (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
1348                 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
1349                 if ((rc == -EPIPE) &&
1350                     ((1 << smc->clcsock->sk->sk_state) &
1351                                         (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
1352                         rc = 0;
1353         }
1354         release_sock(smc->clcsock->sk);
1355         lock_sock(&smc->sk);
1356         if (rc != 0 || smc->sk.sk_err) {
1357                 smc->sk.sk_state = SMC_CLOSED;
1358                 if (rc == -EPIPE || rc == -EAGAIN)
1359                         smc->sk.sk_err = EPIPE;
1360                 else if (signal_pending(current))
1361                         smc->sk.sk_err = -sock_intr_errno(timeo);
1362                 sock_put(&smc->sk); /* passive closing */
1363                 goto out;
1364         }
1365
1366         rc = __smc_connect(smc);
1367         if (rc < 0)
1368                 smc->sk.sk_err = -rc;
1369
1370 out:
1371         if (!sock_flag(&smc->sk, SOCK_DEAD)) {
1372                 if (smc->sk.sk_err) {
1373                         smc->sk.sk_state_change(&smc->sk);
1374                 } else { /* allow polling before and after fallback decision */
1375                         smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
1376                         smc->sk.sk_write_space(&smc->sk);
1377                 }
1378         }
1379         release_sock(&smc->sk);
1380 }
1381
1382 static int smc_connect(struct socket *sock, struct sockaddr *addr,
1383                        int alen, int flags)
1384 {
1385         struct sock *sk = sock->sk;
1386         struct smc_sock *smc;
1387         int rc = -EINVAL;
1388
1389         smc = smc_sk(sk);
1390
1391         /* separate smc parameter checking to be safe */
1392         if (alen < sizeof(addr->sa_family))
1393                 goto out_err;
1394         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
1395                 goto out_err;
1396
1397         lock_sock(sk);
1398         switch (sk->sk_state) {
1399         default:
1400                 goto out;
1401         case SMC_ACTIVE:
1402                 rc = -EISCONN;
1403                 goto out;
1404         case SMC_INIT:
1405                 break;
1406         }
1407
1408         smc_copy_sock_settings_to_clc(smc);
1409         tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1410         if (smc->connect_nonblock) {
1411                 rc = -EALREADY;
1412                 goto out;
1413         }
1414         rc = kernel_connect(smc->clcsock, addr, alen, flags);
1415         if (rc && rc != -EINPROGRESS)
1416                 goto out;
1417
1418         sock_hold(&smc->sk); /* sock put in passive closing */
1419         if (smc->use_fallback)
1420                 goto out;
1421         if (flags & O_NONBLOCK) {
1422                 if (queue_work(smc_hs_wq, &smc->connect_work))
1423                         smc->connect_nonblock = 1;
1424                 rc = -EINPROGRESS;
1425         } else {
1426                 rc = __smc_connect(smc);
1427                 if (rc < 0)
1428                         goto out;
1429                 else
1430                         rc = 0; /* success cases including fallback */
1431         }
1432
1433 out:
1434         release_sock(sk);
1435 out_err:
1436         return rc;
1437 }
1438
1439 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
1440 {
1441         struct socket *new_clcsock = NULL;
1442         struct sock *lsk = &lsmc->sk;
1443         struct sock *new_sk;
1444         int rc = -EINVAL;
1445
1446         release_sock(lsk);
1447         new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
1448         if (!new_sk) {
1449                 rc = -ENOMEM;
1450                 lsk->sk_err = ENOMEM;
1451                 *new_smc = NULL;
1452                 lock_sock(lsk);
1453                 goto out;
1454         }
1455         *new_smc = smc_sk(new_sk);
1456
1457         mutex_lock(&lsmc->clcsock_release_lock);
1458         if (lsmc->clcsock)
1459                 rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
1460         mutex_unlock(&lsmc->clcsock_release_lock);
1461         lock_sock(lsk);
1462         if  (rc < 0 && rc != -EAGAIN)
1463                 lsk->sk_err = -rc;
1464         if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
1465                 new_sk->sk_prot->unhash(new_sk);
1466                 if (new_clcsock)
1467                         sock_release(new_clcsock);
1468                 new_sk->sk_state = SMC_CLOSED;
1469                 sock_set_flag(new_sk, SOCK_DEAD);
1470                 sock_put(new_sk); /* final */
1471                 *new_smc = NULL;
1472                 goto out;
1473         }
1474
1475         /* new clcsock has inherited the smc listen-specific sk_data_ready
1476          * function; switch it back to the original sk_data_ready function
1477          */
1478         new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
1479         (*new_smc)->clcsock = new_clcsock;
1480 out:
1481         return rc;
1482 }
1483
1484 /* add a just created sock to the accept queue of the listen sock as
1485  * candidate for a following socket accept call from user space
1486  */
1487 static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
1488 {
1489         struct smc_sock *par = smc_sk(parent);
1490
1491         sock_hold(sk); /* sock_put in smc_accept_unlink () */
1492         spin_lock(&par->accept_q_lock);
1493         list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
1494         spin_unlock(&par->accept_q_lock);
1495         sk_acceptq_added(parent);
1496 }
1497
1498 /* remove a socket from the accept queue of its parental listening socket */
1499 static void smc_accept_unlink(struct sock *sk)
1500 {
1501         struct smc_sock *par = smc_sk(sk)->listen_smc;
1502
1503         spin_lock(&par->accept_q_lock);
1504         list_del_init(&smc_sk(sk)->accept_q);
1505         spin_unlock(&par->accept_q_lock);
1506         sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
1507         sock_put(sk); /* sock_hold in smc_accept_enqueue */
1508 }
1509
1510 /* remove a sock from the accept queue to bind it to a new socket created
1511  * for a socket accept call from user space
1512  */
1513 struct sock *smc_accept_dequeue(struct sock *parent,
1514                                 struct socket *new_sock)
1515 {
1516         struct smc_sock *isk, *n;
1517         struct sock *new_sk;
1518
1519         list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
1520                 new_sk = (struct sock *)isk;
1521
1522                 smc_accept_unlink(new_sk);
1523                 if (new_sk->sk_state == SMC_CLOSED) {
1524                         new_sk->sk_prot->unhash(new_sk);
1525                         if (isk->clcsock) {
1526                                 sock_release(isk->clcsock);
1527                                 isk->clcsock = NULL;
1528                         }
1529                         sock_put(new_sk); /* final */
1530                         continue;
1531                 }
1532                 if (new_sock) {
1533                         sock_graft(new_sk, new_sock);
1534                         if (isk->use_fallback) {
1535                                 smc_sk(new_sk)->clcsock->file = new_sock->file;
1536                                 isk->clcsock->file->private_data = isk->clcsock;
1537                         }
1538                 }
1539                 return new_sk;
1540         }
1541         return NULL;
1542 }
1543
1544 /* clean up for a created but never accepted sock */
1545 void smc_close_non_accepted(struct sock *sk)
1546 {
1547         struct smc_sock *smc = smc_sk(sk);
1548
1549         sock_hold(sk); /* sock_put below */
1550         lock_sock(sk);
1551         if (!sk->sk_lingertime)
1552                 /* wait for peer closing */
1553                 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
1554         __smc_release(smc);
1555         release_sock(sk);
1556         sock_put(sk); /* sock_hold above */
1557         sock_put(sk); /* final sock_put */
1558 }
1559
1560 static int smcr_serv_conf_first_link(struct smc_sock *smc)
1561 {
1562         struct smc_link *link = smc->conn.lnk;
1563         struct smc_llc_qentry *qentry;
1564         int rc;
1565
1566         if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
1567                 return SMC_CLC_DECL_ERR_REGRMB;
1568
1569         /* send CONFIRM LINK request to client over the RoCE fabric */
1570         rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
1571         if (rc < 0)
1572                 return SMC_CLC_DECL_TIMEOUT_CL;
1573
1574         /* receive CONFIRM LINK response from client over the RoCE fabric */
1575         qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
1576                               SMC_LLC_CONFIRM_LINK);
1577         if (!qentry) {
1578                 struct smc_clc_msg_decline dclc;
1579
1580                 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1581                                       SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1582                 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
1583         }
1584         smc_llc_save_peer_uid(qentry);
1585         rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
1586         smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
1587         if (rc)
1588                 return SMC_CLC_DECL_RMBE_EC;
1589
1590         /* confirm_rkey is implicit on 1st contact */
1591         smc->conn.rmb_desc->is_conf_rkey = true;
1592
1593         smc_llc_link_active(link);
1594         smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
1595
1596         /* initial contact - try to establish second link */
1597         smc_llc_srv_add_link(link, NULL);
1598         return 0;
1599 }
1600
1601 /* listen worker: finish */
1602 static void smc_listen_out(struct smc_sock *new_smc)
1603 {
1604         struct smc_sock *lsmc = new_smc->listen_smc;
1605         struct sock *newsmcsk = &new_smc->sk;
1606
1607         if (lsmc->sk.sk_state == SMC_LISTEN) {
1608                 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1609                 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1610                 release_sock(&lsmc->sk);
1611         } else { /* no longer listening */
1612                 smc_close_non_accepted(newsmcsk);
1613         }
1614
1615         /* Wake up accept */
1616         lsmc->sk.sk_data_ready(&lsmc->sk);
1617         sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1618 }
1619
1620 /* listen worker: finish in state connected */
1621 static void smc_listen_out_connected(struct smc_sock *new_smc)
1622 {
1623         struct sock *newsmcsk = &new_smc->sk;
1624
1625         sk_refcnt_debug_inc(newsmcsk);
1626         if (newsmcsk->sk_state == SMC_INIT)
1627                 newsmcsk->sk_state = SMC_ACTIVE;
1628
1629         smc_listen_out(new_smc);
1630 }
1631
1632 /* listen worker: finish in error state */
1633 static void smc_listen_out_err(struct smc_sock *new_smc)
1634 {
1635         struct sock *newsmcsk = &new_smc->sk;
1636         struct net *net = sock_net(newsmcsk);
1637
1638         this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
1639         if (newsmcsk->sk_state == SMC_INIT)
1640                 sock_put(&new_smc->sk); /* passive closing */
1641         newsmcsk->sk_state = SMC_CLOSED;
1642
1643         smc_listen_out(new_smc);
1644 }
1645
1646 /* listen worker: decline and fall back if possible */
1647 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1648                                int local_first, u8 version)
1649 {
1650         /* RDMA setup failed, switch back to TCP */
1651         smc_conn_abort(new_smc, local_first);
1652         if (reason_code < 0 ||
1653             smc_switch_to_fallback(new_smc, reason_code)) {
1654                 /* error, no fallback possible */
1655                 smc_listen_out_err(new_smc);
1656                 return;
1657         }
1658         if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1659                 if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
1660                         smc_listen_out_err(new_smc);
1661                         return;
1662                 }
1663         }
1664         smc_listen_out_connected(new_smc);
1665 }
1666
1667 /* listen worker: version checking */
1668 static int smc_listen_v2_check(struct smc_sock *new_smc,
1669                                struct smc_clc_msg_proposal *pclc,
1670                                struct smc_init_info *ini)
1671 {
1672         struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
1673         struct smc_clc_v2_extension *pclc_v2_ext;
1674         int rc = SMC_CLC_DECL_PEERNOSMC;
1675
1676         ini->smc_type_v1 = pclc->hdr.typev1;
1677         ini->smc_type_v2 = pclc->hdr.typev2;
1678         ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
1679         ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
1680         if (pclc->hdr.version > SMC_V1) {
1681                 if (smcd_indicated(ini->smc_type_v2))
1682                         ini->smcd_version |= SMC_V2;
1683                 if (smcr_indicated(ini->smc_type_v2))
1684                         ini->smcr_version |= SMC_V2;
1685         }
1686         if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
1687                 rc = SMC_CLC_DECL_PEERNOSMC;
1688                 goto out;
1689         }
1690         pclc_v2_ext = smc_get_clc_v2_ext(pclc);
1691         if (!pclc_v2_ext) {
1692                 ini->smcd_version &= ~SMC_V2;
1693                 ini->smcr_version &= ~SMC_V2;
1694                 rc = SMC_CLC_DECL_NOV2EXT;
1695                 goto out;
1696         }
1697         pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
1698         if (ini->smcd_version & SMC_V2) {
1699                 if (!smc_ism_is_v2_capable()) {
1700                         ini->smcd_version &= ~SMC_V2;
1701                         rc = SMC_CLC_DECL_NOISM2SUPP;
1702                 } else if (!pclc_smcd_v2_ext) {
1703                         ini->smcd_version &= ~SMC_V2;
1704                         rc = SMC_CLC_DECL_NOV2DEXT;
1705                 } else if (!pclc_v2_ext->hdr.eid_cnt &&
1706                            !pclc_v2_ext->hdr.flag.seid) {
1707                         ini->smcd_version &= ~SMC_V2;
1708                         rc = SMC_CLC_DECL_NOUEID;
1709                 }
1710         }
1711         if (ini->smcr_version & SMC_V2) {
1712                 if (!pclc_v2_ext->hdr.eid_cnt) {
1713                         ini->smcr_version &= ~SMC_V2;
1714                         rc = SMC_CLC_DECL_NOUEID;
1715                 }
1716         }
1717
1718 out:
1719         if (!ini->smcd_version && !ini->smcr_version)
1720                 return rc;
1721
1722         return 0;
1723 }
1724
1725 /* listen worker: check prefixes */
1726 static int smc_listen_prfx_check(struct smc_sock *new_smc,
1727                                  struct smc_clc_msg_proposal *pclc)
1728 {
1729         struct smc_clc_msg_proposal_prefix *pclc_prfx;
1730         struct socket *newclcsock = new_smc->clcsock;
1731
1732         if (pclc->hdr.typev1 == SMC_TYPE_N)
1733                 return 0;
1734         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1735         if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1736                 return SMC_CLC_DECL_DIFFPREFIX;
1737
1738         return 0;
1739 }
1740
1741 /* listen worker: initialize connection and buffers */
1742 static int smc_listen_rdma_init(struct smc_sock *new_smc,
1743                                 struct smc_init_info *ini)
1744 {
1745         int rc;
1746
1747         /* allocate connection / link group */
1748         rc = smc_conn_create(new_smc, ini);
1749         if (rc)
1750                 return rc;
1751
1752         /* create send buffer and rmb */
1753         if (smc_buf_create(new_smc, false))
1754                 return SMC_CLC_DECL_MEM;
1755
1756         return 0;
1757 }
1758
1759 /* listen worker: initialize connection and buffers for SMC-D */
1760 static int smc_listen_ism_init(struct smc_sock *new_smc,
1761                                struct smc_init_info *ini)
1762 {
1763         int rc;
1764
1765         rc = smc_conn_create(new_smc, ini);
1766         if (rc)
1767                 return rc;
1768
1769         /* Create send and receive buffers */
1770         rc = smc_buf_create(new_smc, true);
1771         if (rc) {
1772                 smc_conn_abort(new_smc, ini->first_contact_local);
1773                 return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
1774                                          SMC_CLC_DECL_MEM;
1775         }
1776
1777         return 0;
1778 }
1779
1780 static bool smc_is_already_selected(struct smcd_dev *smcd,
1781                                     struct smc_init_info *ini,
1782                                     int matches)
1783 {
1784         int i;
1785
1786         for (i = 0; i < matches; i++)
1787                 if (smcd == ini->ism_dev[i])
1788                         return true;
1789
1790         return false;
1791 }
1792
1793 /* check for ISM devices matching proposed ISM devices */
1794 static void smc_check_ism_v2_match(struct smc_init_info *ini,
1795                                    u16 proposed_chid, u64 proposed_gid,
1796                                    unsigned int *matches)
1797 {
1798         struct smcd_dev *smcd;
1799
1800         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1801                 if (smcd->going_away)
1802                         continue;
1803                 if (smc_is_already_selected(smcd, ini, *matches))
1804                         continue;
1805                 if (smc_ism_get_chid(smcd) == proposed_chid &&
1806                     !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
1807                         ini->ism_peer_gid[*matches] = proposed_gid;
1808                         ini->ism_dev[*matches] = smcd;
1809                         (*matches)++;
1810                         break;
1811                 }
1812         }
1813 }
1814
1815 static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini)
1816 {
1817         if (!ini->rc)
1818                 ini->rc = rc;
1819 }
1820
1821 static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
1822                                         struct smc_clc_msg_proposal *pclc,
1823                                         struct smc_init_info *ini)
1824 {
1825         struct smc_clc_smcd_v2_extension *smcd_v2_ext;
1826         struct smc_clc_v2_extension *smc_v2_ext;
1827         struct smc_clc_msg_smcd *pclc_smcd;
1828         unsigned int matches = 0;
1829         u8 smcd_version;
1830         u8 *eid = NULL;
1831         int i, rc;
1832
1833         if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
1834                 goto not_found;
1835
1836         pclc_smcd = smc_get_clc_msg_smcd(pclc);
1837         smc_v2_ext = smc_get_clc_v2_ext(pclc);
1838         smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
1839
1840         mutex_lock(&smcd_dev_list.mutex);
1841         if (pclc_smcd->ism.chid)
1842                 /* check for ISM device matching proposed native ISM device */
1843                 smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
1844                                        ntohll(pclc_smcd->ism.gid), &matches);
1845         for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
1846                 /* check for ISM devices matching proposed non-native ISM
1847                  * devices
1848                  */
1849                 smc_check_ism_v2_match(ini,
1850                                        ntohs(smcd_v2_ext->gidchid[i - 1].chid),
1851                                        ntohll(smcd_v2_ext->gidchid[i - 1].gid),
1852                                        &matches);
1853         }
1854         mutex_unlock(&smcd_dev_list.mutex);
1855
1856         if (!ini->ism_dev[0]) {
1857                 smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
1858                 goto not_found;
1859         }
1860
1861         smc_ism_get_system_eid(&eid);
1862         if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
1863                                smcd_v2_ext->system_eid, eid))
1864                 goto not_found;
1865
1866         /* separate - outside the smcd_dev_list.lock */
1867         smcd_version = ini->smcd_version;
1868         for (i = 0; i < matches; i++) {
1869                 ini->smcd_version = SMC_V2;
1870                 ini->is_smcd = true;
1871                 ini->ism_selected = i;
1872                 rc = smc_listen_ism_init(new_smc, ini);
1873                 if (rc) {
1874                         smc_find_ism_store_rc(rc, ini);
1875                         /* try next active ISM device */
1876                         continue;
1877                 }
1878                 return; /* matching and usable V2 ISM device found */
1879         }
1880         /* no V2 ISM device could be initialized */
1881         ini->smcd_version = smcd_version;       /* restore original value */
1882         ini->negotiated_eid[0] = 0;
1883
1884 not_found:
1885         ini->smcd_version &= ~SMC_V2;
1886         ini->ism_dev[0] = NULL;
1887         ini->is_smcd = false;
1888 }
1889
1890 static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
1891                                         struct smc_clc_msg_proposal *pclc,
1892                                         struct smc_init_info *ini)
1893 {
1894         struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
1895         int rc = 0;
1896
1897         /* check if ISM V1 is available */
1898         if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
1899                 goto not_found;
1900         ini->is_smcd = true; /* prepare ISM check */
1901         ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
1902         rc = smc_find_ism_device(new_smc, ini);
1903         if (rc)
1904                 goto not_found;
1905         ini->ism_selected = 0;
1906         rc = smc_listen_ism_init(new_smc, ini);
1907         if (!rc)
1908                 return;         /* V1 ISM device found */
1909
1910 not_found:
1911         smc_find_ism_store_rc(rc, ini);
1912         ini->smcd_version &= ~SMC_V1;
1913         ini->ism_dev[0] = NULL;
1914         ini->is_smcd = false;
1915 }
1916
1917 /* listen worker: register buffers */
1918 static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
1919 {
1920         struct smc_connection *conn = &new_smc->conn;
1921
1922         if (!local_first) {
1923                 if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
1924                         return SMC_CLC_DECL_ERR_REGRMB;
1925         }
1926         smc_rmb_sync_sg_for_device(&new_smc->conn);
1927
1928         return 0;
1929 }
1930
1931 static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
1932                                          struct smc_clc_msg_proposal *pclc,
1933                                          struct smc_init_info *ini)
1934 {
1935         struct smc_clc_v2_extension *smc_v2_ext;
1936         u8 smcr_version;
1937         int rc;
1938
1939         if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
1940                 goto not_found;
1941
1942         smc_v2_ext = smc_get_clc_v2_ext(pclc);
1943         if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
1944                 goto not_found;
1945
1946         /* prepare RDMA check */
1947         memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
1948         memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
1949         memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
1950         ini->check_smcrv2 = true;
1951         ini->smcrv2.clc_sk = new_smc->clcsock->sk;
1952         ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
1953         ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
1954         rc = smc_find_rdma_device(new_smc, ini);
1955         if (rc) {
1956                 smc_find_ism_store_rc(rc, ini);
1957                 goto not_found;
1958         }
1959         if (!ini->smcrv2.uses_gateway)
1960                 memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
1961
1962         smcr_version = ini->smcr_version;
1963         ini->smcr_version = SMC_V2;
1964         rc = smc_listen_rdma_init(new_smc, ini);
1965         if (!rc)
1966                 rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
1967         if (!rc)
1968                 return;
1969         ini->smcr_version = smcr_version;
1970         smc_find_ism_store_rc(rc, ini);
1971
1972 not_found:
1973         ini->smcr_version &= ~SMC_V2;
1974         ini->check_smcrv2 = false;
1975 }
1976
1977 static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
1978                                         struct smc_clc_msg_proposal *pclc,
1979                                         struct smc_init_info *ini)
1980 {
1981         int rc;
1982
1983         if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
1984                 return SMC_CLC_DECL_NOSMCDEV;
1985
1986         /* prepare RDMA check */
1987         memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
1988         memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
1989         memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
1990         rc = smc_find_rdma_device(new_smc, ini);
1991         if (rc) {
1992                 /* no RDMA device found */
1993                 return SMC_CLC_DECL_NOSMCDEV;
1994         }
1995         rc = smc_listen_rdma_init(new_smc, ini);
1996         if (rc)
1997                 return rc;
1998         return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
1999 }
2000
2001 /* determine the local device matching to proposal */
2002 static int smc_listen_find_device(struct smc_sock *new_smc,
2003                                   struct smc_clc_msg_proposal *pclc,
2004                                   struct smc_init_info *ini)
2005 {
2006         int prfx_rc;
2007
2008         /* check for ISM device matching V2 proposed device */
2009         smc_find_ism_v2_device_serv(new_smc, pclc, ini);
2010         if (ini->ism_dev[0])
2011                 return 0;
2012
2013         /* check for matching IP prefix and subnet length (V1) */
2014         prfx_rc = smc_listen_prfx_check(new_smc, pclc);
2015         if (prfx_rc)
2016                 smc_find_ism_store_rc(prfx_rc, ini);
2017
2018         /* get vlan id from IP device */
2019         if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
2020                 return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
2021
2022         /* check for ISM device matching V1 proposed device */
2023         if (!prfx_rc)
2024                 smc_find_ism_v1_device_serv(new_smc, pclc, ini);
2025         if (ini->ism_dev[0])
2026                 return 0;
2027
2028         if (!smcr_indicated(pclc->hdr.typev1) &&
2029             !smcr_indicated(pclc->hdr.typev2))
2030                 /* skip RDMA and decline */
2031                 return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
2032
2033         /* check if RDMA V2 is available */
2034         smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
2035         if (ini->smcrv2.ib_dev_v2)
2036                 return 0;
2037
2038         /* check if RDMA V1 is available */
2039         if (!prfx_rc) {
2040                 int rc;
2041
2042                 rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
2043                 smc_find_ism_store_rc(rc, ini);
2044                 return (!rc) ? 0 : ini->rc;
2045         }
2046         return SMC_CLC_DECL_NOSMCDEV;
2047 }
2048
2049 /* listen worker: finish RDMA setup */
2050 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
2051                                   struct smc_clc_msg_accept_confirm *cclc,
2052                                   bool local_first,
2053                                   struct smc_init_info *ini)
2054 {
2055         struct smc_link *link = new_smc->conn.lnk;
2056         int reason_code = 0;
2057
2058         if (local_first)
2059                 smc_link_save_peer_info(link, cclc, ini);
2060
2061         if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
2062                 return SMC_CLC_DECL_ERR_RTOK;
2063
2064         if (local_first) {
2065                 if (smc_ib_ready_link(link))
2066                         return SMC_CLC_DECL_ERR_RDYLNK;
2067                 /* QP confirmation over RoCE fabric */
2068                 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
2069                 reason_code = smcr_serv_conf_first_link(new_smc);
2070                 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
2071         }
2072         return reason_code;
2073 }
2074
2075 /* setup for connection of server */
2076 static void smc_listen_work(struct work_struct *work)
2077 {
2078         struct smc_sock *new_smc = container_of(work, struct smc_sock,
2079                                                 smc_listen_work);
2080         struct socket *newclcsock = new_smc->clcsock;
2081         struct smc_clc_msg_accept_confirm *cclc;
2082         struct smc_clc_msg_proposal_area *buf;
2083         struct smc_clc_msg_proposal *pclc;
2084         struct smc_init_info *ini = NULL;
2085         u8 proposal_version = SMC_V1;
2086         u8 accept_version;
2087         int rc = 0;
2088
2089         if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
2090                 return smc_listen_out_err(new_smc);
2091
2092         if (new_smc->use_fallback) {
2093                 smc_listen_out_connected(new_smc);
2094                 return;
2095         }
2096
2097         /* check if peer is smc capable */
2098         if (!tcp_sk(newclcsock->sk)->syn_smc) {
2099                 rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
2100                 if (rc)
2101                         smc_listen_out_err(new_smc);
2102                 else
2103                         smc_listen_out_connected(new_smc);
2104                 return;
2105         }
2106
2107         /* do inband token exchange -
2108          * wait for and receive SMC Proposal CLC message
2109          */
2110         buf = kzalloc(sizeof(*buf), GFP_KERNEL);
2111         if (!buf) {
2112                 rc = SMC_CLC_DECL_MEM;
2113                 goto out_decl;
2114         }
2115         pclc = (struct smc_clc_msg_proposal *)buf;
2116         rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
2117                               SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
2118         if (rc)
2119                 goto out_decl;
2120
2121         if (pclc->hdr.version > SMC_V1)
2122                 proposal_version = SMC_V2;
2123
2124         /* IPSec connections opt out of SMC optimizations */
2125         if (using_ipsec(new_smc)) {
2126                 rc = SMC_CLC_DECL_IPSEC;
2127                 goto out_decl;
2128         }
2129
2130         ini = kzalloc(sizeof(*ini), GFP_KERNEL);
2131         if (!ini) {
2132                 rc = SMC_CLC_DECL_MEM;
2133                 goto out_decl;
2134         }
2135
2136         /* initial version checking */
2137         rc = smc_listen_v2_check(new_smc, pclc, ini);
2138         if (rc)
2139                 goto out_decl;
2140
2141         mutex_lock(&smc_server_lgr_pending);
2142         smc_close_init(new_smc);
2143         smc_rx_init(new_smc);
2144         smc_tx_init(new_smc);
2145
2146         /* determine ISM or RoCE device used for connection */
2147         rc = smc_listen_find_device(new_smc, pclc, ini);
2148         if (rc)
2149                 goto out_unlock;
2150
2151         /* send SMC Accept CLC message */
2152         accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
2153         rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
2154                                  accept_version, ini->negotiated_eid);
2155         if (rc)
2156                 goto out_unlock;
2157
2158         /* SMC-D does not need this lock any more */
2159         if (ini->is_smcd)
2160                 mutex_unlock(&smc_server_lgr_pending);
2161
2162         /* receive SMC Confirm CLC message */
2163         memset(buf, 0, sizeof(*buf));
2164         cclc = (struct smc_clc_msg_accept_confirm *)buf;
2165         rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
2166                               SMC_CLC_CONFIRM, CLC_WAIT_TIME);
2167         if (rc) {
2168                 if (!ini->is_smcd)
2169                         goto out_unlock;
2170                 goto out_decl;
2171         }
2172
2173         /* finish worker */
2174         if (!ini->is_smcd) {
2175                 rc = smc_listen_rdma_finish(new_smc, cclc,
2176                                             ini->first_contact_local, ini);
2177                 if (rc)
2178                         goto out_unlock;
2179                 mutex_unlock(&smc_server_lgr_pending);
2180         }
2181         smc_conn_save_peer_info(new_smc, cclc);
2182         smc_listen_out_connected(new_smc);
2183         SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
2184         goto out_free;
2185
2186 out_unlock:
2187         mutex_unlock(&smc_server_lgr_pending);
2188 out_decl:
2189         smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
2190                            proposal_version);
2191 out_free:
2192         kfree(ini);
2193         kfree(buf);
2194 }
2195
2196 static void smc_tcp_listen_work(struct work_struct *work)
2197 {
2198         struct smc_sock *lsmc = container_of(work, struct smc_sock,
2199                                              tcp_listen_work);
2200         struct sock *lsk = &lsmc->sk;
2201         struct smc_sock *new_smc;
2202         int rc = 0;
2203
2204         lock_sock(lsk);
2205         while (lsk->sk_state == SMC_LISTEN) {
2206                 rc = smc_clcsock_accept(lsmc, &new_smc);
2207                 if (rc) /* clcsock accept queue empty or error */
2208                         goto out;
2209                 if (!new_smc)
2210                         continue;
2211
2212                 new_smc->listen_smc = lsmc;
2213                 new_smc->use_fallback = lsmc->use_fallback;
2214                 new_smc->fallback_rsn = lsmc->fallback_rsn;
2215                 sock_hold(lsk); /* sock_put in smc_listen_work */
2216                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
2217                 smc_copy_sock_settings_to_smc(new_smc);
2218                 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
2219                 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
2220                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
2221                 if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
2222                         sock_put(&new_smc->sk);
2223         }
2224
2225 out:
2226         release_sock(lsk);
2227         sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
2228 }
2229
2230 static void smc_clcsock_data_ready(struct sock *listen_clcsock)
2231 {
2232         struct smc_sock *lsmc =
2233                 smc_clcsock_user_data(listen_clcsock);
2234
2235         if (!lsmc)
2236                 return;
2237         lsmc->clcsk_data_ready(listen_clcsock);
2238         if (lsmc->sk.sk_state == SMC_LISTEN) {
2239                 sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
2240                 if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work))
2241                         sock_put(&lsmc->sk);
2242         }
2243 }
2244
2245 static int smc_listen(struct socket *sock, int backlog)
2246 {
2247         struct sock *sk = sock->sk;
2248         struct smc_sock *smc;
2249         int rc;
2250
2251         smc = smc_sk(sk);
2252         lock_sock(sk);
2253
2254         rc = -EINVAL;
2255         if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
2256             smc->connect_nonblock)
2257                 goto out;
2258
2259         rc = 0;
2260         if (sk->sk_state == SMC_LISTEN) {
2261                 sk->sk_max_ack_backlog = backlog;
2262                 goto out;
2263         }
2264         /* some socket options are handled in core, so we could not apply
2265          * them to the clc socket -- copy smc socket options to clc socket
2266          */
2267         smc_copy_sock_settings_to_clc(smc);
2268         if (!smc->use_fallback)
2269                 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
2270
2271         /* save original sk_data_ready function and establish
2272          * smc-specific sk_data_ready function
2273          */
2274         smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
2275         smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
2276         smc->clcsock->sk->sk_user_data =
2277                 (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
2278         rc = kernel_listen(smc->clcsock, backlog);
2279         if (rc) {
2280                 smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
2281                 goto out;
2282         }
2283         sk->sk_max_ack_backlog = backlog;
2284         sk->sk_ack_backlog = 0;
2285         sk->sk_state = SMC_LISTEN;
2286
2287 out:
2288         release_sock(sk);
2289         return rc;
2290 }
2291
2292 static int smc_accept(struct socket *sock, struct socket *new_sock,
2293                       int flags, bool kern)
2294 {
2295         struct sock *sk = sock->sk, *nsk;
2296         DECLARE_WAITQUEUE(wait, current);
2297         struct smc_sock *lsmc;
2298         long timeo;
2299         int rc = 0;
2300
2301         lsmc = smc_sk(sk);
2302         sock_hold(sk); /* sock_put below */
2303         lock_sock(sk);
2304
2305         if (lsmc->sk.sk_state != SMC_LISTEN) {
2306                 rc = -EINVAL;
2307                 release_sock(sk);
2308                 goto out;
2309         }
2310
2311         /* Wait for an incoming connection */
2312         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
2313         add_wait_queue_exclusive(sk_sleep(sk), &wait);
2314         while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
2315                 set_current_state(TASK_INTERRUPTIBLE);
2316                 if (!timeo) {
2317                         rc = -EAGAIN;
2318                         break;
2319                 }
2320                 release_sock(sk);
2321                 timeo = schedule_timeout(timeo);
2322                 /* wakeup by sk_data_ready in smc_listen_work() */
2323                 sched_annotate_sleep();
2324                 lock_sock(sk);
2325                 if (signal_pending(current)) {
2326                         rc = sock_intr_errno(timeo);
2327                         break;
2328                 }
2329         }
2330         set_current_state(TASK_RUNNING);
2331         remove_wait_queue(sk_sleep(sk), &wait);
2332
2333         if (!rc)
2334                 rc = sock_error(nsk);
2335         release_sock(sk);
2336         if (rc)
2337                 goto out;
2338
2339         if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
2340                 /* wait till data arrives on the socket */
2341                 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
2342                                                                 MSEC_PER_SEC);
2343                 if (smc_sk(nsk)->use_fallback) {
2344                         struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
2345
2346                         lock_sock(clcsk);
2347                         if (skb_queue_empty(&clcsk->sk_receive_queue))
2348                                 sk_wait_data(clcsk, &timeo, NULL);
2349                         release_sock(clcsk);
2350                 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
2351                         lock_sock(nsk);
2352                         smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
2353                         release_sock(nsk);
2354                 }
2355         }
2356
2357 out:
2358         sock_put(sk); /* sock_hold above */
2359         return rc;
2360 }
2361
2362 static int smc_getname(struct socket *sock, struct sockaddr *addr,
2363                        int peer)
2364 {
2365         struct smc_sock *smc;
2366
2367         if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
2368             (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
2369                 return -ENOTCONN;
2370
2371         smc = smc_sk(sock->sk);
2372
2373         return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
2374 }
2375
2376 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2377 {
2378         struct sock *sk = sock->sk;
2379         struct smc_sock *smc;
2380         int rc = -EPIPE;
2381
2382         smc = smc_sk(sk);
2383         lock_sock(sk);
2384         if ((sk->sk_state != SMC_ACTIVE) &&
2385             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
2386             (sk->sk_state != SMC_INIT))
2387                 goto out;
2388
2389         if (msg->msg_flags & MSG_FASTOPEN) {
2390                 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2391                         rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
2392                         if (rc)
2393                                 goto out;
2394                 } else {
2395                         rc = -EINVAL;
2396                         goto out;
2397                 }
2398         }
2399
2400         if (smc->use_fallback) {
2401                 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
2402         } else {
2403                 rc = smc_tx_sendmsg(smc, msg, len);
2404                 SMC_STAT_TX_PAYLOAD(smc, len, rc);
2405         }
2406 out:
2407         release_sock(sk);
2408         return rc;
2409 }
2410
2411 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
2412                        int flags)
2413 {
2414         struct sock *sk = sock->sk;
2415         struct smc_sock *smc;
2416         int rc = -ENOTCONN;
2417
2418         smc = smc_sk(sk);
2419         lock_sock(sk);
2420         if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
2421                 /* socket was connected before, no more data to read */
2422                 rc = 0;
2423                 goto out;
2424         }
2425         if ((sk->sk_state == SMC_INIT) ||
2426             (sk->sk_state == SMC_LISTEN) ||
2427             (sk->sk_state == SMC_CLOSED))
2428                 goto out;
2429
2430         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
2431                 rc = 0;
2432                 goto out;
2433         }
2434
2435         if (smc->use_fallback) {
2436                 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
2437         } else {
2438                 msg->msg_namelen = 0;
2439                 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
2440                 SMC_STAT_RX_PAYLOAD(smc, rc, rc);
2441         }
2442
2443 out:
2444         release_sock(sk);
2445         return rc;
2446 }
2447
2448 static __poll_t smc_accept_poll(struct sock *parent)
2449 {
2450         struct smc_sock *isk = smc_sk(parent);
2451         __poll_t mask = 0;
2452
2453         spin_lock(&isk->accept_q_lock);
2454         if (!list_empty(&isk->accept_q))
2455                 mask = EPOLLIN | EPOLLRDNORM;
2456         spin_unlock(&isk->accept_q_lock);
2457
2458         return mask;
2459 }
2460
2461 static __poll_t smc_poll(struct file *file, struct socket *sock,
2462                              poll_table *wait)
2463 {
2464         struct sock *sk = sock->sk;
2465         struct smc_sock *smc;
2466         __poll_t mask = 0;
2467
2468         if (!sk)
2469                 return EPOLLNVAL;
2470
2471         smc = smc_sk(sock->sk);
2472         if (smc->use_fallback) {
2473                 /* delegate to CLC child sock */
2474                 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
2475                 sk->sk_err = smc->clcsock->sk->sk_err;
2476         } else {
2477                 if (sk->sk_state != SMC_CLOSED)
2478                         sock_poll_wait(file, sock, wait);
2479                 if (sk->sk_err)
2480                         mask |= EPOLLERR;
2481                 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
2482                     (sk->sk_state == SMC_CLOSED))
2483                         mask |= EPOLLHUP;
2484                 if (sk->sk_state == SMC_LISTEN) {
2485                         /* woken up by sk_data_ready in smc_listen_work() */
2486                         mask |= smc_accept_poll(sk);
2487                 } else if (smc->use_fallback) { /* as result of connect_work()*/
2488                         mask |= smc->clcsock->ops->poll(file, smc->clcsock,
2489                                                            wait);
2490                         sk->sk_err = smc->clcsock->sk->sk_err;
2491                 } else {
2492                         if ((sk->sk_state != SMC_INIT &&
2493                              atomic_read(&smc->conn.sndbuf_space)) ||
2494                             sk->sk_shutdown & SEND_SHUTDOWN) {
2495                                 mask |= EPOLLOUT | EPOLLWRNORM;
2496                         } else {
2497                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2498                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2499                         }
2500                         if (atomic_read(&smc->conn.bytes_to_rcv))
2501                                 mask |= EPOLLIN | EPOLLRDNORM;
2502                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2503                                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2504                         if (sk->sk_state == SMC_APPCLOSEWAIT1)
2505                                 mask |= EPOLLIN;
2506                         if (smc->conn.urg_state == SMC_URG_VALID)
2507                                 mask |= EPOLLPRI;
2508                 }
2509         }
2510
2511         return mask;
2512 }
2513
2514 static int smc_shutdown(struct socket *sock, int how)
2515 {
2516         struct sock *sk = sock->sk;
2517         bool do_shutdown = true;
2518         struct smc_sock *smc;
2519         int rc = -EINVAL;
2520         int old_state;
2521         int rc1 = 0;
2522
2523         smc = smc_sk(sk);
2524
2525         if ((how < SHUT_RD) || (how > SHUT_RDWR))
2526                 return rc;
2527
2528         lock_sock(sk);
2529
2530         rc = -ENOTCONN;
2531         if ((sk->sk_state != SMC_ACTIVE) &&
2532             (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
2533             (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
2534             (sk->sk_state != SMC_APPCLOSEWAIT1) &&
2535             (sk->sk_state != SMC_APPCLOSEWAIT2) &&
2536             (sk->sk_state != SMC_APPFINCLOSEWAIT))
2537                 goto out;
2538         if (smc->use_fallback) {
2539                 rc = kernel_sock_shutdown(smc->clcsock, how);
2540                 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
2541                 if (sk->sk_shutdown == SHUTDOWN_MASK)
2542                         sk->sk_state = SMC_CLOSED;
2543                 goto out;
2544         }
2545         switch (how) {
2546         case SHUT_RDWR:         /* shutdown in both directions */
2547                 old_state = sk->sk_state;
2548                 rc = smc_close_active(smc);
2549                 if (old_state == SMC_ACTIVE &&
2550                     sk->sk_state == SMC_PEERCLOSEWAIT1)
2551                         do_shutdown = false;
2552                 break;
2553         case SHUT_WR:
2554                 rc = smc_close_shutdown_write(smc);
2555                 break;
2556         case SHUT_RD:
2557                 rc = 0;
2558                 /* nothing more to do because peer is not involved */
2559                 break;
2560         }
2561         if (do_shutdown && smc->clcsock)
2562                 rc1 = kernel_sock_shutdown(smc->clcsock, how);
2563         /* map sock_shutdown_cmd constants to sk_shutdown value range */
2564         sk->sk_shutdown |= how + 1;
2565
2566 out:
2567         release_sock(sk);
2568         return rc ? rc : rc1;
2569 }
2570
2571 static int smc_setsockopt(struct socket *sock, int level, int optname,
2572                           sockptr_t optval, unsigned int optlen)
2573 {
2574         struct sock *sk = sock->sk;
2575         struct smc_sock *smc;
2576         int val, rc;
2577
2578         if (level == SOL_TCP && optname == TCP_ULP)
2579                 return -EOPNOTSUPP;
2580
2581         smc = smc_sk(sk);
2582
2583         /* generic setsockopts reaching us here always apply to the
2584          * CLC socket
2585          */
2586         mutex_lock(&smc->clcsock_release_lock);
2587         if (!smc->clcsock) {
2588                 mutex_unlock(&smc->clcsock_release_lock);
2589                 return -EBADF;
2590         }
2591         if (unlikely(!smc->clcsock->ops->setsockopt))
2592                 rc = -EOPNOTSUPP;
2593         else
2594                 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
2595                                                    optval, optlen);
2596         if (smc->clcsock->sk->sk_err) {
2597                 sk->sk_err = smc->clcsock->sk->sk_err;
2598                 sk_error_report(sk);
2599         }
2600         mutex_unlock(&smc->clcsock_release_lock);
2601
2602         if (optlen < sizeof(int))
2603                 return -EINVAL;
2604         if (copy_from_sockptr(&val, optval, sizeof(int)))
2605                 return -EFAULT;
2606
2607         lock_sock(sk);
2608         if (rc || smc->use_fallback)
2609                 goto out;
2610         switch (optname) {
2611         case TCP_FASTOPEN:
2612         case TCP_FASTOPEN_CONNECT:
2613         case TCP_FASTOPEN_KEY:
2614         case TCP_FASTOPEN_NO_COOKIE:
2615                 /* option not supported by SMC */
2616                 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
2617                         rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
2618                 } else {
2619                         rc = -EINVAL;
2620                 }
2621                 break;
2622         case TCP_NODELAY:
2623                 if (sk->sk_state != SMC_INIT &&
2624                     sk->sk_state != SMC_LISTEN &&
2625                     sk->sk_state != SMC_CLOSED) {
2626                         if (val) {
2627                                 SMC_STAT_INC(smc, ndly_cnt);
2628                                 mod_delayed_work(smc->conn.lgr->tx_wq,
2629                                                  &smc->conn.tx_work, 0);
2630                         }
2631                 }
2632                 break;
2633         case TCP_CORK:
2634                 if (sk->sk_state != SMC_INIT &&
2635                     sk->sk_state != SMC_LISTEN &&
2636                     sk->sk_state != SMC_CLOSED) {
2637                         if (!val) {
2638                                 SMC_STAT_INC(smc, cork_cnt);
2639                                 mod_delayed_work(smc->conn.lgr->tx_wq,
2640                                                  &smc->conn.tx_work, 0);
2641                         }
2642                 }
2643                 break;
2644         case TCP_DEFER_ACCEPT:
2645                 smc->sockopt_defer_accept = val;
2646                 break;
2647         default:
2648                 break;
2649         }
2650 out:
2651         release_sock(sk);
2652
2653         return rc;
2654 }
2655
2656 static int smc_getsockopt(struct socket *sock, int level, int optname,
2657                           char __user *optval, int __user *optlen)
2658 {
2659         struct smc_sock *smc;
2660         int rc;
2661
2662         smc = smc_sk(sock->sk);
2663         mutex_lock(&smc->clcsock_release_lock);
2664         if (!smc->clcsock) {
2665                 mutex_unlock(&smc->clcsock_release_lock);
2666                 return -EBADF;
2667         }
2668         /* socket options apply to the CLC socket */
2669         if (unlikely(!smc->clcsock->ops->getsockopt)) {
2670                 mutex_unlock(&smc->clcsock_release_lock);
2671                 return -EOPNOTSUPP;
2672         }
2673         rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
2674                                            optval, optlen);
2675         mutex_unlock(&smc->clcsock_release_lock);
2676         return rc;
2677 }
2678
2679 static int smc_ioctl(struct socket *sock, unsigned int cmd,
2680                      unsigned long arg)
2681 {
2682         union smc_host_cursor cons, urg;
2683         struct smc_connection *conn;
2684         struct smc_sock *smc;
2685         int answ;
2686
2687         smc = smc_sk(sock->sk);
2688         conn = &smc->conn;
2689         lock_sock(&smc->sk);
2690         if (smc->use_fallback) {
2691                 if (!smc->clcsock) {
2692                         release_sock(&smc->sk);
2693                         return -EBADF;
2694                 }
2695                 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
2696                 release_sock(&smc->sk);
2697                 return answ;
2698         }
2699         switch (cmd) {
2700         case SIOCINQ: /* same as FIONREAD */
2701                 if (smc->sk.sk_state == SMC_LISTEN) {
2702                         release_sock(&smc->sk);
2703                         return -EINVAL;
2704                 }
2705                 if (smc->sk.sk_state == SMC_INIT ||
2706                     smc->sk.sk_state == SMC_CLOSED)
2707                         answ = 0;
2708                 else
2709                         answ = atomic_read(&smc->conn.bytes_to_rcv);
2710                 break;
2711         case SIOCOUTQ:
2712                 /* output queue size (not send + not acked) */
2713                 if (smc->sk.sk_state == SMC_LISTEN) {
2714                         release_sock(&smc->sk);
2715                         return -EINVAL;
2716                 }
2717                 if (smc->sk.sk_state == SMC_INIT ||
2718                     smc->sk.sk_state == SMC_CLOSED)
2719                         answ = 0;
2720                 else
2721                         answ = smc->conn.sndbuf_desc->len -
2722                                         atomic_read(&smc->conn.sndbuf_space);
2723                 break;
2724         case SIOCOUTQNSD:
2725                 /* output queue size (not send only) */
2726                 if (smc->sk.sk_state == SMC_LISTEN) {
2727                         release_sock(&smc->sk);
2728                         return -EINVAL;
2729                 }
2730                 if (smc->sk.sk_state == SMC_INIT ||
2731                     smc->sk.sk_state == SMC_CLOSED)
2732                         answ = 0;
2733                 else
2734                         answ = smc_tx_prepared_sends(&smc->conn);
2735                 break;
2736         case SIOCATMARK:
2737                 if (smc->sk.sk_state == SMC_LISTEN) {
2738                         release_sock(&smc->sk);
2739                         return -EINVAL;
2740                 }
2741                 if (smc->sk.sk_state == SMC_INIT ||
2742                     smc->sk.sk_state == SMC_CLOSED) {
2743                         answ = 0;
2744                 } else {
2745                         smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
2746                         smc_curs_copy(&urg, &conn->urg_curs, conn);
2747                         answ = smc_curs_diff(conn->rmb_desc->len,
2748                                              &cons, &urg) == 1;
2749                 }
2750                 break;
2751         default:
2752                 release_sock(&smc->sk);
2753                 return -ENOIOCTLCMD;
2754         }
2755         release_sock(&smc->sk);
2756
2757         return put_user(answ, (int __user *)arg);
2758 }
2759
2760 static ssize_t smc_sendpage(struct socket *sock, struct page *page,
2761                             int offset, size_t size, int flags)
2762 {
2763         struct sock *sk = sock->sk;
2764         struct smc_sock *smc;
2765         int rc = -EPIPE;
2766
2767         smc = smc_sk(sk);
2768         lock_sock(sk);
2769         if (sk->sk_state != SMC_ACTIVE) {
2770                 release_sock(sk);
2771                 goto out;
2772         }
2773         release_sock(sk);
2774         if (smc->use_fallback) {
2775                 rc = kernel_sendpage(smc->clcsock, page, offset,
2776                                      size, flags);
2777         } else {
2778                 SMC_STAT_INC(smc, sendpage_cnt);
2779                 rc = sock_no_sendpage(sock, page, offset, size, flags);
2780         }
2781
2782 out:
2783         return rc;
2784 }
2785
2786 /* Map the affected portions of the rmbe into an spd, note the number of bytes
2787  * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
2788  * updates till whenever a respective page has been fully processed.
2789  * Note that subsequent recv() calls have to wait till all splice() processing
2790  * completed.
2791  */
2792 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
2793                                struct pipe_inode_info *pipe, size_t len,
2794                                unsigned int flags)
2795 {
2796         struct sock *sk = sock->sk;
2797         struct smc_sock *smc;
2798         int rc = -ENOTCONN;
2799
2800         smc = smc_sk(sk);
2801         lock_sock(sk);
2802         if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
2803                 /* socket was connected before, no more data to read */
2804                 rc = 0;
2805                 goto out;
2806         }
2807         if (sk->sk_state == SMC_INIT ||
2808             sk->sk_state == SMC_LISTEN ||
2809             sk->sk_state == SMC_CLOSED)
2810                 goto out;
2811
2812         if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
2813                 rc = 0;
2814                 goto out;
2815         }
2816
2817         if (smc->use_fallback) {
2818                 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
2819                                                     pipe, len, flags);
2820         } else {
2821                 if (*ppos) {
2822                         rc = -ESPIPE;
2823                         goto out;
2824                 }
2825                 if (flags & SPLICE_F_NONBLOCK)
2826                         flags = MSG_DONTWAIT;
2827                 else
2828                         flags = 0;
2829                 SMC_STAT_INC(smc, splice_cnt);
2830                 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
2831         }
2832 out:
2833         release_sock(sk);
2834
2835         return rc;
2836 }
2837
2838 /* must look like tcp */
2839 static const struct proto_ops smc_sock_ops = {
2840         .family         = PF_SMC,
2841         .owner          = THIS_MODULE,
2842         .release        = smc_release,
2843         .bind           = smc_bind,
2844         .connect        = smc_connect,
2845         .socketpair     = sock_no_socketpair,
2846         .accept         = smc_accept,
2847         .getname        = smc_getname,
2848         .poll           = smc_poll,
2849         .ioctl          = smc_ioctl,
2850         .listen         = smc_listen,
2851         .shutdown       = smc_shutdown,
2852         .setsockopt     = smc_setsockopt,
2853         .getsockopt     = smc_getsockopt,
2854         .sendmsg        = smc_sendmsg,
2855         .recvmsg        = smc_recvmsg,
2856         .mmap           = sock_no_mmap,
2857         .sendpage       = smc_sendpage,
2858         .splice_read    = smc_splice_read,
2859 };
2860
2861 static int __smc_create(struct net *net, struct socket *sock, int protocol,
2862                         int kern, struct socket *clcsock)
2863 {
2864         int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
2865         struct smc_sock *smc;
2866         struct sock *sk;
2867         int rc;
2868
2869         rc = -ESOCKTNOSUPPORT;
2870         if (sock->type != SOCK_STREAM)
2871                 goto out;
2872
2873         rc = -EPROTONOSUPPORT;
2874         if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
2875                 goto out;
2876
2877         rc = -ENOBUFS;
2878         sock->ops = &smc_sock_ops;
2879         sk = smc_sock_alloc(net, sock, protocol);
2880         if (!sk)
2881                 goto out;
2882
2883         /* create internal TCP socket for CLC handshake and fallback */
2884         smc = smc_sk(sk);
2885         smc->use_fallback = false; /* assume rdma capability first */
2886         smc->fallback_rsn = 0;
2887
2888         rc = 0;
2889         if (!clcsock) {
2890                 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
2891                                       &smc->clcsock);
2892                 if (rc) {
2893                         sk_common_release(sk);
2894                         goto out;
2895                 }
2896         } else {
2897                 smc->clcsock = clcsock;
2898         }
2899
2900         smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
2901         smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
2902
2903 out:
2904         return rc;
2905 }
2906
2907 static int smc_create(struct net *net, struct socket *sock, int protocol,
2908                       int kern)
2909 {
2910         return __smc_create(net, sock, protocol, kern, NULL);
2911 }
2912
2913 static const struct net_proto_family smc_sock_family_ops = {
2914         .family = PF_SMC,
2915         .owner  = THIS_MODULE,
2916         .create = smc_create,
2917 };
2918
2919 static int smc_ulp_init(struct sock *sk)
2920 {
2921         struct socket *tcp = sk->sk_socket;
2922         struct net *net = sock_net(sk);
2923         struct socket *smcsock;
2924         int protocol, ret;
2925
2926         /* only TCP can be replaced */
2927         if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP ||
2928             (sk->sk_family != AF_INET && sk->sk_family != AF_INET6))
2929                 return -ESOCKTNOSUPPORT;
2930         /* don't handle wq now */
2931         if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list)
2932                 return -ENOTCONN;
2933
2934         if (sk->sk_family == AF_INET)
2935                 protocol = SMCPROTO_SMC;
2936         else
2937                 protocol = SMCPROTO_SMC6;
2938
2939         smcsock = sock_alloc();
2940         if (!smcsock)
2941                 return -ENFILE;
2942
2943         smcsock->type = SOCK_STREAM;
2944         __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */
2945         ret = __smc_create(net, smcsock, protocol, 1, tcp);
2946         if (ret) {
2947                 sock_release(smcsock); /* module_put() which ops won't be NULL */
2948                 return ret;
2949         }
2950
2951         /* replace tcp socket to smc */
2952         smcsock->file = tcp->file;
2953         smcsock->file->private_data = smcsock;
2954         smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */
2955         smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */
2956         tcp->file = NULL;
2957
2958         return ret;
2959 }
2960
2961 static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
2962                           const gfp_t priority)
2963 {
2964         struct inet_connection_sock *icsk = inet_csk(newsk);
2965
2966         /* don't inherit ulp ops to child when listen */
2967         icsk->icsk_ulp_ops = NULL;
2968 }
2969
2970 static struct tcp_ulp_ops smc_ulp_ops __read_mostly = {
2971         .name           = "smc",
2972         .owner          = THIS_MODULE,
2973         .init           = smc_ulp_init,
2974         .clone          = smc_ulp_clone,
2975 };
2976
2977 unsigned int smc_net_id;
2978
2979 static __net_init int smc_net_init(struct net *net)
2980 {
2981         return smc_pnet_net_init(net);
2982 }
2983
2984 static void __net_exit smc_net_exit(struct net *net)
2985 {
2986         smc_pnet_net_exit(net);
2987 }
2988
2989 static __net_init int smc_net_stat_init(struct net *net)
2990 {
2991         return smc_stats_init(net);
2992 }
2993
2994 static void __net_exit smc_net_stat_exit(struct net *net)
2995 {
2996         smc_stats_exit(net);
2997 }
2998
2999 static struct pernet_operations smc_net_ops = {
3000         .init = smc_net_init,
3001         .exit = smc_net_exit,
3002         .id   = &smc_net_id,
3003         .size = sizeof(struct smc_net),
3004 };
3005
3006 static struct pernet_operations smc_net_stat_ops = {
3007         .init = smc_net_stat_init,
3008         .exit = smc_net_stat_exit,
3009 };
3010
3011 static int __init smc_init(void)
3012 {
3013         int rc;
3014
3015         rc = register_pernet_subsys(&smc_net_ops);
3016         if (rc)
3017                 return rc;
3018
3019         rc = register_pernet_subsys(&smc_net_stat_ops);
3020         if (rc)
3021                 return rc;
3022
3023         smc_ism_init();
3024         smc_clc_init();
3025
3026         rc = smc_nl_init();
3027         if (rc)
3028                 goto out_pernet_subsys;
3029
3030         rc = smc_pnet_init();
3031         if (rc)
3032                 goto out_nl;
3033
3034         rc = -ENOMEM;
3035         smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
3036         if (!smc_hs_wq)
3037                 goto out_pnet;
3038
3039         smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
3040         if (!smc_close_wq)
3041                 goto out_alloc_hs_wq;
3042
3043         rc = smc_core_init();
3044         if (rc) {
3045                 pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
3046                 goto out_alloc_wqs;
3047         }
3048
3049         rc = smc_llc_init();
3050         if (rc) {
3051                 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
3052                 goto out_core;
3053         }
3054
3055         rc = smc_cdc_init();
3056         if (rc) {
3057                 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
3058                 goto out_core;
3059         }
3060
3061         rc = proto_register(&smc_proto, 1);
3062         if (rc) {
3063                 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
3064                 goto out_core;
3065         }
3066
3067         rc = proto_register(&smc_proto6, 1);
3068         if (rc) {
3069                 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
3070                 goto out_proto;
3071         }
3072
3073         rc = sock_register(&smc_sock_family_ops);
3074         if (rc) {
3075                 pr_err("%s: sock_register fails with %d\n", __func__, rc);
3076                 goto out_proto6;
3077         }
3078         INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
3079         INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
3080
3081         rc = smc_ib_register_client();
3082         if (rc) {
3083                 pr_err("%s: ib_register fails with %d\n", __func__, rc);
3084                 goto out_sock;
3085         }
3086
3087         rc = tcp_register_ulp(&smc_ulp_ops);
3088         if (rc) {
3089                 pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
3090                 goto out_ib;
3091         }
3092
3093         static_branch_enable(&tcp_have_smc);
3094         return 0;
3095
3096 out_ib:
3097         smc_ib_unregister_client();
3098 out_sock:
3099         sock_unregister(PF_SMC);
3100 out_proto6:
3101         proto_unregister(&smc_proto6);
3102 out_proto:
3103         proto_unregister(&smc_proto);
3104 out_core:
3105         smc_core_exit();
3106 out_alloc_wqs:
3107         destroy_workqueue(smc_close_wq);
3108 out_alloc_hs_wq:
3109         destroy_workqueue(smc_hs_wq);
3110 out_pnet:
3111         smc_pnet_exit();
3112 out_nl:
3113         smc_nl_exit();
3114 out_pernet_subsys:
3115         unregister_pernet_subsys(&smc_net_ops);
3116
3117         return rc;
3118 }
3119
3120 static void __exit smc_exit(void)
3121 {
3122         static_branch_disable(&tcp_have_smc);
3123         tcp_unregister_ulp(&smc_ulp_ops);
3124         sock_unregister(PF_SMC);
3125         smc_core_exit();
3126         smc_ib_unregister_client();
3127         destroy_workqueue(smc_close_wq);
3128         destroy_workqueue(smc_hs_wq);
3129         proto_unregister(&smc_proto6);
3130         proto_unregister(&smc_proto);
3131         smc_pnet_exit();
3132         smc_nl_exit();
3133         smc_clc_exit();
3134         unregister_pernet_subsys(&smc_net_stat_ops);
3135         unregister_pernet_subsys(&smc_net_ops);
3136         rcu_barrier();
3137 }
3138
3139 module_init(smc_init);
3140 module_exit(smc_exit);
3141
3142 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
3143 MODULE_DESCRIPTION("smc socket address family");
3144 MODULE_LICENSE("GPL");
3145 MODULE_ALIAS_NETPROTO(PF_SMC);
3146 MODULE_ALIAS_TCP_ULP("smc");