linux/net/smc/smc_core.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  Basic Transport Functions exploiting Infiniband API
   6 *
   7 *  Copyright IBM Corp. 2016
   8 *
   9 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  10 */
  11
  12#include <linux/socket.h>
  13#include <linux/if_vlan.h>
  14#include <linux/random.h>
  15#include <linux/workqueue.h>
  16#include <net/tcp.h>
  17#include <net/sock.h>
  18#include <rdma/ib_verbs.h>
  19
  20#include "smc.h"
  21#include "smc_clc.h"
  22#include "smc_core.h"
  23#include "smc_ib.h"
  24#include "smc_wr.h"
  25#include "smc_llc.h"
  26#include "smc_cdc.h"
  27#include "smc_close.h"
  28
  29#define SMC_LGR_NUM_INCR                256
  30#define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
  31#define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10)
  32
  33static u32 smc_lgr_num;                 /* unique link group number */
  34
  35static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
  36                         bool is_rmb);
  37
  38static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
  39{
  40        /* client link group creation always follows the server link group
  41         * creation. For client use a somewhat higher removal delay time,
  42         * otherwise there is a risk of out-of-sync link groups.
  43         */
  44        mod_delayed_work(system_wq, &lgr->free_work,
  45                         lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
  46                                                 SMC_LGR_FREE_DELAY_SERV);
  47}
  48
  49/* Register connection's alert token in our lookup structure.
  50 * To use rbtrees we have to implement our own insert core.
  51 * Requires @conns_lock
  52 * @smc         connection to register
  53 * Returns 0 on success, != otherwise.
  54 */
  55static void smc_lgr_add_alert_token(struct smc_connection *conn)
  56{
  57        struct rb_node **link, *parent = NULL;
  58        u32 token = conn->alert_token_local;
  59
  60        link = &conn->lgr->conns_all.rb_node;
  61        while (*link) {
  62                struct smc_connection *cur = rb_entry(*link,
  63                                        struct smc_connection, alert_node);
  64
  65                parent = *link;
  66                if (cur->alert_token_local > token)
  67                        link = &parent->rb_left;
  68                else
  69                        link = &parent->rb_right;
  70        }
  71        /* Put the new node there */
  72        rb_link_node(&conn->alert_node, parent, link);
  73        rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
  74}
  75
  76/* Register connection in link group by assigning an alert token
  77 * registered in a search tree.
  78 * Requires @conns_lock
  79 * Note that '0' is a reserved value and not assigned.
  80 */
  81static void smc_lgr_register_conn(struct smc_connection *conn)
  82{
  83        struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  84        static atomic_t nexttoken = ATOMIC_INIT(0);
  85
  86        /* find a new alert_token_local value not yet used by some connection
  87         * in this link group
  88         */
  89        sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
  90        while (!conn->alert_token_local) {
  91                conn->alert_token_local = atomic_inc_return(&nexttoken);
  92                if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
  93                        conn->alert_token_local = 0;
  94        }
  95        smc_lgr_add_alert_token(conn);
  96        conn->lgr->conns_num++;
  97}
  98
  99/* Unregister connection and reset the alert token of the given connection<
 100 */
 101static void __smc_lgr_unregister_conn(struct smc_connection *conn)
 102{
 103        struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 104        struct smc_link_group *lgr = conn->lgr;
 105
 106        rb_erase(&conn->alert_node, &lgr->conns_all);
 107        lgr->conns_num--;
 108        conn->alert_token_local = 0;
 109        conn->lgr = NULL;
 110        sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
 111}
 112
 113/* Unregister connection and trigger lgr freeing if applicable
 114 */
 115static void smc_lgr_unregister_conn(struct smc_connection *conn)
 116{
 117        struct smc_link_group *lgr = conn->lgr;
 118        int reduced = 0;
 119
 120        write_lock_bh(&lgr->conns_lock);
 121        if (conn->alert_token_local) {
 122                reduced = 1;
 123                __smc_lgr_unregister_conn(conn);
 124        }
 125        write_unlock_bh(&lgr->conns_lock);
 126        if (!reduced || lgr->conns_num)
 127                return;
 128        smc_lgr_schedule_free_work(lgr);
 129}
 130
 131static void smc_lgr_free_work(struct work_struct *work)
 132{
 133        struct smc_link_group *lgr = container_of(to_delayed_work(work),
 134                                                  struct smc_link_group,
 135                                                  free_work);
 136        bool conns;
 137
 138        spin_lock_bh(&smc_lgr_list.lock);
 139        if (list_empty(&lgr->list))
 140                goto free;
 141        read_lock_bh(&lgr->conns_lock);
 142        conns = RB_EMPTY_ROOT(&lgr->conns_all);
 143        read_unlock_bh(&lgr->conns_lock);
 144        if (!conns) { /* number of lgr connections is no longer zero */
 145                spin_unlock_bh(&smc_lgr_list.lock);
 146                return;
 147        }
 148        list_del_init(&lgr->list); /* remove from smc_lgr_list */
 149free:
 150        spin_unlock_bh(&smc_lgr_list.lock);
 151        if (!delayed_work_pending(&lgr->free_work))
 152                smc_lgr_free(lgr);
 153}
 154
 155/* create a new SMC link group */
 156static int smc_lgr_create(struct smc_sock *smc,
 157                          struct smc_ib_device *smcibdev, u8 ibport,
 158                          char *peer_systemid, unsigned short vlan_id)
 159{
 160        struct smc_link_group *lgr;
 161        struct smc_link *lnk;
 162        u8 rndvec[3];
 163        int rc = 0;
 164        int i;
 165
 166        lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
 167        if (!lgr) {
 168                rc = -ENOMEM;
 169                goto out;
 170        }
 171        lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 172        lgr->sync_err = false;
 173        memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
 174        lgr->vlan_id = vlan_id;
 175        rwlock_init(&lgr->sndbufs_lock);
 176        rwlock_init(&lgr->rmbs_lock);
 177        for (i = 0; i < SMC_RMBE_SIZES; i++) {
 178                INIT_LIST_HEAD(&lgr->sndbufs[i]);
 179                INIT_LIST_HEAD(&lgr->rmbs[i]);
 180        }
 181        smc_lgr_num += SMC_LGR_NUM_INCR;
 182        memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE);
 183        INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
 184        lgr->conns_all = RB_ROOT;
 185
 186        lnk = &lgr->lnk[SMC_SINGLE_LINK];
 187        /* initialize link */
 188        lnk->state = SMC_LNK_ACTIVATING;
 189        lnk->link_id = SMC_SINGLE_LINK;
 190        lnk->smcibdev = smcibdev;
 191        lnk->ibport = ibport;
 192        lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
 193        if (!smcibdev->initialized)
 194                smc_ib_setup_per_ibdev(smcibdev);
 195        get_random_bytes(rndvec, sizeof(rndvec));
 196        lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
 197        rc = smc_wr_alloc_link_mem(lnk);
 198        if (rc)
 199                goto free_lgr;
 200        rc = smc_ib_create_protection_domain(lnk);
 201        if (rc)
 202                goto free_link_mem;
 203        rc = smc_ib_create_queue_pair(lnk);
 204        if (rc)
 205                goto dealloc_pd;
 206        rc = smc_wr_create_link(lnk);
 207        if (rc)
 208                goto destroy_qp;
 209        init_completion(&lnk->llc_confirm);
 210        init_completion(&lnk->llc_confirm_resp);
 211        init_completion(&lnk->llc_add);
 212        init_completion(&lnk->llc_add_resp);
 213
 214        smc->conn.lgr = lgr;
 215        rwlock_init(&lgr->conns_lock);
 216        spin_lock_bh(&smc_lgr_list.lock);
 217        list_add(&lgr->list, &smc_lgr_list.list);
 218        spin_unlock_bh(&smc_lgr_list.lock);
 219        return 0;
 220
 221destroy_qp:
 222        smc_ib_destroy_queue_pair(lnk);
 223dealloc_pd:
 224        smc_ib_dealloc_protection_domain(lnk);
 225free_link_mem:
 226        smc_wr_free_link_mem(lnk);
 227free_lgr:
 228        kfree(lgr);
 229out:
 230        return rc;
 231}
 232
 233static void smc_buf_unuse(struct smc_connection *conn)
 234{
 235        if (conn->sndbuf_desc) {
 236                conn->sndbuf_desc->used = 0;
 237                conn->sndbuf_size = 0;
 238        }
 239        if (conn->rmb_desc) {
 240                if (!conn->rmb_desc->regerr) {
 241                        conn->rmb_desc->reused = 1;
 242                        conn->rmb_desc->used = 0;
 243                        conn->rmbe_size = 0;
 244                } else {
 245                        /* buf registration failed, reuse not possible */
 246                        struct smc_link_group *lgr = conn->lgr;
 247                        struct smc_link *lnk;
 248
 249                        write_lock_bh(&lgr->rmbs_lock);
 250                        list_del(&conn->rmb_desc->list);
 251                        write_unlock_bh(&lgr->rmbs_lock);
 252
 253                        lnk = &lgr->lnk[SMC_SINGLE_LINK];
 254                        smc_buf_free(conn->rmb_desc, lnk, true);
 255                }
 256        }
 257}
 258
 259/* remove a finished connection from its link group */
 260void smc_conn_free(struct smc_connection *conn)
 261{
 262        if (!conn->lgr)
 263                return;
 264        smc_cdc_tx_dismiss_slots(conn);
 265        smc_lgr_unregister_conn(conn);
 266        smc_buf_unuse(conn);
 267}
 268
 269static void smc_link_clear(struct smc_link *lnk)
 270{
 271        lnk->peer_qpn = 0;
 272        smc_ib_modify_qp_reset(lnk);
 273        smc_wr_free_link(lnk);
 274        smc_ib_destroy_queue_pair(lnk);
 275        smc_ib_dealloc_protection_domain(lnk);
 276        smc_wr_free_link_mem(lnk);
 277}
 278
 279static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
 280                         bool is_rmb)
 281{
 282        if (is_rmb) {
 283                if (buf_desc->mr_rx[SMC_SINGLE_LINK])
 284                        smc_ib_put_memory_region(
 285                                        buf_desc->mr_rx[SMC_SINGLE_LINK]);
 286                smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
 287                                    DMA_FROM_DEVICE);
 288        } else {
 289                smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
 290                                    DMA_TO_DEVICE);
 291        }
 292        sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
 293        if (buf_desc->cpu_addr)
 294                free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
 295        kfree(buf_desc);
 296}
 297
 298static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 299{
 300        struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 301        struct smc_buf_desc *buf_desc, *bf_desc;
 302        struct list_head *buf_list;
 303        int i;
 304
 305        for (i = 0; i < SMC_RMBE_SIZES; i++) {
 306                if (is_rmb)
 307                        buf_list = &lgr->rmbs[i];
 308                else
 309                        buf_list = &lgr->sndbufs[i];
 310                list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
 311                                         list) {
 312                        list_del(&buf_desc->list);
 313                        smc_buf_free(buf_desc, lnk, is_rmb);
 314                }
 315        }
 316}
 317
 318static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 319{
 320        /* free send buffers */
 321        __smc_lgr_free_bufs(lgr, false);
 322        /* free rmbs */
 323        __smc_lgr_free_bufs(lgr, true);
 324}
 325
 326/* remove a link group */
 327void smc_lgr_free(struct smc_link_group *lgr)
 328{
 329        smc_lgr_free_bufs(lgr);
 330        smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 331        kfree(lgr);
 332}
 333
 334void smc_lgr_forget(struct smc_link_group *lgr)
 335{
 336        spin_lock_bh(&smc_lgr_list.lock);
 337        /* do not use this link group for new connections */
 338        if (!list_empty(&lgr->list))
 339                list_del_init(&lgr->list);
 340        spin_unlock_bh(&smc_lgr_list.lock);
 341}
 342
 343/* terminate linkgroup abnormally */
 344void smc_lgr_terminate(struct smc_link_group *lgr)
 345{
 346        struct smc_connection *conn;
 347        struct smc_sock *smc;
 348        struct rb_node *node;
 349
 350        smc_lgr_forget(lgr);
 351
 352        write_lock_bh(&lgr->conns_lock);
 353        node = rb_first(&lgr->conns_all);
 354        while (node) {
 355                conn = rb_entry(node, struct smc_connection, alert_node);
 356                smc = container_of(conn, struct smc_sock, conn);
 357                sock_hold(&smc->sk); /* sock_put in close work */
 358                conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
 359                __smc_lgr_unregister_conn(conn);
 360                write_unlock_bh(&lgr->conns_lock);
 361                if (!schedule_work(&conn->close_work))
 362                        sock_put(&smc->sk);
 363                write_lock_bh(&lgr->conns_lock);
 364                node = rb_first(&lgr->conns_all);
 365        }
 366        write_unlock_bh(&lgr->conns_lock);
 367        wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
 368        smc_lgr_schedule_free_work(lgr);
 369}
 370
 371/* Determine vlan of internal TCP socket.
 372 * @vlan_id: address to store the determined vlan id into
 373 */
 374static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
 375{
 376        struct dst_entry *dst = sk_dst_get(clcsock->sk);
 377        int rc = 0;
 378
 379        *vlan_id = 0;
 380        if (!dst) {
 381                rc = -ENOTCONN;
 382                goto out;
 383        }
 384        if (!dst->dev) {
 385                rc = -ENODEV;
 386                goto out_rel;
 387        }
 388
 389        if (is_vlan_dev(dst->dev))
 390                *vlan_id = vlan_dev_vlan_id(dst->dev);
 391
 392out_rel:
 393        dst_release(dst);
 394out:
 395        return rc;
 396}
 397
 398/* determine the link gid matching the vlan id of the link group */
 399static int smc_link_determine_gid(struct smc_link_group *lgr)
 400{
 401        struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 402        struct ib_gid_attr gattr;
 403        union ib_gid gid;
 404        int i;
 405
 406        if (!lgr->vlan_id) {
 407                lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1];
 408                return 0;
 409        }
 410
 411        for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
 412             i++) {
 413                if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
 414                                 &gattr))
 415                        continue;
 416                if (gattr.ndev) {
 417                        if (is_vlan_dev(gattr.ndev) &&
 418                            vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
 419                                lnk->gid = gid;
 420                                dev_put(gattr.ndev);
 421                                return 0;
 422                        }
 423                        dev_put(gattr.ndev);
 424                }
 425        }
 426        return -ENODEV;
 427}
 428
 429/* create a new SMC connection (and a new link group if necessary) */
 430int smc_conn_create(struct smc_sock *smc,
 431                    struct smc_ib_device *smcibdev, u8 ibport,
 432                    struct smc_clc_msg_local *lcl, int srv_first_contact)
 433{
 434        struct smc_connection *conn = &smc->conn;
 435        struct smc_link_group *lgr;
 436        unsigned short vlan_id;
 437        enum smc_lgr_role role;
 438        int local_contact = SMC_FIRST_CONTACT;
 439        int rc = 0;
 440
 441        role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 442        rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
 443        if (rc)
 444                return rc;
 445
 446        if ((role == SMC_CLNT) && srv_first_contact)
 447                /* create new link group as well */
 448                goto create;
 449
 450        /* determine if an existing link group can be reused */
 451        spin_lock_bh(&smc_lgr_list.lock);
 452        list_for_each_entry(lgr, &smc_lgr_list.list, list) {
 453                write_lock_bh(&lgr->conns_lock);
 454                if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
 455                            SMC_SYSTEMID_LEN) &&
 456                    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
 457                            SMC_GID_SIZE) &&
 458                    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
 459                            sizeof(lcl->mac)) &&
 460                    !lgr->sync_err &&
 461                    (lgr->role == role) &&
 462                    (lgr->vlan_id == vlan_id) &&
 463                    ((role == SMC_CLNT) ||
 464                     (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
 465                        /* link group found */
 466                        local_contact = SMC_REUSE_CONTACT;
 467                        conn->lgr = lgr;
 468                        smc_lgr_register_conn(conn); /* add smc conn to lgr */
 469                        write_unlock_bh(&lgr->conns_lock);
 470                        break;
 471                }
 472                write_unlock_bh(&lgr->conns_lock);
 473        }
 474        spin_unlock_bh(&smc_lgr_list.lock);
 475
 476        if (role == SMC_CLNT && !srv_first_contact &&
 477            (local_contact == SMC_FIRST_CONTACT)) {
 478                /* Server reuses a link group, but Client wants to start
 479                 * a new one
 480                 * send out_of_sync decline, reason synchr. error
 481                 */
 482                return -ENOLINK;
 483        }
 484
 485create:
 486        if (local_contact == SMC_FIRST_CONTACT) {
 487                rc = smc_lgr_create(smc, smcibdev, ibport,
 488                                    lcl->id_for_peer, vlan_id);
 489                if (rc)
 490                        goto out;
 491                smc_lgr_register_conn(conn); /* add smc conn to lgr */
 492                rc = smc_link_determine_gid(conn->lgr);
 493        }
 494        conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 495        conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
 496#ifndef KERNEL_HAS_ATOMIC64
 497        spin_lock_init(&conn->acurs_lock);
 498#endif
 499
 500out:
 501        return rc ? rc : local_contact;
 502}
 503
 504/* try to reuse a sndbuf or rmb description slot for a certain
 505 * buffer size; if not available, return NULL
 506 */
 507static inline
 508struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
 509                                      int compressed_bufsize,
 510                                      rwlock_t *lock,
 511                                      struct list_head *buf_list)
 512{
 513        struct smc_buf_desc *buf_slot;
 514
 515        read_lock_bh(lock);
 516        list_for_each_entry(buf_slot, buf_list, list) {
 517                if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
 518                        read_unlock_bh(lock);
 519                        return buf_slot;
 520                }
 521        }
 522        read_unlock_bh(lock);
 523        return NULL;
 524}
 525
 526/* one of the conditions for announcing a receiver's current window size is
 527 * that it "results in a minimum increase in the window size of 10% of the
 528 * receive buffer space" [RFC7609]
 529 */
 530static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 531{
 532        return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 533}
 534
 535static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
 536                                               bool is_rmb, int bufsize)
 537{
 538        struct smc_buf_desc *buf_desc;
 539        struct smc_link *lnk;
 540        int rc;
 541
 542        /* try to alloc a new buffer */
 543        buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
 544        if (!buf_desc)
 545                return ERR_PTR(-ENOMEM);
 546
 547        buf_desc->cpu_addr =
 548                (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
 549                                         __GFP_NOMEMALLOC |
 550                                         __GFP_NORETRY | __GFP_ZERO,
 551                                         get_order(bufsize));
 552        if (!buf_desc->cpu_addr) {
 553                kfree(buf_desc);
 554                return ERR_PTR(-EAGAIN);
 555        }
 556        buf_desc->order = get_order(bufsize);
 557
 558        /* build the sg table from the pages */
 559        lnk = &lgr->lnk[SMC_SINGLE_LINK];
 560        rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
 561                            GFP_KERNEL);
 562        if (rc) {
 563                smc_buf_free(buf_desc, lnk, is_rmb);
 564                return ERR_PTR(rc);
 565        }
 566        sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
 567                   buf_desc->cpu_addr, bufsize);
 568
 569        /* map sg table to DMA address */
 570        rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
 571                               is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
 572        /* SMC protocol depends on mapping to one DMA address only */
 573        if (rc != 1)  {
 574                smc_buf_free(buf_desc, lnk, is_rmb);
 575                return ERR_PTR(-EAGAIN);
 576        }
 577
 578        /* create a new memory region for the RMB */
 579        if (is_rmb) {
 580                rc = smc_ib_get_memory_region(lnk->roce_pd,
 581                                              IB_ACCESS_REMOTE_WRITE |
 582                                              IB_ACCESS_LOCAL_WRITE,
 583                                              buf_desc);
 584                if (rc) {
 585                        smc_buf_free(buf_desc, lnk, is_rmb);
 586                        return ERR_PTR(rc);
 587                }
 588        }
 589
 590        return buf_desc;
 591}
 592
 593static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 594{
 595        struct smc_connection *conn = &smc->conn;
 596        struct smc_link_group *lgr = conn->lgr;
 597        struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
 598        struct list_head *buf_list;
 599        int bufsize, bufsize_short;
 600        int sk_buf_size;
 601        rwlock_t *lock;
 602
 603        if (is_rmb)
 604                /* use socket recv buffer size (w/o overhead) as start value */
 605                sk_buf_size = smc->sk.sk_rcvbuf / 2;
 606        else
 607                /* use socket send buffer size (w/o overhead) as start value */
 608                sk_buf_size = smc->sk.sk_sndbuf / 2;
 609
 610        for (bufsize_short = smc_compress_bufsize(sk_buf_size);
 611             bufsize_short >= 0; bufsize_short--) {
 612
 613                if (is_rmb) {
 614                        lock = &lgr->rmbs_lock;
 615                        buf_list = &lgr->rmbs[bufsize_short];
 616                } else {
 617                        lock = &lgr->sndbufs_lock;
 618                        buf_list = &lgr->sndbufs[bufsize_short];
 619                }
 620                bufsize = smc_uncompress_bufsize(bufsize_short);
 621                if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
 622                        continue;
 623
 624                /* check for reusable slot in the link group */
 625                buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
 626                if (buf_desc) {
 627                        memset(buf_desc->cpu_addr, 0, bufsize);
 628                        break; /* found reusable slot */
 629                }
 630
 631                buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
 632                if (PTR_ERR(buf_desc) == -ENOMEM)
 633                        break;
 634                if (IS_ERR(buf_desc))
 635                        continue;
 636
 637                buf_desc->used = 1;
 638                write_lock_bh(lock);
 639                list_add(&buf_desc->list, buf_list);
 640                write_unlock_bh(lock);
 641                break; /* found */
 642        }
 643
 644        if (IS_ERR(buf_desc))
 645                return -ENOMEM;
 646
 647        if (is_rmb) {
 648                conn->rmb_desc = buf_desc;
 649                conn->rmbe_size = bufsize;
 650                conn->rmbe_size_short = bufsize_short;
 651                smc->sk.sk_rcvbuf = bufsize * 2;
 652                atomic_set(&conn->bytes_to_rcv, 0);
 653                conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
 654        } else {
 655                conn->sndbuf_desc = buf_desc;
 656                conn->sndbuf_size = bufsize;
 657                smc->sk.sk_sndbuf = bufsize * 2;
 658                atomic_set(&conn->sndbuf_space, bufsize);
 659        }
 660        return 0;
 661}
 662
 663void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 664{
 665        struct smc_link_group *lgr = conn->lgr;
 666
 667        smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 668                               conn->sndbuf_desc, DMA_TO_DEVICE);
 669}
 670
 671void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 672{
 673        struct smc_link_group *lgr = conn->lgr;
 674
 675        smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 676                                  conn->sndbuf_desc, DMA_TO_DEVICE);
 677}
 678
 679void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 680{
 681        struct smc_link_group *lgr = conn->lgr;
 682
 683        smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 684                               conn->rmb_desc, DMA_FROM_DEVICE);
 685}
 686
 687void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 688{
 689        struct smc_link_group *lgr = conn->lgr;
 690
 691        smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 692                                  conn->rmb_desc, DMA_FROM_DEVICE);
 693}
 694
 695/* create the send and receive buffer for an SMC socket;
 696 * receive buffers are called RMBs;
 697 * (even though the SMC protocol allows more than one RMB-element per RMB,
 698 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
 699 * extra RMB for every connection in a link group
 700 */
 701int smc_buf_create(struct smc_sock *smc)
 702{
 703        int rc;
 704
 705        /* create send buffer */
 706        rc = __smc_buf_create(smc, false);
 707        if (rc)
 708                return rc;
 709        /* create rmb */
 710        rc = __smc_buf_create(smc, true);
 711        if (rc)
 712                smc_buf_free(smc->conn.sndbuf_desc,
 713                             &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
 714        return rc;
 715}
 716
 717static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 718{
 719        int i;
 720
 721        for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
 722                if (!test_and_set_bit(i, lgr->rtokens_used_mask))
 723                        return i;
 724        }
 725        return -ENOSPC;
 726}
 727
 728/* add a new rtoken from peer */
 729int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 730{
 731        u64 dma_addr = be64_to_cpu(nw_vaddr);
 732        u32 rkey = ntohl(nw_rkey);
 733        int i;
 734
 735        for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 736                if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
 737                    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
 738                    test_bit(i, lgr->rtokens_used_mask)) {
 739                        /* already in list */
 740                        return i;
 741                }
 742        }
 743        i = smc_rmb_reserve_rtoken_idx(lgr);
 744        if (i < 0)
 745                return i;
 746        lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
 747        lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
 748        return i;
 749}
 750
 751/* delete an rtoken */
 752int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
 753{
 754        u32 rkey = ntohl(nw_rkey);
 755        int i;
 756
 757        for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 758                if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
 759                    test_bit(i, lgr->rtokens_used_mask)) {
 760                        lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
 761                        lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
 762
 763                        clear_bit(i, lgr->rtokens_used_mask);
 764                        return 0;
 765                }
 766        }
 767        return -ENOENT;
 768}
 769
 770/* save rkey and dma_addr received from peer during clc handshake */
 771int smc_rmb_rtoken_handling(struct smc_connection *conn,
 772                            struct smc_clc_msg_accept_confirm *clc)
 773{
 774        conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
 775                                          clc->rmb_rkey);
 776        if (conn->rtoken_idx < 0)
 777                return conn->rtoken_idx;
 778        return 0;
 779}
 780