linux/net/smc/smc_pnet.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  Generic netlink support functions to configure an SMC-R PNET table
   6 *
   7 *  Copyright IBM Corp. 2016
   8 *
   9 *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/list.h>
  14#include <linux/ctype.h>
  15#include <net/netlink.h>
  16#include <net/genetlink.h>
  17
  18#include <uapi/linux/if.h>
  19#include <uapi/linux/smc.h>
  20
  21#include <rdma/ib_verbs.h>
  22
  23#include <net/netns/generic.h>
  24#include "smc_netns.h"
  25
  26#include "smc_pnet.h"
  27#include "smc_ib.h"
  28#include "smc_ism.h"
  29#include "smc_core.h"
  30
  31#define SMC_ASCII_BLANK 32
  32
  33static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
  34
  35static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
  36        [SMC_PNETID_NAME] = {
  37                .type = NLA_NUL_STRING,
  38                .len = SMC_MAX_PNETID_LEN
  39        },
  40        [SMC_PNETID_ETHNAME] = {
  41                .type = NLA_NUL_STRING,
  42                .len = IFNAMSIZ - 1
  43        },
  44        [SMC_PNETID_IBNAME] = {
  45                .type = NLA_NUL_STRING,
  46                .len = IB_DEVICE_NAME_MAX - 1
  47        },
  48        [SMC_PNETID_IBPORT] = { .type = NLA_U8 }
  49};
  50
  51static struct genl_family smc_pnet_nl_family;
  52
  53/**
  54 * struct smc_user_pnetentry - pnet identifier name entry for/from user
  55 * @list: List node.
  56 * @pnet_name: Pnet identifier name
  57 * @ndev: pointer to network device.
  58 * @smcibdev: Pointer to IB device.
  59 * @ib_port: Port of IB device.
  60 * @smcd_dev: Pointer to smcd device.
  61 */
  62struct smc_user_pnetentry {
  63        struct list_head list;
  64        char pnet_name[SMC_MAX_PNETID_LEN + 1];
  65        struct net_device *ndev;
  66        struct smc_ib_device *smcibdev;
  67        u8 ib_port;
  68        struct smcd_dev *smcd_dev;
  69};
  70
  71/* pnet entry stored in pnet table */
  72struct smc_pnetentry {
  73        struct list_head list;
  74        char pnet_name[SMC_MAX_PNETID_LEN + 1];
  75        struct net_device *ndev;
  76};
  77
  78/* Check if two given pnetids match */
  79static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
  80{
  81        int i;
  82
  83        for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
  84                if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
  85                    (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
  86                        break;
  87                if (pnetid1[i] != pnetid2[i])
  88                        return false;
  89        }
  90        return true;
  91}
  92
  93/* Remove a pnetid from the pnet table.
  94 */
  95static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
  96{
  97        struct smc_pnetentry *pnetelem, *tmp_pe;
  98        struct smc_pnettable *pnettable;
  99        struct smc_ib_device *ibdev;
 100        struct smcd_dev *smcd_dev;
 101        struct smc_net *sn;
 102        int rc = -ENOENT;
 103        int ibport;
 104
 105        /* get pnettable for namespace */
 106        sn = net_generic(net, smc_net_id);
 107        pnettable = &sn->pnettable;
 108
 109        /* remove netdevices */
 110        write_lock(&pnettable->lock);
 111        list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
 112                                 list) {
 113                if (!pnet_name ||
 114                    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
 115                        list_del(&pnetelem->list);
 116                        dev_put(pnetelem->ndev);
 117                        kfree(pnetelem);
 118                        rc = 0;
 119                }
 120        }
 121        write_unlock(&pnettable->lock);
 122
 123        /* if this is not the initial namespace, stop here */
 124        if (net != &init_net)
 125                return rc;
 126
 127        /* remove ib devices */
 128        spin_lock(&smc_ib_devices.lock);
 129        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 130                for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
 131                        if (ibdev->pnetid_by_user[ibport] &&
 132                            (!pnet_name ||
 133                             smc_pnet_match(pnet_name,
 134                                            ibdev->pnetid[ibport]))) {
 135                                memset(ibdev->pnetid[ibport], 0,
 136                                       SMC_MAX_PNETID_LEN);
 137                                ibdev->pnetid_by_user[ibport] = false;
 138                                rc = 0;
 139                        }
 140                }
 141        }
 142        spin_unlock(&smc_ib_devices.lock);
 143        /* remove smcd devices */
 144        spin_lock(&smcd_dev_list.lock);
 145        list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
 146                if (smcd_dev->pnetid_by_user &&
 147                    (!pnet_name ||
 148                     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
 149                        memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
 150                        smcd_dev->pnetid_by_user = false;
 151                        rc = 0;
 152                }
 153        }
 154        spin_unlock(&smcd_dev_list.lock);
 155        return rc;
 156}
 157
 158/* Remove a pnet entry mentioning a given network device from the pnet table.
 159 */
 160static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 161{
 162        struct smc_pnetentry *pnetelem, *tmp_pe;
 163        struct smc_pnettable *pnettable;
 164        struct net *net = dev_net(ndev);
 165        struct smc_net *sn;
 166        int rc = -ENOENT;
 167
 168        /* get pnettable for namespace */
 169        sn = net_generic(net, smc_net_id);
 170        pnettable = &sn->pnettable;
 171
 172        write_lock(&pnettable->lock);
 173        list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
 174                if (pnetelem->ndev == ndev) {
 175                        list_del(&pnetelem->list);
 176                        dev_put(pnetelem->ndev);
 177                        kfree(pnetelem);
 178                        rc = 0;
 179                        break;
 180                }
 181        }
 182        write_unlock(&pnettable->lock);
 183        return rc;
 184}
 185
 186/* Append a pnetid to the end of the pnet table if not already on this list.
 187 */
 188static int smc_pnet_enter(struct smc_pnettable *pnettable,
 189                          struct smc_user_pnetentry *new_pnetelem)
 190{
 191        u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
 192        u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 193        struct smc_pnetentry *tmp_pnetelem;
 194        struct smc_pnetentry *pnetelem;
 195        bool new_smcddev = false;
 196        struct net_device *ndev;
 197        bool new_netdev = true;
 198        bool new_ibdev = false;
 199
 200        if (new_pnetelem->smcibdev) {
 201                struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
 202                int ib_port = new_pnetelem->ib_port;
 203
 204                spin_lock(&smc_ib_devices.lock);
 205                if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
 206                        memcpy(ib_dev->pnetid[ib_port - 1],
 207                               new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
 208                        ib_dev->pnetid_by_user[ib_port - 1] = true;
 209                        new_ibdev = true;
 210                }
 211                spin_unlock(&smc_ib_devices.lock);
 212        }
 213        if (new_pnetelem->smcd_dev) {
 214                struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
 215
 216                spin_lock(&smcd_dev_list.lock);
 217                if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
 218                        memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
 219                               SMC_MAX_PNETID_LEN);
 220                        smcd_dev->pnetid_by_user = true;
 221                        new_smcddev = true;
 222                }
 223                spin_unlock(&smcd_dev_list.lock);
 224        }
 225
 226        if (!new_pnetelem->ndev)
 227                return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
 228
 229        /* check if (base) netdev already has a pnetid. If there is one, we do
 230         * not want to add a pnet table entry
 231         */
 232        ndev = pnet_find_base_ndev(new_pnetelem->ndev);
 233        if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
 234                                    ndev_pnetid))
 235                return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
 236
 237        /* add a new netdev entry to the pnet table if there isn't one */
 238        tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
 239        if (!tmp_pnetelem)
 240                return -ENOMEM;
 241        memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
 242               SMC_MAX_PNETID_LEN);
 243        tmp_pnetelem->ndev = new_pnetelem->ndev;
 244
 245        write_lock(&pnettable->lock);
 246        list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
 247                if (pnetelem->ndev == new_pnetelem->ndev)
 248                        new_netdev = false;
 249        }
 250        if (new_netdev) {
 251                dev_hold(tmp_pnetelem->ndev);
 252                list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
 253                write_unlock(&pnettable->lock);
 254        } else {
 255                write_unlock(&pnettable->lock);
 256                kfree(tmp_pnetelem);
 257        }
 258
 259        return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
 260}
 261
 262/* The limit for pnetid is 16 characters.
 263 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
 264 * Lower case letters are converted to upper case.
 265 * Interior blanks should not be used.
 266 */
 267static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
 268{
 269        char *bf = skip_spaces(pnet_name);
 270        size_t len = strlen(bf);
 271        char *end = bf + len;
 272
 273        if (!len)
 274                return false;
 275        while (--end >= bf && isspace(*end))
 276                ;
 277        if (end - bf >= SMC_MAX_PNETID_LEN)
 278                return false;
 279        while (bf <= end) {
 280                if (!isalnum(*bf))
 281                        return false;
 282                *pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
 283                bf++;
 284        }
 285        *pnetid = '\0';
 286        return true;
 287}
 288
 289/* Find an infiniband device by a given name. The device might not exist. */
 290static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
 291{
 292        struct smc_ib_device *ibdev;
 293
 294        spin_lock(&smc_ib_devices.lock);
 295        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 296                if (!strncmp(ibdev->ibdev->name, ib_name,
 297                             sizeof(ibdev->ibdev->name)) ||
 298                    !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
 299                             IB_DEVICE_NAME_MAX - 1)) {
 300                        goto out;
 301                }
 302        }
 303        ibdev = NULL;
 304out:
 305        spin_unlock(&smc_ib_devices.lock);
 306        return ibdev;
 307}
 308
 309/* Find an smcd device by a given name. The device might not exist. */
 310static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
 311{
 312        struct smcd_dev *smcd_dev;
 313
 314        spin_lock(&smcd_dev_list.lock);
 315        list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
 316                if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
 317                             IB_DEVICE_NAME_MAX - 1))
 318                        goto out;
 319        }
 320        smcd_dev = NULL;
 321out:
 322        spin_unlock(&smcd_dev_list.lock);
 323        return smcd_dev;
 324}
 325
 326/* Parse the supplied netlink attributes and fill a pnetentry structure.
 327 * For ethernet and infiniband device names verify that the devices exist.
 328 */
 329static int smc_pnet_fill_entry(struct net *net,
 330                               struct smc_user_pnetentry *pnetelem,
 331                               struct nlattr *tb[])
 332{
 333        char *string, *ibname;
 334        int rc;
 335
 336        memset(pnetelem, 0, sizeof(*pnetelem));
 337        INIT_LIST_HEAD(&pnetelem->list);
 338
 339        rc = -EINVAL;
 340        if (!tb[SMC_PNETID_NAME])
 341                goto error;
 342        string = (char *)nla_data(tb[SMC_PNETID_NAME]);
 343        if (!smc_pnetid_valid(string, pnetelem->pnet_name))
 344                goto error;
 345
 346        rc = -EINVAL;
 347        if (tb[SMC_PNETID_ETHNAME]) {
 348                string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
 349                pnetelem->ndev = dev_get_by_name(net, string);
 350                if (!pnetelem->ndev)
 351                        goto error;
 352        }
 353
 354        /* if this is not the initial namespace, stop here */
 355        if (net != &init_net)
 356                return 0;
 357
 358        rc = -EINVAL;
 359        if (tb[SMC_PNETID_IBNAME]) {
 360                ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
 361                ibname = strim(ibname);
 362                pnetelem->smcibdev = smc_pnet_find_ib(ibname);
 363                pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
 364                if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
 365                        goto error;
 366                if (pnetelem->smcibdev) {
 367                        if (!tb[SMC_PNETID_IBPORT])
 368                                goto error;
 369                        pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
 370                        if (pnetelem->ib_port < 1 ||
 371                            pnetelem->ib_port > SMC_MAX_PORTS)
 372                                goto error;
 373                }
 374        }
 375
 376        return 0;
 377
 378error:
 379        if (pnetelem->ndev)
 380                dev_put(pnetelem->ndev);
 381        return rc;
 382}
 383
 384/* Convert an smc_pnetentry to a netlink attribute sequence */
 385static int smc_pnet_set_nla(struct sk_buff *msg,
 386                            struct smc_user_pnetentry *pnetelem)
 387{
 388        if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
 389                return -1;
 390        if (pnetelem->ndev) {
 391                if (nla_put_string(msg, SMC_PNETID_ETHNAME,
 392                                   pnetelem->ndev->name))
 393                        return -1;
 394        } else {
 395                if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
 396                        return -1;
 397        }
 398        if (pnetelem->smcibdev) {
 399                if (nla_put_string(msg, SMC_PNETID_IBNAME,
 400                        dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
 401                    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
 402                        return -1;
 403        } else if (pnetelem->smcd_dev) {
 404                if (nla_put_string(msg, SMC_PNETID_IBNAME,
 405                                   dev_name(&pnetelem->smcd_dev->dev)) ||
 406                    nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
 407                        return -1;
 408        } else {
 409                if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
 410                    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
 411                        return -1;
 412        }
 413
 414        return 0;
 415}
 416
 417static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
 418{
 419        struct net *net = genl_info_net(info);
 420        struct smc_user_pnetentry pnetelem;
 421        struct smc_pnettable *pnettable;
 422        struct smc_net *sn;
 423        int rc;
 424
 425        /* get pnettable for namespace */
 426        sn = net_generic(net, smc_net_id);
 427        pnettable = &sn->pnettable;
 428
 429        rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
 430        if (!rc)
 431                rc = smc_pnet_enter(pnettable, &pnetelem);
 432        if (pnetelem.ndev)
 433                dev_put(pnetelem.ndev);
 434        return rc;
 435}
 436
 437static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
 438{
 439        struct net *net = genl_info_net(info);
 440
 441        if (!info->attrs[SMC_PNETID_NAME])
 442                return -EINVAL;
 443        return smc_pnet_remove_by_pnetid(net,
 444                                (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
 445}
 446
 447static int smc_pnet_dump_start(struct netlink_callback *cb)
 448{
 449        cb->args[0] = 0;
 450        return 0;
 451}
 452
 453static int smc_pnet_dumpinfo(struct sk_buff *skb,
 454                             u32 portid, u32 seq, u32 flags,
 455                             struct smc_user_pnetentry *pnetelem)
 456{
 457        void *hdr;
 458
 459        hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
 460                          flags, SMC_PNETID_GET);
 461        if (!hdr)
 462                return -ENOMEM;
 463        if (smc_pnet_set_nla(skb, pnetelem) < 0) {
 464                genlmsg_cancel(skb, hdr);
 465                return -EMSGSIZE;
 466        }
 467        genlmsg_end(skb, hdr);
 468        return 0;
 469}
 470
 471static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
 472                          u32 seq, u8 *pnetid, int start_idx)
 473{
 474        struct smc_user_pnetentry tmp_entry;
 475        struct smc_pnettable *pnettable;
 476        struct smc_pnetentry *pnetelem;
 477        struct smc_ib_device *ibdev;
 478        struct smcd_dev *smcd_dev;
 479        struct smc_net *sn;
 480        int idx = 0;
 481        int ibport;
 482
 483        /* get pnettable for namespace */
 484        sn = net_generic(net, smc_net_id);
 485        pnettable = &sn->pnettable;
 486
 487        /* dump netdevices */
 488        read_lock(&pnettable->lock);
 489        list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
 490                if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
 491                        continue;
 492                if (idx++ < start_idx)
 493                        continue;
 494                memset(&tmp_entry, 0, sizeof(tmp_entry));
 495                memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
 496                       SMC_MAX_PNETID_LEN);
 497                tmp_entry.ndev = pnetelem->ndev;
 498                if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
 499                                      &tmp_entry)) {
 500                        --idx;
 501                        break;
 502                }
 503        }
 504        read_unlock(&pnettable->lock);
 505
 506        /* if this is not the initial namespace, stop here */
 507        if (net != &init_net)
 508                return idx;
 509
 510        /* dump ib devices */
 511        spin_lock(&smc_ib_devices.lock);
 512        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 513                for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
 514                        if (ibdev->pnetid_by_user[ibport]) {
 515                                if (pnetid &&
 516                                    !smc_pnet_match(ibdev->pnetid[ibport],
 517                                                    pnetid))
 518                                        continue;
 519                                if (idx++ < start_idx)
 520                                        continue;
 521                                memset(&tmp_entry, 0, sizeof(tmp_entry));
 522                                memcpy(&tmp_entry.pnet_name,
 523                                       ibdev->pnetid[ibport],
 524                                       SMC_MAX_PNETID_LEN);
 525                                tmp_entry.smcibdev = ibdev;
 526                                tmp_entry.ib_port = ibport + 1;
 527                                if (smc_pnet_dumpinfo(skb, portid, seq,
 528                                                      NLM_F_MULTI,
 529                                                      &tmp_entry)) {
 530                                        --idx;
 531                                        break;
 532                                }
 533                        }
 534                }
 535        }
 536        spin_unlock(&smc_ib_devices.lock);
 537
 538        /* dump smcd devices */
 539        spin_lock(&smcd_dev_list.lock);
 540        list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
 541                if (smcd_dev->pnetid_by_user) {
 542                        if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
 543                                continue;
 544                        if (idx++ < start_idx)
 545                                continue;
 546                        memset(&tmp_entry, 0, sizeof(tmp_entry));
 547                        memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
 548                               SMC_MAX_PNETID_LEN);
 549                        tmp_entry.smcd_dev = smcd_dev;
 550                        if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
 551                                              &tmp_entry)) {
 552                                --idx;
 553                                break;
 554                        }
 555                }
 556        }
 557        spin_unlock(&smcd_dev_list.lock);
 558
 559        return idx;
 560}
 561
 562static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
 563{
 564        struct net *net = sock_net(skb->sk);
 565        int idx;
 566
 567        idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
 568                             cb->nlh->nlmsg_seq, NULL, cb->args[0]);
 569
 570        cb->args[0] = idx;
 571        return skb->len;
 572}
 573
 574/* Retrieve one PNETID entry */
 575static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
 576{
 577        struct net *net = genl_info_net(info);
 578        struct sk_buff *msg;
 579        void *hdr;
 580
 581        if (!info->attrs[SMC_PNETID_NAME])
 582                return -EINVAL;
 583
 584        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 585        if (!msg)
 586                return -ENOMEM;
 587
 588        _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
 589                       nla_data(info->attrs[SMC_PNETID_NAME]), 0);
 590
 591        /* finish multi part message and send it */
 592        hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
 593                        NLM_F_MULTI);
 594        if (!hdr) {
 595                nlmsg_free(msg);
 596                return -EMSGSIZE;
 597        }
 598        return genlmsg_reply(msg, info);
 599}
 600
 601/* Remove and delete all pnetids from pnet table.
 602 */
 603static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
 604{
 605        struct net *net = genl_info_net(info);
 606
 607        smc_pnet_remove_by_pnetid(net, NULL);
 608        return 0;
 609}
 610
 611/* SMC_PNETID generic netlink operation definition */
 612static const struct genl_ops smc_pnet_ops[] = {
 613        {
 614                .cmd = SMC_PNETID_GET,
 615                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 616                .flags = GENL_ADMIN_PERM,
 617                .doit = smc_pnet_get,
 618                .dumpit = smc_pnet_dump,
 619                .start = smc_pnet_dump_start
 620        },
 621        {
 622                .cmd = SMC_PNETID_ADD,
 623                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 624                .flags = GENL_ADMIN_PERM,
 625                .doit = smc_pnet_add
 626        },
 627        {
 628                .cmd = SMC_PNETID_DEL,
 629                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 630                .flags = GENL_ADMIN_PERM,
 631                .doit = smc_pnet_del
 632        },
 633        {
 634                .cmd = SMC_PNETID_FLUSH,
 635                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 636                .flags = GENL_ADMIN_PERM,
 637                .doit = smc_pnet_flush
 638        }
 639};
 640
 641/* SMC_PNETID family definition */
 642static struct genl_family smc_pnet_nl_family __ro_after_init = {
 643        .hdrsize = 0,
 644        .name = SMCR_GENL_FAMILY_NAME,
 645        .version = SMCR_GENL_FAMILY_VERSION,
 646        .maxattr = SMC_PNETID_MAX,
 647        .policy = smc_pnet_policy,
 648        .netnsok = true,
 649        .module = THIS_MODULE,
 650        .ops = smc_pnet_ops,
 651        .n_ops =  ARRAY_SIZE(smc_pnet_ops)
 652};
 653
 654static int smc_pnet_netdev_event(struct notifier_block *this,
 655                                 unsigned long event, void *ptr)
 656{
 657        struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 658
 659        switch (event) {
 660        case NETDEV_REBOOT:
 661        case NETDEV_UNREGISTER:
 662                smc_pnet_remove_by_ndev(event_dev);
 663                return NOTIFY_OK;
 664        default:
 665                return NOTIFY_DONE;
 666        }
 667}
 668
 669static struct notifier_block smc_netdev_notifier = {
 670        .notifier_call = smc_pnet_netdev_event
 671};
 672
 673/* init network namespace */
 674int smc_pnet_net_init(struct net *net)
 675{
 676        struct smc_net *sn = net_generic(net, smc_net_id);
 677        struct smc_pnettable *pnettable = &sn->pnettable;
 678
 679        INIT_LIST_HEAD(&pnettable->pnetlist);
 680        rwlock_init(&pnettable->lock);
 681
 682        return 0;
 683}
 684
 685int __init smc_pnet_init(void)
 686{
 687        int rc;
 688
 689        rc = genl_register_family(&smc_pnet_nl_family);
 690        if (rc)
 691                return rc;
 692        rc = register_netdevice_notifier(&smc_netdev_notifier);
 693        if (rc)
 694                genl_unregister_family(&smc_pnet_nl_family);
 695        return rc;
 696}
 697
 698/* exit network namespace */
 699void smc_pnet_net_exit(struct net *net)
 700{
 701        /* flush pnet table */
 702        smc_pnet_remove_by_pnetid(net, NULL);
 703}
 704
 705void smc_pnet_exit(void)
 706{
 707        unregister_netdevice_notifier(&smc_netdev_notifier);
 708        genl_unregister_family(&smc_pnet_nl_family);
 709}
 710
 711/* Determine one base device for stacked net devices.
 712 * If the lower device level contains more than one devices
 713 * (for instance with bonding slaves), just the first device
 714 * is used to reach a base device.
 715 */
 716static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
 717{
 718        int i, nest_lvl;
 719
 720        rtnl_lock();
 721        nest_lvl = dev_get_nest_level(ndev);
 722        for (i = 0; i < nest_lvl; i++) {
 723                struct list_head *lower = &ndev->adj_list.lower;
 724
 725                if (list_empty(lower))
 726                        break;
 727                lower = lower->next;
 728                ndev = netdev_lower_get_next(ndev, &lower);
 729        }
 730        rtnl_unlock();
 731        return ndev;
 732}
 733
 734static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
 735                                              u8 *pnetid)
 736{
 737        struct smc_pnettable *pnettable;
 738        struct net *net = dev_net(ndev);
 739        struct smc_pnetentry *pnetelem;
 740        struct smc_net *sn;
 741        int rc = -ENOENT;
 742
 743        /* get pnettable for namespace */
 744        sn = net_generic(net, smc_net_id);
 745        pnettable = &sn->pnettable;
 746
 747        read_lock(&pnettable->lock);
 748        list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
 749                if (ndev == pnetelem->ndev) {
 750                        /* get pnetid of netdev device */
 751                        memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
 752                        rc = 0;
 753                        break;
 754                }
 755        }
 756        read_unlock(&pnettable->lock);
 757        return rc;
 758}
 759
 760/* if handshake network device belongs to a roce device, return its
 761 * IB device and port
 762 */
 763static void smc_pnet_find_rdma_dev(struct net_device *netdev,
 764                                   struct smc_init_info *ini)
 765{
 766        struct smc_ib_device *ibdev;
 767
 768        spin_lock(&smc_ib_devices.lock);
 769        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 770                struct net_device *ndev;
 771                int i;
 772
 773                for (i = 1; i <= SMC_MAX_PORTS; i++) {
 774                        if (!rdma_is_port_valid(ibdev->ibdev, i))
 775                                continue;
 776                        if (!ibdev->ibdev->ops.get_netdev)
 777                                continue;
 778                        ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
 779                        if (!ndev)
 780                                continue;
 781                        dev_put(ndev);
 782                        if (netdev == ndev &&
 783                            smc_ib_port_active(ibdev, i) &&
 784                            !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
 785                                                  ini->ib_gid, NULL)) {
 786                                ini->ib_dev = ibdev;
 787                                ini->ib_port = i;
 788                                break;
 789                        }
 790                }
 791        }
 792        spin_unlock(&smc_ib_devices.lock);
 793}
 794
 795/* Determine the corresponding IB device port based on the hardware PNETID.
 796 * Searching stops at the first matching active IB device port with vlan_id
 797 * configured.
 798 * If nothing found, check pnetid table.
 799 * If nothing found, try to use handshake device
 800 */
 801static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 802                                         struct smc_init_info *ini)
 803{
 804        u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 805        struct smc_ib_device *ibdev;
 806        int i;
 807
 808        ndev = pnet_find_base_ndev(ndev);
 809        if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
 810                                   ndev_pnetid) &&
 811            smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
 812                smc_pnet_find_rdma_dev(ndev, ini);
 813                return; /* pnetid could not be determined */
 814        }
 815
 816        spin_lock(&smc_ib_devices.lock);
 817        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 818                for (i = 1; i <= SMC_MAX_PORTS; i++) {
 819                        if (!rdma_is_port_valid(ibdev->ibdev, i))
 820                                continue;
 821                        if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
 822                            smc_ib_port_active(ibdev, i) &&
 823                            !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
 824                                                  ini->ib_gid, NULL)) {
 825                                ini->ib_dev = ibdev;
 826                                ini->ib_port = i;
 827                                goto out;
 828                        }
 829                }
 830        }
 831out:
 832        spin_unlock(&smc_ib_devices.lock);
 833}
 834
 835static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
 836                                        struct smc_init_info *ini)
 837{
 838        u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 839        struct smcd_dev *ismdev;
 840
 841        ndev = pnet_find_base_ndev(ndev);
 842        if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
 843                                   ndev_pnetid) &&
 844            smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
 845                return; /* pnetid could not be determined */
 846
 847        spin_lock(&smcd_dev_list.lock);
 848        list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
 849                if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
 850                        ini->ism_dev = ismdev;
 851                        break;
 852                }
 853        }
 854        spin_unlock(&smcd_dev_list.lock);
 855}
 856
 857/* PNET table analysis for a given sock:
 858 * determine ib_device and port belonging to used internal TCP socket
 859 * ethernet interface.
 860 */
 861void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
 862{
 863        struct dst_entry *dst = sk_dst_get(sk);
 864
 865        ini->ib_dev = NULL;
 866        ini->ib_port = 0;
 867        if (!dst)
 868                goto out;
 869        if (!dst->dev)
 870                goto out_rel;
 871
 872        smc_pnet_find_roce_by_pnetid(dst->dev, ini);
 873
 874out_rel:
 875        dst_release(dst);
 876out:
 877        return;
 878}
 879
 880void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
 881{
 882        struct dst_entry *dst = sk_dst_get(sk);
 883
 884        ini->ism_dev = NULL;
 885        if (!dst)
 886                goto out;
 887        if (!dst->dev)
 888                goto out_rel;
 889
 890        smc_pnet_find_ism_by_pnetid(dst->dev, ini);
 891
 892out_rel:
 893        dst_release(dst);
 894out:
 895        return;
 896}
 897