linux/net/smc/smc_pnet.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  Generic netlink support functions to configure an SMC-R PNET table
   6 *
   7 *  Copyright IBM Corp. 2016
   8 *
   9 *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/list.h>
  14#include <linux/ctype.h>
  15#include <net/netlink.h>
  16#include <net/genetlink.h>
  17
  18#include <uapi/linux/if.h>
  19#include <uapi/linux/smc.h>
  20
  21#include <rdma/ib_verbs.h>
  22
  23#include <net/netns/generic.h>
  24#include "smc_netns.h"
  25
  26#include "smc_pnet.h"
  27#include "smc_ib.h"
  28#include "smc_ism.h"
  29#include "smc_core.h"
  30
  31#define SMC_ASCII_BLANK 32
  32
  33static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
  34
  35static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
  36        [SMC_PNETID_NAME] = {
  37                .type = NLA_NUL_STRING,
  38                .len = SMC_MAX_PNETID_LEN
  39        },
  40        [SMC_PNETID_ETHNAME] = {
  41                .type = NLA_NUL_STRING,
  42                .len = IFNAMSIZ - 1
  43        },
  44        [SMC_PNETID_IBNAME] = {
  45                .type = NLA_NUL_STRING,
  46                .len = IB_DEVICE_NAME_MAX - 1
  47        },
  48        [SMC_PNETID_IBPORT] = { .type = NLA_U8 }
  49};
  50
  51static struct genl_family smc_pnet_nl_family;
  52
  53/**
  54 * struct smc_user_pnetentry - pnet identifier name entry for/from user
  55 * @list: List node.
  56 * @pnet_name: Pnet identifier name
  57 * @ndev: pointer to network device.
  58 * @smcibdev: Pointer to IB device.
  59 * @ib_port: Port of IB device.
  60 * @smcd_dev: Pointer to smcd device.
  61 */
  62struct smc_user_pnetentry {
  63        struct list_head list;
  64        char pnet_name[SMC_MAX_PNETID_LEN + 1];
  65        struct net_device *ndev;
  66        struct smc_ib_device *smcibdev;
  67        u8 ib_port;
  68        struct smcd_dev *smcd_dev;
  69};
  70
  71/* pnet entry stored in pnet table */
  72struct smc_pnetentry {
  73        struct list_head list;
  74        char pnet_name[SMC_MAX_PNETID_LEN + 1];
  75        struct net_device *ndev;
  76};
  77
  78/* Check if two given pnetids match */
  79static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
  80{
  81        int i;
  82
  83        for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
  84                if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) &&
  85                    (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK))
  86                        break;
  87                if (pnetid1[i] != pnetid2[i])
  88                        return false;
  89        }
  90        return true;
  91}
  92
  93/* Remove a pnetid from the pnet table.
  94 */
  95static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
  96{
  97        struct smc_pnetentry *pnetelem, *tmp_pe;
  98        struct smc_pnettable *pnettable;
  99        struct smc_ib_device *ibdev;
 100        struct smcd_dev *smcd_dev;
 101        struct smc_net *sn;
 102        int rc = -ENOENT;
 103        int ibport;
 104
 105        /* get pnettable for namespace */
 106        sn = net_generic(net, smc_net_id);
 107        pnettable = &sn->pnettable;
 108
 109        /* remove netdevices */
 110        write_lock(&pnettable->lock);
 111        list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
 112                                 list) {
 113                if (!pnet_name ||
 114                    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
 115                        list_del(&pnetelem->list);
 116                        dev_put(pnetelem->ndev);
 117                        kfree(pnetelem);
 118                        rc = 0;
 119                }
 120        }
 121        write_unlock(&pnettable->lock);
 122
 123        /* if this is not the initial namespace, stop here */
 124        if (net != &init_net)
 125                return rc;
 126
 127        /* remove ib devices */
 128        spin_lock(&smc_ib_devices.lock);
 129        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 130                for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
 131                        if (ibdev->pnetid_by_user[ibport] &&
 132                            (!pnet_name ||
 133                             smc_pnet_match(pnet_name,
 134                                            ibdev->pnetid[ibport]))) {
 135                                memset(ibdev->pnetid[ibport], 0,
 136                                       SMC_MAX_PNETID_LEN);
 137                                ibdev->pnetid_by_user[ibport] = false;
 138                                rc = 0;
 139                        }
 140                }
 141        }
 142        spin_unlock(&smc_ib_devices.lock);
 143        /* remove smcd devices */
 144        spin_lock(&smcd_dev_list.lock);
 145        list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
 146                if (smcd_dev->pnetid_by_user &&
 147                    (!pnet_name ||
 148                     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
 149                        memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
 150                        smcd_dev->pnetid_by_user = false;
 151                        rc = 0;
 152                }
 153        }
 154        spin_unlock(&smcd_dev_list.lock);
 155        return rc;
 156}
 157
 158/* Remove a pnet entry mentioning a given network device from the pnet table.
 159 */
 160static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 161{
 162        struct smc_pnetentry *pnetelem, *tmp_pe;
 163        struct smc_pnettable *pnettable;
 164        struct net *net = dev_net(ndev);
 165        struct smc_net *sn;
 166        int rc = -ENOENT;
 167
 168        /* get pnettable for namespace */
 169        sn = net_generic(net, smc_net_id);
 170        pnettable = &sn->pnettable;
 171
 172        write_lock(&pnettable->lock);
 173        list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
 174                if (pnetelem->ndev == ndev) {
 175                        list_del(&pnetelem->list);
 176                        dev_put(pnetelem->ndev);
 177                        kfree(pnetelem);
 178                        rc = 0;
 179                        break;
 180                }
 181        }
 182        write_unlock(&pnettable->lock);
 183        return rc;
 184}
 185
 186/* Append a pnetid to the end of the pnet table if not already on this list.
 187 */
 188static int smc_pnet_enter(struct smc_pnettable *pnettable,
 189                          struct smc_user_pnetentry *new_pnetelem)
 190{
 191        u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
 192        u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 193        struct smc_pnetentry *tmp_pnetelem;
 194        struct smc_pnetentry *pnetelem;
 195        bool new_smcddev = false;
 196        struct net_device *ndev;
 197        bool new_netdev = true;
 198        bool new_ibdev = false;
 199
 200        if (new_pnetelem->smcibdev) {
 201                struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
 202                int ib_port = new_pnetelem->ib_port;
 203
 204                spin_lock(&smc_ib_devices.lock);
 205                if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
 206                        memcpy(ib_dev->pnetid[ib_port - 1],
 207                               new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
 208                        ib_dev->pnetid_by_user[ib_port - 1] = true;
 209                        new_ibdev = true;
 210                }
 211                spin_unlock(&smc_ib_devices.lock);
 212        }
 213        if (new_pnetelem->smcd_dev) {
 214                struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
 215
 216                spin_lock(&smcd_dev_list.lock);
 217                if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
 218                        memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
 219                               SMC_MAX_PNETID_LEN);
 220                        smcd_dev->pnetid_by_user = true;
 221                        new_smcddev = true;
 222                }
 223                spin_unlock(&smcd_dev_list.lock);
 224        }
 225
 226        if (!new_pnetelem->ndev)
 227                return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
 228
 229        /* check if (base) netdev already has a pnetid. If there is one, we do
 230         * not want to add a pnet table entry
 231         */
 232        ndev = pnet_find_base_ndev(new_pnetelem->ndev);
 233        if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
 234                                    ndev_pnetid))
 235                return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
 236
 237        /* add a new netdev entry to the pnet table if there isn't one */
 238        tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
 239        if (!tmp_pnetelem)
 240                return -ENOMEM;
 241        memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
 242               SMC_MAX_PNETID_LEN);
 243        tmp_pnetelem->ndev = new_pnetelem->ndev;
 244
 245        write_lock(&pnettable->lock);
 246        list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
 247                if (pnetelem->ndev == new_pnetelem->ndev)
 248                        new_netdev = false;
 249        }
 250        if (new_netdev) {
 251                dev_hold(tmp_pnetelem->ndev);
 252                list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
 253                write_unlock(&pnettable->lock);
 254        } else {
 255                write_unlock(&pnettable->lock);
 256                kfree(tmp_pnetelem);
 257        }
 258
 259        return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
 260}
 261
 262/* The limit for pnetid is 16 characters.
 263 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
 264 * Lower case letters are converted to upper case.
 265 * Interior blanks should not be used.
 266 */
 267static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
 268{
 269        char *bf = skip_spaces(pnet_name);
 270        size_t len = strlen(bf);
 271        char *end = bf + len;
 272
 273        if (!len)
 274                return false;
 275        while (--end >= bf && isspace(*end))
 276                ;
 277        if (end - bf >= SMC_MAX_PNETID_LEN)
 278                return false;
 279        while (bf <= end) {
 280                if (!isalnum(*bf))
 281                        return false;
 282                *pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
 283                bf++;
 284        }
 285        *pnetid = '\0';
 286        return true;
 287}
 288
 289/* Find an infiniband device by a given name. The device might not exist. */
 290static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
 291{
 292        struct smc_ib_device *ibdev;
 293
 294        spin_lock(&smc_ib_devices.lock);
 295        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 296                if (!strncmp(ibdev->ibdev->name, ib_name,
 297                             sizeof(ibdev->ibdev->name)) ||
 298                    !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
 299                             IB_DEVICE_NAME_MAX - 1)) {
 300                        goto out;
 301                }
 302        }
 303        ibdev = NULL;
 304out:
 305        spin_unlock(&smc_ib_devices.lock);
 306        return ibdev;
 307}
 308
 309/* Find an smcd device by a given name. The device might not exist. */
 310static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
 311{
 312        struct smcd_dev *smcd_dev;
 313
 314        spin_lock(&smcd_dev_list.lock);
 315        list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
 316                if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
 317                             IB_DEVICE_NAME_MAX - 1))
 318                        goto out;
 319        }
 320        smcd_dev = NULL;
 321out:
 322        spin_unlock(&smcd_dev_list.lock);
 323        return smcd_dev;
 324}
 325
 326/* Parse the supplied netlink attributes and fill a pnetentry structure.
 327 * For ethernet and infiniband device names verify that the devices exist.
 328 */
 329static int smc_pnet_fill_entry(struct net *net,
 330                               struct smc_user_pnetentry *pnetelem,
 331                               struct nlattr *tb[])
 332{
 333        char *string, *ibname;
 334        int rc;
 335
 336        memset(pnetelem, 0, sizeof(*pnetelem));
 337        INIT_LIST_HEAD(&pnetelem->list);
 338
 339        rc = -EINVAL;
 340        if (!tb[SMC_PNETID_NAME])
 341                goto error;
 342        string = (char *)nla_data(tb[SMC_PNETID_NAME]);
 343        if (!smc_pnetid_valid(string, pnetelem->pnet_name))
 344                goto error;
 345
 346        rc = -EINVAL;
 347        if (tb[SMC_PNETID_ETHNAME]) {
 348                string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
 349                pnetelem->ndev = dev_get_by_name(net, string);
 350                if (!pnetelem->ndev)
 351                        goto error;
 352        }
 353
 354        /* if this is not the initial namespace, stop here */
 355        if (net != &init_net)
 356                return 0;
 357
 358        rc = -EINVAL;
 359        if (tb[SMC_PNETID_IBNAME]) {
 360                ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
 361                ibname = strim(ibname);
 362                pnetelem->smcibdev = smc_pnet_find_ib(ibname);
 363                pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
 364                if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
 365                        goto error;
 366                if (pnetelem->smcibdev) {
 367                        if (!tb[SMC_PNETID_IBPORT])
 368                                goto error;
 369                        pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
 370                        if (pnetelem->ib_port < 1 ||
 371                            pnetelem->ib_port > SMC_MAX_PORTS)
 372                                goto error;
 373                }
 374        }
 375
 376        return 0;
 377
 378error:
 379        return rc;
 380}
 381
 382/* Convert an smc_pnetentry to a netlink attribute sequence */
 383static int smc_pnet_set_nla(struct sk_buff *msg,
 384                            struct smc_user_pnetentry *pnetelem)
 385{
 386        if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
 387                return -1;
 388        if (pnetelem->ndev) {
 389                if (nla_put_string(msg, SMC_PNETID_ETHNAME,
 390                                   pnetelem->ndev->name))
 391                        return -1;
 392        } else {
 393                if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
 394                        return -1;
 395        }
 396        if (pnetelem->smcibdev) {
 397                if (nla_put_string(msg, SMC_PNETID_IBNAME,
 398                        dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
 399                    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
 400                        return -1;
 401        } else if (pnetelem->smcd_dev) {
 402                if (nla_put_string(msg, SMC_PNETID_IBNAME,
 403                                   dev_name(&pnetelem->smcd_dev->dev)) ||
 404                    nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
 405                        return -1;
 406        } else {
 407                if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
 408                    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
 409                        return -1;
 410        }
 411
 412        return 0;
 413}
 414
 415static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
 416{
 417        struct net *net = genl_info_net(info);
 418        struct smc_user_pnetentry pnetelem;
 419        struct smc_pnettable *pnettable;
 420        struct smc_net *sn;
 421        int rc;
 422
 423        /* get pnettable for namespace */
 424        sn = net_generic(net, smc_net_id);
 425        pnettable = &sn->pnettable;
 426
 427        rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
 428        if (!rc)
 429                rc = smc_pnet_enter(pnettable, &pnetelem);
 430        if (pnetelem.ndev)
 431                dev_put(pnetelem.ndev);
 432        return rc;
 433}
 434
 435static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
 436{
 437        struct net *net = genl_info_net(info);
 438
 439        if (!info->attrs[SMC_PNETID_NAME])
 440                return -EINVAL;
 441        return smc_pnet_remove_by_pnetid(net,
 442                                (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
 443}
 444
 445static int smc_pnet_dump_start(struct netlink_callback *cb)
 446{
 447        cb->args[0] = 0;
 448        return 0;
 449}
 450
 451static int smc_pnet_dumpinfo(struct sk_buff *skb,
 452                             u32 portid, u32 seq, u32 flags,
 453                             struct smc_user_pnetentry *pnetelem)
 454{
 455        void *hdr;
 456
 457        hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
 458                          flags, SMC_PNETID_GET);
 459        if (!hdr)
 460                return -ENOMEM;
 461        if (smc_pnet_set_nla(skb, pnetelem) < 0) {
 462                genlmsg_cancel(skb, hdr);
 463                return -EMSGSIZE;
 464        }
 465        genlmsg_end(skb, hdr);
 466        return 0;
 467}
 468
 469static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
 470                          u32 seq, u8 *pnetid, int start_idx)
 471{
 472        struct smc_user_pnetentry tmp_entry;
 473        struct smc_pnettable *pnettable;
 474        struct smc_pnetentry *pnetelem;
 475        struct smc_ib_device *ibdev;
 476        struct smcd_dev *smcd_dev;
 477        struct smc_net *sn;
 478        int idx = 0;
 479        int ibport;
 480
 481        /* get pnettable for namespace */
 482        sn = net_generic(net, smc_net_id);
 483        pnettable = &sn->pnettable;
 484
 485        /* dump netdevices */
 486        read_lock(&pnettable->lock);
 487        list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
 488                if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
 489                        continue;
 490                if (idx++ < start_idx)
 491                        continue;
 492                memset(&tmp_entry, 0, sizeof(tmp_entry));
 493                memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
 494                       SMC_MAX_PNETID_LEN);
 495                tmp_entry.ndev = pnetelem->ndev;
 496                if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
 497                                      &tmp_entry)) {
 498                        --idx;
 499                        break;
 500                }
 501        }
 502        read_unlock(&pnettable->lock);
 503
 504        /* if this is not the initial namespace, stop here */
 505        if (net != &init_net)
 506                return idx;
 507
 508        /* dump ib devices */
 509        spin_lock(&smc_ib_devices.lock);
 510        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 511                for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
 512                        if (ibdev->pnetid_by_user[ibport]) {
 513                                if (pnetid &&
 514                                    !smc_pnet_match(ibdev->pnetid[ibport],
 515                                                    pnetid))
 516                                        continue;
 517                                if (idx++ < start_idx)
 518                                        continue;
 519                                memset(&tmp_entry, 0, sizeof(tmp_entry));
 520                                memcpy(&tmp_entry.pnet_name,
 521                                       ibdev->pnetid[ibport],
 522                                       SMC_MAX_PNETID_LEN);
 523                                tmp_entry.smcibdev = ibdev;
 524                                tmp_entry.ib_port = ibport + 1;
 525                                if (smc_pnet_dumpinfo(skb, portid, seq,
 526                                                      NLM_F_MULTI,
 527                                                      &tmp_entry)) {
 528                                        --idx;
 529                                        break;
 530                                }
 531                        }
 532                }
 533        }
 534        spin_unlock(&smc_ib_devices.lock);
 535
 536        /* dump smcd devices */
 537        spin_lock(&smcd_dev_list.lock);
 538        list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
 539                if (smcd_dev->pnetid_by_user) {
 540                        if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
 541                                continue;
 542                        if (idx++ < start_idx)
 543                                continue;
 544                        memset(&tmp_entry, 0, sizeof(tmp_entry));
 545                        memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
 546                               SMC_MAX_PNETID_LEN);
 547                        tmp_entry.smcd_dev = smcd_dev;
 548                        if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
 549                                              &tmp_entry)) {
 550                                --idx;
 551                                break;
 552                        }
 553                }
 554        }
 555        spin_unlock(&smcd_dev_list.lock);
 556
 557        return idx;
 558}
 559
 560static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
 561{
 562        struct net *net = sock_net(skb->sk);
 563        int idx;
 564
 565        idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
 566                             cb->nlh->nlmsg_seq, NULL, cb->args[0]);
 567
 568        cb->args[0] = idx;
 569        return skb->len;
 570}
 571
 572/* Retrieve one PNETID entry */
 573static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
 574{
 575        struct net *net = genl_info_net(info);
 576        struct sk_buff *msg;
 577        void *hdr;
 578
 579        if (!info->attrs[SMC_PNETID_NAME])
 580                return -EINVAL;
 581
 582        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 583        if (!msg)
 584                return -ENOMEM;
 585
 586        _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
 587                       nla_data(info->attrs[SMC_PNETID_NAME]), 0);
 588
 589        /* finish multi part message and send it */
 590        hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
 591                        NLM_F_MULTI);
 592        if (!hdr) {
 593                nlmsg_free(msg);
 594                return -EMSGSIZE;
 595        }
 596        return genlmsg_reply(msg, info);
 597}
 598
 599/* Remove and delete all pnetids from pnet table.
 600 */
 601static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
 602{
 603        struct net *net = genl_info_net(info);
 604
 605        smc_pnet_remove_by_pnetid(net, NULL);
 606        return 0;
 607}
 608
 609/* SMC_PNETID generic netlink operation definition */
 610static const struct genl_ops smc_pnet_ops[] = {
 611        {
 612                .cmd = SMC_PNETID_GET,
 613                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 614                .flags = GENL_ADMIN_PERM,
 615                .doit = smc_pnet_get,
 616                .dumpit = smc_pnet_dump,
 617                .start = smc_pnet_dump_start
 618        },
 619        {
 620                .cmd = SMC_PNETID_ADD,
 621                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 622                .flags = GENL_ADMIN_PERM,
 623                .doit = smc_pnet_add
 624        },
 625        {
 626                .cmd = SMC_PNETID_DEL,
 627                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 628                .flags = GENL_ADMIN_PERM,
 629                .doit = smc_pnet_del
 630        },
 631        {
 632                .cmd = SMC_PNETID_FLUSH,
 633                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 634                .flags = GENL_ADMIN_PERM,
 635                .doit = smc_pnet_flush
 636        }
 637};
 638
 639/* SMC_PNETID family definition */
 640static struct genl_family smc_pnet_nl_family __ro_after_init = {
 641        .hdrsize = 0,
 642        .name = SMCR_GENL_FAMILY_NAME,
 643        .version = SMCR_GENL_FAMILY_VERSION,
 644        .maxattr = SMC_PNETID_MAX,
 645        .policy = smc_pnet_policy,
 646        .netnsok = true,
 647        .module = THIS_MODULE,
 648        .ops = smc_pnet_ops,
 649        .n_ops =  ARRAY_SIZE(smc_pnet_ops)
 650};
 651
 652static int smc_pnet_netdev_event(struct notifier_block *this,
 653                                 unsigned long event, void *ptr)
 654{
 655        struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 656
 657        switch (event) {
 658        case NETDEV_REBOOT:
 659        case NETDEV_UNREGISTER:
 660                smc_pnet_remove_by_ndev(event_dev);
 661                return NOTIFY_OK;
 662        default:
 663                return NOTIFY_DONE;
 664        }
 665}
 666
 667static struct notifier_block smc_netdev_notifier = {
 668        .notifier_call = smc_pnet_netdev_event
 669};
 670
 671/* init network namespace */
 672int smc_pnet_net_init(struct net *net)
 673{
 674        struct smc_net *sn = net_generic(net, smc_net_id);
 675        struct smc_pnettable *pnettable = &sn->pnettable;
 676
 677        INIT_LIST_HEAD(&pnettable->pnetlist);
 678        rwlock_init(&pnettable->lock);
 679
 680        return 0;
 681}
 682
 683int __init smc_pnet_init(void)
 684{
 685        int rc;
 686
 687        rc = genl_register_family(&smc_pnet_nl_family);
 688        if (rc)
 689                return rc;
 690        rc = register_netdevice_notifier(&smc_netdev_notifier);
 691        if (rc)
 692                genl_unregister_family(&smc_pnet_nl_family);
 693        return rc;
 694}
 695
 696/* exit network namespace */
 697void smc_pnet_net_exit(struct net *net)
 698{
 699        /* flush pnet table */
 700        smc_pnet_remove_by_pnetid(net, NULL);
 701}
 702
 703void smc_pnet_exit(void)
 704{
 705        unregister_netdevice_notifier(&smc_netdev_notifier);
 706        genl_unregister_family(&smc_pnet_nl_family);
 707}
 708
 709/* Determine one base device for stacked net devices.
 710 * If the lower device level contains more than one devices
 711 * (for instance with bonding slaves), just the first device
 712 * is used to reach a base device.
 713 */
 714static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
 715{
 716        int i, nest_lvl;
 717
 718        rtnl_lock();
 719        nest_lvl = ndev->lower_level;
 720        for (i = 0; i < nest_lvl; i++) {
 721                struct list_head *lower = &ndev->adj_list.lower;
 722
 723                if (list_empty(lower))
 724                        break;
 725                lower = lower->next;
 726                ndev = netdev_lower_get_next(ndev, &lower);
 727        }
 728        rtnl_unlock();
 729        return ndev;
 730}
 731
 732static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
 733                                              u8 *pnetid)
 734{
 735        struct smc_pnettable *pnettable;
 736        struct net *net = dev_net(ndev);
 737        struct smc_pnetentry *pnetelem;
 738        struct smc_net *sn;
 739        int rc = -ENOENT;
 740
 741        /* get pnettable for namespace */
 742        sn = net_generic(net, smc_net_id);
 743        pnettable = &sn->pnettable;
 744
 745        read_lock(&pnettable->lock);
 746        list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
 747                if (ndev == pnetelem->ndev) {
 748                        /* get pnetid of netdev device */
 749                        memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
 750                        rc = 0;
 751                        break;
 752                }
 753        }
 754        read_unlock(&pnettable->lock);
 755        return rc;
 756}
 757
 758/* if handshake network device belongs to a roce device, return its
 759 * IB device and port
 760 */
 761static void smc_pnet_find_rdma_dev(struct net_device *netdev,
 762                                   struct smc_init_info *ini)
 763{
 764        struct smc_ib_device *ibdev;
 765
 766        spin_lock(&smc_ib_devices.lock);
 767        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 768                struct net_device *ndev;
 769                int i;
 770
 771                for (i = 1; i <= SMC_MAX_PORTS; i++) {
 772                        if (!rdma_is_port_valid(ibdev->ibdev, i))
 773                                continue;
 774                        if (!ibdev->ibdev->ops.get_netdev)
 775                                continue;
 776                        ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
 777                        if (!ndev)
 778                                continue;
 779                        dev_put(ndev);
 780                        if (netdev == ndev &&
 781                            smc_ib_port_active(ibdev, i) &&
 782                            !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
 783                                                  ini->ib_gid, NULL)) {
 784                                ini->ib_dev = ibdev;
 785                                ini->ib_port = i;
 786                                break;
 787                        }
 788                }
 789        }
 790        spin_unlock(&smc_ib_devices.lock);
 791}
 792
 793/* Determine the corresponding IB device port based on the hardware PNETID.
 794 * Searching stops at the first matching active IB device port with vlan_id
 795 * configured.
 796 * If nothing found, check pnetid table.
 797 * If nothing found, try to use handshake device
 798 */
 799static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 800                                         struct smc_init_info *ini)
 801{
 802        u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 803        struct smc_ib_device *ibdev;
 804        int i;
 805
 806        ndev = pnet_find_base_ndev(ndev);
 807        if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
 808                                   ndev_pnetid) &&
 809            smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
 810                smc_pnet_find_rdma_dev(ndev, ini);
 811                return; /* pnetid could not be determined */
 812        }
 813
 814        spin_lock(&smc_ib_devices.lock);
 815        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 816                for (i = 1; i <= SMC_MAX_PORTS; i++) {
 817                        if (!rdma_is_port_valid(ibdev->ibdev, i))
 818                                continue;
 819                        if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
 820                            smc_ib_port_active(ibdev, i) &&
 821                            !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
 822                                                  ini->ib_gid, NULL)) {
 823                                ini->ib_dev = ibdev;
 824                                ini->ib_port = i;
 825                                goto out;
 826                        }
 827                }
 828        }
 829out:
 830        spin_unlock(&smc_ib_devices.lock);
 831}
 832
 833static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
 834                                        struct smc_init_info *ini)
 835{
 836        u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 837        struct smcd_dev *ismdev;
 838
 839        ndev = pnet_find_base_ndev(ndev);
 840        if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
 841                                   ndev_pnetid) &&
 842            smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
 843                return; /* pnetid could not be determined */
 844
 845        spin_lock(&smcd_dev_list.lock);
 846        list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
 847                if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
 848                        ini->ism_dev = ismdev;
 849                        break;
 850                }
 851        }
 852        spin_unlock(&smcd_dev_list.lock);
 853}
 854
 855/* PNET table analysis for a given sock:
 856 * determine ib_device and port belonging to used internal TCP socket
 857 * ethernet interface.
 858 */
 859void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
 860{
 861        struct dst_entry *dst = sk_dst_get(sk);
 862
 863        ini->ib_dev = NULL;
 864        ini->ib_port = 0;
 865        if (!dst)
 866                goto out;
 867        if (!dst->dev)
 868                goto out_rel;
 869
 870        smc_pnet_find_roce_by_pnetid(dst->dev, ini);
 871
 872out_rel:
 873        dst_release(dst);
 874out:
 875        return;
 876}
 877
 878void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
 879{
 880        struct dst_entry *dst = sk_dst_get(sk);
 881
 882        ini->ism_dev = NULL;
 883        if (!dst)
 884                goto out;
 885        if (!dst->dev)
 886                goto out_rel;
 887
 888        smc_pnet_find_ism_by_pnetid(dst->dev, ini);
 889
 890out_rel:
 891        dst_release(dst);
 892out:
 893        return;
 894}
 895