linux/net/smc/smc_pnet.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  Generic netlink support functions to configure an SMC-R PNET table
   6 *
   7 *  Copyright IBM Corp. 2016
   8 *
   9 *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/list.h>
  14#include <linux/ctype.h>
  15#include <net/netlink.h>
  16#include <net/genetlink.h>
  17
  18#include <uapi/linux/if.h>
  19#include <uapi/linux/smc.h>
  20
  21#include <rdma/ib_verbs.h>
  22
  23#include "smc_pnet.h"
  24#include "smc_ib.h"
  25
  26#define SMC_MAX_PNET_ID_LEN     16      /* Max. length of PNET id */
  27
  28static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
  29        [SMC_PNETID_NAME] = {
  30                .type = NLA_NUL_STRING,
  31                .len = SMC_MAX_PNET_ID_LEN - 1
  32        },
  33        [SMC_PNETID_ETHNAME] = {
  34                .type = NLA_NUL_STRING,
  35                .len = IFNAMSIZ - 1
  36        },
  37        [SMC_PNETID_IBNAME] = {
  38                .type = NLA_NUL_STRING,
  39                .len = IB_DEVICE_NAME_MAX - 1
  40        },
  41        [SMC_PNETID_IBPORT] = { .type = NLA_U8 }
  42};
  43
  44static struct genl_family smc_pnet_nl_family;
  45
  46/**
  47 * struct smc_pnettable - SMC PNET table anchor
  48 * @lock: Lock for list action
  49 * @pnetlist: List of PNETIDs
  50 */
  51static struct smc_pnettable {
  52        rwlock_t lock;
  53        struct list_head pnetlist;
  54} smc_pnettable = {
  55        .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
  56        .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
  57};
  58
  59/**
  60 * struct smc_pnetentry - pnet identifier name entry
  61 * @list: List node.
  62 * @pnet_name: Pnet identifier name
  63 * @ndev: pointer to network device.
  64 * @smcibdev: Pointer to IB device.
  65 */
  66struct smc_pnetentry {
  67        struct list_head list;
  68        char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
  69        struct net_device *ndev;
  70        struct smc_ib_device *smcibdev;
  71        u8 ib_port;
  72};
  73
  74/* Check if two RDMA device entries are identical. Use device name and port
  75 * number for comparison.
  76 */
  77static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
  78                                 u8 ibport)
  79{
  80        return pnetelem->ib_port == ibport &&
  81               !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
  82                        sizeof(pnetelem->smcibdev->ibdev->name));
  83}
  84
  85/* Find a pnetid in the pnet table.
  86 */
  87static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
  88{
  89        struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
  90
  91        read_lock(&smc_pnettable.lock);
  92        list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
  93                if (!strncmp(pnetelem->pnet_name, pnet_name,
  94                             sizeof(pnetelem->pnet_name))) {
  95                        found_pnetelem = pnetelem;
  96                        break;
  97                }
  98        }
  99        read_unlock(&smc_pnettable.lock);
 100        return found_pnetelem;
 101}
 102
 103/* Remove a pnetid from the pnet table.
 104 */
 105static int smc_pnet_remove_by_pnetid(char *pnet_name)
 106{
 107        struct smc_pnetentry *pnetelem, *tmp_pe;
 108        int rc = -ENOENT;
 109
 110        write_lock(&smc_pnettable.lock);
 111        list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
 112                                 list) {
 113                if (!strncmp(pnetelem->pnet_name, pnet_name,
 114                             sizeof(pnetelem->pnet_name))) {
 115                        list_del(&pnetelem->list);
 116                        dev_put(pnetelem->ndev);
 117                        kfree(pnetelem);
 118                        rc = 0;
 119                        break;
 120                }
 121        }
 122        write_unlock(&smc_pnettable.lock);
 123        return rc;
 124}
 125
 126/* Remove a pnet entry mentioning a given network device from the pnet table.
 127 */
 128static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 129{
 130        struct smc_pnetentry *pnetelem, *tmp_pe;
 131        int rc = -ENOENT;
 132
 133        write_lock(&smc_pnettable.lock);
 134        list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
 135                                 list) {
 136                if (pnetelem->ndev == ndev) {
 137                        list_del(&pnetelem->list);
 138                        dev_put(pnetelem->ndev);
 139                        kfree(pnetelem);
 140                        rc = 0;
 141                        break;
 142                }
 143        }
 144        write_unlock(&smc_pnettable.lock);
 145        return rc;
 146}
 147
 148/* Remove a pnet entry mentioning a given ib device from the pnet table.
 149 */
 150int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
 151{
 152        struct smc_pnetentry *pnetelem, *tmp_pe;
 153        int rc = -ENOENT;
 154
 155        write_lock(&smc_pnettable.lock);
 156        list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
 157                                 list) {
 158                if (pnetelem->smcibdev == ibdev) {
 159                        list_del(&pnetelem->list);
 160                        dev_put(pnetelem->ndev);
 161                        kfree(pnetelem);
 162                        rc = 0;
 163                        break;
 164                }
 165        }
 166        write_unlock(&smc_pnettable.lock);
 167        return rc;
 168}
 169
 170/* Append a pnetid to the end of the pnet table if not already on this list.
 171 */
 172static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
 173{
 174        struct smc_pnetentry *pnetelem;
 175        int rc = -EEXIST;
 176
 177        write_lock(&smc_pnettable.lock);
 178        list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
 179                if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
 180                             sizeof(new_pnetelem->pnet_name)) ||
 181                    !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
 182                             sizeof(new_pnetelem->ndev->name)) ||
 183                    smc_pnet_same_ibname(pnetelem,
 184                                         new_pnetelem->smcibdev->ibdev->name,
 185                                         new_pnetelem->ib_port)) {
 186                        dev_put(pnetelem->ndev);
 187                        goto found;
 188                }
 189        }
 190        list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
 191        rc = 0;
 192found:
 193        write_unlock(&smc_pnettable.lock);
 194        return rc;
 195}
 196
 197/* The limit for pnetid is 16 characters.
 198 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
 199 * Lower case letters are converted to upper case.
 200 * Interior blanks should not be used.
 201 */
 202static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
 203{
 204        char *bf = skip_spaces(pnet_name);
 205        size_t len = strlen(bf);
 206        char *end = bf + len;
 207
 208        if (!len)
 209                return false;
 210        while (--end >= bf && isspace(*end))
 211                ;
 212        if (end - bf >= SMC_MAX_PNET_ID_LEN)
 213                return false;
 214        while (bf <= end) {
 215                if (!isalnum(*bf))
 216                        return false;
 217                *pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
 218                bf++;
 219        }
 220        *pnetid = '\0';
 221        return true;
 222}
 223
 224/* Find an infiniband device by a given name. The device might not exist. */
 225static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
 226{
 227        struct smc_ib_device *ibdev;
 228
 229        spin_lock(&smc_ib_devices.lock);
 230        list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 231                if (!strncmp(ibdev->ibdev->name, ib_name,
 232                             sizeof(ibdev->ibdev->name))) {
 233                        goto out;
 234                }
 235        }
 236        ibdev = NULL;
 237out:
 238        spin_unlock(&smc_ib_devices.lock);
 239        return ibdev;
 240}
 241
 242/* Parse the supplied netlink attributes and fill a pnetentry structure.
 243 * For ethernet and infiniband device names verify that the devices exist.
 244 */
 245static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
 246                               struct nlattr *tb[])
 247{
 248        char *string, *ibname;
 249        int rc;
 250
 251        memset(pnetelem, 0, sizeof(*pnetelem));
 252        INIT_LIST_HEAD(&pnetelem->list);
 253
 254        rc = -EINVAL;
 255        if (!tb[SMC_PNETID_NAME])
 256                goto error;
 257        string = (char *)nla_data(tb[SMC_PNETID_NAME]);
 258        if (!smc_pnetid_valid(string, pnetelem->pnet_name))
 259                goto error;
 260
 261        rc = -EINVAL;
 262        if (!tb[SMC_PNETID_ETHNAME])
 263                goto error;
 264        rc = -ENOENT;
 265        string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
 266        pnetelem->ndev = dev_get_by_name(net, string);
 267        if (!pnetelem->ndev)
 268                goto error;
 269
 270        rc = -EINVAL;
 271        if (!tb[SMC_PNETID_IBNAME])
 272                goto error;
 273        rc = -ENOENT;
 274        ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
 275        ibname = strim(ibname);
 276        pnetelem->smcibdev = smc_pnet_find_ib(ibname);
 277        if (!pnetelem->smcibdev)
 278                goto error;
 279
 280        rc = -EINVAL;
 281        if (!tb[SMC_PNETID_IBPORT])
 282                goto error;
 283        pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
 284        if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
 285                goto error;
 286
 287        return 0;
 288
 289error:
 290        if (pnetelem->ndev)
 291                dev_put(pnetelem->ndev);
 292        return rc;
 293}
 294
 295/* Convert an smc_pnetentry to a netlink attribute sequence */
 296static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
 297{
 298        if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
 299            nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
 300            nla_put_string(msg, SMC_PNETID_IBNAME,
 301                           pnetelem->smcibdev->ibdev->name) ||
 302            nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
 303                return -1;
 304        return 0;
 305}
 306
 307/* Retrieve one PNETID entry */
 308static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
 309{
 310        struct smc_pnetentry *pnetelem;
 311        struct sk_buff *msg;
 312        void *hdr;
 313        int rc;
 314
 315        if (!info->attrs[SMC_PNETID_NAME])
 316                return -EINVAL;
 317        pnetelem = smc_pnet_find_pnetid(
 318                                (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
 319        if (!pnetelem)
 320                return -ENOENT;
 321        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 322        if (!msg)
 323                return -ENOMEM;
 324
 325        hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
 326                          &smc_pnet_nl_family, 0, SMC_PNETID_GET);
 327        if (!hdr) {
 328                rc = -EMSGSIZE;
 329                goto err_out;
 330        }
 331
 332        if (smc_pnet_set_nla(msg, pnetelem)) {
 333                rc = -ENOBUFS;
 334                goto err_out;
 335        }
 336
 337        genlmsg_end(msg, hdr);
 338        return genlmsg_reply(msg, info);
 339
 340err_out:
 341        nlmsg_free(msg);
 342        return rc;
 343}
 344
 345static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
 346{
 347        struct net *net = genl_info_net(info);
 348        struct smc_pnetentry *pnetelem;
 349        int rc;
 350
 351        pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
 352        if (!pnetelem)
 353                return -ENOMEM;
 354        rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
 355        if (!rc)
 356                rc = smc_pnet_enter(pnetelem);
 357        if (rc) {
 358                kfree(pnetelem);
 359                return rc;
 360        }
 361        rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
 362        if (rc)
 363                smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
 364        return rc;
 365}
 366
 367static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
 368{
 369        if (!info->attrs[SMC_PNETID_NAME])
 370                return -EINVAL;
 371        return smc_pnet_remove_by_pnetid(
 372                                (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
 373}
 374
 375static int smc_pnet_dump_start(struct netlink_callback *cb)
 376{
 377        cb->args[0] = 0;
 378        return 0;
 379}
 380
 381static int smc_pnet_dumpinfo(struct sk_buff *skb,
 382                             u32 portid, u32 seq, u32 flags,
 383                             struct smc_pnetentry *pnetelem)
 384{
 385        void *hdr;
 386
 387        hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
 388                          flags, SMC_PNETID_GET);
 389        if (!hdr)
 390                return -ENOMEM;
 391        if (smc_pnet_set_nla(skb, pnetelem) < 0) {
 392                genlmsg_cancel(skb, hdr);
 393                return -EMSGSIZE;
 394        }
 395        genlmsg_end(skb, hdr);
 396        return 0;
 397}
 398
 399static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
 400{
 401        struct smc_pnetentry *pnetelem;
 402        int idx = 0;
 403
 404        read_lock(&smc_pnettable.lock);
 405        list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
 406                if (idx++ < cb->args[0])
 407                        continue;
 408                if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
 409                                      cb->nlh->nlmsg_seq, NLM_F_MULTI,
 410                                      pnetelem)) {
 411                        --idx;
 412                        break;
 413                }
 414        }
 415        cb->args[0] = idx;
 416        read_unlock(&smc_pnettable.lock);
 417        return skb->len;
 418}
 419
 420/* Remove and delete all pnetids from pnet table.
 421 */
 422static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
 423{
 424        struct smc_pnetentry *pnetelem, *tmp_pe;
 425
 426        write_lock(&smc_pnettable.lock);
 427        list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
 428                                 list) {
 429                list_del(&pnetelem->list);
 430                dev_put(pnetelem->ndev);
 431                kfree(pnetelem);
 432        }
 433        write_unlock(&smc_pnettable.lock);
 434        return 0;
 435}
 436
 437/* SMC_PNETID generic netlink operation definition */
 438static const struct genl_ops smc_pnet_ops[] = {
 439        {
 440                .cmd = SMC_PNETID_GET,
 441                .flags = GENL_ADMIN_PERM,
 442                .policy = smc_pnet_policy,
 443                .doit = smc_pnet_get,
 444                .dumpit = smc_pnet_dump,
 445                .start = smc_pnet_dump_start
 446        },
 447        {
 448                .cmd = SMC_PNETID_ADD,
 449                .flags = GENL_ADMIN_PERM,
 450                .policy = smc_pnet_policy,
 451                .doit = smc_pnet_add
 452        },
 453        {
 454                .cmd = SMC_PNETID_DEL,
 455                .flags = GENL_ADMIN_PERM,
 456                .policy = smc_pnet_policy,
 457                .doit = smc_pnet_del
 458        },
 459        {
 460                .cmd = SMC_PNETID_FLUSH,
 461                .flags = GENL_ADMIN_PERM,
 462                .policy = smc_pnet_policy,
 463                .doit = smc_pnet_flush
 464        }
 465};
 466
 467/* SMC_PNETID family definition */
 468static struct genl_family smc_pnet_nl_family = {
 469        .hdrsize = 0,
 470        .name = SMCR_GENL_FAMILY_NAME,
 471        .version = SMCR_GENL_FAMILY_VERSION,
 472        .maxattr = SMC_PNETID_MAX,
 473        .netnsok = true,
 474        .module = THIS_MODULE,
 475        .ops = smc_pnet_ops,
 476        .n_ops =  ARRAY_SIZE(smc_pnet_ops)
 477};
 478
 479static int smc_pnet_netdev_event(struct notifier_block *this,
 480                                 unsigned long event, void *ptr)
 481{
 482        struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 483
 484        switch (event) {
 485        case NETDEV_REBOOT:
 486        case NETDEV_UNREGISTER:
 487                smc_pnet_remove_by_ndev(event_dev);
 488        default:
 489                break;
 490        }
 491        return NOTIFY_DONE;
 492}
 493
 494static struct notifier_block smc_netdev_notifier = {
 495        .notifier_call = smc_pnet_netdev_event
 496};
 497
 498int __init smc_pnet_init(void)
 499{
 500        int rc;
 501
 502        rc = genl_register_family(&smc_pnet_nl_family);
 503        if (rc)
 504                return rc;
 505        rc = register_netdevice_notifier(&smc_netdev_notifier);
 506        if (rc)
 507                genl_unregister_family(&smc_pnet_nl_family);
 508        return rc;
 509}
 510
 511void smc_pnet_exit(void)
 512{
 513        smc_pnet_flush(NULL, NULL);
 514        unregister_netdevice_notifier(&smc_netdev_notifier);
 515        genl_unregister_family(&smc_pnet_nl_family);
 516}
 517
 518/* PNET table analysis for a given sock:
 519 * determine ib_device and port belonging to used internal TCP socket
 520 * ethernet interface.
 521 */
 522void smc_pnet_find_roce_resource(struct sock *sk,
 523                                 struct smc_ib_device **smcibdev, u8 *ibport)
 524{
 525        struct dst_entry *dst = sk_dst_get(sk);
 526        struct smc_pnetentry *pnetelem;
 527
 528        *smcibdev = NULL;
 529        *ibport = 0;
 530
 531        if (!dst)
 532                return;
 533        if (!dst->dev)
 534                goto out_rel;
 535        read_lock(&smc_pnettable.lock);
 536        list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
 537                if (dst->dev == pnetelem->ndev) {
 538                        if (smc_ib_port_active(pnetelem->smcibdev,
 539                                               pnetelem->ib_port)) {
 540                                *smcibdev = pnetelem->smcibdev;
 541                                *ibport = pnetelem->ib_port;
 542                        }
 543                        break;
 544                }
 545        }
 546        read_unlock(&smc_pnettable.lock);
 547out_rel:
 548        dst_release(dst);
 549}
 550