linux/net/ipv4/udp_tunnel_nic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2// Copyright (c) 2020 Facebook Inc.
   3
   4#include <linux/ethtool_netlink.h>
   5#include <linux/netdevice.h>
   6#include <linux/slab.h>
   7#include <linux/types.h>
   8#include <linux/workqueue.h>
   9#include <net/udp_tunnel.h>
  10#include <net/vxlan.h>
  11
  12enum udp_tunnel_nic_table_entry_flags {
  13        UDP_TUNNEL_NIC_ENTRY_ADD        = BIT(0),
  14        UDP_TUNNEL_NIC_ENTRY_DEL        = BIT(1),
  15        UDP_TUNNEL_NIC_ENTRY_OP_FAIL    = BIT(2),
  16        UDP_TUNNEL_NIC_ENTRY_FROZEN     = BIT(3),
  17};
  18
  19struct udp_tunnel_nic_table_entry {
  20        __be16 port;
  21        u8 type;
  22        u8 flags;
  23        u16 use_cnt;
  24#define UDP_TUNNEL_NIC_USE_CNT_MAX      U16_MAX
  25        u8 hw_priv;
  26};
  27
  28/**
  29 * struct udp_tunnel_nic - UDP tunnel port offload state
  30 * @work:       async work for talking to hardware from process context
  31 * @dev:        netdev pointer
  32 * @need_sync:  at least one port start changed
  33 * @need_replay: space was freed, we need a replay of all ports
  34 * @work_pending: @work is currently scheduled
  35 * @n_tables:   number of tables under @entries
  36 * @missed:     bitmap of tables which overflown
  37 * @entries:    table of tables of ports currently offloaded
  38 */
  39struct udp_tunnel_nic {
  40        struct work_struct work;
  41
  42        struct net_device *dev;
  43
  44        u8 need_sync:1;
  45        u8 need_replay:1;
  46        u8 work_pending:1;
  47
  48        unsigned int n_tables;
  49        unsigned long missed;
  50        struct udp_tunnel_nic_table_entry **entries;
  51};
  52
  53/* We ensure all work structs are done using driver state, but not the code.
  54 * We need a workqueue we can flush before module gets removed.
  55 */
  56static struct workqueue_struct *udp_tunnel_nic_workqueue;
  57
  58static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type)
  59{
  60        switch (type) {
  61        case UDP_TUNNEL_TYPE_VXLAN:
  62                return "vxlan";
  63        case UDP_TUNNEL_TYPE_GENEVE:
  64                return "geneve";
  65        case UDP_TUNNEL_TYPE_VXLAN_GPE:
  66                return "vxlan-gpe";
  67        default:
  68                return "unknown";
  69        }
  70}
  71
  72static bool
  73udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry)
  74{
  75        return entry->use_cnt == 0 && !entry->flags;
  76}
  77
  78static bool
  79udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry)
  80{
  81        return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN);
  82}
  83
  84static bool
  85udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry)
  86{
  87        return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN;
  88}
  89
  90static void
  91udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry)
  92{
  93        if (!udp_tunnel_nic_entry_is_free(entry))
  94                entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN;
  95}
  96
  97static void
  98udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry)
  99{
 100        entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN;
 101}
 102
 103static bool
 104udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry)
 105{
 106        return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD |
 107                               UDP_TUNNEL_NIC_ENTRY_DEL);
 108}
 109
 110static void
 111udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
 112                           struct udp_tunnel_nic_table_entry *entry,
 113                           unsigned int flag)
 114{
 115        entry->flags |= flag;
 116        utn->need_sync = 1;
 117}
 118
 119static void
 120udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry,
 121                             struct udp_tunnel_info *ti)
 122{
 123        memset(ti, 0, sizeof(*ti));
 124        ti->port = entry->port;
 125        ti->type = entry->type;
 126        ti->hw_priv = entry->hw_priv;
 127}
 128
 129static bool
 130udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn)
 131{
 132        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 133        unsigned int i, j;
 134
 135        for (i = 0; i < utn->n_tables; i++)
 136                for (j = 0; j < info->tables[i].n_entries; j++)
 137                        if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
 138                                return false;
 139        return true;
 140}
 141
 142static bool
 143udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
 144{
 145        const struct udp_tunnel_nic_table_info *table;
 146        unsigned int i, j;
 147
 148        if (!utn->missed)
 149                return false;
 150
 151        for (i = 0; i < utn->n_tables; i++) {
 152                table = &dev->udp_tunnel_nic_info->tables[i];
 153                if (!test_bit(i, &utn->missed))
 154                        continue;
 155
 156                for (j = 0; j < table->n_entries; j++)
 157                        if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
 158                                return true;
 159        }
 160
 161        return false;
 162}
 163
 164static void
 165__udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table,
 166                          unsigned int idx, struct udp_tunnel_info *ti)
 167{
 168        struct udp_tunnel_nic_table_entry *entry;
 169        struct udp_tunnel_nic *utn;
 170
 171        utn = dev->udp_tunnel_nic;
 172        entry = &utn->entries[table][idx];
 173
 174        if (entry->use_cnt)
 175                udp_tunnel_nic_ti_from_entry(entry, ti);
 176}
 177
 178static void
 179__udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
 180                               unsigned int idx, u8 priv)
 181{
 182        dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv;
 183}
 184
 185static void
 186udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry,
 187                                 int err)
 188{
 189        bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
 190
 191        WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
 192                     entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL);
 193
 194        if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
 195            (!err || (err == -EEXIST && dodgy)))
 196                entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD;
 197
 198        if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL &&
 199            (!err || (err == -ENOENT && dodgy)))
 200                entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL;
 201
 202        if (!err)
 203                entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
 204        else
 205                entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
 206}
 207
 208static void
 209udp_tunnel_nic_device_sync_one(struct net_device *dev,
 210                               struct udp_tunnel_nic *utn,
 211                               unsigned int table, unsigned int idx)
 212{
 213        struct udp_tunnel_nic_table_entry *entry;
 214        struct udp_tunnel_info ti;
 215        int err;
 216
 217        entry = &utn->entries[table][idx];
 218        if (!udp_tunnel_nic_entry_is_queued(entry))
 219                return;
 220
 221        udp_tunnel_nic_ti_from_entry(entry, &ti);
 222        if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD)
 223                err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti);
 224        else
 225                err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx,
 226                                                           &ti);
 227        udp_tunnel_nic_entry_update_done(entry, err);
 228
 229        if (err)
 230                netdev_warn(dev,
 231                            "UDP tunnel port sync failed port %d type %s: %d\n",
 232                            be16_to_cpu(entry->port),
 233                            udp_tunnel_nic_tunnel_type_name(entry->type),
 234                            err);
 235}
 236
 237static void
 238udp_tunnel_nic_device_sync_by_port(struct net_device *dev,
 239                                   struct udp_tunnel_nic *utn)
 240{
 241        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 242        unsigned int i, j;
 243
 244        for (i = 0; i < utn->n_tables; i++)
 245                for (j = 0; j < info->tables[i].n_entries; j++)
 246                        udp_tunnel_nic_device_sync_one(dev, utn, i, j);
 247}
 248
 249static void
 250udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
 251                                    struct udp_tunnel_nic *utn)
 252{
 253        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 254        unsigned int i, j;
 255        int err;
 256
 257        for (i = 0; i < utn->n_tables; i++) {
 258                /* Find something that needs sync in this table */
 259                for (j = 0; j < info->tables[i].n_entries; j++)
 260                        if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j]))
 261                                break;
 262                if (j == info->tables[i].n_entries)
 263                        continue;
 264
 265                err = info->sync_table(dev, i);
 266                if (err)
 267                        netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n",
 268                                    i, err);
 269
 270                for (j = 0; j < info->tables[i].n_entries; j++) {
 271                        struct udp_tunnel_nic_table_entry *entry;
 272
 273                        entry = &utn->entries[i][j];
 274                        if (udp_tunnel_nic_entry_is_queued(entry))
 275                                udp_tunnel_nic_entry_update_done(entry, err);
 276                }
 277        }
 278}
 279
 280static void
 281__udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 282{
 283        if (!utn->need_sync)
 284                return;
 285
 286        if (dev->udp_tunnel_nic_info->sync_table)
 287                udp_tunnel_nic_device_sync_by_table(dev, utn);
 288        else
 289                udp_tunnel_nic_device_sync_by_port(dev, utn);
 290
 291        utn->need_sync = 0;
 292        /* Can't replay directly here, in case we come from the tunnel driver's
 293         * notification - trying to replay may deadlock inside tunnel driver.
 294         */
 295        utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
 296}
 297
 298static void
 299udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 300{
 301        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 302        bool may_sleep;
 303
 304        if (!utn->need_sync)
 305                return;
 306
 307        /* Drivers which sleep in the callback need to update from
 308         * the workqueue, if we come from the tunnel driver's notification.
 309         */
 310        may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
 311        if (!may_sleep)
 312                __udp_tunnel_nic_device_sync(dev, utn);
 313        if (may_sleep || utn->need_replay) {
 314                queue_work(udp_tunnel_nic_workqueue, &utn->work);
 315                utn->work_pending = 1;
 316        }
 317}
 318
 319static bool
 320udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table,
 321                                struct udp_tunnel_info *ti)
 322{
 323        return table->tunnel_types & ti->type;
 324}
 325
 326static bool
 327udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn,
 328                          struct udp_tunnel_info *ti)
 329{
 330        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 331        unsigned int i;
 332
 333        /* Special case IPv4-only NICs */
 334        if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY &&
 335            ti->sa_family != AF_INET)
 336                return false;
 337
 338        for (i = 0; i < utn->n_tables; i++)
 339                if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti))
 340                        return true;
 341        return false;
 342}
 343
 344static int
 345udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
 346                             struct udp_tunnel_info *ti)
 347{
 348        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 349        struct udp_tunnel_nic_table_entry *entry;
 350        unsigned int i, j;
 351
 352        for (i = 0; i < utn->n_tables; i++)
 353                for (j = 0; j < info->tables[i].n_entries; j++) {
 354                        entry = &utn->entries[i][j];
 355
 356                        if (!udp_tunnel_nic_entry_is_free(entry) &&
 357                            entry->port == ti->port &&
 358                            entry->type != ti->type) {
 359                                __set_bit(i, &utn->missed);
 360                                return true;
 361                        }
 362                }
 363        return false;
 364}
 365
 366static void
 367udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
 368                         unsigned int table, unsigned int idx, int use_cnt_adj)
 369{
 370        struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
 371        bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
 372        unsigned int from, to;
 373
 374        WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX);
 375
 376        /* If not going from used to unused or vice versa - all done.
 377         * For dodgy entries make sure we try to sync again (queue the entry).
 378         */
 379        entry->use_cnt += use_cnt_adj;
 380        if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj))
 381                return;
 382
 383        /* Cancel the op before it was sent to the device, if possible,
 384         * otherwise we'd need to take special care to issue commands
 385         * in the same order the ports arrived.
 386         */
 387        if (use_cnt_adj < 0) {
 388                from = UDP_TUNNEL_NIC_ENTRY_ADD;
 389                to = UDP_TUNNEL_NIC_ENTRY_DEL;
 390        } else {
 391                from = UDP_TUNNEL_NIC_ENTRY_DEL;
 392                to = UDP_TUNNEL_NIC_ENTRY_ADD;
 393        }
 394
 395        if (entry->flags & from) {
 396                entry->flags &= ~from;
 397                if (!dodgy)
 398                        return;
 399        }
 400
 401        udp_tunnel_nic_entry_queue(utn, entry, to);
 402}
 403
 404static bool
 405udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn,
 406                             unsigned int table, unsigned int idx,
 407                             struct udp_tunnel_info *ti, int use_cnt_adj)
 408{
 409        struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
 410
 411        if (udp_tunnel_nic_entry_is_free(entry) ||
 412            entry->port != ti->port ||
 413            entry->type != ti->type)
 414                return false;
 415
 416        if (udp_tunnel_nic_entry_is_frozen(entry))
 417                return true;
 418
 419        udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj);
 420        return true;
 421}
 422
 423/* Try to find existing matching entry and adjust its use count, instead of
 424 * adding a new one. Returns true if entry was found. In case of delete the
 425 * entry may have gotten removed in the process, in which case it will be
 426 * queued for removal.
 427 */
 428static bool
 429udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
 430                            struct udp_tunnel_info *ti, int use_cnt_adj)
 431{
 432        const struct udp_tunnel_nic_table_info *table;
 433        unsigned int i, j;
 434
 435        for (i = 0; i < utn->n_tables; i++) {
 436                table = &dev->udp_tunnel_nic_info->tables[i];
 437                if (!udp_tunnel_nic_table_is_capable(table, ti))
 438                        continue;
 439
 440                for (j = 0; j < table->n_entries; j++)
 441                        if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti,
 442                                                         use_cnt_adj))
 443                                return true;
 444        }
 445
 446        return false;
 447}
 448
 449static bool
 450udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
 451                            struct udp_tunnel_info *ti)
 452{
 453        return udp_tunnel_nic_try_existing(dev, utn, ti, +1);
 454}
 455
 456static bool
 457udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
 458                            struct udp_tunnel_info *ti)
 459{
 460        return udp_tunnel_nic_try_existing(dev, utn, ti, -1);
 461}
 462
 463static bool
 464udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
 465                       struct udp_tunnel_info *ti)
 466{
 467        const struct udp_tunnel_nic_table_info *table;
 468        unsigned int i, j;
 469
 470        for (i = 0; i < utn->n_tables; i++) {
 471                table = &dev->udp_tunnel_nic_info->tables[i];
 472                if (!udp_tunnel_nic_table_is_capable(table, ti))
 473                        continue;
 474
 475                for (j = 0; j < table->n_entries; j++) {
 476                        struct udp_tunnel_nic_table_entry *entry;
 477
 478                        entry = &utn->entries[i][j];
 479                        if (!udp_tunnel_nic_entry_is_free(entry))
 480                                continue;
 481
 482                        entry->port = ti->port;
 483                        entry->type = ti->type;
 484                        entry->use_cnt = 1;
 485                        udp_tunnel_nic_entry_queue(utn, entry,
 486                                                   UDP_TUNNEL_NIC_ENTRY_ADD);
 487                        return true;
 488                }
 489
 490                /* The different table may still fit this port in, but there
 491                 * are no devices currently which have multiple tables accepting
 492                 * the same tunnel type, and false positives are okay.
 493                 */
 494                __set_bit(i, &utn->missed);
 495        }
 496
 497        return false;
 498}
 499
 500static void
 501__udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti)
 502{
 503        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 504        struct udp_tunnel_nic *utn;
 505
 506        utn = dev->udp_tunnel_nic;
 507        if (!utn)
 508                return;
 509        if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)
 510                return;
 511        if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN &&
 512            ti->port == htons(IANA_VXLAN_UDP_PORT)) {
 513                if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 514                        netdev_warn(dev, "device assumes port 4789 will be used by vxlan tunnels\n");
 515                return;
 516        }
 517
 518        if (!udp_tunnel_nic_is_capable(dev, utn, ti))
 519                return;
 520
 521        /* It may happen that a tunnel of one type is removed and different
 522         * tunnel type tries to reuse its port before the device was informed.
 523         * Rely on utn->missed to re-add this port later.
 524         */
 525        if (udp_tunnel_nic_has_collision(dev, utn, ti))
 526                return;
 527
 528        if (!udp_tunnel_nic_add_existing(dev, utn, ti))
 529                udp_tunnel_nic_add_new(dev, utn, ti);
 530
 531        udp_tunnel_nic_device_sync(dev, utn);
 532}
 533
 534static void
 535__udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti)
 536{
 537        struct udp_tunnel_nic *utn;
 538
 539        utn = dev->udp_tunnel_nic;
 540        if (!utn)
 541                return;
 542
 543        if (!udp_tunnel_nic_is_capable(dev, utn, ti))
 544                return;
 545
 546        udp_tunnel_nic_del_existing(dev, utn, ti);
 547
 548        udp_tunnel_nic_device_sync(dev, utn);
 549}
 550
 551static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
 552{
 553        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 554        struct udp_tunnel_nic *utn;
 555        unsigned int i, j;
 556
 557        ASSERT_RTNL();
 558
 559        utn = dev->udp_tunnel_nic;
 560        if (!utn)
 561                return;
 562
 563        utn->need_sync = false;
 564        for (i = 0; i < utn->n_tables; i++)
 565                for (j = 0; j < info->tables[i].n_entries; j++) {
 566                        struct udp_tunnel_nic_table_entry *entry;
 567
 568                        entry = &utn->entries[i][j];
 569
 570                        entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
 571                                          UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
 572                        /* We don't release rtnl across ops */
 573                        WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
 574                        if (!entry->use_cnt)
 575                                continue;
 576
 577                        udp_tunnel_nic_entry_queue(utn, entry,
 578                                                   UDP_TUNNEL_NIC_ENTRY_ADD);
 579                }
 580
 581        __udp_tunnel_nic_device_sync(dev, utn);
 582}
 583
 584static size_t
 585__udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
 586{
 587        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 588        struct udp_tunnel_nic *utn;
 589        unsigned int j;
 590        size_t size;
 591
 592        utn = dev->udp_tunnel_nic;
 593        if (!utn)
 594                return 0;
 595
 596        size = 0;
 597        for (j = 0; j < info->tables[table].n_entries; j++) {
 598                if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
 599                        continue;
 600
 601                size += nla_total_size(0) +              /* _TABLE_ENTRY */
 602                        nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */
 603                        nla_total_size(sizeof(u32));     /* _ENTRY_TYPE */
 604        }
 605
 606        return size;
 607}
 608
 609static int
 610__udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
 611                            struct sk_buff *skb)
 612{
 613        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 614        struct udp_tunnel_nic *utn;
 615        struct nlattr *nest;
 616        unsigned int j;
 617
 618        utn = dev->udp_tunnel_nic;
 619        if (!utn)
 620                return 0;
 621
 622        for (j = 0; j < info->tables[table].n_entries; j++) {
 623                if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
 624                        continue;
 625
 626                nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
 627
 628                if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
 629                                 utn->entries[table][j].port) ||
 630                    nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
 631                                ilog2(utn->entries[table][j].type)))
 632                        goto err_cancel;
 633
 634                nla_nest_end(skb, nest);
 635        }
 636
 637        return 0;
 638
 639err_cancel:
 640        nla_nest_cancel(skb, nest);
 641        return -EMSGSIZE;
 642}
 643
 644static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
 645        .get_port       = __udp_tunnel_nic_get_port,
 646        .set_port_priv  = __udp_tunnel_nic_set_port_priv,
 647        .add_port       = __udp_tunnel_nic_add_port,
 648        .del_port       = __udp_tunnel_nic_del_port,
 649        .reset_ntf      = __udp_tunnel_nic_reset_ntf,
 650        .dump_size      = __udp_tunnel_nic_dump_size,
 651        .dump_write     = __udp_tunnel_nic_dump_write,
 652};
 653
 654static void
 655udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
 656{
 657        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 658        unsigned int i, j;
 659
 660        for (i = 0; i < utn->n_tables; i++)
 661                for (j = 0; j < info->tables[i].n_entries; j++) {
 662                        int adj_cnt = -utn->entries[i][j].use_cnt;
 663
 664                        if (adj_cnt)
 665                                udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt);
 666                }
 667
 668        __udp_tunnel_nic_device_sync(dev, utn);
 669
 670        for (i = 0; i < utn->n_tables; i++)
 671                memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
 672                                                      sizeof(**utn->entries)));
 673        WARN_ON(utn->need_sync);
 674        utn->need_replay = 0;
 675}
 676
 677static void
 678udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
 679{
 680        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 681        struct udp_tunnel_nic_shared_node *node;
 682        unsigned int i, j;
 683
 684        /* Freeze all the ports we are already tracking so that the replay
 685         * does not double up the refcount.
 686         */
 687        for (i = 0; i < utn->n_tables; i++)
 688                for (j = 0; j < info->tables[i].n_entries; j++)
 689                        udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
 690        utn->missed = 0;
 691        utn->need_replay = 0;
 692
 693        if (!info->shared) {
 694                udp_tunnel_get_rx_info(dev);
 695        } else {
 696                list_for_each_entry(node, &info->shared->devices, list)
 697                        udp_tunnel_get_rx_info(node->dev);
 698        }
 699
 700        for (i = 0; i < utn->n_tables; i++)
 701                for (j = 0; j < info->tables[i].n_entries; j++)
 702                        udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
 703}
 704
 705static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
 706{
 707        struct udp_tunnel_nic *utn =
 708                container_of(work, struct udp_tunnel_nic, work);
 709
 710        rtnl_lock();
 711        utn->work_pending = 0;
 712        __udp_tunnel_nic_device_sync(utn->dev, utn);
 713
 714        if (utn->need_replay)
 715                udp_tunnel_nic_replay(utn->dev, utn);
 716        rtnl_unlock();
 717}
 718
 719static struct udp_tunnel_nic *
 720udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
 721                     unsigned int n_tables)
 722{
 723        struct udp_tunnel_nic *utn;
 724        unsigned int i;
 725
 726        utn = kzalloc(sizeof(*utn), GFP_KERNEL);
 727        if (!utn)
 728                return NULL;
 729        utn->n_tables = n_tables;
 730        INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
 731
 732        utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL);
 733        if (!utn->entries)
 734                goto err_free_utn;
 735
 736        for (i = 0; i < n_tables; i++) {
 737                utn->entries[i] = kcalloc(info->tables[i].n_entries,
 738                                          sizeof(*utn->entries[i]), GFP_KERNEL);
 739                if (!utn->entries[i])
 740                        goto err_free_prev_entries;
 741        }
 742
 743        return utn;
 744
 745err_free_prev_entries:
 746        while (i--)
 747                kfree(utn->entries[i]);
 748        kfree(utn->entries);
 749err_free_utn:
 750        kfree(utn);
 751        return NULL;
 752}
 753
 754static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
 755{
 756        unsigned int i;
 757
 758        for (i = 0; i < utn->n_tables; i++)
 759                kfree(utn->entries[i]);
 760        kfree(utn->entries);
 761        kfree(utn);
 762}
 763
 764static int udp_tunnel_nic_register(struct net_device *dev)
 765{
 766        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 767        struct udp_tunnel_nic_shared_node *node = NULL;
 768        struct udp_tunnel_nic *utn;
 769        unsigned int n_tables, i;
 770
 771        BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
 772                     UDP_TUNNEL_NIC_MAX_TABLES);
 773        /* Expect use count of at most 2 (IPv4, IPv6) per device */
 774        BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX <
 775                     UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2);
 776
 777        /* Check that the driver info is sane */
 778        if (WARN_ON(!info->set_port != !info->unset_port) ||
 779            WARN_ON(!info->set_port == !info->sync_table) ||
 780            WARN_ON(!info->tables[0].n_entries))
 781                return -EINVAL;
 782
 783        if (WARN_ON(info->shared &&
 784                    info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
 785                return -EINVAL;
 786
 787        n_tables = 1;
 788        for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
 789                if (!info->tables[i].n_entries)
 790                        continue;
 791
 792                n_tables++;
 793                if (WARN_ON(!info->tables[i - 1].n_entries))
 794                        return -EINVAL;
 795        }
 796
 797        /* Create UDP tunnel state structures */
 798        if (info->shared) {
 799                node = kzalloc(sizeof(*node), GFP_KERNEL);
 800                if (!node)
 801                        return -ENOMEM;
 802
 803                node->dev = dev;
 804        }
 805
 806        if (info->shared && info->shared->udp_tunnel_nic_info) {
 807                utn = info->shared->udp_tunnel_nic_info;
 808        } else {
 809                utn = udp_tunnel_nic_alloc(info, n_tables);
 810                if (!utn) {
 811                        kfree(node);
 812                        return -ENOMEM;
 813                }
 814        }
 815
 816        if (info->shared) {
 817                if (!info->shared->udp_tunnel_nic_info) {
 818                        INIT_LIST_HEAD(&info->shared->devices);
 819                        info->shared->udp_tunnel_nic_info = utn;
 820                }
 821
 822                list_add_tail(&node->list, &info->shared->devices);
 823        }
 824
 825        utn->dev = dev;
 826        dev_hold(dev);
 827        dev->udp_tunnel_nic = utn;
 828
 829        if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
 830                udp_tunnel_get_rx_info(dev);
 831
 832        return 0;
 833}
 834
 835static void
 836udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 837{
 838        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
 839
 840        /* For a shared table remove this dev from the list of sharing devices
 841         * and if there are other devices just detach.
 842         */
 843        if (info->shared) {
 844                struct udp_tunnel_nic_shared_node *node, *first;
 845
 846                list_for_each_entry(node, &info->shared->devices, list)
 847                        if (node->dev == dev)
 848                                break;
 849                if (node->dev != dev)
 850                        return;
 851
 852                list_del(&node->list);
 853                kfree(node);
 854
 855                first = list_first_entry_or_null(&info->shared->devices,
 856                                                 typeof(*first), list);
 857                if (first) {
 858                        udp_tunnel_drop_rx_info(dev);
 859                        utn->dev = first->dev;
 860                        goto release_dev;
 861                }
 862
 863                info->shared->udp_tunnel_nic_info = NULL;
 864        }
 865
 866        /* Flush before we check work, so we don't waste time adding entries
 867         * from the work which we will boot immediately.
 868         */
 869        udp_tunnel_nic_flush(dev, utn);
 870
 871        /* Wait for the work to be done using the state, netdev core will
 872         * retry unregister until we give up our reference on this device.
 873         */
 874        if (utn->work_pending)
 875                return;
 876
 877        udp_tunnel_nic_free(utn);
 878release_dev:
 879        dev->udp_tunnel_nic = NULL;
 880        dev_put(dev);
 881}
 882
 883static int
 884udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
 885                               unsigned long event, void *ptr)
 886{
 887        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 888        const struct udp_tunnel_nic_info *info;
 889        struct udp_tunnel_nic *utn;
 890
 891        info = dev->udp_tunnel_nic_info;
 892        if (!info)
 893                return NOTIFY_DONE;
 894
 895        if (event == NETDEV_REGISTER) {
 896                int err;
 897
 898                err = udp_tunnel_nic_register(dev);
 899                if (err)
 900                        netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err);
 901                return notifier_from_errno(err);
 902        }
 903        /* All other events will need the udp_tunnel_nic state */
 904        utn = dev->udp_tunnel_nic;
 905        if (!utn)
 906                return NOTIFY_DONE;
 907
 908        if (event == NETDEV_UNREGISTER) {
 909                udp_tunnel_nic_unregister(dev, utn);
 910                return NOTIFY_OK;
 911        }
 912
 913        /* All other events only matter if NIC has to be programmed open */
 914        if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
 915                return NOTIFY_DONE;
 916
 917        if (event == NETDEV_UP) {
 918                WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
 919                udp_tunnel_get_rx_info(dev);
 920                return NOTIFY_OK;
 921        }
 922        if (event == NETDEV_GOING_DOWN) {
 923                udp_tunnel_nic_flush(dev, utn);
 924                return NOTIFY_OK;
 925        }
 926
 927        return NOTIFY_DONE;
 928}
 929
 930static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = {
 931        .notifier_call = udp_tunnel_nic_netdevice_event,
 932};
 933
 934static int __init udp_tunnel_nic_init_module(void)
 935{
 936        int err;
 937
 938        udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0);
 939        if (!udp_tunnel_nic_workqueue)
 940                return -ENOMEM;
 941
 942        rtnl_lock();
 943        udp_tunnel_nic_ops = &__udp_tunnel_nic_ops;
 944        rtnl_unlock();
 945
 946        err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block);
 947        if (err)
 948                goto err_unset_ops;
 949
 950        return 0;
 951
 952err_unset_ops:
 953        rtnl_lock();
 954        udp_tunnel_nic_ops = NULL;
 955        rtnl_unlock();
 956        destroy_workqueue(udp_tunnel_nic_workqueue);
 957        return err;
 958}
 959late_initcall(udp_tunnel_nic_init_module);
 960
 961static void __exit udp_tunnel_nic_cleanup_module(void)
 962{
 963        unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block);
 964
 965        rtnl_lock();
 966        udp_tunnel_nic_ops = NULL;
 967        rtnl_unlock();
 968
 969        destroy_workqueue(udp_tunnel_nic_workqueue);
 970}
 971module_exit(udp_tunnel_nic_cleanup_module);
 972
 973MODULE_LICENSE("GPL");
 974