linux/net/sched/cls_api.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/cls_api.c  Packet classifier API.
   4 *
   5 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   6 *
   7 * Changes:
   8 *
   9 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/types.h>
  14#include <linux/kernel.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/err.h>
  18#include <linux/skbuff.h>
  19#include <linux/init.h>
  20#include <linux/kmod.h>
  21#include <linux/slab.h>
  22#include <linux/idr.h>
  23#include <linux/jhash.h>
  24#include <linux/rculist.h>
  25#include <net/net_namespace.h>
  26#include <net/sock.h>
  27#include <net/netlink.h>
  28#include <net/pkt_sched.h>
  29#include <net/pkt_cls.h>
  30#include <net/tc_act/tc_pedit.h>
  31#include <net/tc_act/tc_mirred.h>
  32#include <net/tc_act/tc_vlan.h>
  33#include <net/tc_act/tc_tunnel_key.h>
  34#include <net/tc_act/tc_csum.h>
  35#include <net/tc_act/tc_gact.h>
  36#include <net/tc_act/tc_police.h>
  37#include <net/tc_act/tc_sample.h>
  38#include <net/tc_act/tc_skbedit.h>
  39#include <net/tc_act/tc_ct.h>
  40#include <net/tc_act/tc_mpls.h>
  41#include <net/tc_act/tc_gate.h>
  42#include <net/flow_offload.h>
  43
  44extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
  45
  46/* The list of all installed classifier types */
  47static LIST_HEAD(tcf_proto_base);
  48
  49/* Protects list of registered TC modules. It is pure SMP lock. */
  50static DEFINE_RWLOCK(cls_mod_lock);
  51
  52#ifdef CONFIG_NET_CLS_ACT
  53DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
  54EXPORT_SYMBOL(tc_skb_ext_tc);
  55
  56void tc_skb_ext_tc_enable(void)
  57{
  58        static_branch_inc(&tc_skb_ext_tc);
  59}
  60EXPORT_SYMBOL(tc_skb_ext_tc_enable);
  61
  62void tc_skb_ext_tc_disable(void)
  63{
  64        static_branch_dec(&tc_skb_ext_tc);
  65}
  66EXPORT_SYMBOL(tc_skb_ext_tc_disable);
  67#endif
  68
  69static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
  70{
  71        return jhash_3words(tp->chain->index, tp->prio,
  72                            (__force __u32)tp->protocol, 0);
  73}
  74
  75static void tcf_proto_signal_destroying(struct tcf_chain *chain,
  76                                        struct tcf_proto *tp)
  77{
  78        struct tcf_block *block = chain->block;
  79
  80        mutex_lock(&block->proto_destroy_lock);
  81        hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
  82                     destroy_obj_hashfn(tp));
  83        mutex_unlock(&block->proto_destroy_lock);
  84}
  85
  86static bool tcf_proto_cmp(const struct tcf_proto *tp1,
  87                          const struct tcf_proto *tp2)
  88{
  89        return tp1->chain->index == tp2->chain->index &&
  90               tp1->prio == tp2->prio &&
  91               tp1->protocol == tp2->protocol;
  92}
  93
  94static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
  95                                        struct tcf_proto *tp)
  96{
  97        u32 hash = destroy_obj_hashfn(tp);
  98        struct tcf_proto *iter;
  99        bool found = false;
 100
 101        rcu_read_lock();
 102        hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
 103                                   destroy_ht_node, hash) {
 104                if (tcf_proto_cmp(tp, iter)) {
 105                        found = true;
 106                        break;
 107                }
 108        }
 109        rcu_read_unlock();
 110
 111        return found;
 112}
 113
 114static void
 115tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
 116{
 117        struct tcf_block *block = chain->block;
 118
 119        mutex_lock(&block->proto_destroy_lock);
 120        if (hash_hashed(&tp->destroy_ht_node))
 121                hash_del_rcu(&tp->destroy_ht_node);
 122        mutex_unlock(&block->proto_destroy_lock);
 123}
 124
 125/* Find classifier type by string name */
 126
 127static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
 128{
 129        const struct tcf_proto_ops *t, *res = NULL;
 130
 131        if (kind) {
 132                read_lock(&cls_mod_lock);
 133                list_for_each_entry(t, &tcf_proto_base, head) {
 134                        if (strcmp(kind, t->kind) == 0) {
 135                                if (try_module_get(t->owner))
 136                                        res = t;
 137                                break;
 138                        }
 139                }
 140                read_unlock(&cls_mod_lock);
 141        }
 142        return res;
 143}
 144
 145static const struct tcf_proto_ops *
 146tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
 147                     struct netlink_ext_ack *extack)
 148{
 149        const struct tcf_proto_ops *ops;
 150
 151        ops = __tcf_proto_lookup_ops(kind);
 152        if (ops)
 153                return ops;
 154#ifdef CONFIG_MODULES
 155        if (rtnl_held)
 156                rtnl_unlock();
 157        request_module("cls_%s", kind);
 158        if (rtnl_held)
 159                rtnl_lock();
 160        ops = __tcf_proto_lookup_ops(kind);
 161        /* We dropped the RTNL semaphore in order to perform
 162         * the module load. So, even if we succeeded in loading
 163         * the module we have to replay the request. We indicate
 164         * this using -EAGAIN.
 165         */
 166        if (ops) {
 167                module_put(ops->owner);
 168                return ERR_PTR(-EAGAIN);
 169        }
 170#endif
 171        NL_SET_ERR_MSG(extack, "TC classifier not found");
 172        return ERR_PTR(-ENOENT);
 173}
 174
 175/* Register(unregister) new classifier type */
 176
 177int register_tcf_proto_ops(struct tcf_proto_ops *ops)
 178{
 179        struct tcf_proto_ops *t;
 180        int rc = -EEXIST;
 181
 182        write_lock(&cls_mod_lock);
 183        list_for_each_entry(t, &tcf_proto_base, head)
 184                if (!strcmp(ops->kind, t->kind))
 185                        goto out;
 186
 187        list_add_tail(&ops->head, &tcf_proto_base);
 188        rc = 0;
 189out:
 190        write_unlock(&cls_mod_lock);
 191        return rc;
 192}
 193EXPORT_SYMBOL(register_tcf_proto_ops);
 194
 195static struct workqueue_struct *tc_filter_wq;
 196
 197int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 198{
 199        struct tcf_proto_ops *t;
 200        int rc = -ENOENT;
 201
 202        /* Wait for outstanding call_rcu()s, if any, from a
 203         * tcf_proto_ops's destroy() handler.
 204         */
 205        rcu_barrier();
 206        flush_workqueue(tc_filter_wq);
 207
 208        write_lock(&cls_mod_lock);
 209        list_for_each_entry(t, &tcf_proto_base, head) {
 210                if (t == ops) {
 211                        list_del(&t->head);
 212                        rc = 0;
 213                        break;
 214                }
 215        }
 216        write_unlock(&cls_mod_lock);
 217        return rc;
 218}
 219EXPORT_SYMBOL(unregister_tcf_proto_ops);
 220
 221bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
 222{
 223        INIT_RCU_WORK(rwork, func);
 224        return queue_rcu_work(tc_filter_wq, rwork);
 225}
 226EXPORT_SYMBOL(tcf_queue_work);
 227
 228/* Select new prio value from the range, managed by kernel. */
 229
 230static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 231{
 232        u32 first = TC_H_MAKE(0xC0000000U, 0U);
 233
 234        if (tp)
 235                first = tp->prio - 1;
 236
 237        return TC_H_MAJ(first);
 238}
 239
 240static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
 241{
 242        if (kind)
 243                return nla_strscpy(name, kind, IFNAMSIZ) < 0;
 244        memset(name, 0, IFNAMSIZ);
 245        return false;
 246}
 247
 248static bool tcf_proto_is_unlocked(const char *kind)
 249{
 250        const struct tcf_proto_ops *ops;
 251        bool ret;
 252
 253        if (strlen(kind) == 0)
 254                return false;
 255
 256        ops = tcf_proto_lookup_ops(kind, false, NULL);
 257        /* On error return false to take rtnl lock. Proto lookup/create
 258         * functions will perform lookup again and properly handle errors.
 259         */
 260        if (IS_ERR(ops))
 261                return false;
 262
 263        ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
 264        module_put(ops->owner);
 265        return ret;
 266}
 267
 268static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 269                                          u32 prio, struct tcf_chain *chain,
 270                                          bool rtnl_held,
 271                                          struct netlink_ext_ack *extack)
 272{
 273        struct tcf_proto *tp;
 274        int err;
 275
 276        tp = kzalloc(sizeof(*tp), GFP_KERNEL);
 277        if (!tp)
 278                return ERR_PTR(-ENOBUFS);
 279
 280        tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
 281        if (IS_ERR(tp->ops)) {
 282                err = PTR_ERR(tp->ops);
 283                goto errout;
 284        }
 285        tp->classify = tp->ops->classify;
 286        tp->protocol = protocol;
 287        tp->prio = prio;
 288        tp->chain = chain;
 289        spin_lock_init(&tp->lock);
 290        refcount_set(&tp->refcnt, 1);
 291
 292        err = tp->ops->init(tp);
 293        if (err) {
 294                module_put(tp->ops->owner);
 295                goto errout;
 296        }
 297        return tp;
 298
 299errout:
 300        kfree(tp);
 301        return ERR_PTR(err);
 302}
 303
 304static void tcf_proto_get(struct tcf_proto *tp)
 305{
 306        refcount_inc(&tp->refcnt);
 307}
 308
 309static void tcf_chain_put(struct tcf_chain *chain);
 310
 311static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
 312                              bool sig_destroy, struct netlink_ext_ack *extack)
 313{
 314        tp->ops->destroy(tp, rtnl_held, extack);
 315        if (sig_destroy)
 316                tcf_proto_signal_destroyed(tp->chain, tp);
 317        tcf_chain_put(tp->chain);
 318        module_put(tp->ops->owner);
 319        kfree_rcu(tp, rcu);
 320}
 321
 322static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
 323                          struct netlink_ext_ack *extack)
 324{
 325        if (refcount_dec_and_test(&tp->refcnt))
 326                tcf_proto_destroy(tp, rtnl_held, true, extack);
 327}
 328
 329static bool tcf_proto_check_delete(struct tcf_proto *tp)
 330{
 331        if (tp->ops->delete_empty)
 332                return tp->ops->delete_empty(tp);
 333
 334        tp->deleting = true;
 335        return tp->deleting;
 336}
 337
 338static void tcf_proto_mark_delete(struct tcf_proto *tp)
 339{
 340        spin_lock(&tp->lock);
 341        tp->deleting = true;
 342        spin_unlock(&tp->lock);
 343}
 344
 345static bool tcf_proto_is_deleting(struct tcf_proto *tp)
 346{
 347        bool deleting;
 348
 349        spin_lock(&tp->lock);
 350        deleting = tp->deleting;
 351        spin_unlock(&tp->lock);
 352
 353        return deleting;
 354}
 355
 356#define ASSERT_BLOCK_LOCKED(block)                                      \
 357        lockdep_assert_held(&(block)->lock)
 358
 359struct tcf_filter_chain_list_item {
 360        struct list_head list;
 361        tcf_chain_head_change_t *chain_head_change;
 362        void *chain_head_change_priv;
 363};
 364
 365static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 366                                          u32 chain_index)
 367{
 368        struct tcf_chain *chain;
 369
 370        ASSERT_BLOCK_LOCKED(block);
 371
 372        chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 373        if (!chain)
 374                return NULL;
 375        list_add_tail_rcu(&chain->list, &block->chain_list);
 376        mutex_init(&chain->filter_chain_lock);
 377        chain->block = block;
 378        chain->index = chain_index;
 379        chain->refcnt = 1;
 380        if (!chain->index)
 381                block->chain0.chain = chain;
 382        return chain;
 383}
 384
 385static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
 386                                       struct tcf_proto *tp_head)
 387{
 388        if (item->chain_head_change)
 389                item->chain_head_change(tp_head, item->chain_head_change_priv);
 390}
 391
 392static void tcf_chain0_head_change(struct tcf_chain *chain,
 393                                   struct tcf_proto *tp_head)
 394{
 395        struct tcf_filter_chain_list_item *item;
 396        struct tcf_block *block = chain->block;
 397
 398        if (chain->index)
 399                return;
 400
 401        mutex_lock(&block->lock);
 402        list_for_each_entry(item, &block->chain0.filter_chain_list, list)
 403                tcf_chain_head_change_item(item, tp_head);
 404        mutex_unlock(&block->lock);
 405}
 406
 407/* Returns true if block can be safely freed. */
 408
 409static bool tcf_chain_detach(struct tcf_chain *chain)
 410{
 411        struct tcf_block *block = chain->block;
 412
 413        ASSERT_BLOCK_LOCKED(block);
 414
 415        list_del_rcu(&chain->list);
 416        if (!chain->index)
 417                block->chain0.chain = NULL;
 418
 419        if (list_empty(&block->chain_list) &&
 420            refcount_read(&block->refcnt) == 0)
 421                return true;
 422
 423        return false;
 424}
 425
 426static void tcf_block_destroy(struct tcf_block *block)
 427{
 428        mutex_destroy(&block->lock);
 429        mutex_destroy(&block->proto_destroy_lock);
 430        kfree_rcu(block, rcu);
 431}
 432
 433static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
 434{
 435        struct tcf_block *block = chain->block;
 436
 437        mutex_destroy(&chain->filter_chain_lock);
 438        kfree_rcu(chain, rcu);
 439        if (free_block)
 440                tcf_block_destroy(block);
 441}
 442
 443static void tcf_chain_hold(struct tcf_chain *chain)
 444{
 445        ASSERT_BLOCK_LOCKED(chain->block);
 446
 447        ++chain->refcnt;
 448}
 449
 450static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
 451{
 452        ASSERT_BLOCK_LOCKED(chain->block);
 453
 454        /* In case all the references are action references, this
 455         * chain should not be shown to the user.
 456         */
 457        return chain->refcnt == chain->action_refcnt;
 458}
 459
 460static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
 461                                          u32 chain_index)
 462{
 463        struct tcf_chain *chain;
 464
 465        ASSERT_BLOCK_LOCKED(block);
 466
 467        list_for_each_entry(chain, &block->chain_list, list) {
 468                if (chain->index == chain_index)
 469                        return chain;
 470        }
 471        return NULL;
 472}
 473
 474#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
 475static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
 476                                              u32 chain_index)
 477{
 478        struct tcf_chain *chain;
 479
 480        list_for_each_entry_rcu(chain, &block->chain_list, list) {
 481                if (chain->index == chain_index)
 482                        return chain;
 483        }
 484        return NULL;
 485}
 486#endif
 487
 488static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
 489                           u32 seq, u16 flags, int event, bool unicast);
 490
 491static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
 492                                         u32 chain_index, bool create,
 493                                         bool by_act)
 494{
 495        struct tcf_chain *chain = NULL;
 496        bool is_first_reference;
 497
 498        mutex_lock(&block->lock);
 499        chain = tcf_chain_lookup(block, chain_index);
 500        if (chain) {
 501                tcf_chain_hold(chain);
 502        } else {
 503                if (!create)
 504                        goto errout;
 505                chain = tcf_chain_create(block, chain_index);
 506                if (!chain)
 507                        goto errout;
 508        }
 509
 510        if (by_act)
 511                ++chain->action_refcnt;
 512        is_first_reference = chain->refcnt - chain->action_refcnt == 1;
 513        mutex_unlock(&block->lock);
 514
 515        /* Send notification only in case we got the first
 516         * non-action reference. Until then, the chain acts only as
 517         * a placeholder for actions pointing to it and user ought
 518         * not know about them.
 519         */
 520        if (is_first_reference && !by_act)
 521                tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
 522                                RTM_NEWCHAIN, false);
 523
 524        return chain;
 525
 526errout:
 527        mutex_unlock(&block->lock);
 528        return chain;
 529}
 530
 531static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 532                                       bool create)
 533{
 534        return __tcf_chain_get(block, chain_index, create, false);
 535}
 536
 537struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
 538{
 539        return __tcf_chain_get(block, chain_index, true, true);
 540}
 541EXPORT_SYMBOL(tcf_chain_get_by_act);
 542
 543static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
 544                               void *tmplt_priv);
 545static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
 546                                  void *tmplt_priv, u32 chain_index,
 547                                  struct tcf_block *block, struct sk_buff *oskb,
 548                                  u32 seq, u16 flags, bool unicast);
 549
 550static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
 551                            bool explicitly_created)
 552{
 553        struct tcf_block *block = chain->block;
 554        const struct tcf_proto_ops *tmplt_ops;
 555        bool free_block = false;
 556        unsigned int refcnt;
 557        void *tmplt_priv;
 558
 559        mutex_lock(&block->lock);
 560        if (explicitly_created) {
 561                if (!chain->explicitly_created) {
 562                        mutex_unlock(&block->lock);
 563                        return;
 564                }
 565                chain->explicitly_created = false;
 566        }
 567
 568        if (by_act)
 569                chain->action_refcnt--;
 570
 571        /* tc_chain_notify_delete can't be called while holding block lock.
 572         * However, when block is unlocked chain can be changed concurrently, so
 573         * save these to temporary variables.
 574         */
 575        refcnt = --chain->refcnt;
 576        tmplt_ops = chain->tmplt_ops;
 577        tmplt_priv = chain->tmplt_priv;
 578
 579        /* The last dropped non-action reference will trigger notification. */
 580        if (refcnt - chain->action_refcnt == 0 && !by_act) {
 581                tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
 582                                       block, NULL, 0, 0, false);
 583                /* Last reference to chain, no need to lock. */
 584                chain->flushing = false;
 585        }
 586
 587        if (refcnt == 0)
 588                free_block = tcf_chain_detach(chain);
 589        mutex_unlock(&block->lock);
 590
 591        if (refcnt == 0) {
 592                tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
 593                tcf_chain_destroy(chain, free_block);
 594        }
 595}
 596
 597static void tcf_chain_put(struct tcf_chain *chain)
 598{
 599        __tcf_chain_put(chain, false, false);
 600}
 601
 602void tcf_chain_put_by_act(struct tcf_chain *chain)
 603{
 604        __tcf_chain_put(chain, true, false);
 605}
 606EXPORT_SYMBOL(tcf_chain_put_by_act);
 607
 608static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 609{
 610        __tcf_chain_put(chain, false, true);
 611}
 612
 613static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 614{
 615        struct tcf_proto *tp, *tp_next;
 616
 617        mutex_lock(&chain->filter_chain_lock);
 618        tp = tcf_chain_dereference(chain->filter_chain, chain);
 619        while (tp) {
 620                tp_next = rcu_dereference_protected(tp->next, 1);
 621                tcf_proto_signal_destroying(chain, tp);
 622                tp = tp_next;
 623        }
 624        tp = tcf_chain_dereference(chain->filter_chain, chain);
 625        RCU_INIT_POINTER(chain->filter_chain, NULL);
 626        tcf_chain0_head_change(chain, NULL);
 627        chain->flushing = true;
 628        mutex_unlock(&chain->filter_chain_lock);
 629
 630        while (tp) {
 631                tp_next = rcu_dereference_protected(tp->next, 1);
 632                tcf_proto_put(tp, rtnl_held, NULL);
 633                tp = tp_next;
 634        }
 635}
 636
 637static int tcf_block_setup(struct tcf_block *block,
 638                           struct flow_block_offload *bo);
 639
 640static void tcf_block_offload_init(struct flow_block_offload *bo,
 641                                   struct net_device *dev, struct Qdisc *sch,
 642                                   enum flow_block_command command,
 643                                   enum flow_block_binder_type binder_type,
 644                                   struct flow_block *flow_block,
 645                                   bool shared, struct netlink_ext_ack *extack)
 646{
 647        bo->net = dev_net(dev);
 648        bo->command = command;
 649        bo->binder_type = binder_type;
 650        bo->block = flow_block;
 651        bo->block_shared = shared;
 652        bo->extack = extack;
 653        bo->sch = sch;
 654        bo->cb_list_head = &flow_block->cb_list;
 655        INIT_LIST_HEAD(&bo->cb_list);
 656}
 657
 658static void tcf_block_unbind(struct tcf_block *block,
 659                             struct flow_block_offload *bo);
 660
 661static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
 662{
 663        struct tcf_block *block = block_cb->indr.data;
 664        struct net_device *dev = block_cb->indr.dev;
 665        struct Qdisc *sch = block_cb->indr.sch;
 666        struct netlink_ext_ack extack = {};
 667        struct flow_block_offload bo = {};
 668
 669        tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
 670                               block_cb->indr.binder_type,
 671                               &block->flow_block, tcf_block_shared(block),
 672                               &extack);
 673        rtnl_lock();
 674        down_write(&block->cb_lock);
 675        list_del(&block_cb->driver_list);
 676        list_move(&block_cb->list, &bo.cb_list);
 677        tcf_block_unbind(block, &bo);
 678        up_write(&block->cb_lock);
 679        rtnl_unlock();
 680}
 681
 682static bool tcf_block_offload_in_use(struct tcf_block *block)
 683{
 684        return atomic_read(&block->offloadcnt);
 685}
 686
 687static int tcf_block_offload_cmd(struct tcf_block *block,
 688                                 struct net_device *dev, struct Qdisc *sch,
 689                                 struct tcf_block_ext_info *ei,
 690                                 enum flow_block_command command,
 691                                 struct netlink_ext_ack *extack)
 692{
 693        struct flow_block_offload bo = {};
 694
 695        tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
 696                               &block->flow_block, tcf_block_shared(block),
 697                               extack);
 698
 699        if (dev->netdev_ops->ndo_setup_tc) {
 700                int err;
 701
 702                err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 703                if (err < 0) {
 704                        if (err != -EOPNOTSUPP)
 705                                NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
 706                        return err;
 707                }
 708
 709                return tcf_block_setup(block, &bo);
 710        }
 711
 712        flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
 713                                    tc_block_indr_cleanup);
 714        tcf_block_setup(block, &bo);
 715
 716        return -EOPNOTSUPP;
 717}
 718
 719static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 720                                  struct tcf_block_ext_info *ei,
 721                                  struct netlink_ext_ack *extack)
 722{
 723        struct net_device *dev = q->dev_queue->dev;
 724        int err;
 725
 726        down_write(&block->cb_lock);
 727
 728        /* If tc offload feature is disabled and the block we try to bind
 729         * to already has some offloaded filters, forbid to bind.
 730         */
 731        if (dev->netdev_ops->ndo_setup_tc &&
 732            !tc_can_offload(dev) &&
 733            tcf_block_offload_in_use(block)) {
 734                NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
 735                err = -EOPNOTSUPP;
 736                goto err_unlock;
 737        }
 738
 739        err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
 740        if (err == -EOPNOTSUPP)
 741                goto no_offload_dev_inc;
 742        if (err)
 743                goto err_unlock;
 744
 745        up_write(&block->cb_lock);
 746        return 0;
 747
 748no_offload_dev_inc:
 749        if (tcf_block_offload_in_use(block))
 750                goto err_unlock;
 751
 752        err = 0;
 753        block->nooffloaddevcnt++;
 754err_unlock:
 755        up_write(&block->cb_lock);
 756        return err;
 757}
 758
 759static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 760                                     struct tcf_block_ext_info *ei)
 761{
 762        struct net_device *dev = q->dev_queue->dev;
 763        int err;
 764
 765        down_write(&block->cb_lock);
 766        err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
 767        if (err == -EOPNOTSUPP)
 768                goto no_offload_dev_dec;
 769        up_write(&block->cb_lock);
 770        return;
 771
 772no_offload_dev_dec:
 773        WARN_ON(block->nooffloaddevcnt-- == 0);
 774        up_write(&block->cb_lock);
 775}
 776
 777static int
 778tcf_chain0_head_change_cb_add(struct tcf_block *block,
 779                              struct tcf_block_ext_info *ei,
 780                              struct netlink_ext_ack *extack)
 781{
 782        struct tcf_filter_chain_list_item *item;
 783        struct tcf_chain *chain0;
 784
 785        item = kmalloc(sizeof(*item), GFP_KERNEL);
 786        if (!item) {
 787                NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
 788                return -ENOMEM;
 789        }
 790        item->chain_head_change = ei->chain_head_change;
 791        item->chain_head_change_priv = ei->chain_head_change_priv;
 792
 793        mutex_lock(&block->lock);
 794        chain0 = block->chain0.chain;
 795        if (chain0)
 796                tcf_chain_hold(chain0);
 797        else
 798                list_add(&item->list, &block->chain0.filter_chain_list);
 799        mutex_unlock(&block->lock);
 800
 801        if (chain0) {
 802                struct tcf_proto *tp_head;
 803
 804                mutex_lock(&chain0->filter_chain_lock);
 805
 806                tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
 807                if (tp_head)
 808                        tcf_chain_head_change_item(item, tp_head);
 809
 810                mutex_lock(&block->lock);
 811                list_add(&item->list, &block->chain0.filter_chain_list);
 812                mutex_unlock(&block->lock);
 813
 814                mutex_unlock(&chain0->filter_chain_lock);
 815                tcf_chain_put(chain0);
 816        }
 817
 818        return 0;
 819}
 820
 821static void
 822tcf_chain0_head_change_cb_del(struct tcf_block *block,
 823                              struct tcf_block_ext_info *ei)
 824{
 825        struct tcf_filter_chain_list_item *item;
 826
 827        mutex_lock(&block->lock);
 828        list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
 829                if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
 830                    (item->chain_head_change == ei->chain_head_change &&
 831                     item->chain_head_change_priv == ei->chain_head_change_priv)) {
 832                        if (block->chain0.chain)
 833                                tcf_chain_head_change_item(item, NULL);
 834                        list_del(&item->list);
 835                        mutex_unlock(&block->lock);
 836
 837                        kfree(item);
 838                        return;
 839                }
 840        }
 841        mutex_unlock(&block->lock);
 842        WARN_ON(1);
 843}
 844
 845struct tcf_net {
 846        spinlock_t idr_lock; /* Protects idr */
 847        struct idr idr;
 848};
 849
 850static unsigned int tcf_net_id;
 851
 852static int tcf_block_insert(struct tcf_block *block, struct net *net,
 853                            struct netlink_ext_ack *extack)
 854{
 855        struct tcf_net *tn = net_generic(net, tcf_net_id);
 856        int err;
 857
 858        idr_preload(GFP_KERNEL);
 859        spin_lock(&tn->idr_lock);
 860        err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
 861                            GFP_NOWAIT);
 862        spin_unlock(&tn->idr_lock);
 863        idr_preload_end();
 864
 865        return err;
 866}
 867
 868static void tcf_block_remove(struct tcf_block *block, struct net *net)
 869{
 870        struct tcf_net *tn = net_generic(net, tcf_net_id);
 871
 872        spin_lock(&tn->idr_lock);
 873        idr_remove(&tn->idr, block->index);
 874        spin_unlock(&tn->idr_lock);
 875}
 876
 877static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 878                                          u32 block_index,
 879                                          struct netlink_ext_ack *extack)
 880{
 881        struct tcf_block *block;
 882
 883        block = kzalloc(sizeof(*block), GFP_KERNEL);
 884        if (!block) {
 885                NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
 886                return ERR_PTR(-ENOMEM);
 887        }
 888        mutex_init(&block->lock);
 889        mutex_init(&block->proto_destroy_lock);
 890        init_rwsem(&block->cb_lock);
 891        flow_block_init(&block->flow_block);
 892        INIT_LIST_HEAD(&block->chain_list);
 893        INIT_LIST_HEAD(&block->owner_list);
 894        INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 895
 896        refcount_set(&block->refcnt, 1);
 897        block->net = net;
 898        block->index = block_index;
 899
 900        /* Don't store q pointer for blocks which are shared */
 901        if (!tcf_block_shared(block))
 902                block->q = q;
 903        return block;
 904}
 905
 906static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
 907{
 908        struct tcf_net *tn = net_generic(net, tcf_net_id);
 909
 910        return idr_find(&tn->idr, block_index);
 911}
 912
 913static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
 914{
 915        struct tcf_block *block;
 916
 917        rcu_read_lock();
 918        block = tcf_block_lookup(net, block_index);
 919        if (block && !refcount_inc_not_zero(&block->refcnt))
 920                block = NULL;
 921        rcu_read_unlock();
 922
 923        return block;
 924}
 925
 926static struct tcf_chain *
 927__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 928{
 929        mutex_lock(&block->lock);
 930        if (chain)
 931                chain = list_is_last(&chain->list, &block->chain_list) ?
 932                        NULL : list_next_entry(chain, list);
 933        else
 934                chain = list_first_entry_or_null(&block->chain_list,
 935                                                 struct tcf_chain, list);
 936
 937        /* skip all action-only chains */
 938        while (chain && tcf_chain_held_by_acts_only(chain))
 939                chain = list_is_last(&chain->list, &block->chain_list) ?
 940                        NULL : list_next_entry(chain, list);
 941
 942        if (chain)
 943                tcf_chain_hold(chain);
 944        mutex_unlock(&block->lock);
 945
 946        return chain;
 947}
 948
 949/* Function to be used by all clients that want to iterate over all chains on
 950 * block. It properly obtains block->lock and takes reference to chain before
 951 * returning it. Users of this function must be tolerant to concurrent chain
 952 * insertion/deletion or ensure that no concurrent chain modification is
 953 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 954 * consistent dump because rtnl lock is released each time skb is filled with
 955 * data and sent to user-space.
 956 */
 957
 958struct tcf_chain *
 959tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 960{
 961        struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
 962
 963        if (chain)
 964                tcf_chain_put(chain);
 965
 966        return chain_next;
 967}
 968EXPORT_SYMBOL(tcf_get_next_chain);
 969
 970static struct tcf_proto *
 971__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
 972{
 973        u32 prio = 0;
 974
 975        ASSERT_RTNL();
 976        mutex_lock(&chain->filter_chain_lock);
 977
 978        if (!tp) {
 979                tp = tcf_chain_dereference(chain->filter_chain, chain);
 980        } else if (tcf_proto_is_deleting(tp)) {
 981                /* 'deleting' flag is set and chain->filter_chain_lock was
 982                 * unlocked, which means next pointer could be invalid. Restart
 983                 * search.
 984                 */
 985                prio = tp->prio + 1;
 986                tp = tcf_chain_dereference(chain->filter_chain, chain);
 987
 988                for (; tp; tp = tcf_chain_dereference(tp->next, chain))
 989                        if (!tp->deleting && tp->prio >= prio)
 990                                break;
 991        } else {
 992                tp = tcf_chain_dereference(tp->next, chain);
 993        }
 994
 995        if (tp)
 996                tcf_proto_get(tp);
 997
 998        mutex_unlock(&chain->filter_chain_lock);
 999
1000        return tp;
1001}
1002
1003/* Function to be used by all clients that want to iterate over all tp's on
1004 * chain. Users of this function must be tolerant to concurrent tp
1005 * insertion/deletion or ensure that no concurrent chain modification is
1006 * possible. Note that all netlink dump callbacks cannot guarantee to provide
1007 * consistent dump because rtnl lock is released each time skb is filled with
1008 * data and sent to user-space.
1009 */
1010
1011struct tcf_proto *
1012tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
1013{
1014        struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
1015
1016        if (tp)
1017                tcf_proto_put(tp, true, NULL);
1018
1019        return tp_next;
1020}
1021EXPORT_SYMBOL(tcf_get_next_proto);
1022
1023static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1024{
1025        struct tcf_chain *chain;
1026
1027        /* Last reference to block. At this point chains cannot be added or
1028         * removed concurrently.
1029         */
1030        for (chain = tcf_get_next_chain(block, NULL);
1031             chain;
1032             chain = tcf_get_next_chain(block, chain)) {
1033                tcf_chain_put_explicitly_created(chain);
1034                tcf_chain_flush(chain, rtnl_held);
1035        }
1036}
1037
1038/* Lookup Qdisc and increments its reference counter.
1039 * Set parent, if necessary.
1040 */
1041
1042static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1043                            u32 *parent, int ifindex, bool rtnl_held,
1044                            struct netlink_ext_ack *extack)
1045{
1046        const struct Qdisc_class_ops *cops;
1047        struct net_device *dev;
1048        int err = 0;
1049
1050        if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1051                return 0;
1052
1053        rcu_read_lock();
1054
1055        /* Find link */
1056        dev = dev_get_by_index_rcu(net, ifindex);
1057        if (!dev) {
1058                rcu_read_unlock();
1059                return -ENODEV;
1060        }
1061
1062        /* Find qdisc */
1063        if (!*parent) {
1064                *q = rcu_dereference(dev->qdisc);
1065                *parent = (*q)->handle;
1066        } else {
1067                *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1068                if (!*q) {
1069                        NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1070                        err = -EINVAL;
1071                        goto errout_rcu;
1072                }
1073        }
1074
1075        *q = qdisc_refcount_inc_nz(*q);
1076        if (!*q) {
1077                NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1078                err = -EINVAL;
1079                goto errout_rcu;
1080        }
1081
1082        /* Is it classful? */
1083        cops = (*q)->ops->cl_ops;
1084        if (!cops) {
1085                NL_SET_ERR_MSG(extack, "Qdisc not classful");
1086                err = -EINVAL;
1087                goto errout_qdisc;
1088        }
1089
1090        if (!cops->tcf_block) {
1091                NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1092                err = -EOPNOTSUPP;
1093                goto errout_qdisc;
1094        }
1095
1096errout_rcu:
1097        /* At this point we know that qdisc is not noop_qdisc,
1098         * which means that qdisc holds a reference to net_device
1099         * and we hold a reference to qdisc, so it is safe to release
1100         * rcu read lock.
1101         */
1102        rcu_read_unlock();
1103        return err;
1104
1105errout_qdisc:
1106        rcu_read_unlock();
1107
1108        if (rtnl_held)
1109                qdisc_put(*q);
1110        else
1111                qdisc_put_unlocked(*q);
1112        *q = NULL;
1113
1114        return err;
1115}
1116
1117static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1118                               int ifindex, struct netlink_ext_ack *extack)
1119{
1120        if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1121                return 0;
1122
1123        /* Do we search for filter, attached to class? */
1124        if (TC_H_MIN(parent)) {
1125                const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1126
1127                *cl = cops->find(q, parent);
1128                if (*cl == 0) {
1129                        NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1130                        return -ENOENT;
1131                }
1132        }
1133
1134        return 0;
1135}
1136
1137static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1138                                          unsigned long cl, int ifindex,
1139                                          u32 block_index,
1140                                          struct netlink_ext_ack *extack)
1141{
1142        struct tcf_block *block;
1143
1144        if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1145                block = tcf_block_refcnt_get(net, block_index);
1146                if (!block) {
1147                        NL_SET_ERR_MSG(extack, "Block of given index was not found");
1148                        return ERR_PTR(-EINVAL);
1149                }
1150        } else {
1151                const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1152
1153                block = cops->tcf_block(q, cl, extack);
1154                if (!block)
1155                        return ERR_PTR(-EINVAL);
1156
1157                if (tcf_block_shared(block)) {
1158                        NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1159                        return ERR_PTR(-EOPNOTSUPP);
1160                }
1161
1162                /* Always take reference to block in order to support execution
1163                 * of rules update path of cls API without rtnl lock. Caller
1164                 * must release block when it is finished using it. 'if' block
1165                 * of this conditional obtain reference to block by calling
1166                 * tcf_block_refcnt_get().
1167                 */
1168                refcount_inc(&block->refcnt);
1169        }
1170
1171        return block;
1172}
1173
1174static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1175                            struct tcf_block_ext_info *ei, bool rtnl_held)
1176{
1177        if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1178                /* Flushing/putting all chains will cause the block to be
1179                 * deallocated when last chain is freed. However, if chain_list
1180                 * is empty, block has to be manually deallocated. After block
1181                 * reference counter reached 0, it is no longer possible to
1182                 * increment it or add new chains to block.
1183                 */
1184                bool free_block = list_empty(&block->chain_list);
1185
1186                mutex_unlock(&block->lock);
1187                if (tcf_block_shared(block))
1188                        tcf_block_remove(block, block->net);
1189
1190                if (q)
1191                        tcf_block_offload_unbind(block, q, ei);
1192
1193                if (free_block)
1194                        tcf_block_destroy(block);
1195                else
1196                        tcf_block_flush_all_chains(block, rtnl_held);
1197        } else if (q) {
1198                tcf_block_offload_unbind(block, q, ei);
1199        }
1200}
1201
1202static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1203{
1204        __tcf_block_put(block, NULL, NULL, rtnl_held);
1205}
1206
1207/* Find tcf block.
1208 * Set q, parent, cl when appropriate.
1209 */
1210
1211static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
1212                                        u32 *parent, unsigned long *cl,
1213                                        int ifindex, u32 block_index,
1214                                        struct netlink_ext_ack *extack)
1215{
1216        struct tcf_block *block;
1217        int err = 0;
1218
1219        ASSERT_RTNL();
1220
1221        err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1222        if (err)
1223                goto errout;
1224
1225        err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1226        if (err)
1227                goto errout_qdisc;
1228
1229        block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1230        if (IS_ERR(block)) {
1231                err = PTR_ERR(block);
1232                goto errout_qdisc;
1233        }
1234
1235        return block;
1236
1237errout_qdisc:
1238        if (*q)
1239                qdisc_put(*q);
1240errout:
1241        *q = NULL;
1242        return ERR_PTR(err);
1243}
1244
1245static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1246                              bool rtnl_held)
1247{
1248        if (!IS_ERR_OR_NULL(block))
1249                tcf_block_refcnt_put(block, rtnl_held);
1250
1251        if (q) {
1252                if (rtnl_held)
1253                        qdisc_put(q);
1254                else
1255                        qdisc_put_unlocked(q);
1256        }
1257}
1258
1259struct tcf_block_owner_item {
1260        struct list_head list;
1261        struct Qdisc *q;
1262        enum flow_block_binder_type binder_type;
1263};
1264
1265static void
1266tcf_block_owner_netif_keep_dst(struct tcf_block *block,
1267                               struct Qdisc *q,
1268                               enum flow_block_binder_type binder_type)
1269{
1270        if (block->keep_dst &&
1271            binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1272            binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1273                netif_keep_dst(qdisc_dev(q));
1274}
1275
1276void tcf_block_netif_keep_dst(struct tcf_block *block)
1277{
1278        struct tcf_block_owner_item *item;
1279
1280        block->keep_dst = true;
1281        list_for_each_entry(item, &block->owner_list, list)
1282                tcf_block_owner_netif_keep_dst(block, item->q,
1283                                               item->binder_type);
1284}
1285EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1286
1287static int tcf_block_owner_add(struct tcf_block *block,
1288                               struct Qdisc *q,
1289                               enum flow_block_binder_type binder_type)
1290{
1291        struct tcf_block_owner_item *item;
1292
1293        item = kmalloc(sizeof(*item), GFP_KERNEL);
1294        if (!item)
1295                return -ENOMEM;
1296        item->q = q;
1297        item->binder_type = binder_type;
1298        list_add(&item->list, &block->owner_list);
1299        return 0;
1300}
1301
1302static void tcf_block_owner_del(struct tcf_block *block,
1303                                struct Qdisc *q,
1304                                enum flow_block_binder_type binder_type)
1305{
1306        struct tcf_block_owner_item *item;
1307
1308        list_for_each_entry(item, &block->owner_list, list) {
1309                if (item->q == q && item->binder_type == binder_type) {
1310                        list_del(&item->list);
1311                        kfree(item);
1312                        return;
1313                }
1314        }
1315        WARN_ON(1);
1316}
1317
1318int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1319                      struct tcf_block_ext_info *ei,
1320                      struct netlink_ext_ack *extack)
1321{
1322        struct net *net = qdisc_net(q);
1323        struct tcf_block *block = NULL;
1324        int err;
1325
1326        if (ei->block_index)
1327                /* block_index not 0 means the shared block is requested */
1328                block = tcf_block_refcnt_get(net, ei->block_index);
1329
1330        if (!block) {
1331                block = tcf_block_create(net, q, ei->block_index, extack);
1332                if (IS_ERR(block))
1333                        return PTR_ERR(block);
1334                if (tcf_block_shared(block)) {
1335                        err = tcf_block_insert(block, net, extack);
1336                        if (err)
1337                                goto err_block_insert;
1338                }
1339        }
1340
1341        err = tcf_block_owner_add(block, q, ei->binder_type);
1342        if (err)
1343                goto err_block_owner_add;
1344
1345        tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1346
1347        err = tcf_chain0_head_change_cb_add(block, ei, extack);
1348        if (err)
1349                goto err_chain0_head_change_cb_add;
1350
1351        err = tcf_block_offload_bind(block, q, ei, extack);
1352        if (err)
1353                goto err_block_offload_bind;
1354
1355        *p_block = block;
1356        return 0;
1357
1358err_block_offload_bind:
1359        tcf_chain0_head_change_cb_del(block, ei);
1360err_chain0_head_change_cb_add:
1361        tcf_block_owner_del(block, q, ei->binder_type);
1362err_block_owner_add:
1363err_block_insert:
1364        tcf_block_refcnt_put(block, true);
1365        return err;
1366}
1367EXPORT_SYMBOL(tcf_block_get_ext);
1368
1369static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1370{
1371        struct tcf_proto __rcu **p_filter_chain = priv;
1372
1373        rcu_assign_pointer(*p_filter_chain, tp_head);
1374}
1375
1376int tcf_block_get(struct tcf_block **p_block,
1377                  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1378                  struct netlink_ext_ack *extack)
1379{
1380        struct tcf_block_ext_info ei = {
1381                .chain_head_change = tcf_chain_head_change_dflt,
1382                .chain_head_change_priv = p_filter_chain,
1383        };
1384
1385        WARN_ON(!p_filter_chain);
1386        return tcf_block_get_ext(p_block, q, &ei, extack);
1387}
1388EXPORT_SYMBOL(tcf_block_get);
1389
1390/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1391 * actions should be all removed after flushing.
1392 */
1393void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1394                       struct tcf_block_ext_info *ei)
1395{
1396        if (!block)
1397                return;
1398        tcf_chain0_head_change_cb_del(block, ei);
1399        tcf_block_owner_del(block, q, ei->binder_type);
1400
1401        __tcf_block_put(block, q, ei, true);
1402}
1403EXPORT_SYMBOL(tcf_block_put_ext);
1404
1405void tcf_block_put(struct tcf_block *block)
1406{
1407        struct tcf_block_ext_info ei = {0, };
1408
1409        if (!block)
1410                return;
1411        tcf_block_put_ext(block, block->q, &ei);
1412}
1413
1414EXPORT_SYMBOL(tcf_block_put);
1415
1416static int
1417tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1418                            void *cb_priv, bool add, bool offload_in_use,
1419                            struct netlink_ext_ack *extack)
1420{
1421        struct tcf_chain *chain, *chain_prev;
1422        struct tcf_proto *tp, *tp_prev;
1423        int err;
1424
1425        lockdep_assert_held(&block->cb_lock);
1426
1427        for (chain = __tcf_get_next_chain(block, NULL);
1428             chain;
1429             chain_prev = chain,
1430                     chain = __tcf_get_next_chain(block, chain),
1431                     tcf_chain_put(chain_prev)) {
1432                for (tp = __tcf_get_next_proto(chain, NULL); tp;
1433                     tp_prev = tp,
1434                             tp = __tcf_get_next_proto(chain, tp),
1435                             tcf_proto_put(tp_prev, true, NULL)) {
1436                        if (tp->ops->reoffload) {
1437                                err = tp->ops->reoffload(tp, add, cb, cb_priv,
1438                                                         extack);
1439                                if (err && add)
1440                                        goto err_playback_remove;
1441                        } else if (add && offload_in_use) {
1442                                err = -EOPNOTSUPP;
1443                                NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1444                                goto err_playback_remove;
1445                        }
1446                }
1447        }
1448
1449        return 0;
1450
1451err_playback_remove:
1452        tcf_proto_put(tp, true, NULL);
1453        tcf_chain_put(chain);
1454        tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1455                                    extack);
1456        return err;
1457}
1458
1459static int tcf_block_bind(struct tcf_block *block,
1460                          struct flow_block_offload *bo)
1461{
1462        struct flow_block_cb *block_cb, *next;
1463        int err, i = 0;
1464
1465        lockdep_assert_held(&block->cb_lock);
1466
1467        list_for_each_entry(block_cb, &bo->cb_list, list) {
1468                err = tcf_block_playback_offloads(block, block_cb->cb,
1469                                                  block_cb->cb_priv, true,
1470                                                  tcf_block_offload_in_use(block),
1471                                                  bo->extack);
1472                if (err)
1473                        goto err_unroll;
1474                if (!bo->unlocked_driver_cb)
1475                        block->lockeddevcnt++;
1476
1477                i++;
1478        }
1479        list_splice(&bo->cb_list, &block->flow_block.cb_list);
1480
1481        return 0;
1482
1483err_unroll:
1484        list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1485                if (i-- > 0) {
1486                        list_del(&block_cb->list);
1487                        tcf_block_playback_offloads(block, block_cb->cb,
1488                                                    block_cb->cb_priv, false,
1489                                                    tcf_block_offload_in_use(block),
1490                                                    NULL);
1491                        if (!bo->unlocked_driver_cb)
1492                                block->lockeddevcnt--;
1493                }
1494                flow_block_cb_free(block_cb);
1495        }
1496
1497        return err;
1498}
1499
1500static void tcf_block_unbind(struct tcf_block *block,
1501                             struct flow_block_offload *bo)
1502{
1503        struct flow_block_cb *block_cb, *next;
1504
1505        lockdep_assert_held(&block->cb_lock);
1506
1507        list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1508                tcf_block_playback_offloads(block, block_cb->cb,
1509                                            block_cb->cb_priv, false,
1510                                            tcf_block_offload_in_use(block),
1511                                            NULL);
1512                list_del(&block_cb->list);
1513                flow_block_cb_free(block_cb);
1514                if (!bo->unlocked_driver_cb)
1515                        block->lockeddevcnt--;
1516        }
1517}
1518
1519static int tcf_block_setup(struct tcf_block *block,
1520                           struct flow_block_offload *bo)
1521{
1522        int err;
1523
1524        switch (bo->command) {
1525        case FLOW_BLOCK_BIND:
1526                err = tcf_block_bind(block, bo);
1527                break;
1528        case FLOW_BLOCK_UNBIND:
1529                err = 0;
1530                tcf_block_unbind(block, bo);
1531                break;
1532        default:
1533                WARN_ON_ONCE(1);
1534                err = -EOPNOTSUPP;
1535        }
1536
1537        return err;
1538}
1539
1540/* Main classifier routine: scans classifier chain attached
1541 * to this qdisc, (optionally) tests for protocol and asks
1542 * specific classifiers.
1543 */
1544static inline int __tcf_classify(struct sk_buff *skb,
1545                                 const struct tcf_proto *tp,
1546                                 const struct tcf_proto *orig_tp,
1547                                 struct tcf_result *res,
1548                                 bool compat_mode,
1549                                 u32 *last_executed_chain)
1550{
1551#ifdef CONFIG_NET_CLS_ACT
1552        const int max_reclassify_loop = 16;
1553        const struct tcf_proto *first_tp;
1554        int limit = 0;
1555
1556reclassify:
1557#endif
1558        for (; tp; tp = rcu_dereference_bh(tp->next)) {
1559                __be16 protocol = skb_protocol(skb, false);
1560                int err;
1561
1562                if (tp->protocol != protocol &&
1563                    tp->protocol != htons(ETH_P_ALL))
1564                        continue;
1565
1566                err = tp->classify(skb, tp, res);
1567#ifdef CONFIG_NET_CLS_ACT
1568                if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1569                        first_tp = orig_tp;
1570                        *last_executed_chain = first_tp->chain->index;
1571                        goto reset;
1572                } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1573                        first_tp = res->goto_tp;
1574                        *last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
1575                        goto reset;
1576                }
1577#endif
1578                if (err >= 0)
1579                        return err;
1580        }
1581
1582        return TC_ACT_UNSPEC; /* signal: continue lookup */
1583#ifdef CONFIG_NET_CLS_ACT
1584reset:
1585        if (unlikely(limit++ >= max_reclassify_loop)) {
1586                net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1587                                       tp->chain->block->index,
1588                                       tp->prio & 0xffff,
1589                                       ntohs(tp->protocol));
1590                return TC_ACT_SHOT;
1591        }
1592
1593        tp = first_tp;
1594        goto reclassify;
1595#endif
1596}
1597
1598int tcf_classify(struct sk_buff *skb,
1599                 const struct tcf_block *block,
1600                 const struct tcf_proto *tp,
1601                 struct tcf_result *res, bool compat_mode)
1602{
1603#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1604        u32 last_executed_chain = 0;
1605
1606        return __tcf_classify(skb, tp, tp, res, compat_mode,
1607                              &last_executed_chain);
1608#else
1609        u32 last_executed_chain = tp ? tp->chain->index : 0;
1610        const struct tcf_proto *orig_tp = tp;
1611        struct tc_skb_ext *ext;
1612        int ret;
1613
1614        if (block) {
1615                ext = skb_ext_find(skb, TC_SKB_EXT);
1616
1617                if (ext && ext->chain) {
1618                        struct tcf_chain *fchain;
1619
1620                        fchain = tcf_chain_lookup_rcu(block, ext->chain);
1621                        if (!fchain)
1622                                return TC_ACT_SHOT;
1623
1624                        /* Consume, so cloned/redirect skbs won't inherit ext */
1625                        skb_ext_del(skb, TC_SKB_EXT);
1626
1627                        tp = rcu_dereference_bh(fchain->filter_chain);
1628                        last_executed_chain = fchain->index;
1629                }
1630        }
1631
1632        ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
1633                             &last_executed_chain);
1634
1635        if (tc_skb_ext_tc_enabled()) {
1636                /* If we missed on some chain */
1637                if (ret == TC_ACT_UNSPEC && last_executed_chain) {
1638                        struct tc_skb_cb *cb = tc_skb_cb(skb);
1639
1640                        ext = tc_skb_ext_alloc(skb);
1641                        if (WARN_ON_ONCE(!ext))
1642                                return TC_ACT_SHOT;
1643                        ext->chain = last_executed_chain;
1644                        ext->mru = cb->mru;
1645                        ext->post_ct = cb->post_ct;
1646                        ext->post_ct_snat = cb->post_ct_snat;
1647                        ext->post_ct_dnat = cb->post_ct_dnat;
1648                        ext->zone = cb->zone;
1649                }
1650        }
1651
1652        return ret;
1653#endif
1654}
1655EXPORT_SYMBOL(tcf_classify);
1656
1657struct tcf_chain_info {
1658        struct tcf_proto __rcu **pprev;
1659        struct tcf_proto __rcu *next;
1660};
1661
1662static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1663                                           struct tcf_chain_info *chain_info)
1664{
1665        return tcf_chain_dereference(*chain_info->pprev, chain);
1666}
1667
1668static int tcf_chain_tp_insert(struct tcf_chain *chain,
1669                               struct tcf_chain_info *chain_info,
1670                               struct tcf_proto *tp)
1671{
1672        if (chain->flushing)
1673                return -EAGAIN;
1674
1675        RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1676        if (*chain_info->pprev == chain->filter_chain)
1677                tcf_chain0_head_change(chain, tp);
1678        tcf_proto_get(tp);
1679        rcu_assign_pointer(*chain_info->pprev, tp);
1680
1681        return 0;
1682}
1683
1684static void tcf_chain_tp_remove(struct tcf_chain *chain,
1685                                struct tcf_chain_info *chain_info,
1686                                struct tcf_proto *tp)
1687{
1688        struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1689
1690        tcf_proto_mark_delete(tp);
1691        if (tp == chain->filter_chain)
1692                tcf_chain0_head_change(chain, next);
1693        RCU_INIT_POINTER(*chain_info->pprev, next);
1694}
1695
1696static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1697                                           struct tcf_chain_info *chain_info,
1698                                           u32 protocol, u32 prio,
1699                                           bool prio_allocate);
1700
1701/* Try to insert new proto.
1702 * If proto with specified priority already exists, free new proto
1703 * and return existing one.
1704 */
1705
1706static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1707                                                    struct tcf_proto *tp_new,
1708                                                    u32 protocol, u32 prio,
1709                                                    bool rtnl_held)
1710{
1711        struct tcf_chain_info chain_info;
1712        struct tcf_proto *tp;
1713        int err = 0;
1714
1715        mutex_lock(&chain->filter_chain_lock);
1716
1717        if (tcf_proto_exists_destroying(chain, tp_new)) {
1718                mutex_unlock(&chain->filter_chain_lock);
1719                tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1720                return ERR_PTR(-EAGAIN);
1721        }
1722
1723        tp = tcf_chain_tp_find(chain, &chain_info,
1724                               protocol, prio, false);
1725        if (!tp)
1726                err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1727        mutex_unlock(&chain->filter_chain_lock);
1728
1729        if (tp) {
1730                tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1731                tp_new = tp;
1732        } else if (err) {
1733                tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1734                tp_new = ERR_PTR(err);
1735        }
1736
1737        return tp_new;
1738}
1739
1740static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1741                                      struct tcf_proto *tp, bool rtnl_held,
1742                                      struct netlink_ext_ack *extack)
1743{
1744        struct tcf_chain_info chain_info;
1745        struct tcf_proto *tp_iter;
1746        struct tcf_proto **pprev;
1747        struct tcf_proto *next;
1748
1749        mutex_lock(&chain->filter_chain_lock);
1750
1751        /* Atomically find and remove tp from chain. */
1752        for (pprev = &chain->filter_chain;
1753             (tp_iter = tcf_chain_dereference(*pprev, chain));
1754             pprev = &tp_iter->next) {
1755                if (tp_iter == tp) {
1756                        chain_info.pprev = pprev;
1757                        chain_info.next = tp_iter->next;
1758                        WARN_ON(tp_iter->deleting);
1759                        break;
1760                }
1761        }
1762        /* Verify that tp still exists and no new filters were inserted
1763         * concurrently.
1764         * Mark tp for deletion if it is empty.
1765         */
1766        if (!tp_iter || !tcf_proto_check_delete(tp)) {
1767                mutex_unlock(&chain->filter_chain_lock);
1768                return;
1769        }
1770
1771        tcf_proto_signal_destroying(chain, tp);
1772        next = tcf_chain_dereference(chain_info.next, chain);
1773        if (tp == chain->filter_chain)
1774                tcf_chain0_head_change(chain, next);
1775        RCU_INIT_POINTER(*chain_info.pprev, next);
1776        mutex_unlock(&chain->filter_chain_lock);
1777
1778        tcf_proto_put(tp, rtnl_held, extack);
1779}
1780
1781static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1782                                           struct tcf_chain_info *chain_info,
1783                                           u32 protocol, u32 prio,
1784                                           bool prio_allocate)
1785{
1786        struct tcf_proto **pprev;
1787        struct tcf_proto *tp;
1788
1789        /* Check the chain for existence of proto-tcf with this priority */
1790        for (pprev = &chain->filter_chain;
1791             (tp = tcf_chain_dereference(*pprev, chain));
1792             pprev = &tp->next) {
1793                if (tp->prio >= prio) {
1794                        if (tp->prio == prio) {
1795                                if (prio_allocate ||
1796                                    (tp->protocol != protocol && protocol))
1797                                        return ERR_PTR(-EINVAL);
1798                        } else {
1799                                tp = NULL;
1800                        }
1801                        break;
1802                }
1803        }
1804        chain_info->pprev = pprev;
1805        if (tp) {
1806                chain_info->next = tp->next;
1807                tcf_proto_get(tp);
1808        } else {
1809                chain_info->next = NULL;
1810        }
1811        return tp;
1812}
1813
1814static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1815                         struct tcf_proto *tp, struct tcf_block *block,
1816                         struct Qdisc *q, u32 parent, void *fh,
1817                         u32 portid, u32 seq, u16 flags, int event,
1818                         bool terse_dump, bool rtnl_held)
1819{
1820        struct tcmsg *tcm;
1821        struct nlmsghdr  *nlh;
1822        unsigned char *b = skb_tail_pointer(skb);
1823
1824        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1825        if (!nlh)
1826                goto out_nlmsg_trim;
1827        tcm = nlmsg_data(nlh);
1828        tcm->tcm_family = AF_UNSPEC;
1829        tcm->tcm__pad1 = 0;
1830        tcm->tcm__pad2 = 0;
1831        if (q) {
1832                tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1833                tcm->tcm_parent = parent;
1834        } else {
1835                tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1836                tcm->tcm_block_index = block->index;
1837        }
1838        tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1839        if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1840                goto nla_put_failure;
1841        if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1842                goto nla_put_failure;
1843        if (!fh) {
1844                tcm->tcm_handle = 0;
1845        } else if (terse_dump) {
1846                if (tp->ops->terse_dump) {
1847                        if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
1848                                                rtnl_held) < 0)
1849                                goto nla_put_failure;
1850                } else {
1851                        goto cls_op_not_supp;
1852                }
1853        } else {
1854                if (tp->ops->dump &&
1855                    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1856                        goto nla_put_failure;
1857        }
1858        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1859        return skb->len;
1860
1861out_nlmsg_trim:
1862nla_put_failure:
1863cls_op_not_supp:
1864        nlmsg_trim(skb, b);
1865        return -1;
1866}
1867
1868static int tfilter_notify(struct net *net, struct sk_buff *oskb,
1869                          struct nlmsghdr *n, struct tcf_proto *tp,
1870                          struct tcf_block *block, struct Qdisc *q,
1871                          u32 parent, void *fh, int event, bool unicast,
1872                          bool rtnl_held)
1873{
1874        struct sk_buff *skb;
1875        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1876        int err = 0;
1877
1878        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1879        if (!skb)
1880                return -ENOBUFS;
1881
1882        if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1883                          n->nlmsg_seq, n->nlmsg_flags, event,
1884                          false, rtnl_held) <= 0) {
1885                kfree_skb(skb);
1886                return -EINVAL;
1887        }
1888
1889        if (unicast)
1890                err = rtnl_unicast(skb, net, portid);
1891        else
1892                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1893                                     n->nlmsg_flags & NLM_F_ECHO);
1894        return err;
1895}
1896
1897static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1898                              struct nlmsghdr *n, struct tcf_proto *tp,
1899                              struct tcf_block *block, struct Qdisc *q,
1900                              u32 parent, void *fh, bool unicast, bool *last,
1901                              bool rtnl_held, struct netlink_ext_ack *extack)
1902{
1903        struct sk_buff *skb;
1904        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1905        int err;
1906
1907        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1908        if (!skb)
1909                return -ENOBUFS;
1910
1911        if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1912                          n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1913                          false, rtnl_held) <= 0) {
1914                NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1915                kfree_skb(skb);
1916                return -EINVAL;
1917        }
1918
1919        err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1920        if (err) {
1921                kfree_skb(skb);
1922                return err;
1923        }
1924
1925        if (unicast)
1926                err = rtnl_unicast(skb, net, portid);
1927        else
1928                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1929                                     n->nlmsg_flags & NLM_F_ECHO);
1930        if (err < 0)
1931                NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1932
1933        return err;
1934}
1935
1936static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1937                                 struct tcf_block *block, struct Qdisc *q,
1938                                 u32 parent, struct nlmsghdr *n,
1939                                 struct tcf_chain *chain, int event)
1940{
1941        struct tcf_proto *tp;
1942
1943        for (tp = tcf_get_next_proto(chain, NULL);
1944             tp; tp = tcf_get_next_proto(chain, tp))
1945                tfilter_notify(net, oskb, n, tp, block,
1946                               q, parent, NULL, event, false, true);
1947}
1948
1949static void tfilter_put(struct tcf_proto *tp, void *fh)
1950{
1951        if (tp->ops->put && fh)
1952                tp->ops->put(tp, fh);
1953}
1954
1955static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1956                          struct netlink_ext_ack *extack)
1957{
1958        struct net *net = sock_net(skb->sk);
1959        struct nlattr *tca[TCA_MAX + 1];
1960        char name[IFNAMSIZ];
1961        struct tcmsg *t;
1962        u32 protocol;
1963        u32 prio;
1964        bool prio_allocate;
1965        u32 parent;
1966        u32 chain_index;
1967        struct Qdisc *q;
1968        struct tcf_chain_info chain_info;
1969        struct tcf_chain *chain;
1970        struct tcf_block *block;
1971        struct tcf_proto *tp;
1972        unsigned long cl;
1973        void *fh;
1974        int err;
1975        int tp_created;
1976        bool rtnl_held = false;
1977        u32 flags;
1978
1979        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1980                return -EPERM;
1981
1982replay:
1983        tp_created = 0;
1984
1985        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
1986                                     rtm_tca_policy, extack);
1987        if (err < 0)
1988                return err;
1989
1990        t = nlmsg_data(n);
1991        protocol = TC_H_MIN(t->tcm_info);
1992        prio = TC_H_MAJ(t->tcm_info);
1993        prio_allocate = false;
1994        parent = t->tcm_parent;
1995        tp = NULL;
1996        cl = 0;
1997        block = NULL;
1998        q = NULL;
1999        chain = NULL;
2000        flags = 0;
2001
2002        if (prio == 0) {
2003                /* If no priority is provided by the user,
2004                 * we allocate one.
2005                 */
2006                if (n->nlmsg_flags & NLM_F_CREATE) {
2007                        prio = TC_H_MAKE(0x80000000U, 0U);
2008                        prio_allocate = true;
2009                } else {
2010                        NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2011                        return -ENOENT;
2012                }
2013        }
2014
2015        /* Find head of filter chain. */
2016
2017        err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2018        if (err)
2019                return err;
2020
2021        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2022                NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2023                err = -EINVAL;
2024                goto errout;
2025        }
2026
2027        /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
2028         * block is shared (no qdisc found), qdisc is not unlocked, classifier
2029         * type is not specified, classifier is not unlocked.
2030         */
2031        if (rtnl_held ||
2032            (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2033            !tcf_proto_is_unlocked(name)) {
2034                rtnl_held = true;
2035                rtnl_lock();
2036        }
2037
2038        err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2039        if (err)
2040                goto errout;
2041
2042        block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2043                                 extack);
2044        if (IS_ERR(block)) {
2045                err = PTR_ERR(block);
2046                goto errout;
2047        }
2048        block->classid = parent;
2049
2050        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2051        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2052                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2053                err = -EINVAL;
2054                goto errout;
2055        }
2056        chain = tcf_chain_get(block, chain_index, true);
2057        if (!chain) {
2058                NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2059                err = -ENOMEM;
2060                goto errout;
2061        }
2062
2063        mutex_lock(&chain->filter_chain_lock);
2064        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2065                               prio, prio_allocate);
2066        if (IS_ERR(tp)) {
2067                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2068                err = PTR_ERR(tp);
2069                goto errout_locked;
2070        }
2071
2072        if (tp == NULL) {
2073                struct tcf_proto *tp_new = NULL;
2074
2075                if (chain->flushing) {
2076                        err = -EAGAIN;
2077                        goto errout_locked;
2078                }
2079
2080                /* Proto-tcf does not exist, create new one */
2081
2082                if (tca[TCA_KIND] == NULL || !protocol) {
2083                        NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2084                        err = -EINVAL;
2085                        goto errout_locked;
2086                }
2087
2088                if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2089                        NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2090                        err = -ENOENT;
2091                        goto errout_locked;
2092                }
2093
2094                if (prio_allocate)
2095                        prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2096                                                               &chain_info));
2097
2098                mutex_unlock(&chain->filter_chain_lock);
2099                tp_new = tcf_proto_create(name, protocol, prio, chain,
2100                                          rtnl_held, extack);
2101                if (IS_ERR(tp_new)) {
2102                        err = PTR_ERR(tp_new);
2103                        goto errout_tp;
2104                }
2105
2106                tp_created = 1;
2107                tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2108                                                rtnl_held);
2109                if (IS_ERR(tp)) {
2110                        err = PTR_ERR(tp);
2111                        goto errout_tp;
2112                }
2113        } else {
2114                mutex_unlock(&chain->filter_chain_lock);
2115        }
2116
2117        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2118                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2119                err = -EINVAL;
2120                goto errout;
2121        }
2122
2123        fh = tp->ops->get(tp, t->tcm_handle);
2124
2125        if (!fh) {
2126                if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2127                        NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2128                        err = -ENOENT;
2129                        goto errout;
2130                }
2131        } else if (n->nlmsg_flags & NLM_F_EXCL) {
2132                tfilter_put(tp, fh);
2133                NL_SET_ERR_MSG(extack, "Filter already exists");
2134                err = -EEXIST;
2135                goto errout;
2136        }
2137
2138        if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2139                NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
2140                err = -EINVAL;
2141                goto errout;
2142        }
2143
2144        if (!(n->nlmsg_flags & NLM_F_CREATE))
2145                flags |= TCA_ACT_FLAGS_REPLACE;
2146        if (!rtnl_held)
2147                flags |= TCA_ACT_FLAGS_NO_RTNL;
2148        err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2149                              flags, extack);
2150        if (err == 0) {
2151                tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2152                               RTM_NEWTFILTER, false, rtnl_held);
2153                tfilter_put(tp, fh);
2154                /* q pointer is NULL for shared blocks */
2155                if (q)
2156                        q->flags &= ~TCQ_F_CAN_BYPASS;
2157        }
2158
2159errout:
2160        if (err && tp_created)
2161                tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2162errout_tp:
2163        if (chain) {
2164                if (tp && !IS_ERR(tp))
2165                        tcf_proto_put(tp, rtnl_held, NULL);
2166                if (!tp_created)
2167                        tcf_chain_put(chain);
2168        }
2169        tcf_block_release(q, block, rtnl_held);
2170
2171        if (rtnl_held)
2172                rtnl_unlock();
2173
2174        if (err == -EAGAIN) {
2175                /* Take rtnl lock in case EAGAIN is caused by concurrent flush
2176                 * of target chain.
2177                 */
2178                rtnl_held = true;
2179                /* Replay the request. */
2180                goto replay;
2181        }
2182        return err;
2183
2184errout_locked:
2185        mutex_unlock(&chain->filter_chain_lock);
2186        goto errout;
2187}
2188
2189static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2190                          struct netlink_ext_ack *extack)
2191{
2192        struct net *net = sock_net(skb->sk);
2193        struct nlattr *tca[TCA_MAX + 1];
2194        char name[IFNAMSIZ];
2195        struct tcmsg *t;
2196        u32 protocol;
2197        u32 prio;
2198        u32 parent;
2199        u32 chain_index;
2200        struct Qdisc *q = NULL;
2201        struct tcf_chain_info chain_info;
2202        struct tcf_chain *chain = NULL;
2203        struct tcf_block *block = NULL;
2204        struct tcf_proto *tp = NULL;
2205        unsigned long cl = 0;
2206        void *fh = NULL;
2207        int err;
2208        bool rtnl_held = false;
2209
2210        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2211                return -EPERM;
2212
2213        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2214                                     rtm_tca_policy, extack);
2215        if (err < 0)
2216                return err;
2217
2218        t = nlmsg_data(n);
2219        protocol = TC_H_MIN(t->tcm_info);
2220        prio = TC_H_MAJ(t->tcm_info);
2221        parent = t->tcm_parent;
2222
2223        if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
2224                NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
2225                return -ENOENT;
2226        }
2227
2228        /* Find head of filter chain. */
2229
2230        err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2231        if (err)
2232                return err;
2233
2234        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2235                NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2236                err = -EINVAL;
2237                goto errout;
2238        }
2239        /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2240         * found), qdisc is not unlocked, classifier type is not specified,
2241         * classifier is not unlocked.
2242         */
2243        if (!prio ||
2244            (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2245            !tcf_proto_is_unlocked(name)) {
2246                rtnl_held = true;
2247                rtnl_lock();
2248        }
2249
2250        err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2251        if (err)
2252                goto errout;
2253
2254        block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2255                                 extack);
2256        if (IS_ERR(block)) {
2257                err = PTR_ERR(block);
2258                goto errout;
2259        }
2260
2261        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2262        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2263                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2264                err = -EINVAL;
2265                goto errout;
2266        }
2267        chain = tcf_chain_get(block, chain_index, false);
2268        if (!chain) {
2269                /* User requested flush on non-existent chain. Nothing to do,
2270                 * so just return success.
2271                 */
2272                if (prio == 0) {
2273                        err = 0;
2274                        goto errout;
2275                }
2276                NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2277                err = -ENOENT;
2278                goto errout;
2279        }
2280
2281        if (prio == 0) {
2282                tfilter_notify_chain(net, skb, block, q, parent, n,
2283                                     chain, RTM_DELTFILTER);
2284                tcf_chain_flush(chain, rtnl_held);
2285                err = 0;
2286                goto errout;
2287        }
2288
2289        mutex_lock(&chain->filter_chain_lock);
2290        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2291                               prio, false);
2292        if (!tp || IS_ERR(tp)) {
2293                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2294                err = tp ? PTR_ERR(tp) : -ENOENT;
2295                goto errout_locked;
2296        } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2297                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2298                err = -EINVAL;
2299                goto errout_locked;
2300        } else if (t->tcm_handle == 0) {
2301                tcf_proto_signal_destroying(chain, tp);
2302                tcf_chain_tp_remove(chain, &chain_info, tp);
2303                mutex_unlock(&chain->filter_chain_lock);
2304
2305                tcf_proto_put(tp, rtnl_held, NULL);
2306                tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2307                               RTM_DELTFILTER, false, rtnl_held);
2308                err = 0;
2309                goto errout;
2310        }
2311        mutex_unlock(&chain->filter_chain_lock);
2312
2313        fh = tp->ops->get(tp, t->tcm_handle);
2314
2315        if (!fh) {
2316                NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2317                err = -ENOENT;
2318        } else {
2319                bool last;
2320
2321                err = tfilter_del_notify(net, skb, n, tp, block,
2322                                         q, parent, fh, false, &last,
2323                                         rtnl_held, extack);
2324
2325                if (err)
2326                        goto errout;
2327                if (last)
2328                        tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2329        }
2330
2331errout:
2332        if (chain) {
2333                if (tp && !IS_ERR(tp))
2334                        tcf_proto_put(tp, rtnl_held, NULL);
2335                tcf_chain_put(chain);
2336        }
2337        tcf_block_release(q, block, rtnl_held);
2338
2339        if (rtnl_held)
2340                rtnl_unlock();
2341
2342        return err;
2343
2344errout_locked:
2345        mutex_unlock(&chain->filter_chain_lock);
2346        goto errout;
2347}
2348
2349static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2350                          struct netlink_ext_ack *extack)
2351{
2352        struct net *net = sock_net(skb->sk);
2353        struct nlattr *tca[TCA_MAX + 1];
2354        char name[IFNAMSIZ];
2355        struct tcmsg *t;
2356        u32 protocol;
2357        u32 prio;
2358        u32 parent;
2359        u32 chain_index;
2360        struct Qdisc *q = NULL;
2361        struct tcf_chain_info chain_info;
2362        struct tcf_chain *chain = NULL;
2363        struct tcf_block *block = NULL;
2364        struct tcf_proto *tp = NULL;
2365        unsigned long cl = 0;
2366        void *fh = NULL;
2367        int err;
2368        bool rtnl_held = false;
2369
2370        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2371                                     rtm_tca_policy, extack);
2372        if (err < 0)
2373                return err;
2374
2375        t = nlmsg_data(n);
2376        protocol = TC_H_MIN(t->tcm_info);
2377        prio = TC_H_MAJ(t->tcm_info);
2378        parent = t->tcm_parent;
2379
2380        if (prio == 0) {
2381                NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2382                return -ENOENT;
2383        }
2384
2385        /* Find head of filter chain. */
2386
2387        err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2388        if (err)
2389                return err;
2390
2391        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2392                NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2393                err = -EINVAL;
2394                goto errout;
2395        }
2396        /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2397         * unlocked, classifier type is not specified, classifier is not
2398         * unlocked.
2399         */
2400        if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2401            !tcf_proto_is_unlocked(name)) {
2402                rtnl_held = true;
2403                rtnl_lock();
2404        }
2405
2406        err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2407        if (err)
2408                goto errout;
2409
2410        block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2411                                 extack);
2412        if (IS_ERR(block)) {
2413                err = PTR_ERR(block);
2414                goto errout;
2415        }
2416
2417        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2418        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2419                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2420                err = -EINVAL;
2421                goto errout;
2422        }
2423        chain = tcf_chain_get(block, chain_index, false);
2424        if (!chain) {
2425                NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2426                err = -EINVAL;
2427                goto errout;
2428        }
2429
2430        mutex_lock(&chain->filter_chain_lock);
2431        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2432                               prio, false);
2433        mutex_unlock(&chain->filter_chain_lock);
2434        if (!tp || IS_ERR(tp)) {
2435                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2436                err = tp ? PTR_ERR(tp) : -ENOENT;
2437                goto errout;
2438        } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2439                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2440                err = -EINVAL;
2441                goto errout;
2442        }
2443
2444        fh = tp->ops->get(tp, t->tcm_handle);
2445
2446        if (!fh) {
2447                NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2448                err = -ENOENT;
2449        } else {
2450                err = tfilter_notify(net, skb, n, tp, block, q, parent,
2451                                     fh, RTM_NEWTFILTER, true, rtnl_held);
2452                if (err < 0)
2453                        NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
2454        }
2455
2456        tfilter_put(tp, fh);
2457errout:
2458        if (chain) {
2459                if (tp && !IS_ERR(tp))
2460                        tcf_proto_put(tp, rtnl_held, NULL);
2461                tcf_chain_put(chain);
2462        }
2463        tcf_block_release(q, block, rtnl_held);
2464
2465        if (rtnl_held)
2466                rtnl_unlock();
2467
2468        return err;
2469}
2470
2471struct tcf_dump_args {
2472        struct tcf_walker w;
2473        struct sk_buff *skb;
2474        struct netlink_callback *cb;
2475        struct tcf_block *block;
2476        struct Qdisc *q;
2477        u32 parent;
2478        bool terse_dump;
2479};
2480
2481static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2482{
2483        struct tcf_dump_args *a = (void *)arg;
2484        struct net *net = sock_net(a->skb->sk);
2485
2486        return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2487                             n, NETLINK_CB(a->cb->skb).portid,
2488                             a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2489                             RTM_NEWTFILTER, a->terse_dump, true);
2490}
2491
2492static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
2493                           struct sk_buff *skb, struct netlink_callback *cb,
2494                           long index_start, long *p_index, bool terse)
2495{
2496        struct net *net = sock_net(skb->sk);
2497        struct tcf_block *block = chain->block;
2498        struct tcmsg *tcm = nlmsg_data(cb->nlh);
2499        struct tcf_proto *tp, *tp_prev;
2500        struct tcf_dump_args arg;
2501
2502        for (tp = __tcf_get_next_proto(chain, NULL);
2503             tp;
2504             tp_prev = tp,
2505                     tp = __tcf_get_next_proto(chain, tp),
2506                     tcf_proto_put(tp_prev, true, NULL),
2507                     (*p_index)++) {
2508                if (*p_index < index_start)
2509                        continue;
2510                if (TC_H_MAJ(tcm->tcm_info) &&
2511                    TC_H_MAJ(tcm->tcm_info) != tp->prio)
2512                        continue;
2513                if (TC_H_MIN(tcm->tcm_info) &&
2514                    TC_H_MIN(tcm->tcm_info) != tp->protocol)
2515                        continue;
2516                if (*p_index > index_start)
2517                        memset(&cb->args[1], 0,
2518                               sizeof(cb->args) - sizeof(cb->args[0]));
2519                if (cb->args[1] == 0) {
2520                        if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2521                                          NETLINK_CB(cb->skb).portid,
2522                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
2523                                          RTM_NEWTFILTER, false, true) <= 0)
2524                                goto errout;
2525                        cb->args[1] = 1;
2526                }
2527                if (!tp->ops->walk)
2528                        continue;
2529                arg.w.fn = tcf_node_dump;
2530                arg.skb = skb;
2531                arg.cb = cb;
2532                arg.block = block;
2533                arg.q = q;
2534                arg.parent = parent;
2535                arg.w.stop = 0;
2536                arg.w.skip = cb->args[1] - 1;
2537                arg.w.count = 0;
2538                arg.w.cookie = cb->args[2];
2539                arg.terse_dump = terse;
2540                tp->ops->walk(tp, &arg.w, true);
2541                cb->args[2] = arg.w.cookie;
2542                cb->args[1] = arg.w.count + 1;
2543                if (arg.w.stop)
2544                        goto errout;
2545        }
2546        return true;
2547
2548errout:
2549        tcf_proto_put(tp, true, NULL);
2550        return false;
2551}
2552
2553static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
2554        [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
2555};
2556
2557/* called with RTNL */
2558static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
2559{
2560        struct tcf_chain *chain, *chain_prev;
2561        struct net *net = sock_net(skb->sk);
2562        struct nlattr *tca[TCA_MAX + 1];
2563        struct Qdisc *q = NULL;
2564        struct tcf_block *block;
2565        struct tcmsg *tcm = nlmsg_data(cb->nlh);
2566        bool terse_dump = false;
2567        long index_start;
2568        long index;
2569        u32 parent;
2570        int err;
2571
2572        if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2573                return skb->len;
2574
2575        err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2576                                     tcf_tfilter_dump_policy, cb->extack);
2577        if (err)
2578                return err;
2579
2580        if (tca[TCA_DUMP_FLAGS]) {
2581                struct nla_bitfield32 flags =
2582                        nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
2583
2584                terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
2585        }
2586
2587        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2588                block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2589                if (!block)
2590                        goto out;
2591                /* If we work with block index, q is NULL and parent value
2592                 * will never be used in the following code. The check
2593                 * in tcf_fill_node prevents it. However, compiler does not
2594                 * see that far, so set parent to zero to silence the warning
2595                 * about parent being uninitialized.
2596                 */
2597                parent = 0;
2598        } else {
2599                const struct Qdisc_class_ops *cops;
2600                struct net_device *dev;
2601                unsigned long cl = 0;
2602
2603                dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2604                if (!dev)
2605                        return skb->len;
2606
2607                parent = tcm->tcm_parent;
2608                if (!parent)
2609                        q = rtnl_dereference(dev->qdisc);
2610                else
2611                        q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2612                if (!q)
2613                        goto out;
2614                cops = q->ops->cl_ops;
2615                if (!cops)
2616                        goto out;
2617                if (!cops->tcf_block)
2618                        goto out;
2619                if (TC_H_MIN(tcm->tcm_parent)) {
2620                        cl = cops->find(q, tcm->tcm_parent);
2621                        if (cl == 0)
2622                                goto out;
2623                }
2624                block = cops->tcf_block(q, cl, NULL);
2625                if (!block)
2626                        goto out;
2627                parent = block->classid;
2628                if (tcf_block_shared(block))
2629                        q = NULL;
2630        }
2631
2632        index_start = cb->args[0];
2633        index = 0;
2634
2635        for (chain = __tcf_get_next_chain(block, NULL);
2636             chain;
2637             chain_prev = chain,
2638                     chain = __tcf_get_next_chain(block, chain),
2639                     tcf_chain_put(chain_prev)) {
2640                if (tca[TCA_CHAIN] &&
2641                    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2642                        continue;
2643                if (!tcf_chain_dump(chain, q, parent, skb, cb,
2644                                    index_start, &index, terse_dump)) {
2645                        tcf_chain_put(chain);
2646                        err = -EMSGSIZE;
2647                        break;
2648                }
2649        }
2650
2651        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2652                tcf_block_refcnt_put(block, true);
2653        cb->args[0] = index;
2654
2655out:
2656        /* If we did no progress, the error (EMSGSIZE) is real */
2657        if (skb->len == 0 && err)
2658                return err;
2659        return skb->len;
2660}
2661
2662static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2663                              void *tmplt_priv, u32 chain_index,
2664                              struct net *net, struct sk_buff *skb,
2665                              struct tcf_block *block,
2666                              u32 portid, u32 seq, u16 flags, int event)
2667{
2668        unsigned char *b = skb_tail_pointer(skb);
2669        const struct tcf_proto_ops *ops;
2670        struct nlmsghdr *nlh;
2671        struct tcmsg *tcm;
2672        void *priv;
2673
2674        ops = tmplt_ops;
2675        priv = tmplt_priv;
2676
2677        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2678        if (!nlh)
2679                goto out_nlmsg_trim;
2680        tcm = nlmsg_data(nlh);
2681        tcm->tcm_family = AF_UNSPEC;
2682        tcm->tcm__pad1 = 0;
2683        tcm->tcm__pad2 = 0;
2684        tcm->tcm_handle = 0;
2685        if (block->q) {
2686                tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2687                tcm->tcm_parent = block->q->handle;
2688        } else {
2689                tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2690                tcm->tcm_block_index = block->index;
2691        }
2692
2693        if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2694                goto nla_put_failure;
2695
2696        if (ops) {
2697                if (nla_put_string(skb, TCA_KIND, ops->kind))
2698                        goto nla_put_failure;
2699                if (ops->tmplt_dump(skb, net, priv) < 0)
2700                        goto nla_put_failure;
2701        }
2702
2703        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2704        return skb->len;
2705
2706out_nlmsg_trim:
2707nla_put_failure:
2708        nlmsg_trim(skb, b);
2709        return -EMSGSIZE;
2710}
2711
2712static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2713                           u32 seq, u16 flags, int event, bool unicast)
2714{
2715        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2716        struct tcf_block *block = chain->block;
2717        struct net *net = block->net;
2718        struct sk_buff *skb;
2719        int err = 0;
2720
2721        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2722        if (!skb)
2723                return -ENOBUFS;
2724
2725        if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2726                               chain->index, net, skb, block, portid,
2727                               seq, flags, event) <= 0) {
2728                kfree_skb(skb);
2729                return -EINVAL;
2730        }
2731
2732        if (unicast)
2733                err = rtnl_unicast(skb, net, portid);
2734        else
2735                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2736                                     flags & NLM_F_ECHO);
2737
2738        return err;
2739}
2740
2741static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2742                                  void *tmplt_priv, u32 chain_index,
2743                                  struct tcf_block *block, struct sk_buff *oskb,
2744                                  u32 seq, u16 flags, bool unicast)
2745{
2746        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2747        struct net *net = block->net;
2748        struct sk_buff *skb;
2749
2750        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2751        if (!skb)
2752                return -ENOBUFS;
2753
2754        if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2755                               block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
2756                kfree_skb(skb);
2757                return -EINVAL;
2758        }
2759
2760        if (unicast)
2761                return rtnl_unicast(skb, net, portid);
2762
2763        return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2764}
2765
2766static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2767                              struct nlattr **tca,
2768                              struct netlink_ext_ack *extack)
2769{
2770        const struct tcf_proto_ops *ops;
2771        char name[IFNAMSIZ];
2772        void *tmplt_priv;
2773
2774        /* If kind is not set, user did not specify template. */
2775        if (!tca[TCA_KIND])
2776                return 0;
2777
2778        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2779                NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
2780                return -EINVAL;
2781        }
2782
2783        ops = tcf_proto_lookup_ops(name, true, extack);
2784        if (IS_ERR(ops))
2785                return PTR_ERR(ops);
2786        if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
2787                NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2788                return -EOPNOTSUPP;
2789        }
2790
2791        tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2792        if (IS_ERR(tmplt_priv)) {
2793                module_put(ops->owner);
2794                return PTR_ERR(tmplt_priv);
2795        }
2796        chain->tmplt_ops = ops;
2797        chain->tmplt_priv = tmplt_priv;
2798        return 0;
2799}
2800
2801static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2802                               void *tmplt_priv)
2803{
2804        /* If template ops are set, no work to do for us. */
2805        if (!tmplt_ops)
2806                return;
2807
2808        tmplt_ops->tmplt_destroy(tmplt_priv);
2809        module_put(tmplt_ops->owner);
2810}
2811
2812/* Add/delete/get a chain */
2813
2814static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2815                        struct netlink_ext_ack *extack)
2816{
2817        struct net *net = sock_net(skb->sk);
2818        struct nlattr *tca[TCA_MAX + 1];
2819        struct tcmsg *t;
2820        u32 parent;
2821        u32 chain_index;
2822        struct Qdisc *q;
2823        struct tcf_chain *chain;
2824        struct tcf_block *block;
2825        unsigned long cl;
2826        int err;
2827
2828        if (n->nlmsg_type != RTM_GETCHAIN &&
2829            !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2830                return -EPERM;
2831
2832replay:
2833        q = NULL;
2834        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2835                                     rtm_tca_policy, extack);
2836        if (err < 0)
2837                return err;
2838
2839        t = nlmsg_data(n);
2840        parent = t->tcm_parent;
2841        cl = 0;
2842
2843        block = tcf_block_find(net, &q, &parent, &cl,
2844                               t->tcm_ifindex, t->tcm_block_index, extack);
2845        if (IS_ERR(block))
2846                return PTR_ERR(block);
2847
2848        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2849        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2850                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2851                err = -EINVAL;
2852                goto errout_block;
2853        }
2854
2855        mutex_lock(&block->lock);
2856        chain = tcf_chain_lookup(block, chain_index);
2857        if (n->nlmsg_type == RTM_NEWCHAIN) {
2858                if (chain) {
2859                        if (tcf_chain_held_by_acts_only(chain)) {
2860                                /* The chain exists only because there is
2861                                 * some action referencing it.
2862                                 */
2863                                tcf_chain_hold(chain);
2864                        } else {
2865                                NL_SET_ERR_MSG(extack, "Filter chain already exists");
2866                                err = -EEXIST;
2867                                goto errout_block_locked;
2868                        }
2869                } else {
2870                        if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2871                                NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2872                                err = -ENOENT;
2873                                goto errout_block_locked;
2874                        }
2875                        chain = tcf_chain_create(block, chain_index);
2876                        if (!chain) {
2877                                NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2878                                err = -ENOMEM;
2879                                goto errout_block_locked;
2880                        }
2881                }
2882        } else {
2883                if (!chain || tcf_chain_held_by_acts_only(chain)) {
2884                        NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2885                        err = -EINVAL;
2886                        goto errout_block_locked;
2887                }
2888                tcf_chain_hold(chain);
2889        }
2890
2891        if (n->nlmsg_type == RTM_NEWCHAIN) {
2892                /* Modifying chain requires holding parent block lock. In case
2893                 * the chain was successfully added, take a reference to the
2894                 * chain. This ensures that an empty chain does not disappear at
2895                 * the end of this function.
2896                 */
2897                tcf_chain_hold(chain);
2898                chain->explicitly_created = true;
2899        }
2900        mutex_unlock(&block->lock);
2901
2902        switch (n->nlmsg_type) {
2903        case RTM_NEWCHAIN:
2904                err = tc_chain_tmplt_add(chain, net, tca, extack);
2905                if (err) {
2906                        tcf_chain_put_explicitly_created(chain);
2907                        goto errout;
2908                }
2909
2910                tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
2911                                RTM_NEWCHAIN, false);
2912                break;
2913        case RTM_DELCHAIN:
2914                tfilter_notify_chain(net, skb, block, q, parent, n,
2915                                     chain, RTM_DELTFILTER);
2916                /* Flush the chain first as the user requested chain removal. */
2917                tcf_chain_flush(chain, true);
2918                /* In case the chain was successfully deleted, put a reference
2919                 * to the chain previously taken during addition.
2920                 */
2921                tcf_chain_put_explicitly_created(chain);
2922                break;
2923        case RTM_GETCHAIN:
2924                err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2925                                      n->nlmsg_flags, n->nlmsg_type, true);
2926                if (err < 0)
2927                        NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
2928                break;
2929        default:
2930                err = -EOPNOTSUPP;
2931                NL_SET_ERR_MSG(extack, "Unsupported message type");
2932                goto errout;
2933        }
2934
2935errout:
2936        tcf_chain_put(chain);
2937errout_block:
2938        tcf_block_release(q, block, true);
2939        if (err == -EAGAIN)
2940                /* Replay the request. */
2941                goto replay;
2942        return err;
2943
2944errout_block_locked:
2945        mutex_unlock(&block->lock);
2946        goto errout_block;
2947}
2948
2949/* called with RTNL */
2950static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
2951{
2952        struct net *net = sock_net(skb->sk);
2953        struct nlattr *tca[TCA_MAX + 1];
2954        struct Qdisc *q = NULL;
2955        struct tcf_block *block;
2956        struct tcmsg *tcm = nlmsg_data(cb->nlh);
2957        struct tcf_chain *chain;
2958        long index_start;
2959        long index;
2960        int err;
2961
2962        if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2963                return skb->len;
2964
2965        err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2966                                     rtm_tca_policy, cb->extack);
2967        if (err)
2968                return err;
2969
2970        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2971                block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2972                if (!block)
2973                        goto out;
2974        } else {
2975                const struct Qdisc_class_ops *cops;
2976                struct net_device *dev;
2977                unsigned long cl = 0;
2978
2979                dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2980                if (!dev)
2981                        return skb->len;
2982
2983                if (!tcm->tcm_parent)
2984                        q = rtnl_dereference(dev->qdisc);
2985                else
2986                        q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2987
2988                if (!q)
2989                        goto out;
2990                cops = q->ops->cl_ops;
2991                if (!cops)
2992                        goto out;
2993                if (!cops->tcf_block)
2994                        goto out;
2995                if (TC_H_MIN(tcm->tcm_parent)) {
2996                        cl = cops->find(q, tcm->tcm_parent);
2997                        if (cl == 0)
2998                                goto out;
2999                }
3000                block = cops->tcf_block(q, cl, NULL);
3001                if (!block)
3002                        goto out;
3003                if (tcf_block_shared(block))
3004                        q = NULL;
3005        }
3006
3007        index_start = cb->args[0];
3008        index = 0;
3009
3010        mutex_lock(&block->lock);
3011        list_for_each_entry(chain, &block->chain_list, list) {
3012                if ((tca[TCA_CHAIN] &&
3013                     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
3014                        continue;
3015                if (index < index_start) {
3016                        index++;
3017                        continue;
3018                }
3019                if (tcf_chain_held_by_acts_only(chain))
3020                        continue;
3021                err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
3022                                         chain->index, net, skb, block,
3023                                         NETLINK_CB(cb->skb).portid,
3024                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
3025                                         RTM_NEWCHAIN);
3026                if (err <= 0)
3027                        break;
3028                index++;
3029        }
3030        mutex_unlock(&block->lock);
3031
3032        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3033                tcf_block_refcnt_put(block, true);
3034        cb->args[0] = index;
3035
3036out:
3037        /* If we did no progress, the error (EMSGSIZE) is real */
3038        if (skb->len == 0 && err)
3039                return err;
3040        return skb->len;
3041}
3042
3043void tcf_exts_destroy(struct tcf_exts *exts)
3044{
3045#ifdef CONFIG_NET_CLS_ACT
3046        if (exts->actions) {
3047                tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3048                kfree(exts->actions);
3049        }
3050        exts->nr_actions = 0;
3051#endif
3052}
3053EXPORT_SYMBOL(tcf_exts_destroy);
3054
3055int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3056                         struct nlattr *rate_tlv, struct tcf_exts *exts,
3057                         u32 flags, u32 fl_flags, struct netlink_ext_ack *extack)
3058{
3059#ifdef CONFIG_NET_CLS_ACT
3060        {
3061                int init_res[TCA_ACT_MAX_PRIO] = {};
3062                struct tc_action *act;
3063                size_t attr_size = 0;
3064
3065                if (exts->police && tb[exts->police]) {
3066                        struct tc_action_ops *a_o;
3067
3068                        a_o = tc_action_load_ops(tb[exts->police], true,
3069                                                 !(flags & TCA_ACT_FLAGS_NO_RTNL),
3070                                                 extack);
3071                        if (IS_ERR(a_o))
3072                                return PTR_ERR(a_o);
3073                        flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
3074                        act = tcf_action_init_1(net, tp, tb[exts->police],
3075                                                rate_tlv, a_o, init_res, flags,
3076                                                extack);
3077                        module_put(a_o->owner);
3078                        if (IS_ERR(act))
3079                                return PTR_ERR(act);
3080
3081                        act->type = exts->type = TCA_OLD_COMPAT;
3082                        exts->actions[0] = act;
3083                        exts->nr_actions = 1;
3084                        tcf_idr_insert_many(exts->actions);
3085                } else if (exts->action && tb[exts->action]) {
3086                        int err;
3087
3088                        flags |= TCA_ACT_FLAGS_BIND;
3089                        err = tcf_action_init(net, tp, tb[exts->action],
3090                                              rate_tlv, exts->actions, init_res,
3091                                              &attr_size, flags, fl_flags,
3092                                              extack);
3093                        if (err < 0)
3094                                return err;
3095                        exts->nr_actions = err;
3096                }
3097        }
3098#else
3099        if ((exts->action && tb[exts->action]) ||
3100            (exts->police && tb[exts->police])) {
3101                NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
3102                return -EOPNOTSUPP;
3103        }
3104#endif
3105
3106        return 0;
3107}
3108EXPORT_SYMBOL(tcf_exts_validate_ex);
3109
3110int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3111                      struct nlattr *rate_tlv, struct tcf_exts *exts,
3112                      u32 flags, struct netlink_ext_ack *extack)
3113{
3114        return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts,
3115                                    flags, 0, extack);
3116}
3117EXPORT_SYMBOL(tcf_exts_validate);
3118
3119void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
3120{
3121#ifdef CONFIG_NET_CLS_ACT
3122        struct tcf_exts old = *dst;
3123
3124        *dst = *src;
3125        tcf_exts_destroy(&old);
3126#endif
3127}
3128EXPORT_SYMBOL(tcf_exts_change);
3129
3130#ifdef CONFIG_NET_CLS_ACT
3131static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
3132{
3133        if (exts->nr_actions == 0)
3134                return NULL;
3135        else
3136                return exts->actions[0];
3137}
3138#endif
3139
3140int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
3141{
3142#ifdef CONFIG_NET_CLS_ACT
3143        struct nlattr *nest;
3144
3145        if (exts->action && tcf_exts_has_actions(exts)) {
3146                /*
3147                 * again for backward compatible mode - we want
3148                 * to work with both old and new modes of entering
3149                 * tc data even if iproute2  was newer - jhs
3150                 */
3151                if (exts->type != TCA_OLD_COMPAT) {
3152                        nest = nla_nest_start_noflag(skb, exts->action);
3153                        if (nest == NULL)
3154                                goto nla_put_failure;
3155
3156                        if (tcf_action_dump(skb, exts->actions, 0, 0, false)
3157                            < 0)
3158                                goto nla_put_failure;
3159                        nla_nest_end(skb, nest);
3160                } else if (exts->police) {
3161                        struct tc_action *act = tcf_exts_first_act(exts);
3162                        nest = nla_nest_start_noflag(skb, exts->police);
3163                        if (nest == NULL || !act)
3164                                goto nla_put_failure;
3165                        if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3166                                goto nla_put_failure;
3167                        nla_nest_end(skb, nest);
3168                }
3169        }
3170        return 0;
3171
3172nla_put_failure:
3173        nla_nest_cancel(skb, nest);
3174        return -1;
3175#else
3176        return 0;
3177#endif
3178}
3179EXPORT_SYMBOL(tcf_exts_dump);
3180
3181int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
3182{
3183#ifdef CONFIG_NET_CLS_ACT
3184        struct nlattr *nest;
3185
3186        if (!exts->action || !tcf_exts_has_actions(exts))
3187                return 0;
3188
3189        nest = nla_nest_start_noflag(skb, exts->action);
3190        if (!nest)
3191                goto nla_put_failure;
3192
3193        if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
3194                goto nla_put_failure;
3195        nla_nest_end(skb, nest);
3196        return 0;
3197
3198nla_put_failure:
3199        nla_nest_cancel(skb, nest);
3200        return -1;
3201#else
3202        return 0;
3203#endif
3204}
3205EXPORT_SYMBOL(tcf_exts_terse_dump);
3206
3207int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
3208{
3209#ifdef CONFIG_NET_CLS_ACT
3210        struct tc_action *a = tcf_exts_first_act(exts);
3211        if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3212                return -1;
3213#endif
3214        return 0;
3215}
3216EXPORT_SYMBOL(tcf_exts_dump_stats);
3217
3218static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
3219{
3220        if (*flags & TCA_CLS_FLAGS_IN_HW)
3221                return;
3222        *flags |= TCA_CLS_FLAGS_IN_HW;
3223        atomic_inc(&block->offloadcnt);
3224}
3225
3226static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
3227{
3228        if (!(*flags & TCA_CLS_FLAGS_IN_HW))
3229                return;
3230        *flags &= ~TCA_CLS_FLAGS_IN_HW;
3231        atomic_dec(&block->offloadcnt);
3232}
3233
3234static void tc_cls_offload_cnt_update(struct tcf_block *block,
3235                                      struct tcf_proto *tp, u32 *cnt,
3236                                      u32 *flags, u32 diff, bool add)
3237{
3238        lockdep_assert_held(&block->cb_lock);
3239
3240        spin_lock(&tp->lock);
3241        if (add) {
3242                if (!*cnt)
3243                        tcf_block_offload_inc(block, flags);
3244                *cnt += diff;
3245        } else {
3246                *cnt -= diff;
3247                if (!*cnt)
3248                        tcf_block_offload_dec(block, flags);
3249        }
3250        spin_unlock(&tp->lock);
3251}
3252
3253static void
3254tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
3255                         u32 *cnt, u32 *flags)
3256{
3257        lockdep_assert_held(&block->cb_lock);
3258
3259        spin_lock(&tp->lock);
3260        tcf_block_offload_dec(block, flags);
3261        *cnt = 0;
3262        spin_unlock(&tp->lock);
3263}
3264
3265static int
3266__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3267                   void *type_data, bool err_stop)
3268{
3269        struct flow_block_cb *block_cb;
3270        int ok_count = 0;
3271        int err;
3272
3273        list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3274                err = block_cb->cb(type, type_data, block_cb->cb_priv);
3275                if (err) {
3276                        if (err_stop)
3277                                return err;
3278                } else {
3279                        ok_count++;
3280                }
3281        }
3282        return ok_count;
3283}
3284
3285int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3286                     void *type_data, bool err_stop, bool rtnl_held)
3287{
3288        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3289        int ok_count;
3290
3291retry:
3292        if (take_rtnl)
3293                rtnl_lock();
3294        down_read(&block->cb_lock);
3295        /* Need to obtain rtnl lock if block is bound to devs that require it.
3296         * In block bind code cb_lock is obtained while holding rtnl, so we must
3297         * obtain the locks in same order here.
3298         */
3299        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3300                up_read(&block->cb_lock);
3301                take_rtnl = true;
3302                goto retry;
3303        }
3304
3305        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3306
3307        up_read(&block->cb_lock);
3308        if (take_rtnl)
3309                rtnl_unlock();
3310        return ok_count;
3311}
3312EXPORT_SYMBOL(tc_setup_cb_call);
3313
3314/* Non-destructive filter add. If filter that wasn't already in hardware is
3315 * successfully offloaded, increment block offloads counter. On failure,
3316 * previously offloaded filter is considered to be intact and offloads counter
3317 * is not decremented.
3318 */
3319
3320int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
3321                    enum tc_setup_type type, void *type_data, bool err_stop,
3322                    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3323{
3324        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3325        int ok_count;
3326
3327retry:
3328        if (take_rtnl)
3329                rtnl_lock();
3330        down_read(&block->cb_lock);
3331        /* Need to obtain rtnl lock if block is bound to devs that require it.
3332         * In block bind code cb_lock is obtained while holding rtnl, so we must
3333         * obtain the locks in same order here.
3334         */
3335        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3336                up_read(&block->cb_lock);
3337                take_rtnl = true;
3338                goto retry;
3339        }
3340
3341        /* Make sure all netdevs sharing this block are offload-capable. */
3342        if (block->nooffloaddevcnt && err_stop) {
3343                ok_count = -EOPNOTSUPP;
3344                goto err_unlock;
3345        }
3346
3347        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3348        if (ok_count < 0)
3349                goto err_unlock;
3350
3351        if (tp->ops->hw_add)
3352                tp->ops->hw_add(tp, type_data);
3353        if (ok_count > 0)
3354                tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
3355                                          ok_count, true);
3356err_unlock:
3357        up_read(&block->cb_lock);
3358        if (take_rtnl)
3359                rtnl_unlock();
3360        return min(ok_count, 0);
3361}
3362EXPORT_SYMBOL(tc_setup_cb_add);
3363
3364/* Destructive filter replace. If filter that wasn't already in hardware is
3365 * successfully offloaded, increment block offload counter. On failure,
3366 * previously offloaded filter is considered to be destroyed and offload counter
3367 * is decremented.
3368 */
3369
3370int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
3371                        enum tc_setup_type type, void *type_data, bool err_stop,
3372                        u32 *old_flags, unsigned int *old_in_hw_count,
3373                        u32 *new_flags, unsigned int *new_in_hw_count,
3374                        bool rtnl_held)
3375{
3376        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3377        int ok_count;
3378
3379retry:
3380        if (take_rtnl)
3381                rtnl_lock();
3382        down_read(&block->cb_lock);
3383        /* Need to obtain rtnl lock if block is bound to devs that require it.
3384         * In block bind code cb_lock is obtained while holding rtnl, so we must
3385         * obtain the locks in same order here.
3386         */
3387        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3388                up_read(&block->cb_lock);
3389                take_rtnl = true;
3390                goto retry;
3391        }
3392
3393        /* Make sure all netdevs sharing this block are offload-capable. */
3394        if (block->nooffloaddevcnt && err_stop) {
3395                ok_count = -EOPNOTSUPP;
3396                goto err_unlock;
3397        }
3398
3399        tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3400        if (tp->ops->hw_del)
3401                tp->ops->hw_del(tp, type_data);
3402
3403        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3404        if (ok_count < 0)
3405                goto err_unlock;
3406
3407        if (tp->ops->hw_add)
3408                tp->ops->hw_add(tp, type_data);
3409        if (ok_count > 0)
3410                tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
3411                                          new_flags, ok_count, true);
3412err_unlock:
3413        up_read(&block->cb_lock);
3414        if (take_rtnl)
3415                rtnl_unlock();
3416        return min(ok_count, 0);
3417}
3418EXPORT_SYMBOL(tc_setup_cb_replace);
3419
3420/* Destroy filter and decrement block offload counter, if filter was previously
3421 * offloaded.
3422 */
3423
3424int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
3425                        enum tc_setup_type type, void *type_data, bool err_stop,
3426                        u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3427{
3428        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3429        int ok_count;
3430
3431retry:
3432        if (take_rtnl)
3433                rtnl_lock();
3434        down_read(&block->cb_lock);
3435        /* Need to obtain rtnl lock if block is bound to devs that require it.
3436         * In block bind code cb_lock is obtained while holding rtnl, so we must
3437         * obtain the locks in same order here.
3438         */
3439        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3440                up_read(&block->cb_lock);
3441                take_rtnl = true;
3442                goto retry;
3443        }
3444
3445        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3446
3447        tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3448        if (tp->ops->hw_del)
3449                tp->ops->hw_del(tp, type_data);
3450
3451        up_read(&block->cb_lock);
3452        if (take_rtnl)
3453                rtnl_unlock();
3454        return min(ok_count, 0);
3455}
3456EXPORT_SYMBOL(tc_setup_cb_destroy);
3457
3458int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
3459                          bool add, flow_setup_cb_t *cb,
3460                          enum tc_setup_type type, void *type_data,
3461                          void *cb_priv, u32 *flags, unsigned int *in_hw_count)
3462{
3463        int err = cb(type, type_data, cb_priv);
3464
3465        if (err) {
3466                if (add && tc_skip_sw(*flags))
3467                        return err;
3468        } else {
3469                tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
3470                                          add);
3471        }
3472
3473        return 0;
3474}
3475EXPORT_SYMBOL(tc_setup_cb_reoffload);
3476
3477static int tcf_act_get_cookie(struct flow_action_entry *entry,
3478                              const struct tc_action *act)
3479{
3480        struct tc_cookie *cookie;
3481        int err = 0;
3482
3483        rcu_read_lock();
3484        cookie = rcu_dereference(act->act_cookie);
3485        if (cookie) {
3486                entry->cookie = flow_action_cookie_create(cookie->data,
3487                                                          cookie->len,
3488                                                          GFP_ATOMIC);
3489                if (!entry->cookie)
3490                        err = -ENOMEM;
3491        }
3492        rcu_read_unlock();
3493        return err;
3494}
3495
3496static void tcf_act_put_cookie(struct flow_action_entry *entry)
3497{
3498        flow_action_cookie_destroy(entry->cookie);
3499}
3500
3501void tc_cleanup_offload_action(struct flow_action *flow_action)
3502{
3503        struct flow_action_entry *entry;
3504        int i;
3505
3506        flow_action_for_each(i, entry, flow_action) {
3507                tcf_act_put_cookie(entry);
3508                if (entry->destructor)
3509                        entry->destructor(entry->destructor_priv);
3510        }
3511}
3512EXPORT_SYMBOL(tc_cleanup_offload_action);
3513
3514static int tc_setup_offload_act(struct tc_action *act,
3515                                struct flow_action_entry *entry,
3516                                u32 *index_inc)
3517{
3518#ifdef CONFIG_NET_CLS_ACT
3519        if (act->ops->offload_act_setup)
3520                return act->ops->offload_act_setup(act, entry, index_inc, true);
3521        else
3522                return -EOPNOTSUPP;
3523#else
3524        return 0;
3525#endif
3526}
3527
3528int tc_setup_action(struct flow_action *flow_action,
3529                    struct tc_action *actions[])
3530{
3531        int i, j, index, err = 0;
3532        struct tc_action *act;
3533
3534        BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
3535        BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
3536        BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
3537
3538        if (!actions)
3539                return 0;
3540
3541        j = 0;
3542        tcf_act_for_each_action(i, act, actions) {
3543                struct flow_action_entry *entry;
3544
3545                entry = &flow_action->entries[j];
3546                spin_lock_bh(&act->tcfa_lock);
3547                err = tcf_act_get_cookie(entry, act);
3548                if (err)
3549                        goto err_out_locked;
3550
3551                entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3552                entry->hw_index = act->tcfa_index;
3553                index = 0;
3554                err = tc_setup_offload_act(act, entry, &index);
3555                if (!err)
3556                        j += index;
3557                else
3558                        goto err_out_locked;
3559                spin_unlock_bh(&act->tcfa_lock);
3560        }
3561
3562err_out:
3563        if (err)
3564                tc_cleanup_offload_action(flow_action);
3565
3566        return err;
3567err_out_locked:
3568        spin_unlock_bh(&act->tcfa_lock);
3569        goto err_out;
3570}
3571
3572int tc_setup_offload_action(struct flow_action *flow_action,
3573                            const struct tcf_exts *exts)
3574{
3575#ifdef CONFIG_NET_CLS_ACT
3576        if (!exts)
3577                return 0;
3578
3579        return tc_setup_action(flow_action, exts->actions);
3580#else
3581        return 0;
3582#endif
3583}
3584EXPORT_SYMBOL(tc_setup_offload_action);
3585
3586unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3587{
3588        unsigned int num_acts = 0;
3589        struct tc_action *act;
3590        int i;
3591
3592        tcf_exts_for_each_action(i, act, exts) {
3593                if (is_tcf_pedit(act))
3594                        num_acts += tcf_pedit_nkeys(act);
3595                else
3596                        num_acts++;
3597        }
3598        return num_acts;
3599}
3600EXPORT_SYMBOL(tcf_exts_num_actions);
3601
3602#ifdef CONFIG_NET_CLS_ACT
3603static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
3604                                        u32 *p_block_index,
3605                                        struct netlink_ext_ack *extack)
3606{
3607        *p_block_index = nla_get_u32(block_index_attr);
3608        if (!*p_block_index) {
3609                NL_SET_ERR_MSG(extack, "Block number may not be zero");
3610                return -EINVAL;
3611        }
3612
3613        return 0;
3614}
3615
3616int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
3617                    enum flow_block_binder_type binder_type,
3618                    struct nlattr *block_index_attr,
3619                    struct netlink_ext_ack *extack)
3620{
3621        u32 block_index;
3622        int err;
3623
3624        if (!block_index_attr)
3625                return 0;
3626
3627        err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3628        if (err)
3629                return err;
3630
3631        if (!block_index)
3632                return 0;
3633
3634        qe->info.binder_type = binder_type;
3635        qe->info.chain_head_change = tcf_chain_head_change_dflt;
3636        qe->info.chain_head_change_priv = &qe->filter_chain;
3637        qe->info.block_index = block_index;
3638
3639        return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
3640}
3641EXPORT_SYMBOL(tcf_qevent_init);
3642
3643void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
3644{
3645        if (qe->info.block_index)
3646                tcf_block_put_ext(qe->block, sch, &qe->info);
3647}
3648EXPORT_SYMBOL(tcf_qevent_destroy);
3649
3650int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
3651                               struct netlink_ext_ack *extack)
3652{
3653        u32 block_index;
3654        int err;
3655
3656        if (!block_index_attr)
3657                return 0;
3658
3659        err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3660        if (err)
3661                return err;
3662
3663        /* Bounce newly-configured block or change in block. */
3664        if (block_index != qe->info.block_index) {
3665                NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
3666                return -EINVAL;
3667        }
3668
3669        return 0;
3670}
3671EXPORT_SYMBOL(tcf_qevent_validate_change);
3672
3673struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
3674                                  struct sk_buff **to_free, int *ret)
3675{
3676        struct tcf_result cl_res;
3677        struct tcf_proto *fl;
3678
3679        if (!qe->info.block_index)
3680                return skb;
3681
3682        fl = rcu_dereference_bh(qe->filter_chain);
3683
3684        switch (tcf_classify(skb, NULL, fl, &cl_res, false)) {
3685        case TC_ACT_SHOT:
3686                qdisc_qstats_drop(sch);
3687                __qdisc_drop(skb, to_free);
3688                *ret = __NET_XMIT_BYPASS;
3689                return NULL;
3690        case TC_ACT_STOLEN:
3691        case TC_ACT_QUEUED:
3692        case TC_ACT_TRAP:
3693                __qdisc_drop(skb, to_free);
3694                *ret = __NET_XMIT_STOLEN;
3695                return NULL;
3696        case TC_ACT_REDIRECT:
3697                skb_do_redirect(skb);
3698                *ret = __NET_XMIT_STOLEN;
3699                return NULL;
3700        }
3701
3702        return skb;
3703}
3704EXPORT_SYMBOL(tcf_qevent_handle);
3705
3706int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
3707{
3708        if (!qe->info.block_index)
3709                return 0;
3710        return nla_put_u32(skb, attr_name, qe->info.block_index);
3711}
3712EXPORT_SYMBOL(tcf_qevent_dump);
3713#endif
3714
3715static __net_init int tcf_net_init(struct net *net)
3716{
3717        struct tcf_net *tn = net_generic(net, tcf_net_id);
3718
3719        spin_lock_init(&tn->idr_lock);
3720        idr_init(&tn->idr);
3721        return 0;
3722}
3723
3724static void __net_exit tcf_net_exit(struct net *net)
3725{
3726        struct tcf_net *tn = net_generic(net, tcf_net_id);
3727
3728        idr_destroy(&tn->idr);
3729}
3730
3731static struct pernet_operations tcf_net_ops = {
3732        .init = tcf_net_init,
3733        .exit = tcf_net_exit,
3734        .id   = &tcf_net_id,
3735        .size = sizeof(struct tcf_net),
3736};
3737
3738static int __init tc_filter_init(void)
3739{
3740        int err;
3741
3742        tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
3743        if (!tc_filter_wq)
3744                return -ENOMEM;
3745
3746        err = register_pernet_subsys(&tcf_net_ops);
3747        if (err)
3748                goto err_register_pernet_subsys;
3749
3750        rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3751                      RTNL_FLAG_DOIT_UNLOCKED);
3752        rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3753                      RTNL_FLAG_DOIT_UNLOCKED);
3754        rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3755                      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3756        rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
3757        rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
3758        rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
3759                      tc_dump_chain, 0);
3760
3761        return 0;
3762
3763err_register_pernet_subsys:
3764        destroy_workqueue(tc_filter_wq);
3765        return err;
3766}
3767
3768subsys_initcall(tc_filter_init);
3769