linux/net/sched/cls_api.c
<<
>>
Prefs
   1/*
   2 * net/sched/cls_api.c  Packet classifier API.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 * Changes:
  12 *
  13 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
  14 *
  15 */
  16
  17#include <linux/module.h>
  18#include <linux/types.h>
  19#include <linux/kernel.h>
  20#include <linux/string.h>
  21#include <linux/errno.h>
  22#include <linux/err.h>
  23#include <linux/skbuff.h>
  24#include <linux/init.h>
  25#include <linux/kmod.h>
  26#include <linux/slab.h>
  27#include <linux/idr.h>
  28#include <linux/rhashtable.h>
  29#include <linux/jhash.h>
  30#include <net/net_namespace.h>
  31#include <net/sock.h>
  32#include <net/netlink.h>
  33#include <net/pkt_sched.h>
  34#include <net/pkt_cls.h>
  35#include <net/tc_act/tc_pedit.h>
  36#include <net/tc_act/tc_mirred.h>
  37#include <net/tc_act/tc_vlan.h>
  38#include <net/tc_act/tc_tunnel_key.h>
  39#include <net/tc_act/tc_csum.h>
  40#include <net/tc_act/tc_gact.h>
  41#include <net/tc_act/tc_police.h>
  42#include <net/tc_act/tc_sample.h>
  43#include <net/tc_act/tc_skbedit.h>
  44#include <net/tc_act/tc_ct.h>
  45#include <net/tc_act/tc_mpls.h>
  46#include <net/flow_offload.h>
  47
  48extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
  49
  50/* The list of all installed classifier types */
  51static LIST_HEAD(tcf_proto_base);
  52
  53/* Protects list of registered TC modules. It is pure SMP lock. */
  54static DEFINE_RWLOCK(cls_mod_lock);
  55
  56static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
  57{
  58        return jhash_3words(tp->chain->index, tp->prio,
  59                            (__force __u32)tp->protocol, 0);
  60}
  61
  62static void tcf_proto_signal_destroying(struct tcf_chain *chain,
  63                                        struct tcf_proto *tp)
  64{
  65        struct tcf_block *block = chain->block;
  66
  67        mutex_lock(&block->proto_destroy_lock);
  68        hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
  69                     destroy_obj_hashfn(tp));
  70        mutex_unlock(&block->proto_destroy_lock);
  71}
  72
  73static bool tcf_proto_cmp(const struct tcf_proto *tp1,
  74                          const struct tcf_proto *tp2)
  75{
  76        return tp1->chain->index == tp2->chain->index &&
  77               tp1->prio == tp2->prio &&
  78               tp1->protocol == tp2->protocol;
  79}
  80
  81static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
  82                                        struct tcf_proto *tp)
  83{
  84        u32 hash = destroy_obj_hashfn(tp);
  85        struct tcf_proto *iter;
  86        bool found = false;
  87
  88        rcu_read_lock();
  89        hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
  90                                   destroy_ht_node, hash) {
  91                if (tcf_proto_cmp(tp, iter)) {
  92                        found = true;
  93                        break;
  94                }
  95        }
  96        rcu_read_unlock();
  97
  98        return found;
  99}
 100
 101static void
 102tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
 103{
 104        struct tcf_block *block = chain->block;
 105
 106        mutex_lock(&block->proto_destroy_lock);
 107        if (hash_hashed(&tp->destroy_ht_node))
 108                hash_del_rcu(&tp->destroy_ht_node);
 109        mutex_unlock(&block->proto_destroy_lock);
 110}
 111
 112/* Find classifier type by string name */
 113
 114static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
 115{
 116        const struct tcf_proto_ops *t, *res = NULL;
 117
 118        if (kind) {
 119                read_lock(&cls_mod_lock);
 120                list_for_each_entry(t, &tcf_proto_base, head) {
 121                        if (strcmp(kind, t->kind) == 0) {
 122                                if (try_module_get(t->owner))
 123                                        res = t;
 124                                break;
 125                        }
 126                }
 127                read_unlock(&cls_mod_lock);
 128        }
 129        return res;
 130}
 131
 132static const struct tcf_proto_ops *
 133tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
 134                     struct netlink_ext_ack *extack)
 135{
 136        const struct tcf_proto_ops *ops;
 137
 138        ops = __tcf_proto_lookup_ops(kind);
 139        if (ops)
 140                return ops;
 141#ifdef CONFIG_MODULES
 142        if (rtnl_held)
 143                rtnl_unlock();
 144        request_module("cls_%s", kind);
 145        if (rtnl_held)
 146                rtnl_lock();
 147        ops = __tcf_proto_lookup_ops(kind);
 148        /* We dropped the RTNL semaphore in order to perform
 149         * the module load. So, even if we succeeded in loading
 150         * the module we have to replay the request. We indicate
 151         * this using -EAGAIN.
 152         */
 153        if (ops) {
 154                module_put(ops->owner);
 155                return ERR_PTR(-EAGAIN);
 156        }
 157#endif
 158        NL_SET_ERR_MSG(extack, "TC classifier not found");
 159        return ERR_PTR(-ENOENT);
 160}
 161
 162/* Register(unregister) new classifier type */
 163
 164int register_tcf_proto_ops(struct tcf_proto_ops *ops)
 165{
 166        struct tcf_proto_ops *t;
 167        int rc = -EEXIST;
 168
 169        write_lock(&cls_mod_lock);
 170        list_for_each_entry(t, &tcf_proto_base, head)
 171                if (!strcmp(ops->kind, t->kind))
 172                        goto out;
 173
 174        list_add_tail(&ops->head, &tcf_proto_base);
 175        rc = 0;
 176out:
 177        write_unlock(&cls_mod_lock);
 178        return rc;
 179}
 180EXPORT_SYMBOL(register_tcf_proto_ops);
 181
 182static struct workqueue_struct *tc_filter_wq;
 183
 184int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 185{
 186        struct tcf_proto_ops *t;
 187        int rc = -ENOENT;
 188
 189        /* Wait for outstanding call_rcu()s, if any, from a
 190         * tcf_proto_ops's destroy() handler.
 191         */
 192        rcu_barrier();
 193        flush_workqueue(tc_filter_wq);
 194
 195        write_lock(&cls_mod_lock);
 196        list_for_each_entry(t, &tcf_proto_base, head) {
 197                if (t == ops) {
 198                        list_del(&t->head);
 199                        rc = 0;
 200                        break;
 201                }
 202        }
 203        write_unlock(&cls_mod_lock);
 204        return rc;
 205}
 206EXPORT_SYMBOL(unregister_tcf_proto_ops);
 207
 208bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
 209{
 210        INIT_RCU_WORK(rwork, func);
 211        return queue_rcu_work(tc_filter_wq, rwork);
 212}
 213EXPORT_SYMBOL(tcf_queue_work);
 214
 215/* Select new prio value from the range, managed by kernel. */
 216
 217static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 218{
 219        u32 first = TC_H_MAKE(0xC0000000U, 0U);
 220
 221        if (tp)
 222                first = tp->prio - 1;
 223
 224        return TC_H_MAJ(first);
 225}
 226
 227static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
 228{
 229        if (kind)
 230                return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
 231        memset(name, 0, IFNAMSIZ);
 232        return false;
 233}
 234
 235static bool tcf_proto_is_unlocked(const char *kind)
 236{
 237        const struct tcf_proto_ops *ops;
 238        bool ret;
 239
 240        if (strlen(kind) == 0)
 241                return false;
 242
 243        ops = tcf_proto_lookup_ops(kind, false, NULL);
 244        /* On error return false to take rtnl lock. Proto lookup/create
 245         * functions will perform lookup again and properly handle errors.
 246         */
 247        if (IS_ERR(ops))
 248                return false;
 249
 250        ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
 251        module_put(ops->owner);
 252        return ret;
 253}
 254
 255static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 256                                          u32 prio, struct tcf_chain *chain,
 257                                          bool rtnl_held,
 258                                          struct netlink_ext_ack *extack)
 259{
 260        struct tcf_proto *tp;
 261        int err;
 262
 263        tp = kzalloc(sizeof(*tp), GFP_KERNEL);
 264        if (!tp)
 265                return ERR_PTR(-ENOBUFS);
 266
 267        tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
 268        if (IS_ERR(tp->ops)) {
 269                err = PTR_ERR(tp->ops);
 270                goto errout;
 271        }
 272        tp->classify = tp->ops->classify;
 273        tp->protocol = protocol;
 274        tp->prio = prio;
 275        tp->chain = chain;
 276        spin_lock_init(&tp->lock);
 277        refcount_set(&tp->refcnt, 1);
 278
 279        err = tp->ops->init(tp);
 280        if (err) {
 281                module_put(tp->ops->owner);
 282                goto errout;
 283        }
 284        return tp;
 285
 286errout:
 287        kfree(tp);
 288        return ERR_PTR(err);
 289}
 290
 291static void tcf_proto_get(struct tcf_proto *tp)
 292{
 293        refcount_inc(&tp->refcnt);
 294}
 295
 296static void tcf_chain_put(struct tcf_chain *chain);
 297
 298static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
 299                              bool sig_destroy, struct netlink_ext_ack *extack)
 300{
 301        tp->ops->destroy(tp, rtnl_held, extack);
 302        if (sig_destroy)
 303                tcf_proto_signal_destroyed(tp->chain, tp);
 304        tcf_chain_put(tp->chain);
 305        module_put(tp->ops->owner);
 306        kfree_rcu(tp, rcu);
 307}
 308
 309static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
 310                          struct netlink_ext_ack *extack)
 311{
 312        if (refcount_dec_and_test(&tp->refcnt))
 313                tcf_proto_destroy(tp, rtnl_held, true, extack);
 314}
 315
 316static bool tcf_proto_check_delete(struct tcf_proto *tp)
 317{
 318        if (tp->ops->delete_empty)
 319                return tp->ops->delete_empty(tp);
 320
 321        tp->deleting = true;
 322        return tp->deleting;
 323}
 324
 325static void tcf_proto_mark_delete(struct tcf_proto *tp)
 326{
 327        spin_lock(&tp->lock);
 328        tp->deleting = true;
 329        spin_unlock(&tp->lock);
 330}
 331
 332static bool tcf_proto_is_deleting(struct tcf_proto *tp)
 333{
 334        bool deleting;
 335
 336        spin_lock(&tp->lock);
 337        deleting = tp->deleting;
 338        spin_unlock(&tp->lock);
 339
 340        return deleting;
 341}
 342
 343#define ASSERT_BLOCK_LOCKED(block)                                      \
 344        lockdep_assert_held(&(block)->lock)
 345
 346struct tcf_filter_chain_list_item {
 347        struct list_head list;
 348        tcf_chain_head_change_t *chain_head_change;
 349        void *chain_head_change_priv;
 350};
 351
 352static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 353                                          u32 chain_index)
 354{
 355        struct tcf_chain *chain;
 356
 357        ASSERT_BLOCK_LOCKED(block);
 358
 359        chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 360        if (!chain)
 361                return NULL;
 362        list_add_tail(&chain->list, &block->chain_list);
 363        mutex_init(&chain->filter_chain_lock);
 364        chain->block = block;
 365        chain->index = chain_index;
 366        chain->refcnt = 1;
 367        if (!chain->index)
 368                block->chain0.chain = chain;
 369        return chain;
 370}
 371
 372static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
 373                                       struct tcf_proto *tp_head)
 374{
 375        if (item->chain_head_change)
 376                item->chain_head_change(tp_head, item->chain_head_change_priv);
 377}
 378
 379static void tcf_chain0_head_change(struct tcf_chain *chain,
 380                                   struct tcf_proto *tp_head)
 381{
 382        struct tcf_filter_chain_list_item *item;
 383        struct tcf_block *block = chain->block;
 384
 385        if (chain->index)
 386                return;
 387
 388        mutex_lock(&block->lock);
 389        list_for_each_entry(item, &block->chain0.filter_chain_list, list)
 390                tcf_chain_head_change_item(item, tp_head);
 391        mutex_unlock(&block->lock);
 392}
 393
 394/* Returns true if block can be safely freed. */
 395
 396static bool tcf_chain_detach(struct tcf_chain *chain)
 397{
 398        struct tcf_block *block = chain->block;
 399
 400        ASSERT_BLOCK_LOCKED(block);
 401
 402        list_del(&chain->list);
 403        if (!chain->index)
 404                block->chain0.chain = NULL;
 405
 406        if (list_empty(&block->chain_list) &&
 407            refcount_read(&block->refcnt) == 0)
 408                return true;
 409
 410        return false;
 411}
 412
 413static void tcf_block_destroy(struct tcf_block *block)
 414{
 415        mutex_destroy(&block->lock);
 416        mutex_destroy(&block->proto_destroy_lock);
 417        kfree_rcu(block, rcu);
 418}
 419
 420static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
 421{
 422        struct tcf_block *block = chain->block;
 423
 424        mutex_destroy(&chain->filter_chain_lock);
 425        kfree_rcu(chain, rcu);
 426        if (free_block)
 427                tcf_block_destroy(block);
 428}
 429
 430static void tcf_chain_hold(struct tcf_chain *chain)
 431{
 432        ASSERT_BLOCK_LOCKED(chain->block);
 433
 434        ++chain->refcnt;
 435}
 436
 437static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
 438{
 439        ASSERT_BLOCK_LOCKED(chain->block);
 440
 441        /* In case all the references are action references, this
 442         * chain should not be shown to the user.
 443         */
 444        return chain->refcnt == chain->action_refcnt;
 445}
 446
 447static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
 448                                          u32 chain_index)
 449{
 450        struct tcf_chain *chain;
 451
 452        ASSERT_BLOCK_LOCKED(block);
 453
 454        list_for_each_entry(chain, &block->chain_list, list) {
 455                if (chain->index == chain_index)
 456                        return chain;
 457        }
 458        return NULL;
 459}
 460
 461static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
 462                           u32 seq, u16 flags, int event, bool unicast);
 463
 464static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
 465                                         u32 chain_index, bool create,
 466                                         bool by_act)
 467{
 468        struct tcf_chain *chain = NULL;
 469        bool is_first_reference;
 470
 471        mutex_lock(&block->lock);
 472        chain = tcf_chain_lookup(block, chain_index);
 473        if (chain) {
 474                tcf_chain_hold(chain);
 475        } else {
 476                if (!create)
 477                        goto errout;
 478                chain = tcf_chain_create(block, chain_index);
 479                if (!chain)
 480                        goto errout;
 481        }
 482
 483        if (by_act)
 484                ++chain->action_refcnt;
 485        is_first_reference = chain->refcnt - chain->action_refcnt == 1;
 486        mutex_unlock(&block->lock);
 487
 488        /* Send notification only in case we got the first
 489         * non-action reference. Until then, the chain acts only as
 490         * a placeholder for actions pointing to it and user ought
 491         * not know about them.
 492         */
 493        if (is_first_reference && !by_act)
 494                tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
 495                                RTM_NEWCHAIN, false);
 496
 497        return chain;
 498
 499errout:
 500        mutex_unlock(&block->lock);
 501        return chain;
 502}
 503
 504static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 505                                       bool create)
 506{
 507        return __tcf_chain_get(block, chain_index, create, false);
 508}
 509
 510struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
 511{
 512        return __tcf_chain_get(block, chain_index, true, true);
 513}
 514EXPORT_SYMBOL(tcf_chain_get_by_act);
 515
 516static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
 517                               void *tmplt_priv);
 518static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
 519                                  void *tmplt_priv, u32 chain_index,
 520                                  struct tcf_block *block, struct sk_buff *oskb,
 521                                  u32 seq, u16 flags, bool unicast);
 522
 523static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
 524                            bool explicitly_created)
 525{
 526        struct tcf_block *block = chain->block;
 527        const struct tcf_proto_ops *tmplt_ops;
 528        bool free_block = false;
 529        unsigned int refcnt;
 530        void *tmplt_priv;
 531
 532        mutex_lock(&block->lock);
 533        if (explicitly_created) {
 534                if (!chain->explicitly_created) {
 535                        mutex_unlock(&block->lock);
 536                        return;
 537                }
 538                chain->explicitly_created = false;
 539        }
 540
 541        if (by_act)
 542                chain->action_refcnt--;
 543
 544        /* tc_chain_notify_delete can't be called while holding block lock.
 545         * However, when block is unlocked chain can be changed concurrently, so
 546         * save these to temporary variables.
 547         */
 548        refcnt = --chain->refcnt;
 549        tmplt_ops = chain->tmplt_ops;
 550        tmplt_priv = chain->tmplt_priv;
 551
 552        /* The last dropped non-action reference will trigger notification. */
 553        if (refcnt - chain->action_refcnt == 0 && !by_act) {
 554                tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
 555                                       block, NULL, 0, 0, false);
 556                /* Last reference to chain, no need to lock. */
 557                chain->flushing = false;
 558        }
 559
 560        if (refcnt == 0)
 561                free_block = tcf_chain_detach(chain);
 562        mutex_unlock(&block->lock);
 563
 564        if (refcnt == 0) {
 565                tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
 566                tcf_chain_destroy(chain, free_block);
 567        }
 568}
 569
 570static void tcf_chain_put(struct tcf_chain *chain)
 571{
 572        __tcf_chain_put(chain, false, false);
 573}
 574
 575void tcf_chain_put_by_act(struct tcf_chain *chain)
 576{
 577        __tcf_chain_put(chain, true, false);
 578}
 579EXPORT_SYMBOL(tcf_chain_put_by_act);
 580
 581static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 582{
 583        __tcf_chain_put(chain, false, true);
 584}
 585
 586static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 587{
 588        struct tcf_proto *tp, *tp_next;
 589
 590        mutex_lock(&chain->filter_chain_lock);
 591        tp = tcf_chain_dereference(chain->filter_chain, chain);
 592        while (tp) {
 593                tp_next = rcu_dereference_protected(tp->next, 1);
 594                tcf_proto_signal_destroying(chain, tp);
 595                tp = tp_next;
 596        }
 597        tp = tcf_chain_dereference(chain->filter_chain, chain);
 598        RCU_INIT_POINTER(chain->filter_chain, NULL);
 599        tcf_chain0_head_change(chain, NULL);
 600        chain->flushing = true;
 601        mutex_unlock(&chain->filter_chain_lock);
 602
 603        while (tp) {
 604                tp_next = rcu_dereference_protected(tp->next, 1);
 605                tcf_proto_put(tp, rtnl_held, NULL);
 606                tp = tp_next;
 607        }
 608}
 609
 610static int tcf_block_setup(struct tcf_block *block,
 611                           struct flow_block_offload *bo);
 612
 613static void tc_indr_block_cmd(struct net_device *dev, struct tcf_block *block,
 614                              flow_indr_block_bind_cb_t *cb, void *cb_priv,
 615                              enum flow_block_command command, bool ingress)
 616{
 617        struct flow_block_offload bo = {
 618                .command        = command,
 619                .binder_type    = ingress ?
 620                                  FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS :
 621                                  FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
 622                .net            = dev_net(dev),
 623                .block_shared   = tcf_block_non_null_shared(block),
 624        };
 625        INIT_LIST_HEAD(&bo.cb_list);
 626
 627        if (!block)
 628                return;
 629
 630        bo.block = &block->flow_block;
 631
 632        down_write(&block->cb_lock);
 633        cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
 634
 635        tcf_block_setup(block, &bo);
 636        up_write(&block->cb_lock);
 637}
 638
 639static struct tcf_block *tc_dev_block(struct net_device *dev, bool ingress)
 640{
 641        const struct Qdisc_class_ops *cops;
 642        const struct Qdisc_ops *ops;
 643        struct Qdisc *qdisc;
 644
 645        if (!dev_ingress_queue(dev))
 646                return NULL;
 647
 648        qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
 649        if (!qdisc)
 650                return NULL;
 651
 652        ops = qdisc->ops;
 653        if (!ops)
 654                return NULL;
 655
 656        if (!ingress && !strcmp("ingress", ops->id))
 657                return NULL;
 658
 659        cops = ops->cl_ops;
 660        if (!cops)
 661                return NULL;
 662
 663        if (!cops->tcf_block)
 664                return NULL;
 665
 666        return cops->tcf_block(qdisc,
 667                               ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS,
 668                               NULL);
 669}
 670
 671static void tc_indr_block_get_and_cmd(struct net_device *dev,
 672                                      flow_indr_block_bind_cb_t *cb,
 673                                      void *cb_priv,
 674                                      enum flow_block_command command)
 675{
 676        struct tcf_block *block;
 677
 678        block = tc_dev_block(dev, true);
 679        tc_indr_block_cmd(dev, block, cb, cb_priv, command, true);
 680
 681        block = tc_dev_block(dev, false);
 682        tc_indr_block_cmd(dev, block, cb, cb_priv, command, false);
 683}
 684
 685static void tc_indr_block_call(struct tcf_block *block,
 686                               struct net_device *dev,
 687                               struct tcf_block_ext_info *ei,
 688                               enum flow_block_command command,
 689                               struct netlink_ext_ack *extack)
 690{
 691        struct flow_block_offload bo = {
 692                .command        = command,
 693                .binder_type    = ei->binder_type,
 694                .net            = dev_net(dev),
 695                .block          = &block->flow_block,
 696                .block_shared   = tcf_block_shared(block),
 697                .extack         = extack,
 698        };
 699        INIT_LIST_HEAD(&bo.cb_list);
 700
 701        flow_indr_block_call(dev, &bo, command);
 702        tcf_block_setup(block, &bo);
 703}
 704
 705static bool tcf_block_offload_in_use(struct tcf_block *block)
 706{
 707        return atomic_read(&block->offloadcnt);
 708}
 709
 710static int tcf_block_offload_cmd(struct tcf_block *block,
 711                                 struct net_device *dev,
 712                                 struct tcf_block_ext_info *ei,
 713                                 enum flow_block_command command,
 714                                 struct netlink_ext_ack *extack)
 715{
 716        struct flow_block_offload bo = {};
 717        int err;
 718
 719        bo.net = dev_net(dev);
 720        bo.command = command;
 721        bo.binder_type = ei->binder_type;
 722        bo.block = &block->flow_block;
 723        bo.block_shared = tcf_block_shared(block);
 724        bo.extack = extack;
 725        INIT_LIST_HEAD(&bo.cb_list);
 726
 727        err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 728        if (err < 0)
 729                return err;
 730
 731        return tcf_block_setup(block, &bo);
 732}
 733
 734static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 735                                  struct tcf_block_ext_info *ei,
 736                                  struct netlink_ext_ack *extack)
 737{
 738        struct net_device *dev = q->dev_queue->dev;
 739        int err;
 740
 741        down_write(&block->cb_lock);
 742        if (!dev->netdev_ops->ndo_setup_tc)
 743                goto no_offload_dev_inc;
 744
 745        /* If tc offload feature is disabled and the block we try to bind
 746         * to already has some offloaded filters, forbid to bind.
 747         */
 748        if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
 749                NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
 750                err = -EOPNOTSUPP;
 751                goto err_unlock;
 752        }
 753
 754        err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
 755        if (err == -EOPNOTSUPP)
 756                goto no_offload_dev_inc;
 757        if (err)
 758                goto err_unlock;
 759
 760        tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
 761        up_write(&block->cb_lock);
 762        return 0;
 763
 764no_offload_dev_inc:
 765        if (tcf_block_offload_in_use(block)) {
 766                err = -EOPNOTSUPP;
 767                goto err_unlock;
 768        }
 769        err = 0;
 770        block->nooffloaddevcnt++;
 771        tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
 772err_unlock:
 773        up_write(&block->cb_lock);
 774        return err;
 775}
 776
 777static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 778                                     struct tcf_block_ext_info *ei)
 779{
 780        struct net_device *dev = q->dev_queue->dev;
 781        int err;
 782
 783        down_write(&block->cb_lock);
 784        tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
 785
 786        if (!dev->netdev_ops->ndo_setup_tc)
 787                goto no_offload_dev_dec;
 788        err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
 789        if (err == -EOPNOTSUPP)
 790                goto no_offload_dev_dec;
 791        up_write(&block->cb_lock);
 792        return;
 793
 794no_offload_dev_dec:
 795        WARN_ON(block->nooffloaddevcnt-- == 0);
 796        up_write(&block->cb_lock);
 797}
 798
 799static int
 800tcf_chain0_head_change_cb_add(struct tcf_block *block,
 801                              struct tcf_block_ext_info *ei,
 802                              struct netlink_ext_ack *extack)
 803{
 804        struct tcf_filter_chain_list_item *item;
 805        struct tcf_chain *chain0;
 806
 807        item = kmalloc(sizeof(*item), GFP_KERNEL);
 808        if (!item) {
 809                NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
 810                return -ENOMEM;
 811        }
 812        item->chain_head_change = ei->chain_head_change;
 813        item->chain_head_change_priv = ei->chain_head_change_priv;
 814
 815        mutex_lock(&block->lock);
 816        chain0 = block->chain0.chain;
 817        if (chain0)
 818                tcf_chain_hold(chain0);
 819        else
 820                list_add(&item->list, &block->chain0.filter_chain_list);
 821        mutex_unlock(&block->lock);
 822
 823        if (chain0) {
 824                struct tcf_proto *tp_head;
 825
 826                mutex_lock(&chain0->filter_chain_lock);
 827
 828                tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
 829                if (tp_head)
 830                        tcf_chain_head_change_item(item, tp_head);
 831
 832                mutex_lock(&block->lock);
 833                list_add(&item->list, &block->chain0.filter_chain_list);
 834                mutex_unlock(&block->lock);
 835
 836                mutex_unlock(&chain0->filter_chain_lock);
 837                tcf_chain_put(chain0);
 838        }
 839
 840        return 0;
 841}
 842
 843static void
 844tcf_chain0_head_change_cb_del(struct tcf_block *block,
 845                              struct tcf_block_ext_info *ei)
 846{
 847        struct tcf_filter_chain_list_item *item;
 848
 849        mutex_lock(&block->lock);
 850        list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
 851                if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
 852                    (item->chain_head_change == ei->chain_head_change &&
 853                     item->chain_head_change_priv == ei->chain_head_change_priv)) {
 854                        if (block->chain0.chain)
 855                                tcf_chain_head_change_item(item, NULL);
 856                        list_del(&item->list);
 857                        mutex_unlock(&block->lock);
 858
 859                        kfree(item);
 860                        return;
 861                }
 862        }
 863        mutex_unlock(&block->lock);
 864        WARN_ON(1);
 865}
 866
 867struct tcf_net {
 868        spinlock_t idr_lock; /* Protects idr */
 869        struct idr idr;
 870};
 871
 872static unsigned int tcf_net_id;
 873
 874static int tcf_block_insert(struct tcf_block *block, struct net *net,
 875                            struct netlink_ext_ack *extack)
 876{
 877        struct tcf_net *tn = net_generic(net, tcf_net_id);
 878        int err;
 879
 880        idr_preload(GFP_KERNEL);
 881        spin_lock(&tn->idr_lock);
 882        err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
 883                            GFP_NOWAIT);
 884        spin_unlock(&tn->idr_lock);
 885        idr_preload_end();
 886
 887        return err;
 888}
 889
 890static void tcf_block_remove(struct tcf_block *block, struct net *net)
 891{
 892        struct tcf_net *tn = net_generic(net, tcf_net_id);
 893
 894        spin_lock(&tn->idr_lock);
 895        idr_remove(&tn->idr, block->index);
 896        spin_unlock(&tn->idr_lock);
 897}
 898
 899static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 900                                          u32 block_index,
 901                                          struct netlink_ext_ack *extack)
 902{
 903        struct tcf_block *block;
 904
 905        block = kzalloc(sizeof(*block), GFP_KERNEL);
 906        if (!block) {
 907                NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
 908                return ERR_PTR(-ENOMEM);
 909        }
 910        mutex_init(&block->lock);
 911        mutex_init(&block->proto_destroy_lock);
 912        init_rwsem(&block->cb_lock);
 913        flow_block_init(&block->flow_block);
 914        INIT_LIST_HEAD(&block->chain_list);
 915        INIT_LIST_HEAD(&block->owner_list);
 916        INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 917
 918        refcount_set(&block->refcnt, 1);
 919        block->net = net;
 920        block->index = block_index;
 921
 922        /* Don't store q pointer for blocks which are shared */
 923        if (!tcf_block_shared(block))
 924                block->q = q;
 925        return block;
 926}
 927
 928static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
 929{
 930        struct tcf_net *tn = net_generic(net, tcf_net_id);
 931
 932        return idr_find(&tn->idr, block_index);
 933}
 934
 935static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
 936{
 937        struct tcf_block *block;
 938
 939        rcu_read_lock();
 940        block = tcf_block_lookup(net, block_index);
 941        if (block && !refcount_inc_not_zero(&block->refcnt))
 942                block = NULL;
 943        rcu_read_unlock();
 944
 945        return block;
 946}
 947
 948static struct tcf_chain *
 949__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 950{
 951        mutex_lock(&block->lock);
 952        if (chain)
 953                chain = list_is_last(&chain->list, &block->chain_list) ?
 954                        NULL : list_next_entry(chain, list);
 955        else
 956                chain = list_first_entry_or_null(&block->chain_list,
 957                                                 struct tcf_chain, list);
 958
 959        /* skip all action-only chains */
 960        while (chain && tcf_chain_held_by_acts_only(chain))
 961                chain = list_is_last(&chain->list, &block->chain_list) ?
 962                        NULL : list_next_entry(chain, list);
 963
 964        if (chain)
 965                tcf_chain_hold(chain);
 966        mutex_unlock(&block->lock);
 967
 968        return chain;
 969}
 970
 971/* Function to be used by all clients that want to iterate over all chains on
 972 * block. It properly obtains block->lock and takes reference to chain before
 973 * returning it. Users of this function must be tolerant to concurrent chain
 974 * insertion/deletion or ensure that no concurrent chain modification is
 975 * possible. Note that all netlink dump callbacks cannot guarantee to provide
 976 * consistent dump because rtnl lock is released each time skb is filled with
 977 * data and sent to user-space.
 978 */
 979
 980struct tcf_chain *
 981tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 982{
 983        struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
 984
 985        if (chain)
 986                tcf_chain_put(chain);
 987
 988        return chain_next;
 989}
 990EXPORT_SYMBOL(tcf_get_next_chain);
 991
 992static struct tcf_proto *
 993__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
 994{
 995        u32 prio = 0;
 996
 997        ASSERT_RTNL();
 998        mutex_lock(&chain->filter_chain_lock);
 999
1000        if (!tp) {
1001                tp = tcf_chain_dereference(chain->filter_chain, chain);
1002        } else if (tcf_proto_is_deleting(tp)) {
1003                /* 'deleting' flag is set and chain->filter_chain_lock was
1004                 * unlocked, which means next pointer could be invalid. Restart
1005                 * search.
1006                 */
1007                prio = tp->prio + 1;
1008                tp = tcf_chain_dereference(chain->filter_chain, chain);
1009
1010                for (; tp; tp = tcf_chain_dereference(tp->next, chain))
1011                        if (!tp->deleting && tp->prio >= prio)
1012                                break;
1013        } else {
1014                tp = tcf_chain_dereference(tp->next, chain);
1015        }
1016
1017        if (tp)
1018                tcf_proto_get(tp);
1019
1020        mutex_unlock(&chain->filter_chain_lock);
1021
1022        return tp;
1023}
1024
1025/* Function to be used by all clients that want to iterate over all tp's on
1026 * chain. Users of this function must be tolerant to concurrent tp
1027 * insertion/deletion or ensure that no concurrent chain modification is
1028 * possible. Note that all netlink dump callbacks cannot guarantee to provide
1029 * consistent dump because rtnl lock is released each time skb is filled with
1030 * data and sent to user-space.
1031 */
1032
1033struct tcf_proto *
1034tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
1035                   bool rtnl_held)
1036{
1037        struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
1038
1039        if (tp)
1040                tcf_proto_put(tp, rtnl_held, NULL);
1041
1042        return tp_next;
1043}
1044EXPORT_SYMBOL(tcf_get_next_proto);
1045
1046static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1047{
1048        struct tcf_chain *chain;
1049
1050        /* Last reference to block. At this point chains cannot be added or
1051         * removed concurrently.
1052         */
1053        for (chain = tcf_get_next_chain(block, NULL);
1054             chain;
1055             chain = tcf_get_next_chain(block, chain)) {
1056                tcf_chain_put_explicitly_created(chain);
1057                tcf_chain_flush(chain, rtnl_held);
1058        }
1059}
1060
1061/* Lookup Qdisc and increments its reference counter.
1062 * Set parent, if necessary.
1063 */
1064
1065static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1066                            u32 *parent, int ifindex, bool rtnl_held,
1067                            struct netlink_ext_ack *extack)
1068{
1069        const struct Qdisc_class_ops *cops;
1070        struct net_device *dev;
1071        int err = 0;
1072
1073        if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1074                return 0;
1075
1076        rcu_read_lock();
1077
1078        /* Find link */
1079        dev = dev_get_by_index_rcu(net, ifindex);
1080        if (!dev) {
1081                rcu_read_unlock();
1082                return -ENODEV;
1083        }
1084
1085        /* Find qdisc */
1086        if (!*parent) {
1087                *q = dev->qdisc;
1088                *parent = (*q)->handle;
1089        } else {
1090                *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1091                if (!*q) {
1092                        NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1093                        err = -EINVAL;
1094                        goto errout_rcu;
1095                }
1096        }
1097
1098        *q = qdisc_refcount_inc_nz(*q);
1099        if (!*q) {
1100                NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1101                err = -EINVAL;
1102                goto errout_rcu;
1103        }
1104
1105        /* Is it classful? */
1106        cops = (*q)->ops->cl_ops;
1107        if (!cops) {
1108                NL_SET_ERR_MSG(extack, "Qdisc not classful");
1109                err = -EINVAL;
1110                goto errout_qdisc;
1111        }
1112
1113        if (!cops->tcf_block) {
1114                NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1115                err = -EOPNOTSUPP;
1116                goto errout_qdisc;
1117        }
1118
1119errout_rcu:
1120        /* At this point we know that qdisc is not noop_qdisc,
1121         * which means that qdisc holds a reference to net_device
1122         * and we hold a reference to qdisc, so it is safe to release
1123         * rcu read lock.
1124         */
1125        rcu_read_unlock();
1126        return err;
1127
1128errout_qdisc:
1129        rcu_read_unlock();
1130
1131        if (rtnl_held)
1132                qdisc_put(*q);
1133        else
1134                qdisc_put_unlocked(*q);
1135        *q = NULL;
1136
1137        return err;
1138}
1139
1140static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1141                               int ifindex, struct netlink_ext_ack *extack)
1142{
1143        if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1144                return 0;
1145
1146        /* Do we search for filter, attached to class? */
1147        if (TC_H_MIN(parent)) {
1148                const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1149
1150                *cl = cops->find(q, parent);
1151                if (*cl == 0) {
1152                        NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1153                        return -ENOENT;
1154                }
1155        }
1156
1157        return 0;
1158}
1159
1160static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1161                                          unsigned long cl, int ifindex,
1162                                          u32 block_index,
1163                                          struct netlink_ext_ack *extack)
1164{
1165        struct tcf_block *block;
1166
1167        if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1168                block = tcf_block_refcnt_get(net, block_index);
1169                if (!block) {
1170                        NL_SET_ERR_MSG(extack, "Block of given index was not found");
1171                        return ERR_PTR(-EINVAL);
1172                }
1173        } else {
1174                const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1175
1176                block = cops->tcf_block(q, cl, extack);
1177                if (!block)
1178                        return ERR_PTR(-EINVAL);
1179
1180                if (tcf_block_shared(block)) {
1181                        NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1182                        return ERR_PTR(-EOPNOTSUPP);
1183                }
1184
1185                /* Always take reference to block in order to support execution
1186                 * of rules update path of cls API without rtnl lock. Caller
1187                 * must release block when it is finished using it. 'if' block
1188                 * of this conditional obtain reference to block by calling
1189                 * tcf_block_refcnt_get().
1190                 */
1191                refcount_inc(&block->refcnt);
1192        }
1193
1194        return block;
1195}
1196
1197static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1198                            struct tcf_block_ext_info *ei, bool rtnl_held)
1199{
1200        if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1201                /* Flushing/putting all chains will cause the block to be
1202                 * deallocated when last chain is freed. However, if chain_list
1203                 * is empty, block has to be manually deallocated. After block
1204                 * reference counter reached 0, it is no longer possible to
1205                 * increment it or add new chains to block.
1206                 */
1207                bool free_block = list_empty(&block->chain_list);
1208
1209                mutex_unlock(&block->lock);
1210                if (tcf_block_shared(block))
1211                        tcf_block_remove(block, block->net);
1212
1213                if (q)
1214                        tcf_block_offload_unbind(block, q, ei);
1215
1216                if (free_block)
1217                        tcf_block_destroy(block);
1218                else
1219                        tcf_block_flush_all_chains(block, rtnl_held);
1220        } else if (q) {
1221                tcf_block_offload_unbind(block, q, ei);
1222        }
1223}
1224
1225static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1226{
1227        __tcf_block_put(block, NULL, NULL, rtnl_held);
1228}
1229
1230/* Find tcf block.
1231 * Set q, parent, cl when appropriate.
1232 */
1233
1234static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
1235                                        u32 *parent, unsigned long *cl,
1236                                        int ifindex, u32 block_index,
1237                                        struct netlink_ext_ack *extack)
1238{
1239        struct tcf_block *block;
1240        int err = 0;
1241
1242        ASSERT_RTNL();
1243
1244        err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1245        if (err)
1246                goto errout;
1247
1248        err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1249        if (err)
1250                goto errout_qdisc;
1251
1252        block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1253        if (IS_ERR(block)) {
1254                err = PTR_ERR(block);
1255                goto errout_qdisc;
1256        }
1257
1258        return block;
1259
1260errout_qdisc:
1261        if (*q)
1262                qdisc_put(*q);
1263errout:
1264        *q = NULL;
1265        return ERR_PTR(err);
1266}
1267
1268static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1269                              bool rtnl_held)
1270{
1271        if (!IS_ERR_OR_NULL(block))
1272                tcf_block_refcnt_put(block, rtnl_held);
1273
1274        if (q) {
1275                if (rtnl_held)
1276                        qdisc_put(q);
1277                else
1278                        qdisc_put_unlocked(q);
1279        }
1280}
1281
1282struct tcf_block_owner_item {
1283        struct list_head list;
1284        struct Qdisc *q;
1285        enum flow_block_binder_type binder_type;
1286};
1287
1288static void
1289tcf_block_owner_netif_keep_dst(struct tcf_block *block,
1290                               struct Qdisc *q,
1291                               enum flow_block_binder_type binder_type)
1292{
1293        if (block->keep_dst &&
1294            binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1295            binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1296                netif_keep_dst(qdisc_dev(q));
1297}
1298
1299void tcf_block_netif_keep_dst(struct tcf_block *block)
1300{
1301        struct tcf_block_owner_item *item;
1302
1303        block->keep_dst = true;
1304        list_for_each_entry(item, &block->owner_list, list)
1305                tcf_block_owner_netif_keep_dst(block, item->q,
1306                                               item->binder_type);
1307}
1308EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1309
1310static int tcf_block_owner_add(struct tcf_block *block,
1311                               struct Qdisc *q,
1312                               enum flow_block_binder_type binder_type)
1313{
1314        struct tcf_block_owner_item *item;
1315
1316        item = kmalloc(sizeof(*item), GFP_KERNEL);
1317        if (!item)
1318                return -ENOMEM;
1319        item->q = q;
1320        item->binder_type = binder_type;
1321        list_add(&item->list, &block->owner_list);
1322        return 0;
1323}
1324
1325static void tcf_block_owner_del(struct tcf_block *block,
1326                                struct Qdisc *q,
1327                                enum flow_block_binder_type binder_type)
1328{
1329        struct tcf_block_owner_item *item;
1330
1331        list_for_each_entry(item, &block->owner_list, list) {
1332                if (item->q == q && item->binder_type == binder_type) {
1333                        list_del(&item->list);
1334                        kfree(item);
1335                        return;
1336                }
1337        }
1338        WARN_ON(1);
1339}
1340
1341int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1342                      struct tcf_block_ext_info *ei,
1343                      struct netlink_ext_ack *extack)
1344{
1345        struct net *net = qdisc_net(q);
1346        struct tcf_block *block = NULL;
1347        int err;
1348
1349        if (ei->block_index)
1350                /* block_index not 0 means the shared block is requested */
1351                block = tcf_block_refcnt_get(net, ei->block_index);
1352
1353        if (!block) {
1354                block = tcf_block_create(net, q, ei->block_index, extack);
1355                if (IS_ERR(block))
1356                        return PTR_ERR(block);
1357                if (tcf_block_shared(block)) {
1358                        err = tcf_block_insert(block, net, extack);
1359                        if (err)
1360                                goto err_block_insert;
1361                }
1362        }
1363
1364        err = tcf_block_owner_add(block, q, ei->binder_type);
1365        if (err)
1366                goto err_block_owner_add;
1367
1368        tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1369
1370        err = tcf_chain0_head_change_cb_add(block, ei, extack);
1371        if (err)
1372                goto err_chain0_head_change_cb_add;
1373
1374        err = tcf_block_offload_bind(block, q, ei, extack);
1375        if (err)
1376                goto err_block_offload_bind;
1377
1378        *p_block = block;
1379        return 0;
1380
1381err_block_offload_bind:
1382        tcf_chain0_head_change_cb_del(block, ei);
1383err_chain0_head_change_cb_add:
1384        tcf_block_owner_del(block, q, ei->binder_type);
1385err_block_owner_add:
1386err_block_insert:
1387        tcf_block_refcnt_put(block, true);
1388        return err;
1389}
1390EXPORT_SYMBOL(tcf_block_get_ext);
1391
1392static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1393{
1394        struct tcf_proto __rcu **p_filter_chain = priv;
1395
1396        rcu_assign_pointer(*p_filter_chain, tp_head);
1397}
1398
1399int tcf_block_get(struct tcf_block **p_block,
1400                  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1401                  struct netlink_ext_ack *extack)
1402{
1403        struct tcf_block_ext_info ei = {
1404                .chain_head_change = tcf_chain_head_change_dflt,
1405                .chain_head_change_priv = p_filter_chain,
1406        };
1407
1408        WARN_ON(!p_filter_chain);
1409        return tcf_block_get_ext(p_block, q, &ei, extack);
1410}
1411EXPORT_SYMBOL(tcf_block_get);
1412
1413/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1414 * actions should be all removed after flushing.
1415 */
1416void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1417                       struct tcf_block_ext_info *ei)
1418{
1419        if (!block)
1420                return;
1421        tcf_chain0_head_change_cb_del(block, ei);
1422        tcf_block_owner_del(block, q, ei->binder_type);
1423
1424        __tcf_block_put(block, q, ei, true);
1425}
1426EXPORT_SYMBOL(tcf_block_put_ext);
1427
1428void tcf_block_put(struct tcf_block *block)
1429{
1430        struct tcf_block_ext_info ei = {0, };
1431
1432        if (!block)
1433                return;
1434        tcf_block_put_ext(block, block->q, &ei);
1435}
1436
1437EXPORT_SYMBOL(tcf_block_put);
1438
1439static int
1440tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1441                            void *cb_priv, bool add, bool offload_in_use,
1442                            struct netlink_ext_ack *extack)
1443{
1444        struct tcf_chain *chain, *chain_prev;
1445        struct tcf_proto *tp, *tp_prev;
1446        int err;
1447
1448        lockdep_assert_held(&block->cb_lock);
1449
1450        for (chain = __tcf_get_next_chain(block, NULL);
1451             chain;
1452             chain_prev = chain,
1453                     chain = __tcf_get_next_chain(block, chain),
1454                     tcf_chain_put(chain_prev)) {
1455                for (tp = __tcf_get_next_proto(chain, NULL); tp;
1456                     tp_prev = tp,
1457                             tp = __tcf_get_next_proto(chain, tp),
1458                             tcf_proto_put(tp_prev, true, NULL)) {
1459                        if (tp->ops->reoffload) {
1460                                err = tp->ops->reoffload(tp, add, cb, cb_priv,
1461                                                         extack);
1462                                if (err && add)
1463                                        goto err_playback_remove;
1464                        } else if (add && offload_in_use) {
1465                                err = -EOPNOTSUPP;
1466                                NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1467                                goto err_playback_remove;
1468                        }
1469                }
1470        }
1471
1472        return 0;
1473
1474err_playback_remove:
1475        tcf_proto_put(tp, true, NULL);
1476        tcf_chain_put(chain);
1477        tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1478                                    extack);
1479        return err;
1480}
1481
1482static int tcf_block_bind(struct tcf_block *block,
1483                          struct flow_block_offload *bo)
1484{
1485        struct flow_block_cb *block_cb, *next;
1486        int err, i = 0;
1487
1488        lockdep_assert_held(&block->cb_lock);
1489
1490        list_for_each_entry(block_cb, &bo->cb_list, list) {
1491                err = tcf_block_playback_offloads(block, block_cb->cb,
1492                                                  block_cb->cb_priv, true,
1493                                                  tcf_block_offload_in_use(block),
1494                                                  bo->extack);
1495                if (err)
1496                        goto err_unroll;
1497                if (!bo->unlocked_driver_cb)
1498                        block->lockeddevcnt++;
1499
1500                i++;
1501        }
1502        list_splice(&bo->cb_list, &block->flow_block.cb_list);
1503
1504        return 0;
1505
1506err_unroll:
1507        list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1508                if (i-- > 0) {
1509                        list_del(&block_cb->list);
1510                        tcf_block_playback_offloads(block, block_cb->cb,
1511                                                    block_cb->cb_priv, false,
1512                                                    tcf_block_offload_in_use(block),
1513                                                    NULL);
1514                        if (!bo->unlocked_driver_cb)
1515                                block->lockeddevcnt--;
1516                }
1517                flow_block_cb_free(block_cb);
1518        }
1519
1520        return err;
1521}
1522
1523static void tcf_block_unbind(struct tcf_block *block,
1524                             struct flow_block_offload *bo)
1525{
1526        struct flow_block_cb *block_cb, *next;
1527
1528        lockdep_assert_held(&block->cb_lock);
1529
1530        list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1531                tcf_block_playback_offloads(block, block_cb->cb,
1532                                            block_cb->cb_priv, false,
1533                                            tcf_block_offload_in_use(block),
1534                                            NULL);
1535                list_del(&block_cb->list);
1536                flow_block_cb_free(block_cb);
1537                if (!bo->unlocked_driver_cb)
1538                        block->lockeddevcnt--;
1539        }
1540}
1541
1542static int tcf_block_setup(struct tcf_block *block,
1543                           struct flow_block_offload *bo)
1544{
1545        int err;
1546
1547        switch (bo->command) {
1548        case FLOW_BLOCK_BIND:
1549                err = tcf_block_bind(block, bo);
1550                break;
1551        case FLOW_BLOCK_UNBIND:
1552                err = 0;
1553                tcf_block_unbind(block, bo);
1554                break;
1555        default:
1556                WARN_ON_ONCE(1);
1557                err = -EOPNOTSUPP;
1558        }
1559
1560        return err;
1561}
1562
1563/* Main classifier routine: scans classifier chain attached
1564 * to this qdisc, (optionally) tests for protocol and asks
1565 * specific classifiers.
1566 */
1567int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1568                 struct tcf_result *res, bool compat_mode)
1569{
1570#ifdef CONFIG_NET_CLS_ACT
1571        const int max_reclassify_loop = 4;
1572        const struct tcf_proto *orig_tp = tp;
1573        const struct tcf_proto *first_tp;
1574        int limit = 0;
1575
1576reclassify:
1577#endif
1578        for (; tp; tp = rcu_dereference_bh(tp->next)) {
1579                __be16 protocol = tc_skb_protocol(skb);
1580                int err;
1581
1582                if (tp->protocol != protocol &&
1583                    tp->protocol != htons(ETH_P_ALL))
1584                        continue;
1585
1586                err = tp->classify(skb, tp, res);
1587#ifdef CONFIG_NET_CLS_ACT
1588                if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1589                        first_tp = orig_tp;
1590                        goto reset;
1591                } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1592                        first_tp = res->goto_tp;
1593
1594#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1595                        {
1596                                struct tc_skb_ext *ext;
1597
1598                                ext = skb_ext_add(skb, TC_SKB_EXT);
1599                                if (WARN_ON_ONCE(!ext))
1600                                        return TC_ACT_SHOT;
1601
1602                                ext->chain = err & TC_ACT_EXT_VAL_MASK;
1603                        }
1604#endif
1605                        goto reset;
1606                }
1607#endif
1608                if (err >= 0)
1609                        return err;
1610        }
1611
1612        return TC_ACT_UNSPEC; /* signal: continue lookup */
1613#ifdef CONFIG_NET_CLS_ACT
1614reset:
1615        if (unlikely(limit++ >= max_reclassify_loop)) {
1616                net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1617                                       tp->chain->block->index,
1618                                       tp->prio & 0xffff,
1619                                       ntohs(tp->protocol));
1620                return TC_ACT_SHOT;
1621        }
1622
1623        tp = first_tp;
1624        goto reclassify;
1625#endif
1626}
1627EXPORT_SYMBOL(tcf_classify);
1628
1629struct tcf_chain_info {
1630        struct tcf_proto __rcu **pprev;
1631        struct tcf_proto __rcu *next;
1632};
1633
1634static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1635                                           struct tcf_chain_info *chain_info)
1636{
1637        return tcf_chain_dereference(*chain_info->pprev, chain);
1638}
1639
1640static int tcf_chain_tp_insert(struct tcf_chain *chain,
1641                               struct tcf_chain_info *chain_info,
1642                               struct tcf_proto *tp)
1643{
1644        if (chain->flushing)
1645                return -EAGAIN;
1646
1647        if (*chain_info->pprev == chain->filter_chain)
1648                tcf_chain0_head_change(chain, tp);
1649        tcf_proto_get(tp);
1650        RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1651        rcu_assign_pointer(*chain_info->pprev, tp);
1652
1653        return 0;
1654}
1655
1656static void tcf_chain_tp_remove(struct tcf_chain *chain,
1657                                struct tcf_chain_info *chain_info,
1658                                struct tcf_proto *tp)
1659{
1660        struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1661
1662        tcf_proto_mark_delete(tp);
1663        if (tp == chain->filter_chain)
1664                tcf_chain0_head_change(chain, next);
1665        RCU_INIT_POINTER(*chain_info->pprev, next);
1666}
1667
1668static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1669                                           struct tcf_chain_info *chain_info,
1670                                           u32 protocol, u32 prio,
1671                                           bool prio_allocate);
1672
1673/* Try to insert new proto.
1674 * If proto with specified priority already exists, free new proto
1675 * and return existing one.
1676 */
1677
1678static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1679                                                    struct tcf_proto *tp_new,
1680                                                    u32 protocol, u32 prio,
1681                                                    bool rtnl_held)
1682{
1683        struct tcf_chain_info chain_info;
1684        struct tcf_proto *tp;
1685        int err = 0;
1686
1687        mutex_lock(&chain->filter_chain_lock);
1688
1689        if (tcf_proto_exists_destroying(chain, tp_new)) {
1690                mutex_unlock(&chain->filter_chain_lock);
1691                tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1692                return ERR_PTR(-EAGAIN);
1693        }
1694
1695        tp = tcf_chain_tp_find(chain, &chain_info,
1696                               protocol, prio, false);
1697        if (!tp)
1698                err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1699        mutex_unlock(&chain->filter_chain_lock);
1700
1701        if (tp) {
1702                tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1703                tp_new = tp;
1704        } else if (err) {
1705                tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1706                tp_new = ERR_PTR(err);
1707        }
1708
1709        return tp_new;
1710}
1711
1712static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1713                                      struct tcf_proto *tp, bool rtnl_held,
1714                                      struct netlink_ext_ack *extack)
1715{
1716        struct tcf_chain_info chain_info;
1717        struct tcf_proto *tp_iter;
1718        struct tcf_proto **pprev;
1719        struct tcf_proto *next;
1720
1721        mutex_lock(&chain->filter_chain_lock);
1722
1723        /* Atomically find and remove tp from chain. */
1724        for (pprev = &chain->filter_chain;
1725             (tp_iter = tcf_chain_dereference(*pprev, chain));
1726             pprev = &tp_iter->next) {
1727                if (tp_iter == tp) {
1728                        chain_info.pprev = pprev;
1729                        chain_info.next = tp_iter->next;
1730                        WARN_ON(tp_iter->deleting);
1731                        break;
1732                }
1733        }
1734        /* Verify that tp still exists and no new filters were inserted
1735         * concurrently.
1736         * Mark tp for deletion if it is empty.
1737         */
1738        if (!tp_iter || !tcf_proto_check_delete(tp)) {
1739                mutex_unlock(&chain->filter_chain_lock);
1740                return;
1741        }
1742
1743        tcf_proto_signal_destroying(chain, tp);
1744        next = tcf_chain_dereference(chain_info.next, chain);
1745        if (tp == chain->filter_chain)
1746                tcf_chain0_head_change(chain, next);
1747        RCU_INIT_POINTER(*chain_info.pprev, next);
1748        mutex_unlock(&chain->filter_chain_lock);
1749
1750        tcf_proto_put(tp, rtnl_held, extack);
1751}
1752
1753static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1754                                           struct tcf_chain_info *chain_info,
1755                                           u32 protocol, u32 prio,
1756                                           bool prio_allocate)
1757{
1758        struct tcf_proto **pprev;
1759        struct tcf_proto *tp;
1760
1761        /* Check the chain for existence of proto-tcf with this priority */
1762        for (pprev = &chain->filter_chain;
1763             (tp = tcf_chain_dereference(*pprev, chain));
1764             pprev = &tp->next) {
1765                if (tp->prio >= prio) {
1766                        if (tp->prio == prio) {
1767                                if (prio_allocate ||
1768                                    (tp->protocol != protocol && protocol))
1769                                        return ERR_PTR(-EINVAL);
1770                        } else {
1771                                tp = NULL;
1772                        }
1773                        break;
1774                }
1775        }
1776        chain_info->pprev = pprev;
1777        if (tp) {
1778                chain_info->next = tp->next;
1779                tcf_proto_get(tp);
1780        } else {
1781                chain_info->next = NULL;
1782        }
1783        return tp;
1784}
1785
1786static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1787                         struct tcf_proto *tp, struct tcf_block *block,
1788                         struct Qdisc *q, u32 parent, void *fh,
1789                         u32 portid, u32 seq, u16 flags, int event,
1790                         bool rtnl_held)
1791{
1792        struct tcmsg *tcm;
1793        struct nlmsghdr  *nlh;
1794        unsigned char *b = skb_tail_pointer(skb);
1795
1796        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1797        if (!nlh)
1798                goto out_nlmsg_trim;
1799        tcm = nlmsg_data(nlh);
1800        tcm->tcm_family = AF_UNSPEC;
1801        tcm->tcm__pad1 = 0;
1802        tcm->tcm__pad2 = 0;
1803        if (q) {
1804                tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1805                tcm->tcm_parent = parent;
1806        } else {
1807                tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1808                tcm->tcm_block_index = block->index;
1809        }
1810        tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1811        if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1812                goto nla_put_failure;
1813        if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1814                goto nla_put_failure;
1815        if (!fh) {
1816                tcm->tcm_handle = 0;
1817        } else {
1818                if (tp->ops->dump &&
1819                    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1820                        goto nla_put_failure;
1821        }
1822        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1823        return skb->len;
1824
1825out_nlmsg_trim:
1826nla_put_failure:
1827        nlmsg_trim(skb, b);
1828        return -1;
1829}
1830
1831static int tfilter_notify(struct net *net, struct sk_buff *oskb,
1832                          struct nlmsghdr *n, struct tcf_proto *tp,
1833                          struct tcf_block *block, struct Qdisc *q,
1834                          u32 parent, void *fh, int event, bool unicast,
1835                          bool rtnl_held)
1836{
1837        struct sk_buff *skb;
1838        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1839        int err = 0;
1840
1841        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1842        if (!skb)
1843                return -ENOBUFS;
1844
1845        if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1846                          n->nlmsg_seq, n->nlmsg_flags, event,
1847                          rtnl_held) <= 0) {
1848                kfree_skb(skb);
1849                return -EINVAL;
1850        }
1851
1852        if (unicast)
1853                err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1854        else
1855                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1856                                     n->nlmsg_flags & NLM_F_ECHO);
1857
1858        if (err > 0)
1859                err = 0;
1860        return err;
1861}
1862
1863static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1864                              struct nlmsghdr *n, struct tcf_proto *tp,
1865                              struct tcf_block *block, struct Qdisc *q,
1866                              u32 parent, void *fh, bool unicast, bool *last,
1867                              bool rtnl_held, struct netlink_ext_ack *extack)
1868{
1869        struct sk_buff *skb;
1870        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1871        int err;
1872
1873        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1874        if (!skb)
1875                return -ENOBUFS;
1876
1877        if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1878                          n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1879                          rtnl_held) <= 0) {
1880                NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1881                kfree_skb(skb);
1882                return -EINVAL;
1883        }
1884
1885        err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1886        if (err) {
1887                kfree_skb(skb);
1888                return err;
1889        }
1890
1891        if (unicast)
1892                err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1893        else
1894                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1895                                     n->nlmsg_flags & NLM_F_ECHO);
1896        if (err < 0)
1897                NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1898
1899        if (err > 0)
1900                err = 0;
1901        return err;
1902}
1903
1904static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1905                                 struct tcf_block *block, struct Qdisc *q,
1906                                 u32 parent, struct nlmsghdr *n,
1907                                 struct tcf_chain *chain, int event,
1908                                 bool rtnl_held)
1909{
1910        struct tcf_proto *tp;
1911
1912        for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
1913             tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
1914                tfilter_notify(net, oskb, n, tp, block,
1915                               q, parent, NULL, event, false, rtnl_held);
1916}
1917
1918static void tfilter_put(struct tcf_proto *tp, void *fh)
1919{
1920        if (tp->ops->put && fh)
1921                tp->ops->put(tp, fh);
1922}
1923
1924static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1925                          struct netlink_ext_ack *extack)
1926{
1927        struct net *net = sock_net(skb->sk);
1928        struct nlattr *tca[TCA_MAX + 1];
1929        char name[IFNAMSIZ];
1930        struct tcmsg *t;
1931        u32 protocol;
1932        u32 prio;
1933        bool prio_allocate;
1934        u32 parent;
1935        u32 chain_index;
1936        struct Qdisc *q = NULL;
1937        struct tcf_chain_info chain_info;
1938        struct tcf_chain *chain = NULL;
1939        struct tcf_block *block;
1940        struct tcf_proto *tp;
1941        unsigned long cl;
1942        void *fh;
1943        int err;
1944        int tp_created;
1945        bool rtnl_held = false;
1946
1947        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1948                return -EPERM;
1949
1950replay:
1951        tp_created = 0;
1952
1953        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
1954                                     rtm_tca_policy, extack);
1955        if (err < 0)
1956                return err;
1957
1958        t = nlmsg_data(n);
1959        protocol = TC_H_MIN(t->tcm_info);
1960        prio = TC_H_MAJ(t->tcm_info);
1961        prio_allocate = false;
1962        parent = t->tcm_parent;
1963        tp = NULL;
1964        cl = 0;
1965        block = NULL;
1966
1967        if (prio == 0) {
1968                /* If no priority is provided by the user,
1969                 * we allocate one.
1970                 */
1971                if (n->nlmsg_flags & NLM_F_CREATE) {
1972                        prio = TC_H_MAKE(0x80000000U, 0U);
1973                        prio_allocate = true;
1974                } else {
1975                        NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1976                        return -ENOENT;
1977                }
1978        }
1979
1980        /* Find head of filter chain. */
1981
1982        err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
1983        if (err)
1984                return err;
1985
1986        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
1987                NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
1988                err = -EINVAL;
1989                goto errout;
1990        }
1991
1992        /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
1993         * block is shared (no qdisc found), qdisc is not unlocked, classifier
1994         * type is not specified, classifier is not unlocked.
1995         */
1996        if (rtnl_held ||
1997            (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
1998            !tcf_proto_is_unlocked(name)) {
1999                rtnl_held = true;
2000                rtnl_lock();
2001        }
2002
2003        err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2004        if (err)
2005                goto errout;
2006
2007        block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2008                                 extack);
2009        if (IS_ERR(block)) {
2010                err = PTR_ERR(block);
2011                goto errout;
2012        }
2013
2014        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2015        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2016                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2017                err = -EINVAL;
2018                goto errout;
2019        }
2020        chain = tcf_chain_get(block, chain_index, true);
2021        if (!chain) {
2022                NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2023                err = -ENOMEM;
2024                goto errout;
2025        }
2026
2027        mutex_lock(&chain->filter_chain_lock);
2028        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2029                               prio, prio_allocate);
2030        if (IS_ERR(tp)) {
2031                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2032                err = PTR_ERR(tp);
2033                goto errout_locked;
2034        }
2035
2036        if (tp == NULL) {
2037                struct tcf_proto *tp_new = NULL;
2038
2039                if (chain->flushing) {
2040                        err = -EAGAIN;
2041                        goto errout_locked;
2042                }
2043
2044                /* Proto-tcf does not exist, create new one */
2045
2046                if (tca[TCA_KIND] == NULL || !protocol) {
2047                        NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2048                        err = -EINVAL;
2049                        goto errout_locked;
2050                }
2051
2052                if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2053                        NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2054                        err = -ENOENT;
2055                        goto errout_locked;
2056                }
2057
2058                if (prio_allocate)
2059                        prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2060                                                               &chain_info));
2061
2062                mutex_unlock(&chain->filter_chain_lock);
2063                tp_new = tcf_proto_create(name, protocol, prio, chain,
2064                                          rtnl_held, extack);
2065                if (IS_ERR(tp_new)) {
2066                        err = PTR_ERR(tp_new);
2067                        goto errout_tp;
2068                }
2069
2070                tp_created = 1;
2071                tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2072                                                rtnl_held);
2073                if (IS_ERR(tp)) {
2074                        err = PTR_ERR(tp);
2075                        goto errout_tp;
2076                }
2077        } else {
2078                mutex_unlock(&chain->filter_chain_lock);
2079        }
2080
2081        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2082                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2083                err = -EINVAL;
2084                goto errout;
2085        }
2086
2087        fh = tp->ops->get(tp, t->tcm_handle);
2088
2089        if (!fh) {
2090                if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2091                        NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2092                        err = -ENOENT;
2093                        goto errout;
2094                }
2095        } else if (n->nlmsg_flags & NLM_F_EXCL) {
2096                tfilter_put(tp, fh);
2097                NL_SET_ERR_MSG(extack, "Filter already exists");
2098                err = -EEXIST;
2099                goto errout;
2100        }
2101
2102        if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2103                NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
2104                err = -EINVAL;
2105                goto errout;
2106        }
2107
2108        err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2109                              n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2110                              rtnl_held, extack);
2111        if (err == 0) {
2112                tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2113                               RTM_NEWTFILTER, false, rtnl_held);
2114                tfilter_put(tp, fh);
2115                /* q pointer is NULL for shared blocks */
2116                if (q)
2117                        q->flags &= ~TCQ_F_CAN_BYPASS;
2118        }
2119
2120errout:
2121        if (err && tp_created)
2122                tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2123errout_tp:
2124        if (chain) {
2125                if (tp && !IS_ERR(tp))
2126                        tcf_proto_put(tp, rtnl_held, NULL);
2127                if (!tp_created)
2128                        tcf_chain_put(chain);
2129        }
2130        tcf_block_release(q, block, rtnl_held);
2131
2132        if (rtnl_held)
2133                rtnl_unlock();
2134
2135        if (err == -EAGAIN) {
2136                /* Take rtnl lock in case EAGAIN is caused by concurrent flush
2137                 * of target chain.
2138                 */
2139                rtnl_held = true;
2140                /* Replay the request. */
2141                goto replay;
2142        }
2143        return err;
2144
2145errout_locked:
2146        mutex_unlock(&chain->filter_chain_lock);
2147        goto errout;
2148}
2149
2150static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2151                          struct netlink_ext_ack *extack)
2152{
2153        struct net *net = sock_net(skb->sk);
2154        struct nlattr *tca[TCA_MAX + 1];
2155        char name[IFNAMSIZ];
2156        struct tcmsg *t;
2157        u32 protocol;
2158        u32 prio;
2159        u32 parent;
2160        u32 chain_index;
2161        struct Qdisc *q = NULL;
2162        struct tcf_chain_info chain_info;
2163        struct tcf_chain *chain = NULL;
2164        struct tcf_block *block = NULL;
2165        struct tcf_proto *tp = NULL;
2166        unsigned long cl = 0;
2167        void *fh = NULL;
2168        int err;
2169        bool rtnl_held = false;
2170
2171        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2172                return -EPERM;
2173
2174        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2175                                     rtm_tca_policy, extack);
2176        if (err < 0)
2177                return err;
2178
2179        t = nlmsg_data(n);
2180        protocol = TC_H_MIN(t->tcm_info);
2181        prio = TC_H_MAJ(t->tcm_info);
2182        parent = t->tcm_parent;
2183
2184        if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
2185                NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
2186                return -ENOENT;
2187        }
2188
2189        /* Find head of filter chain. */
2190
2191        err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2192        if (err)
2193                return err;
2194
2195        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2196                NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2197                err = -EINVAL;
2198                goto errout;
2199        }
2200        /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2201         * found), qdisc is not unlocked, classifier type is not specified,
2202         * classifier is not unlocked.
2203         */
2204        if (!prio ||
2205            (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2206            !tcf_proto_is_unlocked(name)) {
2207                rtnl_held = true;
2208                rtnl_lock();
2209        }
2210
2211        err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2212        if (err)
2213                goto errout;
2214
2215        block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2216                                 extack);
2217        if (IS_ERR(block)) {
2218                err = PTR_ERR(block);
2219                goto errout;
2220        }
2221
2222        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2223        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2224                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2225                err = -EINVAL;
2226                goto errout;
2227        }
2228        chain = tcf_chain_get(block, chain_index, false);
2229        if (!chain) {
2230                /* User requested flush on non-existent chain. Nothing to do,
2231                 * so just return success.
2232                 */
2233                if (prio == 0) {
2234                        err = 0;
2235                        goto errout;
2236                }
2237                NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2238                err = -ENOENT;
2239                goto errout;
2240        }
2241
2242        if (prio == 0) {
2243                tfilter_notify_chain(net, skb, block, q, parent, n,
2244                                     chain, RTM_DELTFILTER, rtnl_held);
2245                tcf_chain_flush(chain, rtnl_held);
2246                err = 0;
2247                goto errout;
2248        }
2249
2250        mutex_lock(&chain->filter_chain_lock);
2251        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2252                               prio, false);
2253        if (!tp || IS_ERR(tp)) {
2254                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2255                err = tp ? PTR_ERR(tp) : -ENOENT;
2256                goto errout_locked;
2257        } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2258                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2259                err = -EINVAL;
2260                goto errout_locked;
2261        } else if (t->tcm_handle == 0) {
2262                tcf_proto_signal_destroying(chain, tp);
2263                tcf_chain_tp_remove(chain, &chain_info, tp);
2264                mutex_unlock(&chain->filter_chain_lock);
2265
2266                tcf_proto_put(tp, rtnl_held, NULL);
2267                tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2268                               RTM_DELTFILTER, false, rtnl_held);
2269                err = 0;
2270                goto errout;
2271        }
2272        mutex_unlock(&chain->filter_chain_lock);
2273
2274        fh = tp->ops->get(tp, t->tcm_handle);
2275
2276        if (!fh) {
2277                NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2278                err = -ENOENT;
2279        } else {
2280                bool last;
2281
2282                err = tfilter_del_notify(net, skb, n, tp, block,
2283                                         q, parent, fh, false, &last,
2284                                         rtnl_held, extack);
2285
2286                if (err)
2287                        goto errout;
2288                if (last)
2289                        tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2290        }
2291
2292errout:
2293        if (chain) {
2294                if (tp && !IS_ERR(tp))
2295                        tcf_proto_put(tp, rtnl_held, NULL);
2296                tcf_chain_put(chain);
2297        }
2298        tcf_block_release(q, block, rtnl_held);
2299
2300        if (rtnl_held)
2301                rtnl_unlock();
2302
2303        return err;
2304
2305errout_locked:
2306        mutex_unlock(&chain->filter_chain_lock);
2307        goto errout;
2308}
2309
2310static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2311                          struct netlink_ext_ack *extack)
2312{
2313        struct net *net = sock_net(skb->sk);
2314        struct nlattr *tca[TCA_MAX + 1];
2315        char name[IFNAMSIZ];
2316        struct tcmsg *t;
2317        u32 protocol;
2318        u32 prio;
2319        u32 parent;
2320        u32 chain_index;
2321        struct Qdisc *q = NULL;
2322        struct tcf_chain_info chain_info;
2323        struct tcf_chain *chain = NULL;
2324        struct tcf_block *block = NULL;
2325        struct tcf_proto *tp = NULL;
2326        unsigned long cl = 0;
2327        void *fh = NULL;
2328        int err;
2329        bool rtnl_held = false;
2330
2331        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2332                                     rtm_tca_policy, extack);
2333        if (err < 0)
2334                return err;
2335
2336        t = nlmsg_data(n);
2337        protocol = TC_H_MIN(t->tcm_info);
2338        prio = TC_H_MAJ(t->tcm_info);
2339        parent = t->tcm_parent;
2340
2341        if (prio == 0) {
2342                NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2343                return -ENOENT;
2344        }
2345
2346        /* Find head of filter chain. */
2347
2348        err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2349        if (err)
2350                return err;
2351
2352        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2353                NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2354                err = -EINVAL;
2355                goto errout;
2356        }
2357        /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2358         * unlocked, classifier type is not specified, classifier is not
2359         * unlocked.
2360         */
2361        if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2362            !tcf_proto_is_unlocked(name)) {
2363                rtnl_held = true;
2364                rtnl_lock();
2365        }
2366
2367        err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2368        if (err)
2369                goto errout;
2370
2371        block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2372                                 extack);
2373        if (IS_ERR(block)) {
2374                err = PTR_ERR(block);
2375                goto errout;
2376        }
2377
2378        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2379        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2380                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2381                err = -EINVAL;
2382                goto errout;
2383        }
2384        chain = tcf_chain_get(block, chain_index, false);
2385        if (!chain) {
2386                NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2387                err = -EINVAL;
2388                goto errout;
2389        }
2390
2391        mutex_lock(&chain->filter_chain_lock);
2392        tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2393                               prio, false);
2394        mutex_unlock(&chain->filter_chain_lock);
2395        if (!tp || IS_ERR(tp)) {
2396                NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2397                err = tp ? PTR_ERR(tp) : -ENOENT;
2398                goto errout;
2399        } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2400                NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2401                err = -EINVAL;
2402                goto errout;
2403        }
2404
2405        fh = tp->ops->get(tp, t->tcm_handle);
2406
2407        if (!fh) {
2408                NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2409                err = -ENOENT;
2410        } else {
2411                err = tfilter_notify(net, skb, n, tp, block, q, parent,
2412                                     fh, RTM_NEWTFILTER, true, rtnl_held);
2413                if (err < 0)
2414                        NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
2415        }
2416
2417        tfilter_put(tp, fh);
2418errout:
2419        if (chain) {
2420                if (tp && !IS_ERR(tp))
2421                        tcf_proto_put(tp, rtnl_held, NULL);
2422                tcf_chain_put(chain);
2423        }
2424        tcf_block_release(q, block, rtnl_held);
2425
2426        if (rtnl_held)
2427                rtnl_unlock();
2428
2429        return err;
2430}
2431
2432struct tcf_dump_args {
2433        struct tcf_walker w;
2434        struct sk_buff *skb;
2435        struct netlink_callback *cb;
2436        struct tcf_block *block;
2437        struct Qdisc *q;
2438        u32 parent;
2439};
2440
2441static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2442{
2443        struct tcf_dump_args *a = (void *)arg;
2444        struct net *net = sock_net(a->skb->sk);
2445
2446        return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2447                             n, NETLINK_CB(a->cb->skb).portid,
2448                             a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2449                             RTM_NEWTFILTER, true);
2450}
2451
2452static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
2453                           struct sk_buff *skb, struct netlink_callback *cb,
2454                           long index_start, long *p_index)
2455{
2456        struct net *net = sock_net(skb->sk);
2457        struct tcf_block *block = chain->block;
2458        struct tcmsg *tcm = nlmsg_data(cb->nlh);
2459        struct tcf_proto *tp, *tp_prev;
2460        struct tcf_dump_args arg;
2461
2462        for (tp = __tcf_get_next_proto(chain, NULL);
2463             tp;
2464             tp_prev = tp,
2465                     tp = __tcf_get_next_proto(chain, tp),
2466                     tcf_proto_put(tp_prev, true, NULL),
2467                     (*p_index)++) {
2468                if (*p_index < index_start)
2469                        continue;
2470                if (TC_H_MAJ(tcm->tcm_info) &&
2471                    TC_H_MAJ(tcm->tcm_info) != tp->prio)
2472                        continue;
2473                if (TC_H_MIN(tcm->tcm_info) &&
2474                    TC_H_MIN(tcm->tcm_info) != tp->protocol)
2475                        continue;
2476                if (*p_index > index_start)
2477                        memset(&cb->args[1], 0,
2478                               sizeof(cb->args) - sizeof(cb->args[0]));
2479                if (cb->args[1] == 0) {
2480                        if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2481                                          NETLINK_CB(cb->skb).portid,
2482                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
2483                                          RTM_NEWTFILTER, true) <= 0)
2484                                goto errout;
2485                        cb->args[1] = 1;
2486                }
2487                if (!tp->ops->walk)
2488                        continue;
2489                arg.w.fn = tcf_node_dump;
2490                arg.skb = skb;
2491                arg.cb = cb;
2492                arg.block = block;
2493                arg.q = q;
2494                arg.parent = parent;
2495                arg.w.stop = 0;
2496                arg.w.skip = cb->args[1] - 1;
2497                arg.w.count = 0;
2498                arg.w.cookie = cb->args[2];
2499                tp->ops->walk(tp, &arg.w, true);
2500                cb->args[2] = arg.w.cookie;
2501                cb->args[1] = arg.w.count + 1;
2502                if (arg.w.stop)
2503                        goto errout;
2504        }
2505        return true;
2506
2507errout:
2508        tcf_proto_put(tp, true, NULL);
2509        return false;
2510}
2511
2512/* called with RTNL */
2513static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
2514{
2515        struct tcf_chain *chain, *chain_prev;
2516        struct net *net = sock_net(skb->sk);
2517        struct nlattr *tca[TCA_MAX + 1];
2518        struct Qdisc *q = NULL;
2519        struct tcf_block *block;
2520        struct tcmsg *tcm = nlmsg_data(cb->nlh);
2521        long index_start;
2522        long index;
2523        u32 parent;
2524        int err;
2525
2526        if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2527                return skb->len;
2528
2529        err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2530                                     NULL, cb->extack);
2531        if (err)
2532                return err;
2533
2534        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2535                block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2536                if (!block)
2537                        goto out;
2538                /* If we work with block index, q is NULL and parent value
2539                 * will never be used in the following code. The check
2540                 * in tcf_fill_node prevents it. However, compiler does not
2541                 * see that far, so set parent to zero to silence the warning
2542                 * about parent being uninitialized.
2543                 */
2544                parent = 0;
2545        } else {
2546                const struct Qdisc_class_ops *cops;
2547                struct net_device *dev;
2548                unsigned long cl = 0;
2549
2550                dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2551                if (!dev)
2552                        return skb->len;
2553
2554                parent = tcm->tcm_parent;
2555                if (!parent) {
2556                        q = dev->qdisc;
2557                        parent = q->handle;
2558                } else {
2559                        q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2560                }
2561                if (!q)
2562                        goto out;
2563                cops = q->ops->cl_ops;
2564                if (!cops)
2565                        goto out;
2566                if (!cops->tcf_block)
2567                        goto out;
2568                if (TC_H_MIN(tcm->tcm_parent)) {
2569                        cl = cops->find(q, tcm->tcm_parent);
2570                        if (cl == 0)
2571                                goto out;
2572                }
2573                block = cops->tcf_block(q, cl, NULL);
2574                if (!block)
2575                        goto out;
2576                if (tcf_block_shared(block))
2577                        q = NULL;
2578        }
2579
2580        index_start = cb->args[0];
2581        index = 0;
2582
2583        for (chain = __tcf_get_next_chain(block, NULL);
2584             chain;
2585             chain_prev = chain,
2586                     chain = __tcf_get_next_chain(block, chain),
2587                     tcf_chain_put(chain_prev)) {
2588                if (tca[TCA_CHAIN] &&
2589                    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2590                        continue;
2591                if (!tcf_chain_dump(chain, q, parent, skb, cb,
2592                                    index_start, &index)) {
2593                        tcf_chain_put(chain);
2594                        err = -EMSGSIZE;
2595                        break;
2596                }
2597        }
2598
2599        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2600                tcf_block_refcnt_put(block, true);
2601        cb->args[0] = index;
2602
2603out:
2604        /* If we did no progress, the error (EMSGSIZE) is real */
2605        if (skb->len == 0 && err)
2606                return err;
2607        return skb->len;
2608}
2609
2610static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2611                              void *tmplt_priv, u32 chain_index,
2612                              struct net *net, struct sk_buff *skb,
2613                              struct tcf_block *block,
2614                              u32 portid, u32 seq, u16 flags, int event)
2615{
2616        unsigned char *b = skb_tail_pointer(skb);
2617        const struct tcf_proto_ops *ops;
2618        struct nlmsghdr *nlh;
2619        struct tcmsg *tcm;
2620        void *priv;
2621
2622        ops = tmplt_ops;
2623        priv = tmplt_priv;
2624
2625        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2626        if (!nlh)
2627                goto out_nlmsg_trim;
2628        tcm = nlmsg_data(nlh);
2629        tcm->tcm_family = AF_UNSPEC;
2630        tcm->tcm__pad1 = 0;
2631        tcm->tcm__pad2 = 0;
2632        tcm->tcm_handle = 0;
2633        if (block->q) {
2634                tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2635                tcm->tcm_parent = block->q->handle;
2636        } else {
2637                tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2638                tcm->tcm_block_index = block->index;
2639        }
2640
2641        if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2642                goto nla_put_failure;
2643
2644        if (ops) {
2645                if (nla_put_string(skb, TCA_KIND, ops->kind))
2646                        goto nla_put_failure;
2647                if (ops->tmplt_dump(skb, net, priv) < 0)
2648                        goto nla_put_failure;
2649        }
2650
2651        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2652        return skb->len;
2653
2654out_nlmsg_trim:
2655nla_put_failure:
2656        nlmsg_trim(skb, b);
2657        return -EMSGSIZE;
2658}
2659
2660static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2661                           u32 seq, u16 flags, int event, bool unicast)
2662{
2663        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2664        struct tcf_block *block = chain->block;
2665        struct net *net = block->net;
2666        struct sk_buff *skb;
2667        int err = 0;
2668
2669        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2670        if (!skb)
2671                return -ENOBUFS;
2672
2673        if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2674                               chain->index, net, skb, block, portid,
2675                               seq, flags, event) <= 0) {
2676                kfree_skb(skb);
2677                return -EINVAL;
2678        }
2679
2680        if (unicast)
2681                err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2682        else
2683                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2684                                     flags & NLM_F_ECHO);
2685
2686        if (err > 0)
2687                err = 0;
2688        return err;
2689}
2690
2691static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2692                                  void *tmplt_priv, u32 chain_index,
2693                                  struct tcf_block *block, struct sk_buff *oskb,
2694                                  u32 seq, u16 flags, bool unicast)
2695{
2696        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2697        struct net *net = block->net;
2698        struct sk_buff *skb;
2699
2700        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2701        if (!skb)
2702                return -ENOBUFS;
2703
2704        if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2705                               block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
2706                kfree_skb(skb);
2707                return -EINVAL;
2708        }
2709
2710        if (unicast)
2711                return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2712
2713        return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2714}
2715
2716static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2717                              struct nlattr **tca,
2718                              struct netlink_ext_ack *extack)
2719{
2720        const struct tcf_proto_ops *ops;
2721        char name[IFNAMSIZ];
2722        void *tmplt_priv;
2723
2724        /* If kind is not set, user did not specify template. */
2725        if (!tca[TCA_KIND])
2726                return 0;
2727
2728        if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2729                NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
2730                return -EINVAL;
2731        }
2732
2733        ops = tcf_proto_lookup_ops(name, true, extack);
2734        if (IS_ERR(ops))
2735                return PTR_ERR(ops);
2736        if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
2737                NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2738                return -EOPNOTSUPP;
2739        }
2740
2741        tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2742        if (IS_ERR(tmplt_priv)) {
2743                module_put(ops->owner);
2744                return PTR_ERR(tmplt_priv);
2745        }
2746        chain->tmplt_ops = ops;
2747        chain->tmplt_priv = tmplt_priv;
2748        return 0;
2749}
2750
2751static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2752                               void *tmplt_priv)
2753{
2754        /* If template ops are set, no work to do for us. */
2755        if (!tmplt_ops)
2756                return;
2757
2758        tmplt_ops->tmplt_destroy(tmplt_priv);
2759        module_put(tmplt_ops->owner);
2760}
2761
2762/* Add/delete/get a chain */
2763
2764static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2765                        struct netlink_ext_ack *extack)
2766{
2767        struct net *net = sock_net(skb->sk);
2768        struct nlattr *tca[TCA_MAX + 1];
2769        struct tcmsg *t;
2770        u32 parent;
2771        u32 chain_index;
2772        struct Qdisc *q = NULL;
2773        struct tcf_chain *chain = NULL;
2774        struct tcf_block *block;
2775        unsigned long cl;
2776        int err;
2777
2778        if (n->nlmsg_type != RTM_GETCHAIN &&
2779            !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2780                return -EPERM;
2781
2782replay:
2783        err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2784                                     rtm_tca_policy, extack);
2785        if (err < 0)
2786                return err;
2787
2788        t = nlmsg_data(n);
2789        parent = t->tcm_parent;
2790        cl = 0;
2791
2792        block = tcf_block_find(net, &q, &parent, &cl,
2793                               t->tcm_ifindex, t->tcm_block_index, extack);
2794        if (IS_ERR(block))
2795                return PTR_ERR(block);
2796
2797        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2798        if (chain_index > TC_ACT_EXT_VAL_MASK) {
2799                NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2800                err = -EINVAL;
2801                goto errout_block;
2802        }
2803
2804        mutex_lock(&block->lock);
2805        chain = tcf_chain_lookup(block, chain_index);
2806        if (n->nlmsg_type == RTM_NEWCHAIN) {
2807                if (chain) {
2808                        if (tcf_chain_held_by_acts_only(chain)) {
2809                                /* The chain exists only because there is
2810                                 * some action referencing it.
2811                                 */
2812                                tcf_chain_hold(chain);
2813                        } else {
2814                                NL_SET_ERR_MSG(extack, "Filter chain already exists");
2815                                err = -EEXIST;
2816                                goto errout_block_locked;
2817                        }
2818                } else {
2819                        if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2820                                NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2821                                err = -ENOENT;
2822                                goto errout_block_locked;
2823                        }
2824                        chain = tcf_chain_create(block, chain_index);
2825                        if (!chain) {
2826                                NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2827                                err = -ENOMEM;
2828                                goto errout_block_locked;
2829                        }
2830                }
2831        } else {
2832                if (!chain || tcf_chain_held_by_acts_only(chain)) {
2833                        NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2834                        err = -EINVAL;
2835                        goto errout_block_locked;
2836                }
2837                tcf_chain_hold(chain);
2838        }
2839
2840        if (n->nlmsg_type == RTM_NEWCHAIN) {
2841                /* Modifying chain requires holding parent block lock. In case
2842                 * the chain was successfully added, take a reference to the
2843                 * chain. This ensures that an empty chain does not disappear at
2844                 * the end of this function.
2845                 */
2846                tcf_chain_hold(chain);
2847                chain->explicitly_created = true;
2848        }
2849        mutex_unlock(&block->lock);
2850
2851        switch (n->nlmsg_type) {
2852        case RTM_NEWCHAIN:
2853                err = tc_chain_tmplt_add(chain, net, tca, extack);
2854                if (err) {
2855                        tcf_chain_put_explicitly_created(chain);
2856                        goto errout;
2857                }
2858
2859                tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
2860                                RTM_NEWCHAIN, false);
2861                break;
2862        case RTM_DELCHAIN:
2863                tfilter_notify_chain(net, skb, block, q, parent, n,
2864                                     chain, RTM_DELTFILTER, true);
2865                /* Flush the chain first as the user requested chain removal. */
2866                tcf_chain_flush(chain, true);
2867                /* In case the chain was successfully deleted, put a reference
2868                 * to the chain previously taken during addition.
2869                 */
2870                tcf_chain_put_explicitly_created(chain);
2871                break;
2872        case RTM_GETCHAIN:
2873                err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2874                                      n->nlmsg_seq, n->nlmsg_type, true);
2875                if (err < 0)
2876                        NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
2877                break;
2878        default:
2879                err = -EOPNOTSUPP;
2880                NL_SET_ERR_MSG(extack, "Unsupported message type");
2881                goto errout;
2882        }
2883
2884errout:
2885        tcf_chain_put(chain);
2886errout_block:
2887        tcf_block_release(q, block, true);
2888        if (err == -EAGAIN)
2889                /* Replay the request. */
2890                goto replay;
2891        return err;
2892
2893errout_block_locked:
2894        mutex_unlock(&block->lock);
2895        goto errout_block;
2896}
2897
2898/* called with RTNL */
2899static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
2900{
2901        struct net *net = sock_net(skb->sk);
2902        struct nlattr *tca[TCA_MAX + 1];
2903        struct Qdisc *q = NULL;
2904        struct tcf_block *block;
2905        struct tcmsg *tcm = nlmsg_data(cb->nlh);
2906        struct tcf_chain *chain;
2907        long index_start;
2908        long index;
2909        u32 parent;
2910        int err;
2911
2912        if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2913                return skb->len;
2914
2915        err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2916                                     rtm_tca_policy, cb->extack);
2917        if (err)
2918                return err;
2919
2920        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2921                block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2922                if (!block)
2923                        goto out;
2924                /* If we work with block index, q is NULL and parent value
2925                 * will never be used in the following code. The check
2926                 * in tcf_fill_node prevents it. However, compiler does not
2927                 * see that far, so set parent to zero to silence the warning
2928                 * about parent being uninitialized.
2929                 */
2930                parent = 0;
2931        } else {
2932                const struct Qdisc_class_ops *cops;
2933                struct net_device *dev;
2934                unsigned long cl = 0;
2935
2936                dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2937                if (!dev)
2938                        return skb->len;
2939
2940                parent = tcm->tcm_parent;
2941                if (!parent) {
2942                        q = dev->qdisc;
2943                        parent = q->handle;
2944                } else {
2945                        q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2946                }
2947                if (!q)
2948                        goto out;
2949                cops = q->ops->cl_ops;
2950                if (!cops)
2951                        goto out;
2952                if (!cops->tcf_block)
2953                        goto out;
2954                if (TC_H_MIN(tcm->tcm_parent)) {
2955                        cl = cops->find(q, tcm->tcm_parent);
2956                        if (cl == 0)
2957                                goto out;
2958                }
2959                block = cops->tcf_block(q, cl, NULL);
2960                if (!block)
2961                        goto out;
2962                if (tcf_block_shared(block))
2963                        q = NULL;
2964        }
2965
2966        index_start = cb->args[0];
2967        index = 0;
2968
2969        mutex_lock(&block->lock);
2970        list_for_each_entry(chain, &block->chain_list, list) {
2971                if ((tca[TCA_CHAIN] &&
2972                     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
2973                        continue;
2974                if (index < index_start) {
2975                        index++;
2976                        continue;
2977                }
2978                if (tcf_chain_held_by_acts_only(chain))
2979                        continue;
2980                err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2981                                         chain->index, net, skb, block,
2982                                         NETLINK_CB(cb->skb).portid,
2983                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
2984                                         RTM_NEWCHAIN);
2985                if (err <= 0)
2986                        break;
2987                index++;
2988        }
2989        mutex_unlock(&block->lock);
2990
2991        if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2992                tcf_block_refcnt_put(block, true);
2993        cb->args[0] = index;
2994
2995out:
2996        /* If we did no progress, the error (EMSGSIZE) is real */
2997        if (skb->len == 0 && err)
2998                return err;
2999        return skb->len;
3000}
3001
3002void tcf_exts_destroy(struct tcf_exts *exts)
3003{
3004#ifdef CONFIG_NET_CLS_ACT
3005        if (exts->actions) {
3006                tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3007                kfree(exts->actions);
3008        }
3009        exts->nr_actions = 0;
3010#endif
3011}
3012EXPORT_SYMBOL(tcf_exts_destroy);
3013
3014int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3015                      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3016                      bool rtnl_held, struct netlink_ext_ack *extack)
3017{
3018#ifdef CONFIG_NET_CLS_ACT
3019        {
3020                struct tc_action *act;
3021                size_t attr_size = 0;
3022
3023                if (exts->police && tb[exts->police]) {
3024                        act = tcf_action_init_1(net, tp, tb[exts->police],
3025                                                rate_tlv, "police", ovr,
3026                                                TCA_ACT_BIND, rtnl_held,
3027                                                extack);
3028                        if (IS_ERR(act))
3029                                return PTR_ERR(act);
3030
3031                        act->type = exts->type = TCA_OLD_COMPAT;
3032                        exts->actions[0] = act;
3033                        exts->nr_actions = 1;
3034                } else if (exts->action && tb[exts->action]) {
3035                        int err;
3036
3037                        err = tcf_action_init(net, tp, tb[exts->action],
3038                                              rate_tlv, NULL, ovr, TCA_ACT_BIND,
3039                                              exts->actions, &attr_size,
3040                                              rtnl_held, extack);
3041                        if (err < 0)
3042                                return err;
3043                        exts->nr_actions = err;
3044                }
3045        }
3046#else
3047        if ((exts->action && tb[exts->action]) ||
3048            (exts->police && tb[exts->police])) {
3049                NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
3050                return -EOPNOTSUPP;
3051        }
3052#endif
3053
3054        return 0;
3055}
3056EXPORT_SYMBOL(tcf_exts_validate);
3057
3058void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
3059{
3060#ifdef CONFIG_NET_CLS_ACT
3061        struct tcf_exts old = *dst;
3062
3063        *dst = *src;
3064        tcf_exts_destroy(&old);
3065#endif
3066}
3067EXPORT_SYMBOL(tcf_exts_change);
3068
3069#ifdef CONFIG_NET_CLS_ACT
3070static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
3071{
3072        if (exts->nr_actions == 0)
3073                return NULL;
3074        else
3075                return exts->actions[0];
3076}
3077#endif
3078
3079int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
3080{
3081#ifdef CONFIG_NET_CLS_ACT
3082        struct nlattr *nest;
3083
3084        if (exts->action && tcf_exts_has_actions(exts)) {
3085                /*
3086                 * again for backward compatible mode - we want
3087                 * to work with both old and new modes of entering
3088                 * tc data even if iproute2  was newer - jhs
3089                 */
3090                if (exts->type != TCA_OLD_COMPAT) {
3091                        nest = nla_nest_start_noflag(skb, exts->action);
3092                        if (nest == NULL)
3093                                goto nla_put_failure;
3094
3095                        if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
3096                                goto nla_put_failure;
3097                        nla_nest_end(skb, nest);
3098                } else if (exts->police) {
3099                        struct tc_action *act = tcf_exts_first_act(exts);
3100                        nest = nla_nest_start_noflag(skb, exts->police);
3101                        if (nest == NULL || !act)
3102                                goto nla_put_failure;
3103                        if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3104                                goto nla_put_failure;
3105                        nla_nest_end(skb, nest);
3106                }
3107        }
3108        return 0;
3109
3110nla_put_failure:
3111        nla_nest_cancel(skb, nest);
3112        return -1;
3113#else
3114        return 0;
3115#endif
3116}
3117EXPORT_SYMBOL(tcf_exts_dump);
3118
3119
3120int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
3121{
3122#ifdef CONFIG_NET_CLS_ACT
3123        struct tc_action *a = tcf_exts_first_act(exts);
3124        if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3125                return -1;
3126#endif
3127        return 0;
3128}
3129EXPORT_SYMBOL(tcf_exts_dump_stats);
3130
3131static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
3132{
3133        if (*flags & TCA_CLS_FLAGS_IN_HW)
3134                return;
3135        *flags |= TCA_CLS_FLAGS_IN_HW;
3136        atomic_inc(&block->offloadcnt);
3137}
3138
3139static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
3140{
3141        if (!(*flags & TCA_CLS_FLAGS_IN_HW))
3142                return;
3143        *flags &= ~TCA_CLS_FLAGS_IN_HW;
3144        atomic_dec(&block->offloadcnt);
3145}
3146
3147static void tc_cls_offload_cnt_update(struct tcf_block *block,
3148                                      struct tcf_proto *tp, u32 *cnt,
3149                                      u32 *flags, u32 diff, bool add)
3150{
3151        lockdep_assert_held(&block->cb_lock);
3152
3153        spin_lock(&tp->lock);
3154        if (add) {
3155                if (!*cnt)
3156                        tcf_block_offload_inc(block, flags);
3157                *cnt += diff;
3158        } else {
3159                *cnt -= diff;
3160                if (!*cnt)
3161                        tcf_block_offload_dec(block, flags);
3162        }
3163        spin_unlock(&tp->lock);
3164}
3165
3166static void
3167tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
3168                         u32 *cnt, u32 *flags)
3169{
3170        lockdep_assert_held(&block->cb_lock);
3171
3172        spin_lock(&tp->lock);
3173        tcf_block_offload_dec(block, flags);
3174        *cnt = 0;
3175        spin_unlock(&tp->lock);
3176}
3177
3178static int
3179__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3180                   void *type_data, bool err_stop)
3181{
3182        struct flow_block_cb *block_cb;
3183        int ok_count = 0;
3184        int err;
3185
3186        list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3187                err = block_cb->cb(type, type_data, block_cb->cb_priv);
3188                if (err) {
3189                        if (err_stop)
3190                                return err;
3191                } else {
3192                        ok_count++;
3193                }
3194        }
3195        return ok_count;
3196}
3197
3198int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3199                     void *type_data, bool err_stop, bool rtnl_held)
3200{
3201        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3202        int ok_count;
3203
3204retry:
3205        if (take_rtnl)
3206                rtnl_lock();
3207        down_read(&block->cb_lock);
3208        /* Need to obtain rtnl lock if block is bound to devs that require it.
3209         * In block bind code cb_lock is obtained while holding rtnl, so we must
3210         * obtain the locks in same order here.
3211         */
3212        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3213                up_read(&block->cb_lock);
3214                take_rtnl = true;
3215                goto retry;
3216        }
3217
3218        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3219
3220        up_read(&block->cb_lock);
3221        if (take_rtnl)
3222                rtnl_unlock();
3223        return ok_count;
3224}
3225EXPORT_SYMBOL(tc_setup_cb_call);
3226
3227/* Non-destructive filter add. If filter that wasn't already in hardware is
3228 * successfully offloaded, increment block offloads counter. On failure,
3229 * previously offloaded filter is considered to be intact and offloads counter
3230 * is not decremented.
3231 */
3232
3233int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
3234                    enum tc_setup_type type, void *type_data, bool err_stop,
3235                    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3236{
3237        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3238        int ok_count;
3239
3240retry:
3241        if (take_rtnl)
3242                rtnl_lock();
3243        down_read(&block->cb_lock);
3244        /* Need to obtain rtnl lock if block is bound to devs that require it.
3245         * In block bind code cb_lock is obtained while holding rtnl, so we must
3246         * obtain the locks in same order here.
3247         */
3248        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3249                up_read(&block->cb_lock);
3250                take_rtnl = true;
3251                goto retry;
3252        }
3253
3254        /* Make sure all netdevs sharing this block are offload-capable. */
3255        if (block->nooffloaddevcnt && err_stop) {
3256                ok_count = -EOPNOTSUPP;
3257                goto err_unlock;
3258        }
3259
3260        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3261        if (ok_count < 0)
3262                goto err_unlock;
3263
3264        if (tp->ops->hw_add)
3265                tp->ops->hw_add(tp, type_data);
3266        if (ok_count > 0)
3267                tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
3268                                          ok_count, true);
3269err_unlock:
3270        up_read(&block->cb_lock);
3271        if (take_rtnl)
3272                rtnl_unlock();
3273        return ok_count < 0 ? ok_count : 0;
3274}
3275EXPORT_SYMBOL(tc_setup_cb_add);
3276
3277/* Destructive filter replace. If filter that wasn't already in hardware is
3278 * successfully offloaded, increment block offload counter. On failure,
3279 * previously offloaded filter is considered to be destroyed and offload counter
3280 * is decremented.
3281 */
3282
3283int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
3284                        enum tc_setup_type type, void *type_data, bool err_stop,
3285                        u32 *old_flags, unsigned int *old_in_hw_count,
3286                        u32 *new_flags, unsigned int *new_in_hw_count,
3287                        bool rtnl_held)
3288{
3289        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3290        int ok_count;
3291
3292retry:
3293        if (take_rtnl)
3294                rtnl_lock();
3295        down_read(&block->cb_lock);
3296        /* Need to obtain rtnl lock if block is bound to devs that require it.
3297         * In block bind code cb_lock is obtained while holding rtnl, so we must
3298         * obtain the locks in same order here.
3299         */
3300        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3301                up_read(&block->cb_lock);
3302                take_rtnl = true;
3303                goto retry;
3304        }
3305
3306        /* Make sure all netdevs sharing this block are offload-capable. */
3307        if (block->nooffloaddevcnt && err_stop) {
3308                ok_count = -EOPNOTSUPP;
3309                goto err_unlock;
3310        }
3311
3312        tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3313        if (tp->ops->hw_del)
3314                tp->ops->hw_del(tp, type_data);
3315
3316        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3317        if (ok_count < 0)
3318                goto err_unlock;
3319
3320        if (tp->ops->hw_add)
3321                tp->ops->hw_add(tp, type_data);
3322        if (ok_count > 0)
3323                tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
3324                                          new_flags, ok_count, true);
3325err_unlock:
3326        up_read(&block->cb_lock);
3327        if (take_rtnl)
3328                rtnl_unlock();
3329        return ok_count < 0 ? ok_count : 0;
3330}
3331EXPORT_SYMBOL(tc_setup_cb_replace);
3332
3333/* Destroy filter and decrement block offload counter, if filter was previously
3334 * offloaded.
3335 */
3336
3337int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
3338                        enum tc_setup_type type, void *type_data, bool err_stop,
3339                        u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3340{
3341        bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3342        int ok_count;
3343
3344retry:
3345        if (take_rtnl)
3346                rtnl_lock();
3347        down_read(&block->cb_lock);
3348        /* Need to obtain rtnl lock if block is bound to devs that require it.
3349         * In block bind code cb_lock is obtained while holding rtnl, so we must
3350         * obtain the locks in same order here.
3351         */
3352        if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3353                up_read(&block->cb_lock);
3354                take_rtnl = true;
3355                goto retry;
3356        }
3357
3358        ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3359
3360        tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3361        if (tp->ops->hw_del)
3362                tp->ops->hw_del(tp, type_data);
3363
3364        up_read(&block->cb_lock);
3365        if (take_rtnl)
3366                rtnl_unlock();
3367        return ok_count < 0 ? ok_count : 0;
3368}
3369EXPORT_SYMBOL(tc_setup_cb_destroy);
3370
3371int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
3372                          bool add, flow_setup_cb_t *cb,
3373                          enum tc_setup_type type, void *type_data,
3374                          void *cb_priv, u32 *flags, unsigned int *in_hw_count)
3375{
3376        int err = cb(type, type_data, cb_priv);
3377
3378        if (err) {
3379                if (add && tc_skip_sw(*flags))
3380                        return err;
3381        } else {
3382                tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
3383                                          add);
3384        }
3385
3386        return 0;
3387}
3388EXPORT_SYMBOL(tc_setup_cb_reoffload);
3389
3390void tc_cleanup_flow_action(struct flow_action *flow_action)
3391{
3392        struct flow_action_entry *entry;
3393        int i;
3394
3395        flow_action_for_each(i, entry, flow_action)
3396                if (entry->destructor)
3397                        entry->destructor(entry->destructor_priv);
3398}
3399EXPORT_SYMBOL(tc_cleanup_flow_action);
3400
3401static void tcf_mirred_get_dev(struct flow_action_entry *entry,
3402                               const struct tc_action *act)
3403{
3404#ifdef CONFIG_NET_CLS_ACT
3405        entry->dev = act->ops->get_dev(act, &entry->destructor);
3406        if (!entry->dev)
3407                return;
3408        entry->destructor_priv = entry->dev;
3409#endif
3410}
3411
3412static void tcf_tunnel_encap_put_tunnel(void *priv)
3413{
3414        struct ip_tunnel_info *tunnel = priv;
3415
3416        kfree(tunnel);
3417}
3418
3419static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
3420                                       const struct tc_action *act)
3421{
3422        entry->tunnel = tcf_tunnel_info_copy(act);
3423        if (!entry->tunnel)
3424                return -ENOMEM;
3425        entry->destructor = tcf_tunnel_encap_put_tunnel;
3426        entry->destructor_priv = entry->tunnel;
3427        return 0;
3428}
3429
3430static void tcf_sample_get_group(struct flow_action_entry *entry,
3431                                 const struct tc_action *act)
3432{
3433#ifdef CONFIG_NET_CLS_ACT
3434        entry->sample.psample_group =
3435                act->ops->get_psample_group(act, &entry->destructor);
3436        entry->destructor_priv = entry->sample.psample_group;
3437#endif
3438}
3439
3440int tc_setup_flow_action(struct flow_action *flow_action,
3441                         const struct tcf_exts *exts)
3442{
3443        struct tc_action *act;
3444        int i, j, k, err = 0;
3445
3446        if (!exts)
3447                return 0;
3448
3449        j = 0;
3450        tcf_exts_for_each_action(i, act, exts) {
3451                struct flow_action_entry *entry;
3452
3453                entry = &flow_action->entries[j];
3454                spin_lock_bh(&act->tcfa_lock);
3455                if (is_tcf_gact_ok(act)) {
3456                        entry->id = FLOW_ACTION_ACCEPT;
3457                } else if (is_tcf_gact_shot(act)) {
3458                        entry->id = FLOW_ACTION_DROP;
3459                } else if (is_tcf_gact_trap(act)) {
3460                        entry->id = FLOW_ACTION_TRAP;
3461                } else if (is_tcf_gact_goto_chain(act)) {
3462                        entry->id = FLOW_ACTION_GOTO;
3463                        entry->chain_index = tcf_gact_goto_chain_index(act);
3464                } else if (is_tcf_mirred_egress_redirect(act)) {
3465                        entry->id = FLOW_ACTION_REDIRECT;
3466                        tcf_mirred_get_dev(entry, act);
3467                } else if (is_tcf_mirred_egress_mirror(act)) {
3468                        entry->id = FLOW_ACTION_MIRRED;
3469                        tcf_mirred_get_dev(entry, act);
3470                } else if (is_tcf_mirred_ingress_redirect(act)) {
3471                        entry->id = FLOW_ACTION_REDIRECT_INGRESS;
3472                        tcf_mirred_get_dev(entry, act);
3473                } else if (is_tcf_mirred_ingress_mirror(act)) {
3474                        entry->id = FLOW_ACTION_MIRRED_INGRESS;
3475                        tcf_mirred_get_dev(entry, act);
3476                } else if (is_tcf_vlan(act)) {
3477                        switch (tcf_vlan_action(act)) {
3478                        case TCA_VLAN_ACT_PUSH:
3479                                entry->id = FLOW_ACTION_VLAN_PUSH;
3480                                entry->vlan.vid = tcf_vlan_push_vid(act);
3481                                entry->vlan.proto = tcf_vlan_push_proto(act);
3482                                entry->vlan.prio = tcf_vlan_push_prio(act);
3483                                break;
3484                        case TCA_VLAN_ACT_POP:
3485                                entry->id = FLOW_ACTION_VLAN_POP;
3486                                break;
3487                        case TCA_VLAN_ACT_MODIFY:
3488                                entry->id = FLOW_ACTION_VLAN_MANGLE;
3489                                entry->vlan.vid = tcf_vlan_push_vid(act);
3490                                entry->vlan.proto = tcf_vlan_push_proto(act);
3491                                entry->vlan.prio = tcf_vlan_push_prio(act);
3492                                break;
3493                        default:
3494                                err = -EOPNOTSUPP;
3495                                goto err_out_locked;
3496                        }
3497                } else if (is_tcf_tunnel_set(act)) {
3498                        entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3499                        err = tcf_tunnel_encap_get_tunnel(entry, act);
3500                        if (err)
3501                                goto err_out_locked;
3502                } else if (is_tcf_tunnel_release(act)) {
3503                        entry->id = FLOW_ACTION_TUNNEL_DECAP;
3504                } else if (is_tcf_pedit(act)) {
3505                        for (k = 0; k < tcf_pedit_nkeys(act); k++) {
3506                                switch (tcf_pedit_cmd(act, k)) {
3507                                case TCA_PEDIT_KEY_EX_CMD_SET:
3508                                        entry->id = FLOW_ACTION_MANGLE;
3509                                        break;
3510                                case TCA_PEDIT_KEY_EX_CMD_ADD:
3511                                        entry->id = FLOW_ACTION_ADD;
3512                                        break;
3513                                default:
3514                                        err = -EOPNOTSUPP;
3515                                        goto err_out_locked;
3516                                }
3517                                entry->mangle.htype = tcf_pedit_htype(act, k);
3518                                entry->mangle.mask = tcf_pedit_mask(act, k);
3519                                entry->mangle.val = tcf_pedit_val(act, k);
3520                                entry->mangle.offset = tcf_pedit_offset(act, k);
3521                                entry = &flow_action->entries[++j];
3522                        }
3523                } else if (is_tcf_csum(act)) {
3524                        entry->id = FLOW_ACTION_CSUM;
3525                        entry->csum_flags = tcf_csum_update_flags(act);
3526                } else if (is_tcf_skbedit_mark(act)) {
3527                        entry->id = FLOW_ACTION_MARK;
3528                        entry->mark = tcf_skbedit_mark(act);
3529                } else if (is_tcf_sample(act)) {
3530                        entry->id = FLOW_ACTION_SAMPLE;
3531                        entry->sample.trunc_size = tcf_sample_trunc_size(act);
3532                        entry->sample.truncate = tcf_sample_truncate(act);
3533                        entry->sample.rate = tcf_sample_rate(act);
3534                        tcf_sample_get_group(entry, act);
3535                } else if (is_tcf_police(act)) {
3536                        entry->id = FLOW_ACTION_POLICE;
3537                        entry->police.burst = tcf_police_tcfp_burst(act);
3538                        entry->police.rate_bytes_ps =
3539                                tcf_police_rate_bytes_ps(act);
3540                } else if (is_tcf_ct(act)) {
3541                        entry->id = FLOW_ACTION_CT;
3542                        entry->ct.action = tcf_ct_action(act);
3543                        entry->ct.zone = tcf_ct_zone(act);
3544                } else if (is_tcf_mpls(act)) {
3545                        switch (tcf_mpls_action(act)) {
3546                        case TCA_MPLS_ACT_PUSH:
3547                                entry->id = FLOW_ACTION_MPLS_PUSH;
3548                                entry->mpls_push.proto = tcf_mpls_proto(act);
3549                                entry->mpls_push.label = tcf_mpls_label(act);
3550                                entry->mpls_push.tc = tcf_mpls_tc(act);
3551                                entry->mpls_push.bos = tcf_mpls_bos(act);
3552                                entry->mpls_push.ttl = tcf_mpls_ttl(act);
3553                                break;
3554                        case TCA_MPLS_ACT_POP:
3555                                entry->id = FLOW_ACTION_MPLS_POP;
3556                                entry->mpls_pop.proto = tcf_mpls_proto(act);
3557                                break;
3558                        case TCA_MPLS_ACT_MODIFY:
3559                                entry->id = FLOW_ACTION_MPLS_MANGLE;
3560                                entry->mpls_mangle.label = tcf_mpls_label(act);
3561                                entry->mpls_mangle.tc = tcf_mpls_tc(act);
3562                                entry->mpls_mangle.bos = tcf_mpls_bos(act);
3563                                entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
3564                                break;
3565                        default:
3566                                goto err_out_locked;
3567                        }
3568                } else if (is_tcf_skbedit_ptype(act)) {
3569                        entry->id = FLOW_ACTION_PTYPE;
3570                        entry->ptype = tcf_skbedit_ptype(act);
3571                } else {
3572                        err = -EOPNOTSUPP;
3573                        goto err_out_locked;
3574                }
3575                spin_unlock_bh(&act->tcfa_lock);
3576
3577                if (!is_tcf_pedit(act))
3578                        j++;
3579        }
3580
3581err_out:
3582        if (err)
3583                tc_cleanup_flow_action(flow_action);
3584
3585        return err;
3586err_out_locked:
3587        spin_unlock_bh(&act->tcfa_lock);
3588        goto err_out;
3589}
3590EXPORT_SYMBOL(tc_setup_flow_action);
3591
3592unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3593{
3594        unsigned int num_acts = 0;
3595        struct tc_action *act;
3596        int i;
3597
3598        tcf_exts_for_each_action(i, act, exts) {
3599                if (is_tcf_pedit(act))
3600                        num_acts += tcf_pedit_nkeys(act);
3601                else
3602                        num_acts++;
3603        }
3604        return num_acts;
3605}
3606EXPORT_SYMBOL(tcf_exts_num_actions);
3607
3608static __net_init int tcf_net_init(struct net *net)
3609{
3610        struct tcf_net *tn = net_generic(net, tcf_net_id);
3611
3612        spin_lock_init(&tn->idr_lock);
3613        idr_init(&tn->idr);
3614        return 0;
3615}
3616
3617static void __net_exit tcf_net_exit(struct net *net)
3618{
3619        struct tcf_net *tn = net_generic(net, tcf_net_id);
3620
3621        idr_destroy(&tn->idr);
3622}
3623
3624static struct pernet_operations tcf_net_ops = {
3625        .init = tcf_net_init,
3626        .exit = tcf_net_exit,
3627        .id   = &tcf_net_id,
3628        .size = sizeof(struct tcf_net),
3629};
3630
3631static struct flow_indr_block_entry block_entry = {
3632        .cb = tc_indr_block_get_and_cmd,
3633        .list = LIST_HEAD_INIT(block_entry.list),
3634};
3635
3636static int __init tc_filter_init(void)
3637{
3638        int err;
3639
3640        tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
3641        if (!tc_filter_wq)
3642                return -ENOMEM;
3643
3644        err = register_pernet_subsys(&tcf_net_ops);
3645        if (err)
3646                goto err_register_pernet_subsys;
3647
3648        flow_indr_add_block_cb(&block_entry);
3649
3650        rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3651                      RTNL_FLAG_DOIT_UNLOCKED);
3652        rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3653                      RTNL_FLAG_DOIT_UNLOCKED);
3654        rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3655                      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3656        rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
3657        rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
3658        rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
3659                      tc_dump_chain, 0);
3660
3661        return 0;
3662
3663err_register_pernet_subsys:
3664        destroy_workqueue(tc_filter_wq);
3665        return err;
3666}
3667
3668subsys_initcall(tc_filter_init);
3669