linux/net/sched/act_police.c
<<
>>
Prefs
   1/*
   2 * net/sched/police.c   Input police filter.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *              J Hadi Salim (action changes)
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/kernel.h>
  16#include <linux/string.h>
  17#include <linux/errno.h>
  18#include <linux/skbuff.h>
  19#include <linux/rtnetlink.h>
  20#include <linux/init.h>
  21#include <linux/slab.h>
  22#include <net/act_api.h>
  23#include <net/netlink.h>
  24
  25#define L2T(p,L)   qdisc_l2t((p)->tcfp_R_tab, L)
  26#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
  27
  28#define POL_TAB_MASK     15
  29static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
  30static u32 police_idx_gen;
  31static DEFINE_RWLOCK(police_lock);
  32
  33static struct tcf_hashinfo police_hash_info = {
  34        .htab   =       tcf_police_ht,
  35        .hmask  =       POL_TAB_MASK,
  36        .lock   =       &police_lock,
  37};
  38
  39/* old policer structure from before tc actions */
  40struct tc_police_compat
  41{
  42        u32                     index;
  43        int                     action;
  44        u32                     limit;
  45        u32                     burst;
  46        u32                     mtu;
  47        struct tc_ratespec      rate;
  48        struct tc_ratespec      peakrate;
  49};
  50
  51/* Each policer is serialized by its individual spinlock */
  52
  53static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
  54                              int type, struct tc_action *a)
  55{
  56        struct tcf_common *p;
  57        int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
  58        struct nlattr *nest;
  59
  60        read_lock_bh(&police_lock);
  61
  62        s_i = cb->args[0];
  63
  64        for (i = 0; i < (POL_TAB_MASK + 1); i++) {
  65                p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
  66
  67                for (; p; p = p->tcfc_next) {
  68                        index++;
  69                        if (index < s_i)
  70                                continue;
  71                        a->priv = p;
  72                        a->order = index;
  73                        nest = nla_nest_start(skb, a->order);
  74                        if (nest == NULL)
  75                                goto nla_put_failure;
  76                        if (type == RTM_DELACTION)
  77                                err = tcf_action_dump_1(skb, a, 0, 1);
  78                        else
  79                                err = tcf_action_dump_1(skb, a, 0, 0);
  80                        if (err < 0) {
  81                                index--;
  82                                nla_nest_cancel(skb, nest);
  83                                goto done;
  84                        }
  85                        nla_nest_end(skb, nest);
  86                        n_i++;
  87                }
  88        }
  89done:
  90        read_unlock_bh(&police_lock);
  91        if (n_i)
  92                cb->args[0] += n_i;
  93        return n_i;
  94
  95nla_put_failure:
  96        nla_nest_cancel(skb, nest);
  97        goto done;
  98}
  99
 100static void tcf_police_free_rcu(struct rcu_head *head)
 101{
 102        kfree(container_of(head, struct tcf_police, tcf_rcu));
 103}
 104
 105static void tcf_police_destroy(struct tcf_police *p)
 106{
 107        unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
 108        struct tcf_common **p1p;
 109
 110        for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
 111                if (*p1p == &p->common) {
 112                        write_lock_bh(&police_lock);
 113                        *p1p = p->tcf_next;
 114                        write_unlock_bh(&police_lock);
 115                        gen_kill_estimator(&p->tcf_bstats,
 116                                           &p->tcf_rate_est);
 117                        if (p->tcfp_R_tab)
 118                                qdisc_put_rtab(p->tcfp_R_tab);
 119                        if (p->tcfp_P_tab)
 120                                qdisc_put_rtab(p->tcfp_P_tab);
 121                        /*
 122                         * gen_estimator est_timer() might access p->tcf_lock
 123                         * or bstats, wait a RCU grace period before freeing p
 124                         */
 125                        call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
 126                        return;
 127                }
 128        }
 129        WARN_ON(1);
 130}
 131
 132static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 133        [TCA_POLICE_RATE]       = { .len = TC_RTAB_SIZE },
 134        [TCA_POLICE_PEAKRATE]   = { .len = TC_RTAB_SIZE },
 135        [TCA_POLICE_AVRATE]     = { .type = NLA_U32 },
 136        [TCA_POLICE_RESULT]     = { .type = NLA_U32 },
 137};
 138
 139static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 140                                 struct tc_action *a, int ovr, int bind)
 141{
 142        unsigned h;
 143        int ret = 0, err;
 144        struct nlattr *tb[TCA_POLICE_MAX + 1];
 145        struct tc_police *parm;
 146        struct tcf_police *police;
 147        struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 148        int size;
 149
 150        if (nla == NULL)
 151                return -EINVAL;
 152
 153        err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
 154        if (err < 0)
 155                return err;
 156
 157        if (tb[TCA_POLICE_TBF] == NULL)
 158                return -EINVAL;
 159        size = nla_len(tb[TCA_POLICE_TBF]);
 160        if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
 161                return -EINVAL;
 162        parm = nla_data(tb[TCA_POLICE_TBF]);
 163
 164        if (parm->index) {
 165                struct tcf_common *pc;
 166
 167                pc = tcf_hash_lookup(parm->index, &police_hash_info);
 168                if (pc != NULL) {
 169                        a->priv = pc;
 170                        police = to_police(pc);
 171                        if (bind) {
 172                                police->tcf_bindcnt += 1;
 173                                police->tcf_refcnt += 1;
 174                        }
 175                        if (ovr)
 176                                goto override;
 177                        return ret;
 178                }
 179        }
 180
 181        police = kzalloc(sizeof(*police), GFP_KERNEL);
 182        if (police == NULL)
 183                return -ENOMEM;
 184        ret = ACT_P_CREATED;
 185        police->tcf_refcnt = 1;
 186        spin_lock_init(&police->tcf_lock);
 187        if (bind)
 188                police->tcf_bindcnt = 1;
 189override:
 190        if (parm->rate.rate) {
 191                err = -ENOMEM;
 192                R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
 193                if (R_tab == NULL)
 194                        goto failure;
 195
 196                if (parm->peakrate.rate) {
 197                        P_tab = qdisc_get_rtab(&parm->peakrate,
 198                                               tb[TCA_POLICE_PEAKRATE]);
 199                        if (P_tab == NULL)
 200                                goto failure;
 201                }
 202        }
 203
 204        spin_lock_bh(&police->tcf_lock);
 205        if (est) {
 206                err = gen_replace_estimator(&police->tcf_bstats,
 207                                            &police->tcf_rate_est,
 208                                            &police->tcf_lock, est);
 209                if (err)
 210                        goto failure_unlock;
 211        } else if (tb[TCA_POLICE_AVRATE] &&
 212                   (ret == ACT_P_CREATED ||
 213                    !gen_estimator_active(&police->tcf_bstats,
 214                                          &police->tcf_rate_est))) {
 215                err = -EINVAL;
 216                goto failure_unlock;
 217        }
 218
 219        /* No failure allowed after this point */
 220        if (R_tab != NULL) {
 221                qdisc_put_rtab(police->tcfp_R_tab);
 222                police->tcfp_R_tab = R_tab;
 223        }
 224        if (P_tab != NULL) {
 225                qdisc_put_rtab(police->tcfp_P_tab);
 226                police->tcfp_P_tab = P_tab;
 227        }
 228
 229        if (tb[TCA_POLICE_RESULT])
 230                police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
 231        police->tcfp_toks = police->tcfp_burst = parm->burst;
 232        police->tcfp_mtu = parm->mtu;
 233        if (police->tcfp_mtu == 0) {
 234                police->tcfp_mtu = ~0;
 235                if (police->tcfp_R_tab)
 236                        police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
 237        }
 238        if (police->tcfp_P_tab)
 239                police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
 240        police->tcf_action = parm->action;
 241
 242        if (tb[TCA_POLICE_AVRATE])
 243                police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
 244
 245        spin_unlock_bh(&police->tcf_lock);
 246        if (ret != ACT_P_CREATED)
 247                return ret;
 248
 249        police->tcfp_t_c = psched_get_time();
 250        police->tcf_index = parm->index ? parm->index :
 251                tcf_hash_new_index(&police_idx_gen, &police_hash_info);
 252        h = tcf_hash(police->tcf_index, POL_TAB_MASK);
 253        write_lock_bh(&police_lock);
 254        police->tcf_next = tcf_police_ht[h];
 255        tcf_police_ht[h] = &police->common;
 256        write_unlock_bh(&police_lock);
 257
 258        a->priv = police;
 259        return ret;
 260
 261failure_unlock:
 262        spin_unlock_bh(&police->tcf_lock);
 263failure:
 264        if (P_tab)
 265                qdisc_put_rtab(P_tab);
 266        if (R_tab)
 267                qdisc_put_rtab(R_tab);
 268        if (ret == ACT_P_CREATED)
 269                kfree(police);
 270        return err;
 271}
 272
 273static int tcf_act_police_cleanup(struct tc_action *a, int bind)
 274{
 275        struct tcf_police *p = a->priv;
 276        int ret = 0;
 277
 278        if (p != NULL) {
 279                if (bind)
 280                        p->tcf_bindcnt--;
 281
 282                p->tcf_refcnt--;
 283                if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) {
 284                        tcf_police_destroy(p);
 285                        ret = 1;
 286                }
 287        }
 288        return ret;
 289}
 290
 291static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 292                          struct tcf_result *res)
 293{
 294        struct tcf_police *police = a->priv;
 295        psched_time_t now;
 296        long toks;
 297        long ptoks = 0;
 298
 299        spin_lock(&police->tcf_lock);
 300
 301        bstats_update(&police->tcf_bstats, skb);
 302
 303        if (police->tcfp_ewma_rate &&
 304            police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
 305                police->tcf_qstats.overlimits++;
 306                if (police->tcf_action == TC_ACT_SHOT)
 307                        police->tcf_qstats.drops++;
 308                spin_unlock(&police->tcf_lock);
 309                return police->tcf_action;
 310        }
 311
 312        if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
 313                if (police->tcfp_R_tab == NULL) {
 314                        spin_unlock(&police->tcf_lock);
 315                        return police->tcfp_result;
 316                }
 317
 318                now = psched_get_time();
 319                toks = psched_tdiff_bounded(now, police->tcfp_t_c,
 320                                            police->tcfp_burst);
 321                if (police->tcfp_P_tab) {
 322                        ptoks = toks + police->tcfp_ptoks;
 323                        if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
 324                                ptoks = (long)L2T_P(police, police->tcfp_mtu);
 325                        ptoks -= L2T_P(police, qdisc_pkt_len(skb));
 326                }
 327                toks += police->tcfp_toks;
 328                if (toks > (long)police->tcfp_burst)
 329                        toks = police->tcfp_burst;
 330                toks -= L2T(police, qdisc_pkt_len(skb));
 331                if ((toks|ptoks) >= 0) {
 332                        police->tcfp_t_c = now;
 333                        police->tcfp_toks = toks;
 334                        police->tcfp_ptoks = ptoks;
 335                        spin_unlock(&police->tcf_lock);
 336                        return police->tcfp_result;
 337                }
 338        }
 339
 340        police->tcf_qstats.overlimits++;
 341        if (police->tcf_action == TC_ACT_SHOT)
 342                police->tcf_qstats.drops++;
 343        spin_unlock(&police->tcf_lock);
 344        return police->tcf_action;
 345}
 346
 347static int
 348tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 349{
 350        unsigned char *b = skb_tail_pointer(skb);
 351        struct tcf_police *police = a->priv;
 352        struct tc_police opt = {
 353                .index = police->tcf_index,
 354                .action = police->tcf_action,
 355                .mtu = police->tcfp_mtu,
 356                .burst = police->tcfp_burst,
 357                .refcnt = police->tcf_refcnt - ref,
 358                .bindcnt = police->tcf_bindcnt - bind,
 359        };
 360
 361        if (police->tcfp_R_tab)
 362                opt.rate = police->tcfp_R_tab->rate;
 363        if (police->tcfp_P_tab)
 364                opt.peakrate = police->tcfp_P_tab->rate;
 365        NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
 366        if (police->tcfp_result)
 367                NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
 368        if (police->tcfp_ewma_rate)
 369                NLA_PUT_U32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate);
 370        return skb->len;
 371
 372nla_put_failure:
 373        nlmsg_trim(skb, b);
 374        return -1;
 375}
 376
 377MODULE_AUTHOR("Alexey Kuznetsov");
 378MODULE_DESCRIPTION("Policing actions");
 379MODULE_LICENSE("GPL");
 380
 381static struct tc_action_ops act_police_ops = {
 382        .kind           =       "police",
 383        .hinfo          =       &police_hash_info,
 384        .type           =       TCA_ID_POLICE,
 385        .capab          =       TCA_CAP_NONE,
 386        .owner          =       THIS_MODULE,
 387        .act            =       tcf_act_police,
 388        .dump           =       tcf_act_police_dump,
 389        .cleanup        =       tcf_act_police_cleanup,
 390        .lookup         =       tcf_hash_search,
 391        .init           =       tcf_act_police_locate,
 392        .walk           =       tcf_act_police_walker
 393};
 394
 395static int __init
 396police_init_module(void)
 397{
 398        return tcf_register_action(&act_police_ops);
 399}
 400
 401static void __exit
 402police_cleanup_module(void)
 403{
 404        tcf_unregister_action(&act_police_ops);
 405        rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 406}
 407
 408module_init(police_init_module);
 409module_exit(police_cleanup_module);
 410