linux/net/sched/cls_rsvp.h
<<
>>
Prefs
   1/*
   2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 */
  11
  12/*
  13   Comparing to general packet classification problem,
  14   RSVP needs only sevaral relatively simple rules:
  15
  16   * (dst, protocol) are always specified,
  17     so that we are able to hash them.
  18   * src may be exact, or may be wildcard, so that
  19     we can keep a hash table plus one wildcard entry.
  20   * source port (or flow label) is important only if src is given.
  21
  22   IMPLEMENTATION.
  23
  24   We use a two level hash table: The top level is keyed by
  25   destination address and protocol ID, every bucket contains a list
  26   of "rsvp sessions", identified by destination address, protocol and
  27   DPI(="Destination Port ID"): triple (key, mask, offset).
  28
  29   Every bucket has a smaller hash table keyed by source address
  30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
  31   Every bucket is again a list of "RSVP flows", selected by
  32   source address and SPI(="Source Port ID" here rather than
  33   "security parameter index"): triple (key, mask, offset).
  34
  35
  36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
  37   and all fragmented packets go to the best-effort traffic class.
  38
  39
  40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
  41   only one "Generalized Port Identifier". So that for classic
  42   ah, esp (and udp,tcp) both *pi should coincide or one of them
  43   should be wildcard.
  44
  45   At first sight, this redundancy is just a waste of CPU
  46   resources. But DPI and SPI add the possibility to assign different
  47   priorities to GPIs. Look also at note 4 about tunnels below.
  48
  49
  50   NOTE 3. One complication is the case of tunneled packets.
  51   We implement it as following: if the first lookup
  52   matches a special session with "tunnelhdr" value not zero,
  53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
  54   In this case, we pull tunnelhdr bytes and restart lookup
  55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
  56   It's enough for PIMREG and IPIP.
  57
  58
  59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
  60   F.e. DPI can select ETH_P_IP (and necessary flags to make
  61   tunnelhdr correct) in GRE protocol field and SPI matches
  62   GRE key. Is it not nice? 8)8)
  63
  64
  65   Well, as result, despite its simplicity, we get a pretty
  66   powerful classification engine.  */
  67
  68
  69struct rsvp_head {
  70        u32                     tmap[256/32];
  71        u32                     hgenerator;
  72        u8                      tgenerator;
  73        struct rsvp_session     *ht[256];
  74};
  75
  76struct rsvp_session {
  77        struct rsvp_session     *next;
  78        __be32                  dst[RSVP_DST_LEN];
  79        struct tc_rsvp_gpi      dpi;
  80        u8                      protocol;
  81        u8                      tunnelid;
  82        /* 16 (src,sport) hash slots, and one wildcard source slot */
  83        struct rsvp_filter      *ht[16 + 1];
  84};
  85
  86
  87struct rsvp_filter {
  88        struct rsvp_filter      *next;
  89        __be32                  src[RSVP_DST_LEN];
  90        struct tc_rsvp_gpi      spi;
  91        u8                      tunnelhdr;
  92
  93        struct tcf_result       res;
  94        struct tcf_exts         exts;
  95
  96        u32                     handle;
  97        struct rsvp_session     *sess;
  98};
  99
 100static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 101{
 102        unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
 103
 104        h ^= h>>16;
 105        h ^= h>>8;
 106        return (h ^ protocol ^ tunnelid) & 0xFF;
 107}
 108
 109static inline unsigned int hash_src(__be32 *src)
 110{
 111        unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
 112
 113        h ^= h>>16;
 114        h ^= h>>8;
 115        h ^= h>>4;
 116        return h & 0xF;
 117}
 118
 119static struct tcf_ext_map rsvp_ext_map = {
 120        .police = TCA_RSVP_POLICE,
 121        .action = TCA_RSVP_ACT
 122};
 123
 124#define RSVP_APPLY_RESULT()                             \
 125{                                                       \
 126        int r = tcf_exts_exec(skb, &f->exts, res);      \
 127        if (r < 0)                                      \
 128                continue;                               \
 129        else if (r > 0)                                 \
 130                return r;                               \
 131}
 132
 133static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 134                         struct tcf_result *res)
 135{
 136        struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 137        struct rsvp_session *s;
 138        struct rsvp_filter *f;
 139        unsigned int h1, h2;
 140        __be32 *dst, *src;
 141        u8 protocol;
 142        u8 tunnelid = 0;
 143        u8 *xprt;
 144#if RSVP_DST_LEN == 4
 145        struct ipv6hdr *nhptr;
 146
 147        if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
 148                return -1;
 149        nhptr = ipv6_hdr(skb);
 150#else
 151        struct iphdr *nhptr;
 152
 153        if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
 154                return -1;
 155        nhptr = ip_hdr(skb);
 156#endif
 157
 158restart:
 159
 160#if RSVP_DST_LEN == 4
 161        src = &nhptr->saddr.s6_addr32[0];
 162        dst = &nhptr->daddr.s6_addr32[0];
 163        protocol = nhptr->nexthdr;
 164        xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
 165#else
 166        src = &nhptr->saddr;
 167        dst = &nhptr->daddr;
 168        protocol = nhptr->protocol;
 169        xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
 170        if (ip_is_fragment(nhptr))
 171                return -1;
 172#endif
 173
 174        h1 = hash_dst(dst, protocol, tunnelid);
 175        h2 = hash_src(src);
 176
 177        for (s = sht[h1]; s; s = s->next) {
 178                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
 179                    protocol == s->protocol &&
 180                    !(s->dpi.mask &
 181                      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
 182#if RSVP_DST_LEN == 4
 183                    dst[0] == s->dst[0] &&
 184                    dst[1] == s->dst[1] &&
 185                    dst[2] == s->dst[2] &&
 186#endif
 187                    tunnelid == s->tunnelid) {
 188
 189                        for (f = s->ht[h2]; f; f = f->next) {
 190                                if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
 191                                    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
 192#if RSVP_DST_LEN == 4
 193                                    &&
 194                                    src[0] == f->src[0] &&
 195                                    src[1] == f->src[1] &&
 196                                    src[2] == f->src[2]
 197#endif
 198                                    ) {
 199                                        *res = f->res;
 200                                        RSVP_APPLY_RESULT();
 201
 202matched:
 203                                        if (f->tunnelhdr == 0)
 204                                                return 0;
 205
 206                                        tunnelid = f->res.classid;
 207                                        nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
 208                                        goto restart;
 209                                }
 210                        }
 211
 212                        /* And wildcard bucket... */
 213                        for (f = s->ht[16]; f; f = f->next) {
 214                                *res = f->res;
 215                                RSVP_APPLY_RESULT();
 216                                goto matched;
 217                        }
 218                        return -1;
 219                }
 220        }
 221        return -1;
 222}
 223
 224static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 225{
 226        struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 227        struct rsvp_session *s;
 228        struct rsvp_filter *f;
 229        unsigned int h1 = handle & 0xFF;
 230        unsigned int h2 = (handle >> 8) & 0xFF;
 231
 232        if (h2 > 16)
 233                return 0;
 234
 235        for (s = sht[h1]; s; s = s->next) {
 236                for (f = s->ht[h2]; f; f = f->next) {
 237                        if (f->handle == handle)
 238                                return (unsigned long)f;
 239                }
 240        }
 241        return 0;
 242}
 243
 244static void rsvp_put(struct tcf_proto *tp, unsigned long f)
 245{
 246}
 247
 248static int rsvp_init(struct tcf_proto *tp)
 249{
 250        struct rsvp_head *data;
 251
 252        data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
 253        if (data) {
 254                tp->root = data;
 255                return 0;
 256        }
 257        return -ENOBUFS;
 258}
 259
 260static void
 261rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 262{
 263        tcf_unbind_filter(tp, &f->res);
 264        tcf_exts_destroy(tp, &f->exts);
 265        kfree(f);
 266}
 267
 268static void rsvp_destroy(struct tcf_proto *tp)
 269{
 270        struct rsvp_head *data = xchg(&tp->root, NULL);
 271        struct rsvp_session **sht;
 272        int h1, h2;
 273
 274        if (data == NULL)
 275                return;
 276
 277        sht = data->ht;
 278
 279        for (h1 = 0; h1 < 256; h1++) {
 280                struct rsvp_session *s;
 281
 282                while ((s = sht[h1]) != NULL) {
 283                        sht[h1] = s->next;
 284
 285                        for (h2 = 0; h2 <= 16; h2++) {
 286                                struct rsvp_filter *f;
 287
 288                                while ((f = s->ht[h2]) != NULL) {
 289                                        s->ht[h2] = f->next;
 290                                        rsvp_delete_filter(tp, f);
 291                                }
 292                        }
 293                        kfree(s);
 294                }
 295        }
 296        kfree(data);
 297}
 298
 299static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 300{
 301        struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
 302        unsigned int h = f->handle;
 303        struct rsvp_session **sp;
 304        struct rsvp_session *s = f->sess;
 305        int i;
 306
 307        for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
 308                if (*fp == f) {
 309                        tcf_tree_lock(tp);
 310                        *fp = f->next;
 311                        tcf_tree_unlock(tp);
 312                        rsvp_delete_filter(tp, f);
 313
 314                        /* Strip tree */
 315
 316                        for (i = 0; i <= 16; i++)
 317                                if (s->ht[i])
 318                                        return 0;
 319
 320                        /* OK, session has no flows */
 321                        for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
 322                             *sp; sp = &(*sp)->next) {
 323                                if (*sp == s) {
 324                                        tcf_tree_lock(tp);
 325                                        *sp = s->next;
 326                                        tcf_tree_unlock(tp);
 327
 328                                        kfree(s);
 329                                        return 0;
 330                                }
 331                        }
 332
 333                        return 0;
 334                }
 335        }
 336        return 0;
 337}
 338
 339static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
 340{
 341        struct rsvp_head *data = tp->root;
 342        int i = 0xFFFF;
 343
 344        while (i-- > 0) {
 345                u32 h;
 346
 347                if ((data->hgenerator += 0x10000) == 0)
 348                        data->hgenerator = 0x10000;
 349                h = data->hgenerator|salt;
 350                if (rsvp_get(tp, h) == 0)
 351                        return h;
 352        }
 353        return 0;
 354}
 355
 356static int tunnel_bts(struct rsvp_head *data)
 357{
 358        int n = data->tgenerator >> 5;
 359        u32 b = 1 << (data->tgenerator & 0x1F);
 360
 361        if (data->tmap[n] & b)
 362                return 0;
 363        data->tmap[n] |= b;
 364        return 1;
 365}
 366
 367static void tunnel_recycle(struct rsvp_head *data)
 368{
 369        struct rsvp_session **sht = data->ht;
 370        u32 tmap[256/32];
 371        int h1, h2;
 372
 373        memset(tmap, 0, sizeof(tmap));
 374
 375        for (h1 = 0; h1 < 256; h1++) {
 376                struct rsvp_session *s;
 377                for (s = sht[h1]; s; s = s->next) {
 378                        for (h2 = 0; h2 <= 16; h2++) {
 379                                struct rsvp_filter *f;
 380
 381                                for (f = s->ht[h2]; f; f = f->next) {
 382                                        if (f->tunnelhdr == 0)
 383                                                continue;
 384                                        data->tgenerator = f->res.classid;
 385                                        tunnel_bts(data);
 386                                }
 387                        }
 388                }
 389        }
 390
 391        memcpy(data->tmap, tmap, sizeof(tmap));
 392}
 393
 394static u32 gen_tunnel(struct rsvp_head *data)
 395{
 396        int i, k;
 397
 398        for (k = 0; k < 2; k++) {
 399                for (i = 255; i > 0; i--) {
 400                        if (++data->tgenerator == 0)
 401                                data->tgenerator = 1;
 402                        if (tunnel_bts(data))
 403                                return data->tgenerator;
 404                }
 405                tunnel_recycle(data);
 406        }
 407        return 0;
 408}
 409
 410static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 411        [TCA_RSVP_CLASSID]      = { .type = NLA_U32 },
 412        [TCA_RSVP_DST]          = { .type = NLA_BINARY,
 413                                    .len = RSVP_DST_LEN * sizeof(u32) },
 414        [TCA_RSVP_SRC]          = { .type = NLA_BINARY,
 415                                    .len = RSVP_DST_LEN * sizeof(u32) },
 416        [TCA_RSVP_PINFO]        = { .len = sizeof(struct tc_rsvp_pinfo) },
 417};
 418
 419static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 420                       struct tcf_proto *tp, unsigned long base,
 421                       u32 handle,
 422                       struct nlattr **tca,
 423                       unsigned long *arg)
 424{
 425        struct rsvp_head *data = tp->root;
 426        struct rsvp_filter *f, **fp;
 427        struct rsvp_session *s, **sp;
 428        struct tc_rsvp_pinfo *pinfo = NULL;
 429        struct nlattr *opt = tca[TCA_OPTIONS];
 430        struct nlattr *tb[TCA_RSVP_MAX + 1];
 431        struct tcf_exts e;
 432        unsigned int h1, h2;
 433        __be32 *dst;
 434        int err;
 435
 436        if (opt == NULL)
 437                return handle ? -EINVAL : 0;
 438
 439        err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
 440        if (err < 0)
 441                return err;
 442
 443        err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 444        if (err < 0)
 445                return err;
 446
 447        f = (struct rsvp_filter *)*arg;
 448        if (f) {
 449                /* Node exists: adjust only classid */
 450
 451                if (f->handle != handle && handle)
 452                        goto errout2;
 453                if (tb[TCA_RSVP_CLASSID]) {
 454                        f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 455                        tcf_bind_filter(tp, &f->res, base);
 456                }
 457
 458                tcf_exts_change(tp, &f->exts, &e);
 459                return 0;
 460        }
 461
 462        /* Now more serious part... */
 463        err = -EINVAL;
 464        if (handle)
 465                goto errout2;
 466        if (tb[TCA_RSVP_DST] == NULL)
 467                goto errout2;
 468
 469        err = -ENOBUFS;
 470        f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
 471        if (f == NULL)
 472                goto errout2;
 473
 474        h2 = 16;
 475        if (tb[TCA_RSVP_SRC]) {
 476                memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 477                h2 = hash_src(f->src);
 478        }
 479        if (tb[TCA_RSVP_PINFO]) {
 480                pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 481                f->spi = pinfo->spi;
 482                f->tunnelhdr = pinfo->tunnelhdr;
 483        }
 484        if (tb[TCA_RSVP_CLASSID])
 485                f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 486
 487        dst = nla_data(tb[TCA_RSVP_DST]);
 488        h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
 489
 490        err = -ENOMEM;
 491        if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
 492                goto errout;
 493
 494        if (f->tunnelhdr) {
 495                err = -EINVAL;
 496                if (f->res.classid > 255)
 497                        goto errout;
 498
 499                err = -ENOMEM;
 500                if (f->res.classid == 0 &&
 501                    (f->res.classid = gen_tunnel(data)) == 0)
 502                        goto errout;
 503        }
 504
 505        for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
 506                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 507                    pinfo && pinfo->protocol == s->protocol &&
 508                    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
 509#if RSVP_DST_LEN == 4
 510                    dst[0] == s->dst[0] &&
 511                    dst[1] == s->dst[1] &&
 512                    dst[2] == s->dst[2] &&
 513#endif
 514                    pinfo->tunnelid == s->tunnelid) {
 515
 516insert:
 517                        /* OK, we found appropriate session */
 518
 519                        fp = &s->ht[h2];
 520
 521                        f->sess = s;
 522                        if (f->tunnelhdr == 0)
 523                                tcf_bind_filter(tp, &f->res, base);
 524
 525                        tcf_exts_change(tp, &f->exts, &e);
 526
 527                        for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
 528                                if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
 529                                        break;
 530                        f->next = *fp;
 531                        wmb();
 532                        *fp = f;
 533
 534                        *arg = (unsigned long)f;
 535                        return 0;
 536                }
 537        }
 538
 539        /* No session found. Create new one. */
 540
 541        err = -ENOBUFS;
 542        s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
 543        if (s == NULL)
 544                goto errout;
 545        memcpy(s->dst, dst, sizeof(s->dst));
 546
 547        if (pinfo) {
 548                s->dpi = pinfo->dpi;
 549                s->protocol = pinfo->protocol;
 550                s->tunnelid = pinfo->tunnelid;
 551        }
 552        for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
 553                if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
 554                        break;
 555        }
 556        s->next = *sp;
 557        wmb();
 558        *sp = s;
 559
 560        goto insert;
 561
 562errout:
 563        kfree(f);
 564errout2:
 565        tcf_exts_destroy(tp, &e);
 566        return err;
 567}
 568
 569static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 570{
 571        struct rsvp_head *head = tp->root;
 572        unsigned int h, h1;
 573
 574        if (arg->stop)
 575                return;
 576
 577        for (h = 0; h < 256; h++) {
 578                struct rsvp_session *s;
 579
 580                for (s = head->ht[h]; s; s = s->next) {
 581                        for (h1 = 0; h1 <= 16; h1++) {
 582                                struct rsvp_filter *f;
 583
 584                                for (f = s->ht[h1]; f; f = f->next) {
 585                                        if (arg->count < arg->skip) {
 586                                                arg->count++;
 587                                                continue;
 588                                        }
 589                                        if (arg->fn(tp, (unsigned long)f, arg) < 0) {
 590                                                arg->stop = 1;
 591                                                return;
 592                                        }
 593                                        arg->count++;
 594                                }
 595                        }
 596                }
 597        }
 598}
 599
 600static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 601                     struct sk_buff *skb, struct tcmsg *t)
 602{
 603        struct rsvp_filter *f = (struct rsvp_filter *)fh;
 604        struct rsvp_session *s;
 605        unsigned char *b = skb_tail_pointer(skb);
 606        struct nlattr *nest;
 607        struct tc_rsvp_pinfo pinfo;
 608
 609        if (f == NULL)
 610                return skb->len;
 611        s = f->sess;
 612
 613        t->tcm_handle = f->handle;
 614
 615        nest = nla_nest_start(skb, TCA_OPTIONS);
 616        if (nest == NULL)
 617                goto nla_put_failure;
 618
 619        if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
 620                goto nla_put_failure;
 621        pinfo.dpi = s->dpi;
 622        pinfo.spi = f->spi;
 623        pinfo.protocol = s->protocol;
 624        pinfo.tunnelid = s->tunnelid;
 625        pinfo.tunnelhdr = f->tunnelhdr;
 626        pinfo.pad = 0;
 627        if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
 628                goto nla_put_failure;
 629        if (f->res.classid &&
 630            nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
 631                goto nla_put_failure;
 632        if (((f->handle >> 8) & 0xFF) != 16 &&
 633            nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
 634                goto nla_put_failure;
 635
 636        if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 637                goto nla_put_failure;
 638
 639        nla_nest_end(skb, nest);
 640
 641        if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 642                goto nla_put_failure;
 643        return skb->len;
 644
 645nla_put_failure:
 646        nlmsg_trim(skb, b);
 647        return -1;
 648}
 649
 650static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 651        .kind           =       RSVP_ID,
 652        .classify       =       rsvp_classify,
 653        .init           =       rsvp_init,
 654        .destroy        =       rsvp_destroy,
 655        .get            =       rsvp_get,
 656        .put            =       rsvp_put,
 657        .change         =       rsvp_change,
 658        .delete         =       rsvp_delete,
 659        .walk           =       rsvp_walk,
 660        .dump           =       rsvp_dump,
 661        .owner          =       THIS_MODULE,
 662};
 663
 664static int __init init_rsvp(void)
 665{
 666        return register_tcf_proto_ops(&RSVP_OPS);
 667}
 668
 669static void __exit exit_rsvp(void)
 670{
 671        unregister_tcf_proto_ops(&RSVP_OPS);
 672}
 673
 674module_init(init_rsvp)
 675module_exit(exit_rsvp)
 676