linux/net/netfilter/nft_ct.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
   3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License version 2 as
   7 * published by the Free Software Foundation.
   8 *
   9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/init.h>
  14#include <linux/module.h>
  15#include <linux/netlink.h>
  16#include <linux/netfilter.h>
  17#include <linux/netfilter/nf_tables.h>
  18#include <net/netfilter/nf_tables.h>
  19#include <net/netfilter/nf_conntrack.h>
  20#include <net/netfilter/nf_conntrack_acct.h>
  21#include <net/netfilter/nf_conntrack_tuple.h>
  22#include <net/netfilter/nf_conntrack_helper.h>
  23#include <net/netfilter/nf_conntrack_ecache.h>
  24#include <net/netfilter/nf_conntrack_labels.h>
  25#include <net/netfilter/nf_conntrack_timeout.h>
  26#include <net/netfilter/nf_conntrack_l4proto.h>
  27
  28struct nft_ct {
  29        enum nft_ct_keys        key:8;
  30        enum ip_conntrack_dir   dir:8;
  31        union {
  32                enum nft_registers      dreg:8;
  33                enum nft_registers      sreg:8;
  34        };
  35};
  36
  37struct nft_ct_helper_obj  {
  38        struct nf_conntrack_helper *helper4;
  39        struct nf_conntrack_helper *helper6;
  40        u8 l4proto;
  41};
  42
  43#ifdef CONFIG_NF_CONNTRACK_ZONES
  44static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
  45static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
  46#endif
  47
  48static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
  49                                   enum nft_ct_keys k,
  50                                   enum ip_conntrack_dir d)
  51{
  52        if (d < IP_CT_DIR_MAX)
  53                return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
  54                                           atomic64_read(&c[d].packets);
  55
  56        return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
  57               nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
  58}
  59
  60static void nft_ct_get_eval(const struct nft_expr *expr,
  61                            struct nft_regs *regs,
  62                            const struct nft_pktinfo *pkt)
  63{
  64        const struct nft_ct *priv = nft_expr_priv(expr);
  65        u32 *dest = &regs->data[priv->dreg];
  66        enum ip_conntrack_info ctinfo;
  67        const struct nf_conn *ct;
  68        const struct nf_conn_help *help;
  69        const struct nf_conntrack_tuple *tuple;
  70        const struct nf_conntrack_helper *helper;
  71        unsigned int state;
  72
  73        ct = nf_ct_get(pkt->skb, &ctinfo);
  74
  75        switch (priv->key) {
  76        case NFT_CT_STATE:
  77                if (ct)
  78                        state = NF_CT_STATE_BIT(ctinfo);
  79                else if (ctinfo == IP_CT_UNTRACKED)
  80                        state = NF_CT_STATE_UNTRACKED_BIT;
  81                else
  82                        state = NF_CT_STATE_INVALID_BIT;
  83                *dest = state;
  84                return;
  85        default:
  86                break;
  87        }
  88
  89        if (ct == NULL)
  90                goto err;
  91
  92        switch (priv->key) {
  93        case NFT_CT_DIRECTION:
  94                nft_reg_store8(dest, CTINFO2DIR(ctinfo));
  95                return;
  96        case NFT_CT_STATUS:
  97                *dest = ct->status;
  98                return;
  99#ifdef CONFIG_NF_CONNTRACK_MARK
 100        case NFT_CT_MARK:
 101                *dest = ct->mark;
 102                return;
 103#endif
 104#ifdef CONFIG_NF_CONNTRACK_SECMARK
 105        case NFT_CT_SECMARK:
 106                *dest = ct->secmark;
 107                return;
 108#endif
 109        case NFT_CT_EXPIRATION:
 110                *dest = jiffies_to_msecs(nf_ct_expires(ct));
 111                return;
 112        case NFT_CT_HELPER:
 113                if (ct->master == NULL)
 114                        goto err;
 115                help = nfct_help(ct->master);
 116                if (help == NULL)
 117                        goto err;
 118                helper = rcu_dereference(help->helper);
 119                if (helper == NULL)
 120                        goto err;
 121                strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
 122                return;
 123#ifdef CONFIG_NF_CONNTRACK_LABELS
 124        case NFT_CT_LABELS: {
 125                struct nf_conn_labels *labels = nf_ct_labels_find(ct);
 126
 127                if (labels)
 128                        memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
 129                else
 130                        memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
 131                return;
 132        }
 133#endif
 134        case NFT_CT_BYTES: /* fallthrough */
 135        case NFT_CT_PKTS: {
 136                const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
 137                u64 count = 0;
 138
 139                if (acct)
 140                        count = nft_ct_get_eval_counter(acct->counter,
 141                                                        priv->key, priv->dir);
 142                memcpy(dest, &count, sizeof(count));
 143                return;
 144        }
 145        case NFT_CT_AVGPKT: {
 146                const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
 147                u64 avgcnt = 0, bcnt = 0, pcnt = 0;
 148
 149                if (acct) {
 150                        pcnt = nft_ct_get_eval_counter(acct->counter,
 151                                                       NFT_CT_PKTS, priv->dir);
 152                        bcnt = nft_ct_get_eval_counter(acct->counter,
 153                                                       NFT_CT_BYTES, priv->dir);
 154                        if (pcnt != 0)
 155                                avgcnt = div64_u64(bcnt, pcnt);
 156                }
 157
 158                memcpy(dest, &avgcnt, sizeof(avgcnt));
 159                return;
 160        }
 161        case NFT_CT_L3PROTOCOL:
 162                nft_reg_store8(dest, nf_ct_l3num(ct));
 163                return;
 164        case NFT_CT_PROTOCOL:
 165                nft_reg_store8(dest, nf_ct_protonum(ct));
 166                return;
 167#ifdef CONFIG_NF_CONNTRACK_ZONES
 168        case NFT_CT_ZONE: {
 169                const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
 170                u16 zoneid;
 171
 172                if (priv->dir < IP_CT_DIR_MAX)
 173                        zoneid = nf_ct_zone_id(zone, priv->dir);
 174                else
 175                        zoneid = zone->id;
 176
 177                nft_reg_store16(dest, zoneid);
 178                return;
 179        }
 180#endif
 181        default:
 182                break;
 183        }
 184
 185        tuple = &ct->tuplehash[priv->dir].tuple;
 186        switch (priv->key) {
 187        case NFT_CT_SRC:
 188                memcpy(dest, tuple->src.u3.all,
 189                       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
 190                return;
 191        case NFT_CT_DST:
 192                memcpy(dest, tuple->dst.u3.all,
 193                       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
 194                return;
 195        case NFT_CT_PROTO_SRC:
 196                nft_reg_store16(dest, (__force u16)tuple->src.u.all);
 197                return;
 198        case NFT_CT_PROTO_DST:
 199                nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
 200                return;
 201        case NFT_CT_SRC_IP:
 202                if (nf_ct_l3num(ct) != NFPROTO_IPV4)
 203                        goto err;
 204                *dest = tuple->src.u3.ip;
 205                return;
 206        case NFT_CT_DST_IP:
 207                if (nf_ct_l3num(ct) != NFPROTO_IPV4)
 208                        goto err;
 209                *dest = tuple->dst.u3.ip;
 210                return;
 211        case NFT_CT_SRC_IP6:
 212                if (nf_ct_l3num(ct) != NFPROTO_IPV6)
 213                        goto err;
 214                memcpy(dest, tuple->src.u3.ip6, sizeof(struct in6_addr));
 215                return;
 216        case NFT_CT_DST_IP6:
 217                if (nf_ct_l3num(ct) != NFPROTO_IPV6)
 218                        goto err;
 219                memcpy(dest, tuple->dst.u3.ip6, sizeof(struct in6_addr));
 220                return;
 221        default:
 222                break;
 223        }
 224        return;
 225err:
 226        regs->verdict.code = NFT_BREAK;
 227}
 228
 229#ifdef CONFIG_NF_CONNTRACK_ZONES
 230static void nft_ct_set_zone_eval(const struct nft_expr *expr,
 231                                 struct nft_regs *regs,
 232                                 const struct nft_pktinfo *pkt)
 233{
 234        struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
 235        const struct nft_ct *priv = nft_expr_priv(expr);
 236        struct sk_buff *skb = pkt->skb;
 237        enum ip_conntrack_info ctinfo;
 238        u16 value = nft_reg_load16(&regs->data[priv->sreg]);
 239        struct nf_conn *ct;
 240
 241        ct = nf_ct_get(skb, &ctinfo);
 242        if (ct) /* already tracked */
 243                return;
 244
 245        zone.id = value;
 246
 247        switch (priv->dir) {
 248        case IP_CT_DIR_ORIGINAL:
 249                zone.dir = NF_CT_ZONE_DIR_ORIG;
 250                break;
 251        case IP_CT_DIR_REPLY:
 252                zone.dir = NF_CT_ZONE_DIR_REPL;
 253                break;
 254        default:
 255                break;
 256        }
 257
 258        ct = this_cpu_read(nft_ct_pcpu_template);
 259
 260        if (likely(atomic_read(&ct->ct_general.use) == 1)) {
 261                nf_ct_zone_add(ct, &zone);
 262        } else {
 263                /* previous skb got queued to userspace */
 264                ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
 265                if (!ct) {
 266                        regs->verdict.code = NF_DROP;
 267                        return;
 268                }
 269        }
 270
 271        atomic_inc(&ct->ct_general.use);
 272        nf_ct_set(skb, ct, IP_CT_NEW);
 273}
 274#endif
 275
 276static void nft_ct_set_eval(const struct nft_expr *expr,
 277                            struct nft_regs *regs,
 278                            const struct nft_pktinfo *pkt)
 279{
 280        const struct nft_ct *priv = nft_expr_priv(expr);
 281        struct sk_buff *skb = pkt->skb;
 282#if defined(CONFIG_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_SECMARK)
 283        u32 value = regs->data[priv->sreg];
 284#endif
 285        enum ip_conntrack_info ctinfo;
 286        struct nf_conn *ct;
 287
 288        ct = nf_ct_get(skb, &ctinfo);
 289        if (ct == NULL || nf_ct_is_template(ct))
 290                return;
 291
 292        switch (priv->key) {
 293#ifdef CONFIG_NF_CONNTRACK_MARK
 294        case NFT_CT_MARK:
 295                if (ct->mark != value) {
 296                        ct->mark = value;
 297                        nf_conntrack_event_cache(IPCT_MARK, ct);
 298                }
 299                break;
 300#endif
 301#ifdef CONFIG_NF_CONNTRACK_SECMARK
 302        case NFT_CT_SECMARK:
 303                if (ct->secmark != value) {
 304                        ct->secmark = value;
 305                        nf_conntrack_event_cache(IPCT_SECMARK, ct);
 306                }
 307                break;
 308#endif
 309#ifdef CONFIG_NF_CONNTRACK_LABELS
 310        case NFT_CT_LABELS:
 311                nf_connlabels_replace(ct,
 312                                      &regs->data[priv->sreg],
 313                                      &regs->data[priv->sreg],
 314                                      NF_CT_LABELS_MAX_SIZE / sizeof(u32));
 315                break;
 316#endif
 317#ifdef CONFIG_NF_CONNTRACK_EVENTS
 318        case NFT_CT_EVENTMASK: {
 319                struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);
 320                u32 ctmask = regs->data[priv->sreg];
 321
 322                if (e) {
 323                        if (e->ctmask != ctmask)
 324                                e->ctmask = ctmask;
 325                        break;
 326                }
 327
 328                if (ctmask && !nf_ct_is_confirmed(ct))
 329                        nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC);
 330                break;
 331        }
 332#endif
 333        default:
 334                break;
 335        }
 336}
 337
 338static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
 339        [NFTA_CT_DREG]          = { .type = NLA_U32 },
 340        [NFTA_CT_KEY]           = { .type = NLA_U32 },
 341        [NFTA_CT_DIRECTION]     = { .type = NLA_U8 },
 342        [NFTA_CT_SREG]          = { .type = NLA_U32 },
 343};
 344
 345#ifdef CONFIG_NF_CONNTRACK_ZONES
 346static void nft_ct_tmpl_put_pcpu(void)
 347{
 348        struct nf_conn *ct;
 349        int cpu;
 350
 351        for_each_possible_cpu(cpu) {
 352                ct = per_cpu(nft_ct_pcpu_template, cpu);
 353                if (!ct)
 354                        break;
 355                nf_ct_put(ct);
 356                per_cpu(nft_ct_pcpu_template, cpu) = NULL;
 357        }
 358}
 359
 360static bool nft_ct_tmpl_alloc_pcpu(void)
 361{
 362        struct nf_conntrack_zone zone = { .id = 0 };
 363        struct nf_conn *tmp;
 364        int cpu;
 365
 366        if (nft_ct_pcpu_template_refcnt)
 367                return true;
 368
 369        for_each_possible_cpu(cpu) {
 370                tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
 371                if (!tmp) {
 372                        nft_ct_tmpl_put_pcpu();
 373                        return false;
 374                }
 375
 376                atomic_set(&tmp->ct_general.use, 1);
 377                per_cpu(nft_ct_pcpu_template, cpu) = tmp;
 378        }
 379
 380        return true;
 381}
 382#endif
 383
 384static int nft_ct_get_init(const struct nft_ctx *ctx,
 385                           const struct nft_expr *expr,
 386                           const struct nlattr * const tb[])
 387{
 388        struct nft_ct *priv = nft_expr_priv(expr);
 389        unsigned int len;
 390        int err;
 391
 392        priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
 393        priv->dir = IP_CT_DIR_MAX;
 394        switch (priv->key) {
 395        case NFT_CT_DIRECTION:
 396                if (tb[NFTA_CT_DIRECTION] != NULL)
 397                        return -EINVAL;
 398                len = sizeof(u8);
 399                break;
 400        case NFT_CT_STATE:
 401        case NFT_CT_STATUS:
 402#ifdef CONFIG_NF_CONNTRACK_MARK
 403        case NFT_CT_MARK:
 404#endif
 405#ifdef CONFIG_NF_CONNTRACK_SECMARK
 406        case NFT_CT_SECMARK:
 407#endif
 408        case NFT_CT_EXPIRATION:
 409                if (tb[NFTA_CT_DIRECTION] != NULL)
 410                        return -EINVAL;
 411                len = sizeof(u32);
 412                break;
 413#ifdef CONFIG_NF_CONNTRACK_LABELS
 414        case NFT_CT_LABELS:
 415                if (tb[NFTA_CT_DIRECTION] != NULL)
 416                        return -EINVAL;
 417                len = NF_CT_LABELS_MAX_SIZE;
 418                break;
 419#endif
 420        case NFT_CT_HELPER:
 421                if (tb[NFTA_CT_DIRECTION] != NULL)
 422                        return -EINVAL;
 423                len = NF_CT_HELPER_NAME_LEN;
 424                break;
 425
 426        case NFT_CT_L3PROTOCOL:
 427        case NFT_CT_PROTOCOL:
 428                /* For compatibility, do not report error if NFTA_CT_DIRECTION
 429                 * attribute is specified.
 430                 */
 431                len = sizeof(u8);
 432                break;
 433        case NFT_CT_SRC:
 434        case NFT_CT_DST:
 435                if (tb[NFTA_CT_DIRECTION] == NULL)
 436                        return -EINVAL;
 437
 438                switch (ctx->family) {
 439                case NFPROTO_IPV4:
 440                        len = FIELD_SIZEOF(struct nf_conntrack_tuple,
 441                                           src.u3.ip);
 442                        break;
 443                case NFPROTO_IPV6:
 444                case NFPROTO_INET:
 445                        len = FIELD_SIZEOF(struct nf_conntrack_tuple,
 446                                           src.u3.ip6);
 447                        break;
 448                default:
 449                        return -EAFNOSUPPORT;
 450                }
 451                break;
 452        case NFT_CT_SRC_IP:
 453        case NFT_CT_DST_IP:
 454                if (tb[NFTA_CT_DIRECTION] == NULL)
 455                        return -EINVAL;
 456
 457                len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip);
 458                break;
 459        case NFT_CT_SRC_IP6:
 460        case NFT_CT_DST_IP6:
 461                if (tb[NFTA_CT_DIRECTION] == NULL)
 462                        return -EINVAL;
 463
 464                len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6);
 465                break;
 466        case NFT_CT_PROTO_SRC:
 467        case NFT_CT_PROTO_DST:
 468                if (tb[NFTA_CT_DIRECTION] == NULL)
 469                        return -EINVAL;
 470                len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
 471                break;
 472        case NFT_CT_BYTES:
 473        case NFT_CT_PKTS:
 474        case NFT_CT_AVGPKT:
 475                len = sizeof(u64);
 476                break;
 477#ifdef CONFIG_NF_CONNTRACK_ZONES
 478        case NFT_CT_ZONE:
 479                len = sizeof(u16);
 480                break;
 481#endif
 482        default:
 483                return -EOPNOTSUPP;
 484        }
 485
 486        if (tb[NFTA_CT_DIRECTION] != NULL) {
 487                priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
 488                switch (priv->dir) {
 489                case IP_CT_DIR_ORIGINAL:
 490                case IP_CT_DIR_REPLY:
 491                        break;
 492                default:
 493                        return -EINVAL;
 494                }
 495        }
 496
 497        priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
 498        err = nft_validate_register_store(ctx, priv->dreg, NULL,
 499                                          NFT_DATA_VALUE, len);
 500        if (err < 0)
 501                return err;
 502
 503        err = nf_ct_netns_get(ctx->net, ctx->family);
 504        if (err < 0)
 505                return err;
 506
 507        if (priv->key == NFT_CT_BYTES ||
 508            priv->key == NFT_CT_PKTS  ||
 509            priv->key == NFT_CT_AVGPKT)
 510                nf_ct_set_acct(ctx->net, true);
 511
 512        return 0;
 513}
 514
 515static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
 516{
 517        switch (priv->key) {
 518#ifdef CONFIG_NF_CONNTRACK_LABELS
 519        case NFT_CT_LABELS:
 520                nf_connlabels_put(ctx->net);
 521                break;
 522#endif
 523#ifdef CONFIG_NF_CONNTRACK_ZONES
 524        case NFT_CT_ZONE:
 525                if (--nft_ct_pcpu_template_refcnt == 0)
 526                        nft_ct_tmpl_put_pcpu();
 527#endif
 528        default:
 529                break;
 530        }
 531}
 532
 533static int nft_ct_set_init(const struct nft_ctx *ctx,
 534                           const struct nft_expr *expr,
 535                           const struct nlattr * const tb[])
 536{
 537        struct nft_ct *priv = nft_expr_priv(expr);
 538        unsigned int len;
 539        int err;
 540
 541        priv->dir = IP_CT_DIR_MAX;
 542        priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
 543        switch (priv->key) {
 544#ifdef CONFIG_NF_CONNTRACK_MARK
 545        case NFT_CT_MARK:
 546                if (tb[NFTA_CT_DIRECTION])
 547                        return -EINVAL;
 548                len = FIELD_SIZEOF(struct nf_conn, mark);
 549                break;
 550#endif
 551#ifdef CONFIG_NF_CONNTRACK_LABELS
 552        case NFT_CT_LABELS:
 553                if (tb[NFTA_CT_DIRECTION])
 554                        return -EINVAL;
 555                len = NF_CT_LABELS_MAX_SIZE;
 556                err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
 557                if (err)
 558                        return err;
 559                break;
 560#endif
 561#ifdef CONFIG_NF_CONNTRACK_ZONES
 562        case NFT_CT_ZONE:
 563                if (!nft_ct_tmpl_alloc_pcpu())
 564                        return -ENOMEM;
 565                nft_ct_pcpu_template_refcnt++;
 566                len = sizeof(u16);
 567                break;
 568#endif
 569#ifdef CONFIG_NF_CONNTRACK_EVENTS
 570        case NFT_CT_EVENTMASK:
 571                if (tb[NFTA_CT_DIRECTION])
 572                        return -EINVAL;
 573                len = sizeof(u32);
 574                break;
 575#endif
 576#ifdef CONFIG_NF_CONNTRACK_SECMARK
 577        case NFT_CT_SECMARK:
 578                if (tb[NFTA_CT_DIRECTION])
 579                        return -EINVAL;
 580                len = sizeof(u32);
 581                break;
 582#endif
 583        default:
 584                return -EOPNOTSUPP;
 585        }
 586
 587        if (tb[NFTA_CT_DIRECTION]) {
 588                priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
 589                switch (priv->dir) {
 590                case IP_CT_DIR_ORIGINAL:
 591                case IP_CT_DIR_REPLY:
 592                        break;
 593                default:
 594                        err = -EINVAL;
 595                        goto err1;
 596                }
 597        }
 598
 599        priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
 600        err = nft_validate_register_load(priv->sreg, len);
 601        if (err < 0)
 602                goto err1;
 603
 604        err = nf_ct_netns_get(ctx->net, ctx->family);
 605        if (err < 0)
 606                goto err1;
 607
 608        return 0;
 609
 610err1:
 611        __nft_ct_set_destroy(ctx, priv);
 612        return err;
 613}
 614
 615static void nft_ct_get_destroy(const struct nft_ctx *ctx,
 616                               const struct nft_expr *expr)
 617{
 618        nf_ct_netns_put(ctx->net, ctx->family);
 619}
 620
 621static void nft_ct_set_destroy(const struct nft_ctx *ctx,
 622                               const struct nft_expr *expr)
 623{
 624        struct nft_ct *priv = nft_expr_priv(expr);
 625
 626        __nft_ct_set_destroy(ctx, priv);
 627        nf_ct_netns_put(ctx->net, ctx->family);
 628}
 629
 630static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
 631{
 632        const struct nft_ct *priv = nft_expr_priv(expr);
 633
 634        if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
 635                goto nla_put_failure;
 636        if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
 637                goto nla_put_failure;
 638
 639        switch (priv->key) {
 640        case NFT_CT_SRC:
 641        case NFT_CT_DST:
 642        case NFT_CT_SRC_IP:
 643        case NFT_CT_DST_IP:
 644        case NFT_CT_SRC_IP6:
 645        case NFT_CT_DST_IP6:
 646        case NFT_CT_PROTO_SRC:
 647        case NFT_CT_PROTO_DST:
 648                if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
 649                        goto nla_put_failure;
 650                break;
 651        case NFT_CT_BYTES:
 652        case NFT_CT_PKTS:
 653        case NFT_CT_AVGPKT:
 654        case NFT_CT_ZONE:
 655                if (priv->dir < IP_CT_DIR_MAX &&
 656                    nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
 657                        goto nla_put_failure;
 658                break;
 659        default:
 660                break;
 661        }
 662
 663        return 0;
 664
 665nla_put_failure:
 666        return -1;
 667}
 668
 669static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
 670{
 671        const struct nft_ct *priv = nft_expr_priv(expr);
 672
 673        if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
 674                goto nla_put_failure;
 675        if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
 676                goto nla_put_failure;
 677
 678        switch (priv->key) {
 679        case NFT_CT_ZONE:
 680                if (priv->dir < IP_CT_DIR_MAX &&
 681                    nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
 682                        goto nla_put_failure;
 683                break;
 684        default:
 685                break;
 686        }
 687
 688        return 0;
 689
 690nla_put_failure:
 691        return -1;
 692}
 693
 694static struct nft_expr_type nft_ct_type;
 695static const struct nft_expr_ops nft_ct_get_ops = {
 696        .type           = &nft_ct_type,
 697        .size           = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
 698        .eval           = nft_ct_get_eval,
 699        .init           = nft_ct_get_init,
 700        .destroy        = nft_ct_get_destroy,
 701        .dump           = nft_ct_get_dump,
 702};
 703
 704static const struct nft_expr_ops nft_ct_set_ops = {
 705        .type           = &nft_ct_type,
 706        .size           = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
 707        .eval           = nft_ct_set_eval,
 708        .init           = nft_ct_set_init,
 709        .destroy        = nft_ct_set_destroy,
 710        .dump           = nft_ct_set_dump,
 711};
 712
 713#ifdef CONFIG_NF_CONNTRACK_ZONES
 714static const struct nft_expr_ops nft_ct_set_zone_ops = {
 715        .type           = &nft_ct_type,
 716        .size           = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
 717        .eval           = nft_ct_set_zone_eval,
 718        .init           = nft_ct_set_init,
 719        .destroy        = nft_ct_set_destroy,
 720        .dump           = nft_ct_set_dump,
 721};
 722#endif
 723
 724static const struct nft_expr_ops *
 725nft_ct_select_ops(const struct nft_ctx *ctx,
 726                    const struct nlattr * const tb[])
 727{
 728        if (tb[NFTA_CT_KEY] == NULL)
 729                return ERR_PTR(-EINVAL);
 730
 731        if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
 732                return ERR_PTR(-EINVAL);
 733
 734        if (tb[NFTA_CT_DREG])
 735                return &nft_ct_get_ops;
 736
 737        if (tb[NFTA_CT_SREG]) {
 738#ifdef CONFIG_NF_CONNTRACK_ZONES
 739                if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
 740                        return &nft_ct_set_zone_ops;
 741#endif
 742                return &nft_ct_set_ops;
 743        }
 744
 745        return ERR_PTR(-EINVAL);
 746}
 747
 748static struct nft_expr_type nft_ct_type __read_mostly = {
 749        .name           = "ct",
 750        .select_ops     = nft_ct_select_ops,
 751        .policy         = nft_ct_policy,
 752        .maxattr        = NFTA_CT_MAX,
 753        .owner          = THIS_MODULE,
 754};
 755
 756static void nft_notrack_eval(const struct nft_expr *expr,
 757                             struct nft_regs *regs,
 758                             const struct nft_pktinfo *pkt)
 759{
 760        struct sk_buff *skb = pkt->skb;
 761        enum ip_conntrack_info ctinfo;
 762        struct nf_conn *ct;
 763
 764        ct = nf_ct_get(pkt->skb, &ctinfo);
 765        /* Previously seen (loopback or untracked)?  Ignore. */
 766        if (ct || ctinfo == IP_CT_UNTRACKED)
 767                return;
 768
 769        nf_ct_set(skb, ct, IP_CT_UNTRACKED);
 770}
 771
 772static struct nft_expr_type nft_notrack_type;
 773static const struct nft_expr_ops nft_notrack_ops = {
 774        .type           = &nft_notrack_type,
 775        .size           = NFT_EXPR_SIZE(0),
 776        .eval           = nft_notrack_eval,
 777};
 778
 779static struct nft_expr_type nft_notrack_type __read_mostly = {
 780        .name           = "notrack",
 781        .ops            = &nft_notrack_ops,
 782        .owner          = THIS_MODULE,
 783};
 784
 785#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 786static int
 787nft_ct_timeout_parse_policy(void *timeouts,
 788                            const struct nf_conntrack_l4proto *l4proto,
 789                            struct net *net, const struct nlattr *attr)
 790{
 791        struct nlattr **tb;
 792        int ret = 0;
 793
 794        tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
 795                     GFP_KERNEL);
 796
 797        if (!tb)
 798                return -ENOMEM;
 799
 800        ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
 801                               attr, l4proto->ctnl_timeout.nla_policy,
 802                               NULL);
 803        if (ret < 0)
 804                goto err;
 805
 806        ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
 807
 808err:
 809        kfree(tb);
 810        return ret;
 811}
 812
 813struct nft_ct_timeout_obj {
 814        struct nf_ct_timeout    *timeout;
 815        u8                      l4proto;
 816};
 817
 818static void nft_ct_timeout_obj_eval(struct nft_object *obj,
 819                                    struct nft_regs *regs,
 820                                    const struct nft_pktinfo *pkt)
 821{
 822        const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
 823        struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
 824        struct nf_conn_timeout *timeout;
 825        const unsigned int *values;
 826
 827        if (priv->l4proto != pkt->tprot)
 828                return;
 829
 830        if (!ct || nf_ct_is_template(ct) || nf_ct_is_confirmed(ct))
 831                return;
 832
 833        timeout = nf_ct_timeout_find(ct);
 834        if (!timeout) {
 835                timeout = nf_ct_timeout_ext_add(ct, priv->timeout, GFP_ATOMIC);
 836                if (!timeout) {
 837                        regs->verdict.code = NF_DROP;
 838                        return;
 839                }
 840        }
 841
 842        rcu_assign_pointer(timeout->timeout, priv->timeout);
 843
 844        /* adjust the timeout as per 'new' state. ct is unconfirmed,
 845         * so the current timestamp must not be added.
 846         */
 847        values = nf_ct_timeout_data(timeout);
 848        if (values)
 849                nf_ct_refresh(ct, pkt->skb, values[0]);
 850}
 851
 852static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
 853                                   const struct nlattr * const tb[],
 854                                   struct nft_object *obj)
 855{
 856        struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
 857        const struct nf_conntrack_l4proto *l4proto;
 858        struct nf_ct_timeout *timeout;
 859        int l3num = ctx->family;
 860        __u8 l4num;
 861        int ret;
 862
 863        if (!tb[NFTA_CT_TIMEOUT_L4PROTO] ||
 864            !tb[NFTA_CT_TIMEOUT_DATA])
 865                return -EINVAL;
 866
 867        if (tb[NFTA_CT_TIMEOUT_L3PROTO])
 868                l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
 869
 870        l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
 871        priv->l4proto = l4num;
 872
 873        l4proto = nf_ct_l4proto_find_get(l4num);
 874
 875        if (l4proto->l4proto != l4num) {
 876                ret = -EOPNOTSUPP;
 877                goto err_proto_put;
 878        }
 879
 880        timeout = kzalloc(sizeof(struct nf_ct_timeout) +
 881                          l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
 882        if (timeout == NULL) {
 883                ret = -ENOMEM;
 884                goto err_proto_put;
 885        }
 886
 887        ret = nft_ct_timeout_parse_policy(&timeout->data, l4proto, ctx->net,
 888                                          tb[NFTA_CT_TIMEOUT_DATA]);
 889        if (ret < 0)
 890                goto err_free_timeout;
 891
 892        timeout->l3num = l3num;
 893        timeout->l4proto = l4proto;
 894
 895        ret = nf_ct_netns_get(ctx->net, ctx->family);
 896        if (ret < 0)
 897                goto err_free_timeout;
 898
 899        priv->timeout = timeout;
 900        return 0;
 901
 902err_free_timeout:
 903        kfree(timeout);
 904err_proto_put:
 905        nf_ct_l4proto_put(l4proto);
 906        return ret;
 907}
 908
 909static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
 910                                       struct nft_object *obj)
 911{
 912        struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
 913        struct nf_ct_timeout *timeout = priv->timeout;
 914
 915        nf_ct_untimeout(ctx->net, timeout);
 916        nf_ct_l4proto_put(timeout->l4proto);
 917        nf_ct_netns_put(ctx->net, ctx->family);
 918        kfree(priv->timeout);
 919}
 920
 921static int nft_ct_timeout_obj_dump(struct sk_buff *skb,
 922                                   struct nft_object *obj, bool reset)
 923{
 924        const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
 925        const struct nf_ct_timeout *timeout = priv->timeout;
 926        struct nlattr *nest_params;
 927        int ret;
 928
 929        if (nla_put_u8(skb, NFTA_CT_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
 930            nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num)))
 931                return -1;
 932
 933        nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED);
 934        if (!nest_params)
 935                return -1;
 936
 937        ret = timeout->l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
 938        if (ret < 0)
 939                return -1;
 940        nla_nest_end(skb, nest_params);
 941        return 0;
 942}
 943
 944static const struct nla_policy nft_ct_timeout_policy[NFTA_CT_TIMEOUT_MAX + 1] = {
 945        [NFTA_CT_TIMEOUT_L3PROTO] = {.type = NLA_U16 },
 946        [NFTA_CT_TIMEOUT_L4PROTO] = {.type = NLA_U8 },
 947        [NFTA_CT_TIMEOUT_DATA]    = {.type = NLA_NESTED },
 948};
 949
 950static struct nft_object_type nft_ct_timeout_obj_type;
 951
 952static const struct nft_object_ops nft_ct_timeout_obj_ops = {
 953        .type           = &nft_ct_timeout_obj_type,
 954        .size           = sizeof(struct nft_ct_timeout_obj),
 955        .eval           = nft_ct_timeout_obj_eval,
 956        .init           = nft_ct_timeout_obj_init,
 957        .destroy        = nft_ct_timeout_obj_destroy,
 958        .dump           = nft_ct_timeout_obj_dump,
 959};
 960
 961static struct nft_object_type nft_ct_timeout_obj_type __read_mostly = {
 962        .type           = NFT_OBJECT_CT_TIMEOUT,
 963        .ops            = &nft_ct_timeout_obj_ops,
 964        .maxattr        = NFTA_CT_TIMEOUT_MAX,
 965        .policy         = nft_ct_timeout_policy,
 966        .owner          = THIS_MODULE,
 967};
 968#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 969
 970static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
 971                                  const struct nlattr * const tb[],
 972                                  struct nft_object *obj)
 973{
 974        struct nft_ct_helper_obj *priv = nft_obj_data(obj);
 975        struct nf_conntrack_helper *help4, *help6;
 976        char name[NF_CT_HELPER_NAME_LEN];
 977        int family = ctx->family;
 978        int err;
 979
 980        if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
 981                return -EINVAL;
 982
 983        priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
 984        if (!priv->l4proto)
 985                return -ENOENT;
 986
 987        nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
 988
 989        if (tb[NFTA_CT_HELPER_L3PROTO])
 990                family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
 991
 992        help4 = NULL;
 993        help6 = NULL;
 994
 995        switch (family) {
 996        case NFPROTO_IPV4:
 997                if (ctx->family == NFPROTO_IPV6)
 998                        return -EINVAL;
 999
1000                help4 = nf_conntrack_helper_try_module_get(name, family,
1001                                                           priv->l4proto);
1002                break;
1003        case NFPROTO_IPV6:
1004                if (ctx->family == NFPROTO_IPV4)
1005                        return -EINVAL;
1006
1007                help6 = nf_conntrack_helper_try_module_get(name, family,
1008                                                           priv->l4proto);
1009                break;
1010        case NFPROTO_NETDEV: /* fallthrough */
1011        case NFPROTO_BRIDGE: /* same */
1012        case NFPROTO_INET:
1013                help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
1014                                                           priv->l4proto);
1015                help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
1016                                                           priv->l4proto);
1017                break;
1018        default:
1019                return -EAFNOSUPPORT;
1020        }
1021
1022        /* && is intentional; only error if INET found neither ipv4 or ipv6 */
1023        if (!help4 && !help6)
1024                return -ENOENT;
1025
1026        priv->helper4 = help4;
1027        priv->helper6 = help6;
1028
1029        err = nf_ct_netns_get(ctx->net, ctx->family);
1030        if (err < 0)
1031                goto err_put_helper;
1032
1033        return 0;
1034
1035err_put_helper:
1036        if (priv->helper4)
1037                nf_conntrack_helper_put(priv->helper4);
1038        if (priv->helper6)
1039                nf_conntrack_helper_put(priv->helper6);
1040        return err;
1041}
1042
1043static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx,
1044                                      struct nft_object *obj)
1045{
1046        struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1047
1048        if (priv->helper4)
1049                nf_conntrack_helper_put(priv->helper4);
1050        if (priv->helper6)
1051                nf_conntrack_helper_put(priv->helper6);
1052
1053        nf_ct_netns_put(ctx->net, ctx->family);
1054}
1055
1056static void nft_ct_helper_obj_eval(struct nft_object *obj,
1057                                   struct nft_regs *regs,
1058                                   const struct nft_pktinfo *pkt)
1059{
1060        const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1061        struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
1062        struct nf_conntrack_helper *to_assign = NULL;
1063        struct nf_conn_help *help;
1064
1065        if (!ct ||
1066            nf_ct_is_confirmed(ct) ||
1067            nf_ct_is_template(ct) ||
1068            priv->l4proto != nf_ct_protonum(ct))
1069                return;
1070
1071        switch (nf_ct_l3num(ct)) {
1072        case NFPROTO_IPV4:
1073                to_assign = priv->helper4;
1074                break;
1075        case NFPROTO_IPV6:
1076                to_assign = priv->helper6;
1077                break;
1078        default:
1079                WARN_ON_ONCE(1);
1080                return;
1081        }
1082
1083        if (!to_assign)
1084                return;
1085
1086        if (test_bit(IPS_HELPER_BIT, &ct->status))
1087                return;
1088
1089        help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
1090        if (help) {
1091                rcu_assign_pointer(help->helper, to_assign);
1092                set_bit(IPS_HELPER_BIT, &ct->status);
1093        }
1094}
1095
1096static int nft_ct_helper_obj_dump(struct sk_buff *skb,
1097                                  struct nft_object *obj, bool reset)
1098{
1099        const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1100        const struct nf_conntrack_helper *helper;
1101        u16 family;
1102
1103        if (priv->helper4 && priv->helper6) {
1104                family = NFPROTO_INET;
1105                helper = priv->helper4;
1106        } else if (priv->helper6) {
1107                family = NFPROTO_IPV6;
1108                helper = priv->helper6;
1109        } else {
1110                family = NFPROTO_IPV4;
1111                helper = priv->helper4;
1112        }
1113
1114        if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
1115                return -1;
1116
1117        if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
1118                return -1;
1119
1120        if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
1121                return -1;
1122
1123        return 0;
1124}
1125
1126static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
1127        [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
1128                                  .len = NF_CT_HELPER_NAME_LEN - 1 },
1129        [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
1130        [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
1131};
1132
1133static struct nft_object_type nft_ct_helper_obj_type;
1134static const struct nft_object_ops nft_ct_helper_obj_ops = {
1135        .type           = &nft_ct_helper_obj_type,
1136        .size           = sizeof(struct nft_ct_helper_obj),
1137        .eval           = nft_ct_helper_obj_eval,
1138        .init           = nft_ct_helper_obj_init,
1139        .destroy        = nft_ct_helper_obj_destroy,
1140        .dump           = nft_ct_helper_obj_dump,
1141};
1142
1143static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
1144        .type           = NFT_OBJECT_CT_HELPER,
1145        .ops            = &nft_ct_helper_obj_ops,
1146        .maxattr        = NFTA_CT_HELPER_MAX,
1147        .policy         = nft_ct_helper_policy,
1148        .owner          = THIS_MODULE,
1149};
1150
1151static int __init nft_ct_module_init(void)
1152{
1153        int err;
1154
1155        BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
1156
1157        err = nft_register_expr(&nft_ct_type);
1158        if (err < 0)
1159                return err;
1160
1161        err = nft_register_expr(&nft_notrack_type);
1162        if (err < 0)
1163                goto err1;
1164
1165        err = nft_register_obj(&nft_ct_helper_obj_type);
1166        if (err < 0)
1167                goto err2;
1168#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1169        err = nft_register_obj(&nft_ct_timeout_obj_type);
1170        if (err < 0)
1171                goto err3;
1172#endif
1173        return 0;
1174
1175#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1176err3:
1177        nft_unregister_obj(&nft_ct_helper_obj_type);
1178#endif
1179err2:
1180        nft_unregister_expr(&nft_notrack_type);
1181err1:
1182        nft_unregister_expr(&nft_ct_type);
1183        return err;
1184}
1185
1186static void __exit nft_ct_module_exit(void)
1187{
1188#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1189        nft_unregister_obj(&nft_ct_timeout_obj_type);
1190#endif
1191        nft_unregister_obj(&nft_ct_helper_obj_type);
1192        nft_unregister_expr(&nft_notrack_type);
1193        nft_unregister_expr(&nft_ct_type);
1194}
1195
1196module_init(nft_ct_module_init);
1197module_exit(nft_ct_module_exit);
1198
1199MODULE_LICENSE("GPL");
1200MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
1201MODULE_ALIAS_NFT_EXPR("ct");
1202MODULE_ALIAS_NFT_EXPR("notrack");
1203MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
1204MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);
1205