linux/net/openvswitch/conntrack.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2015 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 */
  13
  14#include <linux/module.h>
  15#include <linux/openvswitch.h>
  16#include <linux/tcp.h>
  17#include <linux/udp.h>
  18#include <linux/sctp.h>
  19#include <net/ip.h>
  20#include <net/netfilter/nf_conntrack_core.h>
  21#include <net/netfilter/nf_conntrack_helper.h>
  22#include <net/netfilter/nf_conntrack_labels.h>
  23#include <net/netfilter/nf_conntrack_seqadj.h>
  24#include <net/netfilter/nf_conntrack_zones.h>
  25#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
  26
  27#ifdef CONFIG_NF_NAT_NEEDED
  28#include <linux/netfilter/nf_nat.h>
  29#include <net/netfilter/nf_nat_core.h>
  30#include <net/netfilter/nf_nat_l3proto.h>
  31#endif
  32
  33#include "datapath.h"
  34#include "conntrack.h"
  35#include "flow.h"
  36#include "flow_netlink.h"
  37
  38struct ovs_ct_len_tbl {
  39        int maxlen;
  40        int minlen;
  41};
  42
  43/* Metadata mark for masked write to conntrack mark */
  44struct md_mark {
  45        u32 value;
  46        u32 mask;
  47};
  48
  49/* Metadata label for masked write to conntrack label. */
  50struct md_labels {
  51        struct ovs_key_ct_labels value;
  52        struct ovs_key_ct_labels mask;
  53};
  54
  55enum ovs_ct_nat {
  56        OVS_CT_NAT = 1 << 0,     /* NAT for committed connections only. */
  57        OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
  58        OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
  59};
  60
  61/* Conntrack action context for execution. */
  62struct ovs_conntrack_info {
  63        struct nf_conntrack_helper *helper;
  64        struct nf_conntrack_zone zone;
  65        struct nf_conn *ct;
  66        u8 commit : 1;
  67        u8 nat : 3;                 /* enum ovs_ct_nat */
  68        u16 family;
  69        struct md_mark mark;
  70        struct md_labels labels;
  71#ifdef CONFIG_NF_NAT_NEEDED
  72        struct nf_nat_range range;  /* Only present for SRC NAT and DST NAT. */
  73#endif
  74};
  75
  76static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
  77
  78static u16 key_to_nfproto(const struct sw_flow_key *key)
  79{
  80        switch (ntohs(key->eth.type)) {
  81        case ETH_P_IP:
  82                return NFPROTO_IPV4;
  83        case ETH_P_IPV6:
  84                return NFPROTO_IPV6;
  85        default:
  86                return NFPROTO_UNSPEC;
  87        }
  88}
  89
  90/* Map SKB connection state into the values used by flow definition. */
  91static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
  92{
  93        u8 ct_state = OVS_CS_F_TRACKED;
  94
  95        switch (ctinfo) {
  96        case IP_CT_ESTABLISHED_REPLY:
  97        case IP_CT_RELATED_REPLY:
  98                ct_state |= OVS_CS_F_REPLY_DIR;
  99                break;
 100        default:
 101                break;
 102        }
 103
 104        switch (ctinfo) {
 105        case IP_CT_ESTABLISHED:
 106        case IP_CT_ESTABLISHED_REPLY:
 107                ct_state |= OVS_CS_F_ESTABLISHED;
 108                break;
 109        case IP_CT_RELATED:
 110        case IP_CT_RELATED_REPLY:
 111                ct_state |= OVS_CS_F_RELATED;
 112                break;
 113        case IP_CT_NEW:
 114                ct_state |= OVS_CS_F_NEW;
 115                break;
 116        default:
 117                break;
 118        }
 119
 120        return ct_state;
 121}
 122
 123static u32 ovs_ct_get_mark(const struct nf_conn *ct)
 124{
 125#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
 126        return ct ? ct->mark : 0;
 127#else
 128        return 0;
 129#endif
 130}
 131
 132static void ovs_ct_get_labels(const struct nf_conn *ct,
 133                              struct ovs_key_ct_labels *labels)
 134{
 135        struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
 136
 137        if (cl) {
 138                size_t len = sizeof(cl->bits);
 139
 140                if (len > OVS_CT_LABELS_LEN)
 141                        len = OVS_CT_LABELS_LEN;
 142                else if (len < OVS_CT_LABELS_LEN)
 143                        memset(labels, 0, OVS_CT_LABELS_LEN);
 144                memcpy(labels, cl->bits, len);
 145        } else {
 146                memset(labels, 0, OVS_CT_LABELS_LEN);
 147        }
 148}
 149
 150static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
 151                                const struct nf_conntrack_zone *zone,
 152                                const struct nf_conn *ct)
 153{
 154        key->ct.state = state;
 155        key->ct.zone = zone->id;
 156        key->ct.mark = ovs_ct_get_mark(ct);
 157        ovs_ct_get_labels(ct, &key->ct.labels);
 158}
 159
 160/* Update 'key' based on skb->nfct.  If 'post_ct' is true, then OVS has
 161 * previously sent the packet to conntrack via the ct action.  If
 162 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
 163 * initialized from the connection status.
 164 */
 165static void ovs_ct_update_key(const struct sk_buff *skb,
 166                              const struct ovs_conntrack_info *info,
 167                              struct sw_flow_key *key, bool post_ct,
 168                              bool keep_nat_flags)
 169{
 170        const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
 171        enum ip_conntrack_info ctinfo;
 172        struct nf_conn *ct;
 173        u8 state = 0;
 174
 175        ct = nf_ct_get(skb, &ctinfo);
 176        if (ct) {
 177                state = ovs_ct_get_state(ctinfo);
 178                /* All unconfirmed entries are NEW connections. */
 179                if (!nf_ct_is_confirmed(ct))
 180                        state |= OVS_CS_F_NEW;
 181                /* OVS persists the related flag for the duration of the
 182                 * connection.
 183                 */
 184                if (ct->master)
 185                        state |= OVS_CS_F_RELATED;
 186                if (keep_nat_flags) {
 187                        state |= key->ct.state & OVS_CS_F_NAT_MASK;
 188                } else {
 189                        if (ct->status & IPS_SRC_NAT)
 190                                state |= OVS_CS_F_SRC_NAT;
 191                        if (ct->status & IPS_DST_NAT)
 192                                state |= OVS_CS_F_DST_NAT;
 193                }
 194                zone = nf_ct_zone(ct);
 195        } else if (post_ct) {
 196                state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
 197                if (info)
 198                        zone = &info->zone;
 199        }
 200        __ovs_ct_update_key(key, state, zone, ct);
 201}
 202
 203/* This is called to initialize CT key fields possibly coming in from the local
 204 * stack.
 205 */
 206void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
 207{
 208        ovs_ct_update_key(skb, NULL, key, false, false);
 209}
 210
 211int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
 212{
 213        if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
 214                return -EMSGSIZE;
 215
 216        if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
 217            nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
 218                return -EMSGSIZE;
 219
 220        if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
 221            nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
 222                return -EMSGSIZE;
 223
 224        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
 225            nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
 226                    &key->ct.labels))
 227                return -EMSGSIZE;
 228
 229        return 0;
 230}
 231
 232static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
 233                           u32 ct_mark, u32 mask)
 234{
 235#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
 236        enum ip_conntrack_info ctinfo;
 237        struct nf_conn *ct;
 238        u32 new_mark;
 239
 240        /* The connection could be invalid, in which case set_mark is no-op. */
 241        ct = nf_ct_get(skb, &ctinfo);
 242        if (!ct)
 243                return 0;
 244
 245        new_mark = ct_mark | (ct->mark & ~(mask));
 246        if (ct->mark != new_mark) {
 247                ct->mark = new_mark;
 248                nf_conntrack_event_cache(IPCT_MARK, ct);
 249                key->ct.mark = new_mark;
 250        }
 251
 252        return 0;
 253#else
 254        return -ENOTSUPP;
 255#endif
 256}
 257
 258static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
 259                             const struct ovs_key_ct_labels *labels,
 260                             const struct ovs_key_ct_labels *mask)
 261{
 262        enum ip_conntrack_info ctinfo;
 263        struct nf_conn_labels *cl;
 264        struct nf_conn *ct;
 265        int err;
 266
 267        /* The connection could be invalid, in which case set_label is no-op.*/
 268        ct = nf_ct_get(skb, &ctinfo);
 269        if (!ct)
 270                return 0;
 271
 272        cl = nf_ct_labels_find(ct);
 273        if (!cl) {
 274                nf_ct_labels_ext_add(ct);
 275                cl = nf_ct_labels_find(ct);
 276        }
 277        if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN)
 278                return -ENOSPC;
 279
 280        err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
 281                                    OVS_CT_LABELS_LEN / sizeof(u32));
 282        if (err)
 283                return err;
 284
 285        ovs_ct_get_labels(ct, &key->ct.labels);
 286        return 0;
 287}
 288
 289/* 'skb' should already be pulled to nh_ofs. */
 290static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
 291{
 292        const struct nf_conntrack_helper *helper;
 293        const struct nf_conn_help *help;
 294        enum ip_conntrack_info ctinfo;
 295        unsigned int protoff;
 296        struct nf_conn *ct;
 297        int err;
 298
 299        ct = nf_ct_get(skb, &ctinfo);
 300        if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 301                return NF_ACCEPT;
 302
 303        help = nfct_help(ct);
 304        if (!help)
 305                return NF_ACCEPT;
 306
 307        helper = rcu_dereference(help->helper);
 308        if (!helper)
 309                return NF_ACCEPT;
 310
 311        switch (proto) {
 312        case NFPROTO_IPV4:
 313                protoff = ip_hdrlen(skb);
 314                break;
 315        case NFPROTO_IPV6: {
 316                u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 317                __be16 frag_off;
 318                int ofs;
 319
 320                ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
 321                                       &frag_off);
 322                if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
 323                        pr_debug("proto header not found\n");
 324                        return NF_ACCEPT;
 325                }
 326                protoff = ofs;
 327                break;
 328        }
 329        default:
 330                WARN_ONCE(1, "helper invoked on non-IP family!");
 331                return NF_DROP;
 332        }
 333
 334        err = helper->help(skb, protoff, ct, ctinfo);
 335        if (err != NF_ACCEPT)
 336                return err;
 337
 338        /* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
 339         * FTP with NAT) adusting the TCP payload size when mangling IP
 340         * addresses and/or port numbers in the text-based control connection.
 341         */
 342        if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
 343            !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
 344                return NF_DROP;
 345        return NF_ACCEPT;
 346}
 347
 348/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
 349 * value if 'skb' is freed.
 350 */
 351static int handle_fragments(struct net *net, struct sw_flow_key *key,
 352                            u16 zone, struct sk_buff *skb)
 353{
 354        struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
 355        int err;
 356
 357        if (key->eth.type == htons(ETH_P_IP)) {
 358                enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
 359
 360                memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 361                err = ip_defrag(net, skb, user);
 362                if (err)
 363                        return err;
 364
 365                ovs_cb.mru = IPCB(skb)->frag_max_size;
 366#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 367        } else if (key->eth.type == htons(ETH_P_IPV6)) {
 368                enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
 369
 370                skb_orphan(skb);
 371                memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
 372                err = nf_ct_frag6_gather(net, skb, user);
 373                if (err) {
 374                        if (err != -EINPROGRESS)
 375                                kfree_skb(skb);
 376                        return err;
 377                }
 378
 379                key->ip.proto = ipv6_hdr(skb)->nexthdr;
 380                ovs_cb.mru = IP6CB(skb)->frag_max_size;
 381#endif
 382        } else {
 383                kfree_skb(skb);
 384                return -EPFNOSUPPORT;
 385        }
 386
 387        key->ip.frag = OVS_FRAG_TYPE_NONE;
 388        skb_clear_hash(skb);
 389        skb->ignore_df = 1;
 390        *OVS_CB(skb) = ovs_cb;
 391
 392        return 0;
 393}
 394
 395static struct nf_conntrack_expect *
 396ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
 397                   u16 proto, const struct sk_buff *skb)
 398{
 399        struct nf_conntrack_tuple tuple;
 400
 401        if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
 402                return NULL;
 403        return __nf_ct_expect_find(net, zone, &tuple);
 404}
 405
 406/* This replicates logic from nf_conntrack_core.c that is not exported. */
 407static enum ip_conntrack_info
 408ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
 409{
 410        const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
 411
 412        if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
 413                return IP_CT_ESTABLISHED_REPLY;
 414        /* Once we've had two way comms, always ESTABLISHED. */
 415        if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
 416                return IP_CT_ESTABLISHED;
 417        if (test_bit(IPS_EXPECTED_BIT, &ct->status))
 418                return IP_CT_RELATED;
 419        return IP_CT_NEW;
 420}
 421
 422/* Find an existing connection which this packet belongs to without
 423 * re-attributing statistics or modifying the connection state.  This allows an
 424 * skb->nfct lost due to an upcall to be recovered during actions execution.
 425 *
 426 * Must be called with rcu_read_lock.
 427 *
 428 * On success, populates skb->nfct and skb->nfctinfo, and returns the
 429 * connection.  Returns NULL if there is no existing entry.
 430 */
 431static struct nf_conn *
 432ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 433                     u8 l3num, struct sk_buff *skb)
 434{
 435        struct nf_conntrack_l3proto *l3proto;
 436        struct nf_conntrack_l4proto *l4proto;
 437        struct nf_conntrack_tuple tuple;
 438        struct nf_conntrack_tuple_hash *h;
 439        struct nf_conn *ct;
 440        unsigned int dataoff;
 441        u8 protonum;
 442
 443        l3proto = __nf_ct_l3proto_find(l3num);
 444        if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
 445                                 &protonum) <= 0) {
 446                pr_debug("ovs_ct_find_existing: Can't get protonum\n");
 447                return NULL;
 448        }
 449        l4proto = __nf_ct_l4proto_find(l3num, protonum);
 450        if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
 451                             protonum, net, &tuple, l3proto, l4proto)) {
 452                pr_debug("ovs_ct_find_existing: Can't get tuple\n");
 453                return NULL;
 454        }
 455
 456        /* look for tuple match */
 457        h = nf_conntrack_find_get(net, zone, &tuple);
 458        if (!h)
 459                return NULL;   /* Not found. */
 460
 461        ct = nf_ct_tuplehash_to_ctrack(h);
 462
 463        skb->nfct = &ct->ct_general;
 464        skb->nfctinfo = ovs_ct_get_info(h);
 465        return ct;
 466}
 467
 468/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
 469static bool skb_nfct_cached(struct net *net,
 470                            const struct sw_flow_key *key,
 471                            const struct ovs_conntrack_info *info,
 472                            struct sk_buff *skb)
 473{
 474        enum ip_conntrack_info ctinfo;
 475        struct nf_conn *ct;
 476
 477        ct = nf_ct_get(skb, &ctinfo);
 478        /* If no ct, check if we have evidence that an existing conntrack entry
 479         * might be found for this skb.  This happens when we lose a skb->nfct
 480         * due to an upcall.  If the connection was not confirmed, it is not
 481         * cached and needs to be run through conntrack again.
 482         */
 483        if (!ct && key->ct.state & OVS_CS_F_TRACKED &&
 484            !(key->ct.state & OVS_CS_F_INVALID) &&
 485            key->ct.zone == info->zone.id)
 486                ct = ovs_ct_find_existing(net, &info->zone, info->family, skb);
 487        if (!ct)
 488                return false;
 489        if (!net_eq(net, read_pnet(&ct->ct_net)))
 490                return false;
 491        if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
 492                return false;
 493        if (info->helper) {
 494                struct nf_conn_help *help;
 495
 496                help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
 497                if (help && rcu_access_pointer(help->helper) != info->helper)
 498                        return false;
 499        }
 500
 501        return true;
 502}
 503
 504#ifdef CONFIG_NF_NAT_NEEDED
 505/* Modelled after nf_nat_ipv[46]_fn().
 506 * range is only used for new, uninitialized NAT state.
 507 * Returns either NF_ACCEPT or NF_DROP.
 508 */
 509static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
 510                              enum ip_conntrack_info ctinfo,
 511                              const struct nf_nat_range *range,
 512                              enum nf_nat_manip_type maniptype)
 513{
 514        int hooknum, nh_off, err = NF_ACCEPT;
 515
 516        nh_off = skb_network_offset(skb);
 517        skb_pull(skb, nh_off);
 518
 519        /* See HOOK2MANIP(). */
 520        if (maniptype == NF_NAT_MANIP_SRC)
 521                hooknum = NF_INET_LOCAL_IN; /* Source NAT */
 522        else
 523                hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
 524
 525        switch (ctinfo) {
 526        case IP_CT_RELATED:
 527        case IP_CT_RELATED_REPLY:
 528                if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
 529                    skb->protocol == htons(ETH_P_IP) &&
 530                    ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 531                        if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
 532                                                           hooknum))
 533                                err = NF_DROP;
 534                        goto push;
 535                } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
 536                           skb->protocol == htons(ETH_P_IPV6)) {
 537                        __be16 frag_off;
 538                        u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 539                        int hdrlen = ipv6_skip_exthdr(skb,
 540                                                      sizeof(struct ipv6hdr),
 541                                                      &nexthdr, &frag_off);
 542
 543                        if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
 544                                if (!nf_nat_icmpv6_reply_translation(skb, ct,
 545                                                                     ctinfo,
 546                                                                     hooknum,
 547                                                                     hdrlen))
 548                                        err = NF_DROP;
 549                                goto push;
 550                        }
 551                }
 552                /* Non-ICMP, fall thru to initialize if needed. */
 553        case IP_CT_NEW:
 554                /* Seen it before?  This can happen for loopback, retrans,
 555                 * or local packets.
 556                 */
 557                if (!nf_nat_initialized(ct, maniptype)) {
 558                        /* Initialize according to the NAT action. */
 559                        err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
 560                                /* Action is set up to establish a new
 561                                 * mapping.
 562                                 */
 563                                ? nf_nat_setup_info(ct, range, maniptype)
 564                                : nf_nat_alloc_null_binding(ct, hooknum);
 565                        if (err != NF_ACCEPT)
 566                                goto push;
 567                }
 568                break;
 569
 570        case IP_CT_ESTABLISHED:
 571        case IP_CT_ESTABLISHED_REPLY:
 572                break;
 573
 574        default:
 575                err = NF_DROP;
 576                goto push;
 577        }
 578
 579        err = nf_nat_packet(ct, ctinfo, hooknum, skb);
 580push:
 581        skb_push(skb, nh_off);
 582
 583        return err;
 584}
 585
 586static void ovs_nat_update_key(struct sw_flow_key *key,
 587                               const struct sk_buff *skb,
 588                               enum nf_nat_manip_type maniptype)
 589{
 590        if (maniptype == NF_NAT_MANIP_SRC) {
 591                __be16 src;
 592
 593                key->ct.state |= OVS_CS_F_SRC_NAT;
 594                if (key->eth.type == htons(ETH_P_IP))
 595                        key->ipv4.addr.src = ip_hdr(skb)->saddr;
 596                else if (key->eth.type == htons(ETH_P_IPV6))
 597                        memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
 598                               sizeof(key->ipv6.addr.src));
 599                else
 600                        return;
 601
 602                if (key->ip.proto == IPPROTO_UDP)
 603                        src = udp_hdr(skb)->source;
 604                else if (key->ip.proto == IPPROTO_TCP)
 605                        src = tcp_hdr(skb)->source;
 606                else if (key->ip.proto == IPPROTO_SCTP)
 607                        src = sctp_hdr(skb)->source;
 608                else
 609                        return;
 610
 611                key->tp.src = src;
 612        } else {
 613                __be16 dst;
 614
 615                key->ct.state |= OVS_CS_F_DST_NAT;
 616                if (key->eth.type == htons(ETH_P_IP))
 617                        key->ipv4.addr.dst = ip_hdr(skb)->daddr;
 618                else if (key->eth.type == htons(ETH_P_IPV6))
 619                        memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
 620                               sizeof(key->ipv6.addr.dst));
 621                else
 622                        return;
 623
 624                if (key->ip.proto == IPPROTO_UDP)
 625                        dst = udp_hdr(skb)->dest;
 626                else if (key->ip.proto == IPPROTO_TCP)
 627                        dst = tcp_hdr(skb)->dest;
 628                else if (key->ip.proto == IPPROTO_SCTP)
 629                        dst = sctp_hdr(skb)->dest;
 630                else
 631                        return;
 632
 633                key->tp.dst = dst;
 634        }
 635}
 636
 637/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
 638static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
 639                      const struct ovs_conntrack_info *info,
 640                      struct sk_buff *skb, struct nf_conn *ct,
 641                      enum ip_conntrack_info ctinfo)
 642{
 643        enum nf_nat_manip_type maniptype;
 644        int err;
 645
 646        if (nf_ct_is_untracked(ct)) {
 647                /* A NAT action may only be performed on tracked packets. */
 648                return NF_ACCEPT;
 649        }
 650
 651        /* Add NAT extension if not confirmed yet. */
 652        if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
 653                return NF_ACCEPT;   /* Can't NAT. */
 654
 655        /* Determine NAT type.
 656         * Check if the NAT type can be deduced from the tracked connection.
 657         * Make sure new expected connections (IP_CT_RELATED) are NATted only
 658         * when committing.
 659         */
 660        if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
 661            ct->status & IPS_NAT_MASK &&
 662            (ctinfo != IP_CT_RELATED || info->commit)) {
 663                /* NAT an established or related connection like before. */
 664                if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
 665                        /* This is the REPLY direction for a connection
 666                         * for which NAT was applied in the forward
 667                         * direction.  Do the reverse NAT.
 668                         */
 669                        maniptype = ct->status & IPS_SRC_NAT
 670                                ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
 671                else
 672                        maniptype = ct->status & IPS_SRC_NAT
 673                                ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
 674        } else if (info->nat & OVS_CT_SRC_NAT) {
 675                maniptype = NF_NAT_MANIP_SRC;
 676        } else if (info->nat & OVS_CT_DST_NAT) {
 677                maniptype = NF_NAT_MANIP_DST;
 678        } else {
 679                return NF_ACCEPT; /* Connection is not NATed. */
 680        }
 681        err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
 682
 683        /* Mark NAT done if successful and update the flow key. */
 684        if (err == NF_ACCEPT)
 685                ovs_nat_update_key(key, skb, maniptype);
 686
 687        return err;
 688}
 689#else /* !CONFIG_NF_NAT_NEEDED */
 690static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
 691                      const struct ovs_conntrack_info *info,
 692                      struct sk_buff *skb, struct nf_conn *ct,
 693                      enum ip_conntrack_info ctinfo)
 694{
 695        return NF_ACCEPT;
 696}
 697#endif
 698
 699/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
 700 * not done already.  Update key with new CT state after passing the packet
 701 * through conntrack.
 702 * Note that if the packet is deemed invalid by conntrack, skb->nfct will be
 703 * set to NULL and 0 will be returned.
 704 */
 705static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 706                           const struct ovs_conntrack_info *info,
 707                           struct sk_buff *skb)
 708{
 709        /* If we are recirculating packets to match on conntrack fields and
 710         * committing with a separate conntrack action,  then we don't need to
 711         * actually run the packet through conntrack twice unless it's for a
 712         * different zone.
 713         */
 714        bool cached = skb_nfct_cached(net, key, info, skb);
 715        enum ip_conntrack_info ctinfo;
 716        struct nf_conn *ct;
 717
 718        if (!cached) {
 719                struct nf_conn *tmpl = info->ct;
 720                int err;
 721
 722                /* Associate skb with specified zone. */
 723                if (tmpl) {
 724                        if (skb->nfct)
 725                                nf_conntrack_put(skb->nfct);
 726                        nf_conntrack_get(&tmpl->ct_general);
 727                        skb->nfct = &tmpl->ct_general;
 728                        skb->nfctinfo = IP_CT_NEW;
 729                }
 730
 731                /* Repeat if requested, see nf_iterate(). */
 732                do {
 733                        err = nf_conntrack_in(net, info->family,
 734                                              NF_INET_PRE_ROUTING, skb);
 735                } while (err == NF_REPEAT);
 736
 737                if (err != NF_ACCEPT)
 738                        return -ENOENT;
 739
 740                /* Clear CT state NAT flags to mark that we have not yet done
 741                 * NAT after the nf_conntrack_in() call.  We can actually clear
 742                 * the whole state, as it will be re-initialized below.
 743                 */
 744                key->ct.state = 0;
 745
 746                /* Update the key, but keep the NAT flags. */
 747                ovs_ct_update_key(skb, info, key, true, true);
 748        }
 749
 750        ct = nf_ct_get(skb, &ctinfo);
 751        if (ct) {
 752                /* Packets starting a new connection must be NATted before the
 753                 * helper, so that the helper knows about the NAT.  We enforce
 754                 * this by delaying both NAT and helper calls for unconfirmed
 755                 * connections until the committing CT action.  For later
 756                 * packets NAT and Helper may be called in either order.
 757                 *
 758                 * NAT will be done only if the CT action has NAT, and only
 759                 * once per packet (per zone), as guarded by the NAT bits in
 760                 * the key->ct.state.
 761                 */
 762                if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) &&
 763                    (nf_ct_is_confirmed(ct) || info->commit) &&
 764                    ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
 765                        return -EINVAL;
 766                }
 767
 768                /* Userspace may decide to perform a ct lookup without a helper
 769                 * specified followed by a (recirculate and) commit with one.
 770                 * Therefore, for unconfirmed connections which we will commit,
 771                 * we need to attach the helper here.
 772                 */
 773                if (!nf_ct_is_confirmed(ct) && info->commit &&
 774                    info->helper && !nfct_help(ct)) {
 775                        int err = __nf_ct_try_assign_helper(ct, info->ct,
 776                                                            GFP_ATOMIC);
 777                        if (err)
 778                                return err;
 779                }
 780
 781                /* Call the helper only if:
 782                 * - nf_conntrack_in() was executed above ("!cached") for a
 783                 *   confirmed connection, or
 784                 * - When committing an unconfirmed connection.
 785                 */
 786                if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
 787                    ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
 788                        return -EINVAL;
 789                }
 790        }
 791
 792        return 0;
 793}
 794
 795/* Lookup connection and read fields into key. */
 796static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 797                         const struct ovs_conntrack_info *info,
 798                         struct sk_buff *skb)
 799{
 800        struct nf_conntrack_expect *exp;
 801
 802        /* If we pass an expected packet through nf_conntrack_in() the
 803         * expectation is typically removed, but the packet could still be
 804         * lost in upcall processing.  To prevent this from happening we
 805         * perform an explicit expectation lookup.  Expected connections are
 806         * always new, and will be passed through conntrack only when they are
 807         * committed, as it is OK to remove the expectation at that time.
 808         */
 809        exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
 810        if (exp) {
 811                u8 state;
 812
 813                /* NOTE: New connections are NATted and Helped only when
 814                 * committed, so we are not calling into NAT here.
 815                 */
 816                state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
 817                __ovs_ct_update_key(key, state, &info->zone, exp->master);
 818        } else {
 819                struct nf_conn *ct;
 820                int err;
 821
 822                err = __ovs_ct_lookup(net, key, info, skb);
 823                if (err)
 824                        return err;
 825
 826                ct = (struct nf_conn *)skb->nfct;
 827                if (ct)
 828                        nf_ct_deliver_cached_events(ct);
 829        }
 830
 831        return 0;
 832}
 833
 834static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
 835{
 836        size_t i;
 837
 838        for (i = 0; i < sizeof(*labels); i++)
 839                if (labels->ct_labels[i])
 840                        return true;
 841
 842        return false;
 843}
 844
 845/* Lookup connection and confirm if unconfirmed. */
 846static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 847                         const struct ovs_conntrack_info *info,
 848                         struct sk_buff *skb)
 849{
 850        int err;
 851
 852        err = __ovs_ct_lookup(net, key, info, skb);
 853        if (err)
 854                return err;
 855
 856        /* Apply changes before confirming the connection so that the initial
 857         * conntrack NEW netlink event carries the values given in the CT
 858         * action.
 859         */
 860        if (info->mark.mask) {
 861                err = ovs_ct_set_mark(skb, key, info->mark.value,
 862                                      info->mark.mask);
 863                if (err)
 864                        return err;
 865        }
 866        if (labels_nonzero(&info->labels.mask)) {
 867                err = ovs_ct_set_labels(skb, key, &info->labels.value,
 868                                        &info->labels.mask);
 869                if (err)
 870                        return err;
 871        }
 872        /* This will take care of sending queued events even if the connection
 873         * is already confirmed.
 874         */
 875        if (nf_conntrack_confirm(skb) != NF_ACCEPT)
 876                return -EINVAL;
 877
 878        return 0;
 879}
 880
 881/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
 882 * value if 'skb' is freed.
 883 */
 884int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 885                   struct sw_flow_key *key,
 886                   const struct ovs_conntrack_info *info)
 887{
 888        int nh_ofs;
 889        int err;
 890
 891        /* The conntrack module expects to be working at L3. */
 892        nh_ofs = skb_network_offset(skb);
 893        skb_pull(skb, nh_ofs);
 894
 895        if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
 896                err = handle_fragments(net, key, info->zone.id, skb);
 897                if (err)
 898                        return err;
 899        }
 900
 901        if (info->commit)
 902                err = ovs_ct_commit(net, key, info, skb);
 903        else
 904                err = ovs_ct_lookup(net, key, info, skb);
 905
 906        skb_push(skb, nh_ofs);
 907        if (err)
 908                kfree_skb(skb);
 909        return err;
 910}
 911
 912static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 913                             const struct sw_flow_key *key, bool log)
 914{
 915        struct nf_conntrack_helper *helper;
 916        struct nf_conn_help *help;
 917
 918        helper = nf_conntrack_helper_try_module_get(name, info->family,
 919                                                    key->ip.proto);
 920        if (!helper) {
 921                OVS_NLERR(log, "Unknown helper \"%s\"", name);
 922                return -EINVAL;
 923        }
 924
 925        help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
 926        if (!help) {
 927                module_put(helper->me);
 928                return -ENOMEM;
 929        }
 930
 931        rcu_assign_pointer(help->helper, helper);
 932        info->helper = helper;
 933        return 0;
 934}
 935
 936#ifdef CONFIG_NF_NAT_NEEDED
 937static int parse_nat(const struct nlattr *attr,
 938                     struct ovs_conntrack_info *info, bool log)
 939{
 940        struct nlattr *a;
 941        int rem;
 942        bool have_ip_max = false;
 943        bool have_proto_max = false;
 944        bool ip_vers = (info->family == NFPROTO_IPV6);
 945
 946        nla_for_each_nested(a, attr, rem) {
 947                static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
 948                        [OVS_NAT_ATTR_SRC] = {0, 0},
 949                        [OVS_NAT_ATTR_DST] = {0, 0},
 950                        [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr),
 951                                                 sizeof(struct in6_addr)},
 952                        [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr),
 953                                                 sizeof(struct in6_addr)},
 954                        [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)},
 955                        [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)},
 956                        [OVS_NAT_ATTR_PERSISTENT] = {0, 0},
 957                        [OVS_NAT_ATTR_PROTO_HASH] = {0, 0},
 958                        [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0},
 959                };
 960                int type = nla_type(a);
 961
 962                if (type > OVS_NAT_ATTR_MAX) {
 963                        OVS_NLERR(log,
 964                                  "Unknown NAT attribute (type=%d, max=%d).\n",
 965                                  type, OVS_NAT_ATTR_MAX);
 966                        return -EINVAL;
 967                }
 968
 969                if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
 970                        OVS_NLERR(log,
 971                                  "NAT attribute type %d has unexpected length (%d != %d).\n",
 972                                  type, nla_len(a),
 973                                  ovs_nat_attr_lens[type][ip_vers]);
 974                        return -EINVAL;
 975                }
 976
 977                switch (type) {
 978                case OVS_NAT_ATTR_SRC:
 979                case OVS_NAT_ATTR_DST:
 980                        if (info->nat) {
 981                                OVS_NLERR(log,
 982                                          "Only one type of NAT may be specified.\n"
 983                                          );
 984                                return -ERANGE;
 985                        }
 986                        info->nat |= OVS_CT_NAT;
 987                        info->nat |= ((type == OVS_NAT_ATTR_SRC)
 988                                        ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT);
 989                        break;
 990
 991                case OVS_NAT_ATTR_IP_MIN:
 992                        nla_memcpy(&info->range.min_addr, a,
 993                                   sizeof(info->range.min_addr));
 994                        info->range.flags |= NF_NAT_RANGE_MAP_IPS;
 995                        break;
 996
 997                case OVS_NAT_ATTR_IP_MAX:
 998                        have_ip_max = true;
 999                        nla_memcpy(&info->range.max_addr, a,
1000                                   sizeof(info->range.max_addr));
1001                        info->range.flags |= NF_NAT_RANGE_MAP_IPS;
1002                        break;
1003
1004                case OVS_NAT_ATTR_PROTO_MIN:
1005                        info->range.min_proto.all = htons(nla_get_u16(a));
1006                        info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1007                        break;
1008
1009                case OVS_NAT_ATTR_PROTO_MAX:
1010                        have_proto_max = true;
1011                        info->range.max_proto.all = htons(nla_get_u16(a));
1012                        info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1013                        break;
1014
1015                case OVS_NAT_ATTR_PERSISTENT:
1016                        info->range.flags |= NF_NAT_RANGE_PERSISTENT;
1017                        break;
1018
1019                case OVS_NAT_ATTR_PROTO_HASH:
1020                        info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
1021                        break;
1022
1023                case OVS_NAT_ATTR_PROTO_RANDOM:
1024                        info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY;
1025                        break;
1026
1027                default:
1028                        OVS_NLERR(log, "Unknown nat attribute (%d).\n", type);
1029                        return -EINVAL;
1030                }
1031        }
1032
1033        if (rem > 0) {
1034                OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem);
1035                return -EINVAL;
1036        }
1037        if (!info->nat) {
1038                /* Do not allow flags if no type is given. */
1039                if (info->range.flags) {
1040                        OVS_NLERR(log,
1041                                  "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n"
1042                                  );
1043                        return -EINVAL;
1044                }
1045                info->nat = OVS_CT_NAT;   /* NAT existing connections. */
1046        } else if (!info->commit) {
1047                OVS_NLERR(log,
1048                          "NAT attributes may be specified only when CT COMMIT flag is also specified.\n"
1049                          );
1050                return -EINVAL;
1051        }
1052        /* Allow missing IP_MAX. */
1053        if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) {
1054                memcpy(&info->range.max_addr, &info->range.min_addr,
1055                       sizeof(info->range.max_addr));
1056        }
1057        /* Allow missing PROTO_MAX. */
1058        if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1059            !have_proto_max) {
1060                info->range.max_proto.all = info->range.min_proto.all;
1061        }
1062        return 0;
1063}
1064#endif
1065
1066static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
1067        [OVS_CT_ATTR_COMMIT]    = { .minlen = 0, .maxlen = 0 },
1068        [OVS_CT_ATTR_ZONE]      = { .minlen = sizeof(u16),
1069                                    .maxlen = sizeof(u16) },
1070        [OVS_CT_ATTR_MARK]      = { .minlen = sizeof(struct md_mark),
1071                                    .maxlen = sizeof(struct md_mark) },
1072        [OVS_CT_ATTR_LABELS]    = { .minlen = sizeof(struct md_labels),
1073                                    .maxlen = sizeof(struct md_labels) },
1074        [OVS_CT_ATTR_HELPER]    = { .minlen = 1,
1075                                    .maxlen = NF_CT_HELPER_NAME_LEN },
1076#ifdef CONFIG_NF_NAT_NEEDED
1077        /* NAT length is checked when parsing the nested attributes. */
1078        [OVS_CT_ATTR_NAT]       = { .minlen = 0, .maxlen = INT_MAX },
1079#endif
1080};
1081
1082static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
1083                    const char **helper, bool log)
1084{
1085        struct nlattr *a;
1086        int rem;
1087
1088        nla_for_each_nested(a, attr, rem) {
1089                int type = nla_type(a);
1090                int maxlen = ovs_ct_attr_lens[type].maxlen;
1091                int minlen = ovs_ct_attr_lens[type].minlen;
1092
1093                if (type > OVS_CT_ATTR_MAX) {
1094                        OVS_NLERR(log,
1095                                  "Unknown conntrack attr (type=%d, max=%d)",
1096                                  type, OVS_CT_ATTR_MAX);
1097                        return -EINVAL;
1098                }
1099                if (nla_len(a) < minlen || nla_len(a) > maxlen) {
1100                        OVS_NLERR(log,
1101                                  "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
1102                                  type, nla_len(a), maxlen);
1103                        return -EINVAL;
1104                }
1105
1106                switch (type) {
1107                case OVS_CT_ATTR_COMMIT:
1108                        info->commit = true;
1109                        break;
1110#ifdef CONFIG_NF_CONNTRACK_ZONES
1111                case OVS_CT_ATTR_ZONE:
1112                        info->zone.id = nla_get_u16(a);
1113                        break;
1114#endif
1115#ifdef CONFIG_NF_CONNTRACK_MARK
1116                case OVS_CT_ATTR_MARK: {
1117                        struct md_mark *mark = nla_data(a);
1118
1119                        if (!mark->mask) {
1120                                OVS_NLERR(log, "ct_mark mask cannot be 0");
1121                                return -EINVAL;
1122                        }
1123                        info->mark = *mark;
1124                        break;
1125                }
1126#endif
1127#ifdef CONFIG_NF_CONNTRACK_LABELS
1128                case OVS_CT_ATTR_LABELS: {
1129                        struct md_labels *labels = nla_data(a);
1130
1131                        if (!labels_nonzero(&labels->mask)) {
1132                                OVS_NLERR(log, "ct_labels mask cannot be 0");
1133                                return -EINVAL;
1134                        }
1135                        info->labels = *labels;
1136                        break;
1137                }
1138#endif
1139                case OVS_CT_ATTR_HELPER:
1140                        *helper = nla_data(a);
1141                        if (!memchr(*helper, '\0', nla_len(a))) {
1142                                OVS_NLERR(log, "Invalid conntrack helper");
1143                                return -EINVAL;
1144                        }
1145                        break;
1146#ifdef CONFIG_NF_NAT_NEEDED
1147                case OVS_CT_ATTR_NAT: {
1148                        int err = parse_nat(a, info, log);
1149
1150                        if (err)
1151                                return err;
1152                        break;
1153                }
1154#endif
1155                default:
1156                        OVS_NLERR(log, "Unknown conntrack attr (%d)",
1157                                  type);
1158                        return -EINVAL;
1159                }
1160        }
1161
1162#ifdef CONFIG_NF_CONNTRACK_MARK
1163        if (!info->commit && info->mark.mask) {
1164                OVS_NLERR(log,
1165                          "Setting conntrack mark requires 'commit' flag.");
1166                return -EINVAL;
1167        }
1168#endif
1169#ifdef CONFIG_NF_CONNTRACK_LABELS
1170        if (!info->commit && labels_nonzero(&info->labels.mask)) {
1171                OVS_NLERR(log,
1172                          "Setting conntrack labels requires 'commit' flag.");
1173                return -EINVAL;
1174        }
1175#endif
1176        if (rem > 0) {
1177                OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
1178                return -EINVAL;
1179        }
1180
1181        return 0;
1182}
1183
1184bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
1185{
1186        if (attr == OVS_KEY_ATTR_CT_STATE)
1187                return true;
1188        if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
1189            attr == OVS_KEY_ATTR_CT_ZONE)
1190                return true;
1191        if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
1192            attr == OVS_KEY_ATTR_CT_MARK)
1193                return true;
1194        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
1195            attr == OVS_KEY_ATTR_CT_LABELS) {
1196                struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1197
1198                return ovs_net->xt_label;
1199        }
1200
1201        return false;
1202}
1203
1204int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
1205                       const struct sw_flow_key *key,
1206                       struct sw_flow_actions **sfa,  bool log)
1207{
1208        struct ovs_conntrack_info ct_info;
1209        const char *helper = NULL;
1210        u16 family;
1211        int err;
1212
1213        family = key_to_nfproto(key);
1214        if (family == NFPROTO_UNSPEC) {
1215                OVS_NLERR(log, "ct family unspecified");
1216                return -EINVAL;
1217        }
1218
1219        memset(&ct_info, 0, sizeof(ct_info));
1220        ct_info.family = family;
1221
1222        nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
1223                        NF_CT_DEFAULT_ZONE_DIR, 0);
1224
1225        err = parse_ct(attr, &ct_info, &helper, log);
1226        if (err)
1227                return err;
1228
1229        /* Set up template for tracking connections in specific zones. */
1230        ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
1231        if (!ct_info.ct) {
1232                OVS_NLERR(log, "Failed to allocate conntrack template");
1233                return -ENOMEM;
1234        }
1235
1236        __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
1237        nf_conntrack_get(&ct_info.ct->ct_general);
1238
1239        if (helper) {
1240                err = ovs_ct_add_helper(&ct_info, helper, key, log);
1241                if (err)
1242                        goto err_free_ct;
1243        }
1244
1245        err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
1246                                 sizeof(ct_info), log);
1247        if (err)
1248                goto err_free_ct;
1249
1250        return 0;
1251err_free_ct:
1252        __ovs_ct_free_action(&ct_info);
1253        return err;
1254}
1255
1256#ifdef CONFIG_NF_NAT_NEEDED
1257static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
1258                               struct sk_buff *skb)
1259{
1260        struct nlattr *start;
1261
1262        start = nla_nest_start(skb, OVS_CT_ATTR_NAT);
1263        if (!start)
1264                return false;
1265
1266        if (info->nat & OVS_CT_SRC_NAT) {
1267                if (nla_put_flag(skb, OVS_NAT_ATTR_SRC))
1268                        return false;
1269        } else if (info->nat & OVS_CT_DST_NAT) {
1270                if (nla_put_flag(skb, OVS_NAT_ATTR_DST))
1271                        return false;
1272        } else {
1273                goto out;
1274        }
1275
1276        if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
1277                if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
1278                    info->family == NFPROTO_IPV4) {
1279                        if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
1280                                            info->range.min_addr.ip) ||
1281                            (info->range.max_addr.ip
1282                             != info->range.min_addr.ip &&
1283                             (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
1284                                              info->range.max_addr.ip))))
1285                                return false;
1286                } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
1287                           info->family == NFPROTO_IPV6) {
1288                        if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
1289                                             &info->range.min_addr.in6) ||
1290                            (memcmp(&info->range.max_addr.in6,
1291                                    &info->range.min_addr.in6,
1292                                    sizeof(info->range.max_addr.in6)) &&
1293                             (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
1294                                               &info->range.max_addr.in6))))
1295                                return false;
1296                } else {
1297                        return false;
1298                }
1299        }
1300        if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1301            (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN,
1302                         ntohs(info->range.min_proto.all)) ||
1303             (info->range.max_proto.all != info->range.min_proto.all &&
1304              nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX,
1305                          ntohs(info->range.max_proto.all)))))
1306                return false;
1307
1308        if (info->range.flags & NF_NAT_RANGE_PERSISTENT &&
1309            nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT))
1310                return false;
1311        if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM &&
1312            nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH))
1313                return false;
1314        if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY &&
1315            nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM))
1316                return false;
1317out:
1318        nla_nest_end(skb, start);
1319
1320        return true;
1321}
1322#endif
1323
1324int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
1325                          struct sk_buff *skb)
1326{
1327        struct nlattr *start;
1328
1329        start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
1330        if (!start)
1331                return -EMSGSIZE;
1332
1333        if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT))
1334                return -EMSGSIZE;
1335        if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
1336            nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
1337                return -EMSGSIZE;
1338        if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
1339            nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
1340                    &ct_info->mark))
1341                return -EMSGSIZE;
1342        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
1343            labels_nonzero(&ct_info->labels.mask) &&
1344            nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
1345                    &ct_info->labels))
1346                return -EMSGSIZE;
1347        if (ct_info->helper) {
1348                if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
1349                                   ct_info->helper->name))
1350                        return -EMSGSIZE;
1351        }
1352#ifdef CONFIG_NF_NAT_NEEDED
1353        if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
1354                return -EMSGSIZE;
1355#endif
1356        nla_nest_end(skb, start);
1357
1358        return 0;
1359}
1360
1361void ovs_ct_free_action(const struct nlattr *a)
1362{
1363        struct ovs_conntrack_info *ct_info = nla_data(a);
1364
1365        __ovs_ct_free_action(ct_info);
1366}
1367
1368static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
1369{
1370        if (ct_info->helper)
1371                module_put(ct_info->helper->me);
1372        if (ct_info->ct)
1373                nf_ct_tmpl_free(ct_info->ct);
1374}
1375
1376void ovs_ct_init(struct net *net)
1377{
1378        unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
1379        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1380
1381        if (nf_connlabels_get(net, n_bits - 1)) {
1382                ovs_net->xt_label = false;
1383                OVS_NLERR(true, "Failed to set connlabel length");
1384        } else {
1385                ovs_net->xt_label = true;
1386        }
1387}
1388
1389void ovs_ct_exit(struct net *net)
1390{
1391        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1392
1393        if (ovs_net->xt_label)
1394                nf_connlabels_put(net);
1395}
1396