linux/net/netfilter/ipvs/ip_vs_nfct.c
<<
>>
Prefs
   1/*
   2 * ip_vs_nfct.c:        Netfilter connection tracking support for IPVS
   3 *
   4 * Portions Copyright (C) 2001-2002
   5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
   6 *
   7 * Portions Copyright (C) 2003-2010
   8 * Julian Anastasov
   9 *
  10 *
  11 * This code is free software; you can redistribute it and/or modify
  12 * it under the terms of the GNU General Public License as published by
  13 * the Free Software Foundation; either version 2 of the License, or
  14 * (at your option) any later version.
  15 *
  16 * This program is distributed in the hope that it will be useful,
  17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 * GNU General Public License for more details.
  20 *
  21 * You should have received a copy of the GNU General Public License
  22 * along with this program; if not, write to the Free Software
  23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24 *
  25 *
  26 * Authors:
  27 * Ben North <ben@redfrontdoor.org>
  28 * Julian Anastasov <ja@ssi.bg>         Reorganize and sync with latest kernels
  29 * Hannes Eder <heder@google.com>       Extend NFCT support for FTP, ipvs match
  30 *
  31 *
  32 * Current status:
  33 *
  34 * - provide conntrack confirmation for new and related connections, by
  35 * this way we can see their proper conntrack state in all hooks
  36 * - support for all forwarding methods, not only NAT
  37 * - FTP support (NAT), ability to support other NAT apps with expectations
  38 * - to correctly create expectations for related NAT connections the proper
  39 * NF conntrack support must be already installed, eg. ip_vs_ftp requires
  40 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
  41 * NAT rules are needed)
  42 * - alter reply for NAT when forwarding packet in original direction:
  43 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
  44 * when RELATED conntrack is created from real server (Active FTP DATA)
  45 * - if iptables_nat is not loaded the Passive FTP will not work (the
  46 * PASV response can not be NAT-ed) but Active FTP should work
  47 *
  48 */
  49
  50#define KMSG_COMPONENT "IPVS"
  51#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  52
  53#include <linux/module.h>
  54#include <linux/types.h>
  55#include <linux/kernel.h>
  56#include <linux/errno.h>
  57#include <linux/compiler.h>
  58#include <linux/vmalloc.h>
  59#include <linux/skbuff.h>
  60#include <net/ip.h>
  61#include <linux/netfilter.h>
  62#include <linux/netfilter_ipv4.h>
  63#include <net/ip_vs.h>
  64#include <net/netfilter/nf_conntrack_core.h>
  65#include <net/netfilter/nf_conntrack_expect.h>
  66#include <net/netfilter/nf_conntrack_helper.h>
  67#include <net/netfilter/nf_conntrack_zones.h>
  68
  69
  70#define FMT_TUPLE       "%pI4:%u->%pI4:%u/%u"
  71#define ARG_TUPLE(T)    &(T)->src.u3.ip, ntohs((T)->src.u.all), \
  72                        &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
  73                        (T)->dst.protonum
  74
  75#define FMT_CONN        "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
  76#define ARG_CONN(C)     &((C)->caddr.ip), ntohs((C)->cport), \
  77                        &((C)->vaddr.ip), ntohs((C)->vport), \
  78                        &((C)->daddr.ip), ntohs((C)->dport), \
  79                        (C)->protocol, (C)->state
  80
  81void
  82ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
  83{
  84        enum ip_conntrack_info ctinfo;
  85        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
  86        struct nf_conntrack_tuple new_tuple;
  87
  88        if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
  89            nf_ct_is_dying(ct))
  90                return;
  91
  92        /* Never alter conntrack for non-NAT conns */
  93        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  94                return;
  95
  96        /* Alter reply only in original direction */
  97        if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
  98                return;
  99
 100        /*
 101         * The connection is not yet in the hashtable, so we update it.
 102         * CIP->VIP will remain the same, so leave the tuple in
 103         * IP_CT_DIR_ORIGINAL untouched.  When the reply comes back from the
 104         * real-server we will see RIP->DIP.
 105         */
 106        new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 107        /*
 108         * This will also take care of UDP and other protocols.
 109         */
 110        if (outin) {
 111                new_tuple.src.u3 = cp->daddr;
 112                if (new_tuple.dst.protonum != IPPROTO_ICMP &&
 113                    new_tuple.dst.protonum != IPPROTO_ICMPV6)
 114                        new_tuple.src.u.tcp.port = cp->dport;
 115        } else {
 116                new_tuple.dst.u3 = cp->vaddr;
 117                if (new_tuple.dst.protonum != IPPROTO_ICMP &&
 118                    new_tuple.dst.protonum != IPPROTO_ICMPV6)
 119                        new_tuple.dst.u.tcp.port = cp->vport;
 120        }
 121        IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
 122                  "ctinfo=%d, old reply=" FMT_TUPLE
 123                  ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
 124                  __func__, ct, ct->status, ctinfo,
 125                  ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
 126                  ARG_TUPLE(&new_tuple), ARG_CONN(cp));
 127        nf_conntrack_alter_reply(ct, &new_tuple);
 128}
 129
 130int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
 131{
 132        return nf_conntrack_confirm(skb);
 133}
 134
 135/*
 136 * Called from init_conntrack() as expectfn handler.
 137 */
 138static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 139        struct nf_conntrack_expect *exp)
 140{
 141        struct nf_conntrack_tuple *orig, new_reply;
 142        struct ip_vs_conn *cp;
 143        struct ip_vs_conn_param p;
 144        struct net *net = nf_ct_net(ct);
 145
 146        if (exp->tuple.src.l3num != PF_INET)
 147                return;
 148
 149        /*
 150         * We assume that no NF locks are held before this callback.
 151         * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
 152         * expectations even if they use wildcard values, now we provide the
 153         * actual values from the newly created original conntrack direction.
 154         * The conntrack is confirmed when packet reaches IPVS hooks.
 155         */
 156
 157        /* RS->CLIENT */
 158        orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 159        ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
 160                              &orig->src.u3, orig->src.u.tcp.port,
 161                              &orig->dst.u3, orig->dst.u.tcp.port, &p);
 162        cp = ip_vs_conn_out_get(&p);
 163        if (cp) {
 164                /* Change reply CLIENT->RS to CLIENT->VS */
 165                new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 166                IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
 167                          FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
 168                          __func__, ct, ct->status,
 169                          ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
 170                          ARG_CONN(cp));
 171                new_reply.dst.u3 = cp->vaddr;
 172                new_reply.dst.u.tcp.port = cp->vport;
 173                IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
 174                          ", inout cp=" FMT_CONN "\n",
 175                          __func__, ct,
 176                          ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
 177                          ARG_CONN(cp));
 178                goto alter;
 179        }
 180
 181        /* CLIENT->VS */
 182        cp = ip_vs_conn_in_get(&p);
 183        if (cp) {
 184                /* Change reply VS->CLIENT to RS->CLIENT */
 185                new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 186                IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
 187                          FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
 188                          __func__, ct, ct->status,
 189                          ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
 190                          ARG_CONN(cp));
 191                new_reply.src.u3 = cp->daddr;
 192                new_reply.src.u.tcp.port = cp->dport;
 193                IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
 194                          FMT_TUPLE ", outin cp=" FMT_CONN "\n",
 195                          __func__, ct,
 196                          ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
 197                          ARG_CONN(cp));
 198                goto alter;
 199        }
 200
 201        IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
 202                  " - unknown expect\n",
 203                  __func__, ct, ct->status, ARG_TUPLE(orig));
 204        return;
 205
 206alter:
 207        /* Never alter conntrack for non-NAT conns */
 208        if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
 209                nf_conntrack_alter_reply(ct, &new_reply);
 210        ip_vs_conn_put(cp);
 211        return;
 212}
 213
 214/*
 215 * Create NF conntrack expectation with wildcard (optional) source port.
 216 * Then the default callback function will alter the reply and will confirm
 217 * the conntrack entry when the first packet comes.
 218 * Use port 0 to expect connection from any port.
 219 */
 220void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
 221                               struct ip_vs_conn *cp, u_int8_t proto,
 222                               const __be16 port, int from_rs)
 223{
 224        struct nf_conntrack_expect *exp;
 225
 226        if (ct == NULL || nf_ct_is_untracked(ct))
 227                return;
 228
 229        exp = nf_ct_expect_alloc(ct);
 230        if (!exp)
 231                return;
 232
 233        nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
 234                        from_rs ? &cp->daddr : &cp->caddr,
 235                        from_rs ? &cp->caddr : &cp->vaddr,
 236                        proto, port ? &port : NULL,
 237                        from_rs ? &cp->cport : &cp->vport);
 238
 239        exp->expectfn = ip_vs_nfct_expect_callback;
 240
 241        IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
 242                __func__, ct, ARG_TUPLE(&exp->tuple));
 243        nf_ct_expect_related(exp);
 244        nf_ct_expect_put(exp);
 245}
 246EXPORT_SYMBOL(ip_vs_nfct_expect_related);
 247
 248/*
 249 * Our connection was terminated, try to drop the conntrack immediately
 250 */
 251void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 252{
 253        struct nf_conntrack_tuple_hash *h;
 254        struct nf_conn *ct;
 255        struct nf_conntrack_tuple tuple;
 256
 257        if (!cp->cport)
 258                return;
 259
 260        tuple = (struct nf_conntrack_tuple) {
 261                .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
 262        tuple.src.u3 = cp->caddr;
 263        tuple.src.u.all = cp->cport;
 264        tuple.src.l3num = cp->af;
 265        tuple.dst.u3 = cp->vaddr;
 266        tuple.dst.u.all = cp->vport;
 267
 268        IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
 269                " for conn " FMT_CONN "\n",
 270                __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
 271
 272        h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
 273                                  &tuple);
 274        if (h) {
 275                ct = nf_ct_tuplehash_to_ctrack(h);
 276                /* Show what happens instead of calling nf_ct_kill() */
 277                if (del_timer(&ct->timeout)) {
 278                        IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
 279                                FMT_TUPLE "\n",
 280                                __func__, ct, ARG_TUPLE(&tuple));
 281                        if (ct->timeout.function)
 282                                ct->timeout.function(ct->timeout.data);
 283                } else {
 284                        IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
 285                                FMT_TUPLE "\n",
 286                                __func__, ct, ARG_TUPLE(&tuple));
 287                }
 288                nf_ct_put(ct);
 289        } else {
 290                IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
 291                        __func__, ARG_TUPLE(&tuple));
 292        }
 293}
 294
 295