linux/net/netfilter/ipvs/ip_vs_app.c
<<
>>
Prefs
   1/*
   2 * ip_vs_app.c: Application module support for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
  12 * is that ip_vs_app module handles the reverse direction (incoming requests
  13 * and outgoing responses).
  14 *
  15 *              IP_MASQ_APP application masquerading module
  16 *
  17 * Author:      Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
  18 *
  19 */
  20
  21#define KMSG_COMPONENT "IPVS"
  22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  23
  24#include <linux/module.h>
  25#include <linux/kernel.h>
  26#include <linux/skbuff.h>
  27#include <linux/in.h>
  28#include <linux/ip.h>
  29#include <linux/netfilter.h>
  30#include <linux/slab.h>
  31#include <net/net_namespace.h>
  32#include <net/protocol.h>
  33#include <net/tcp.h>
  34#include <linux/stat.h>
  35#include <linux/proc_fs.h>
  36#include <linux/seq_file.h>
  37#include <linux/mutex.h>
  38
  39#include <net/ip_vs.h>
  40
  41EXPORT_SYMBOL(register_ip_vs_app);
  42EXPORT_SYMBOL(unregister_ip_vs_app);
  43EXPORT_SYMBOL(register_ip_vs_app_inc);
  44
  45static DEFINE_MUTEX(__ip_vs_app_mutex);
  46
  47/*
  48 *      Get an ip_vs_app object
  49 */
  50static inline int ip_vs_app_get(struct ip_vs_app *app)
  51{
  52        return try_module_get(app->module);
  53}
  54
  55
  56static inline void ip_vs_app_put(struct ip_vs_app *app)
  57{
  58        module_put(app->module);
  59}
  60
  61static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
  62{
  63        kfree(inc->timeout_table);
  64        kfree(inc);
  65}
  66
  67static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
  68{
  69        struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
  70
  71        ip_vs_app_inc_destroy(inc);
  72}
  73
  74/*
  75 *      Allocate/initialize app incarnation and register it in proto apps.
  76 */
  77static int
  78ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
  79                  __u16 port)
  80{
  81        struct ip_vs_protocol *pp;
  82        struct ip_vs_app *inc;
  83        int ret;
  84
  85        if (!(pp = ip_vs_proto_get(proto)))
  86                return -EPROTONOSUPPORT;
  87
  88        if (!pp->unregister_app)
  89                return -EOPNOTSUPP;
  90
  91        inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
  92        if (!inc)
  93                return -ENOMEM;
  94        INIT_LIST_HEAD(&inc->p_list);
  95        INIT_LIST_HEAD(&inc->incs_list);
  96        inc->app = app;
  97        inc->port = htons(port);
  98        atomic_set(&inc->usecnt, 0);
  99
 100        if (app->timeouts) {
 101                inc->timeout_table =
 102                        ip_vs_create_timeout_table(app->timeouts,
 103                                                   app->timeouts_size);
 104                if (!inc->timeout_table) {
 105                        ret = -ENOMEM;
 106                        goto out;
 107                }
 108        }
 109
 110        ret = pp->register_app(ipvs, inc);
 111        if (ret)
 112                goto out;
 113
 114        list_add(&inc->a_list, &app->incs_list);
 115        IP_VS_DBG(9, "%s App %s:%u registered\n",
 116                  pp->name, inc->name, ntohs(inc->port));
 117
 118        return 0;
 119
 120  out:
 121        ip_vs_app_inc_destroy(inc);
 122        return ret;
 123}
 124
 125
 126/*
 127 *      Release app incarnation
 128 */
 129static void
 130ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 131{
 132        struct ip_vs_protocol *pp;
 133
 134        if (!(pp = ip_vs_proto_get(inc->protocol)))
 135                return;
 136
 137        if (pp->unregister_app)
 138                pp->unregister_app(ipvs, inc);
 139
 140        IP_VS_DBG(9, "%s App %s:%u unregistered\n",
 141                  pp->name, inc->name, ntohs(inc->port));
 142
 143        list_del(&inc->a_list);
 144
 145        call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
 146}
 147
 148
 149/*
 150 *      Get reference to app inc (only called from softirq)
 151 *
 152 */
 153int ip_vs_app_inc_get(struct ip_vs_app *inc)
 154{
 155        int result;
 156
 157        result = ip_vs_app_get(inc->app);
 158        if (result)
 159                atomic_inc(&inc->usecnt);
 160        return result;
 161}
 162
 163
 164/*
 165 *      Put the app inc (only called from timer or net softirq)
 166 */
 167void ip_vs_app_inc_put(struct ip_vs_app *inc)
 168{
 169        atomic_dec(&inc->usecnt);
 170        ip_vs_app_put(inc->app);
 171}
 172
 173
 174/*
 175 *      Register an application incarnation in protocol applications
 176 */
 177int
 178register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
 179                       __u16 port)
 180{
 181        int result;
 182
 183        mutex_lock(&__ip_vs_app_mutex);
 184
 185        result = ip_vs_app_inc_new(ipvs, app, proto, port);
 186
 187        mutex_unlock(&__ip_vs_app_mutex);
 188
 189        return result;
 190}
 191
 192
 193/* Register application for netns */
 194struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 195{
 196        struct ip_vs_app *a;
 197        int err = 0;
 198
 199        mutex_lock(&__ip_vs_app_mutex);
 200
 201        list_for_each_entry(a, &ipvs->app_list, a_list) {
 202                if (!strcmp(app->name, a->name)) {
 203                        err = -EEXIST;
 204                        goto out_unlock;
 205                }
 206        }
 207        a = kmemdup(app, sizeof(*app), GFP_KERNEL);
 208        if (!a) {
 209                err = -ENOMEM;
 210                goto out_unlock;
 211        }
 212        INIT_LIST_HEAD(&a->incs_list);
 213        list_add(&a->a_list, &ipvs->app_list);
 214        /* increase the module use count */
 215        ip_vs_use_count_inc();
 216
 217out_unlock:
 218        mutex_unlock(&__ip_vs_app_mutex);
 219
 220        return err ? ERR_PTR(err) : a;
 221}
 222
 223
 224/*
 225 *      ip_vs_app unregistration routine
 226 *      We are sure there are no app incarnations attached to services
 227 *      Caller should use synchronize_rcu() or rcu_barrier()
 228 */
 229void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 230{
 231        struct ip_vs_app *a, *anxt, *inc, *nxt;
 232
 233        mutex_lock(&__ip_vs_app_mutex);
 234
 235        list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
 236                if (app && strcmp(app->name, a->name))
 237                        continue;
 238                list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
 239                        ip_vs_app_inc_release(ipvs, inc);
 240                }
 241
 242                list_del(&a->a_list);
 243                kfree(a);
 244
 245                /* decrease the module use count */
 246                ip_vs_use_count_dec();
 247        }
 248
 249        mutex_unlock(&__ip_vs_app_mutex);
 250}
 251
 252
 253/*
 254 *      Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
 255 */
 256int ip_vs_bind_app(struct ip_vs_conn *cp,
 257                   struct ip_vs_protocol *pp)
 258{
 259        return pp->app_conn_bind(cp);
 260}
 261
 262
 263/*
 264 *      Unbind cp from application incarnation (called by cp destructor)
 265 */
 266void ip_vs_unbind_app(struct ip_vs_conn *cp)
 267{
 268        struct ip_vs_app *inc = cp->app;
 269
 270        if (!inc)
 271                return;
 272
 273        if (inc->unbind_conn)
 274                inc->unbind_conn(inc, cp);
 275        if (inc->done_conn)
 276                inc->done_conn(inc, cp);
 277        ip_vs_app_inc_put(inc);
 278        cp->app = NULL;
 279}
 280
 281
 282/*
 283 *      Fixes th->seq based on ip_vs_seq info.
 284 */
 285static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 286{
 287        __u32 seq = ntohl(th->seq);
 288
 289        /*
 290         *      Adjust seq with delta-offset for all packets after
 291         *      the most recent resized pkt seq and with previous_delta offset
 292         *      for all packets before most recent resized pkt seq.
 293         */
 294        if (vseq->delta || vseq->previous_delta) {
 295                if(after(seq, vseq->init_seq)) {
 296                        th->seq = htonl(seq + vseq->delta);
 297                        IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
 298                                  __func__, vseq->delta);
 299                } else {
 300                        th->seq = htonl(seq + vseq->previous_delta);
 301                        IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
 302                                  __func__, vseq->previous_delta);
 303                }
 304        }
 305}
 306
 307
 308/*
 309 *      Fixes th->ack_seq based on ip_vs_seq info.
 310 */
 311static inline void
 312vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 313{
 314        __u32 ack_seq = ntohl(th->ack_seq);
 315
 316        /*
 317         * Adjust ack_seq with delta-offset for
 318         * the packets AFTER most recent resized pkt has caused a shift
 319         * for packets before most recent resized pkt, use previous_delta
 320         */
 321        if (vseq->delta || vseq->previous_delta) {
 322                /* since ack_seq is the number of octet that is expected
 323                   to receive next, so compare it with init_seq+delta */
 324                if(after(ack_seq, vseq->init_seq+vseq->delta)) {
 325                        th->ack_seq = htonl(ack_seq - vseq->delta);
 326                        IP_VS_DBG(9, "%s(): subtracted delta "
 327                                  "(%d) from ack_seq\n", __func__, vseq->delta);
 328
 329                } else {
 330                        th->ack_seq = htonl(ack_seq - vseq->previous_delta);
 331                        IP_VS_DBG(9, "%s(): subtracted "
 332                                  "previous_delta (%d) from ack_seq\n",
 333                                  __func__, vseq->previous_delta);
 334                }
 335        }
 336}
 337
 338
 339/*
 340 *      Updates ip_vs_seq if pkt has been resized
 341 *      Assumes already checked proto==IPPROTO_TCP and diff!=0.
 342 */
 343static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
 344                                 unsigned int flag, __u32 seq, int diff)
 345{
 346        /* spinlock is to keep updating cp->flags atomic */
 347        spin_lock_bh(&cp->lock);
 348        if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
 349                vseq->previous_delta = vseq->delta;
 350                vseq->delta += diff;
 351                vseq->init_seq = seq;
 352                cp->flags |= flag;
 353        }
 354        spin_unlock_bh(&cp->lock);
 355}
 356
 357static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 358                                  struct ip_vs_app *app)
 359{
 360        int diff;
 361        const unsigned int tcp_offset = ip_hdrlen(skb);
 362        struct tcphdr *th;
 363        __u32 seq;
 364
 365        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 366                return 0;
 367
 368        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 369
 370        /*
 371         *      Remember seq number in case this pkt gets resized
 372         */
 373        seq = ntohl(th->seq);
 374
 375        /*
 376         *      Fix seq stuff if flagged as so.
 377         */
 378        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 379                vs_fix_seq(&cp->out_seq, th);
 380        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 381                vs_fix_ack_seq(&cp->in_seq, th);
 382
 383        /*
 384         *      Call private output hook function
 385         */
 386        if (app->pkt_out == NULL)
 387                return 1;
 388
 389        if (!app->pkt_out(app, cp, skb, &diff))
 390                return 0;
 391
 392        /*
 393         *      Update ip_vs seq stuff if len has changed.
 394         */
 395        if (diff != 0)
 396                vs_seq_update(cp, &cp->out_seq,
 397                              IP_VS_CONN_F_OUT_SEQ, seq, diff);
 398
 399        return 1;
 400}
 401
 402/*
 403 *      Output pkt hook. Will call bound ip_vs_app specific function
 404 *      called by ipvs packet handler, assumes previously checked cp!=NULL
 405 *      returns false if it can't handle packet (oom)
 406 */
 407int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
 408{
 409        struct ip_vs_app *app;
 410
 411        /*
 412         *      check if application module is bound to
 413         *      this ip_vs_conn.
 414         */
 415        if ((app = cp->app) == NULL)
 416                return 1;
 417
 418        /* TCP is complicated */
 419        if (cp->protocol == IPPROTO_TCP)
 420                return app_tcp_pkt_out(cp, skb, app);
 421
 422        /*
 423         *      Call private output hook function
 424         */
 425        if (app->pkt_out == NULL)
 426                return 1;
 427
 428        return app->pkt_out(app, cp, skb, NULL);
 429}
 430
 431
 432static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 433                                 struct ip_vs_app *app)
 434{
 435        int diff;
 436        const unsigned int tcp_offset = ip_hdrlen(skb);
 437        struct tcphdr *th;
 438        __u32 seq;
 439
 440        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 441                return 0;
 442
 443        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 444
 445        /*
 446         *      Remember seq number in case this pkt gets resized
 447         */
 448        seq = ntohl(th->seq);
 449
 450        /*
 451         *      Fix seq stuff if flagged as so.
 452         */
 453        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 454                vs_fix_seq(&cp->in_seq, th);
 455        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 456                vs_fix_ack_seq(&cp->out_seq, th);
 457
 458        /*
 459         *      Call private input hook function
 460         */
 461        if (app->pkt_in == NULL)
 462                return 1;
 463
 464        if (!app->pkt_in(app, cp, skb, &diff))
 465                return 0;
 466
 467        /*
 468         *      Update ip_vs seq stuff if len has changed.
 469         */
 470        if (diff != 0)
 471                vs_seq_update(cp, &cp->in_seq,
 472                              IP_VS_CONN_F_IN_SEQ, seq, diff);
 473
 474        return 1;
 475}
 476
 477/*
 478 *      Input pkt hook. Will call bound ip_vs_app specific function
 479 *      called by ipvs packet handler, assumes previously checked cp!=NULL.
 480 *      returns false if can't handle packet (oom).
 481 */
 482int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
 483{
 484        struct ip_vs_app *app;
 485
 486        /*
 487         *      check if application module is bound to
 488         *      this ip_vs_conn.
 489         */
 490        if ((app = cp->app) == NULL)
 491                return 1;
 492
 493        /* TCP is complicated */
 494        if (cp->protocol == IPPROTO_TCP)
 495                return app_tcp_pkt_in(cp, skb, app);
 496
 497        /*
 498         *      Call private input hook function
 499         */
 500        if (app->pkt_in == NULL)
 501                return 1;
 502
 503        return app->pkt_in(app, cp, skb, NULL);
 504}
 505
 506
 507#ifdef CONFIG_PROC_FS
 508/*
 509 *      /proc/net/ip_vs_app entry function
 510 */
 511
 512static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 513{
 514        struct ip_vs_app *app, *inc;
 515
 516        list_for_each_entry(app, &ipvs->app_list, a_list) {
 517                list_for_each_entry(inc, &app->incs_list, a_list) {
 518                        if (pos-- == 0)
 519                                return inc;
 520                }
 521        }
 522        return NULL;
 523
 524}
 525
 526static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 527{
 528        struct net *net = seq_file_net(seq);
 529        struct netns_ipvs *ipvs = net_ipvs(net);
 530
 531        mutex_lock(&__ip_vs_app_mutex);
 532
 533        return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 534}
 535
 536static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 537{
 538        struct ip_vs_app *inc, *app;
 539        struct list_head *e;
 540        struct net *net = seq_file_net(seq);
 541        struct netns_ipvs *ipvs = net_ipvs(net);
 542
 543        ++*pos;
 544        if (v == SEQ_START_TOKEN)
 545                return ip_vs_app_idx(ipvs, 0);
 546
 547        inc = v;
 548        app = inc->app;
 549
 550        if ((e = inc->a_list.next) != &app->incs_list)
 551                return list_entry(e, struct ip_vs_app, a_list);
 552
 553        /* go on to next application */
 554        for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
 555                app = list_entry(e, struct ip_vs_app, a_list);
 556                list_for_each_entry(inc, &app->incs_list, a_list) {
 557                        return inc;
 558                }
 559        }
 560        return NULL;
 561}
 562
 563static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 564{
 565        mutex_unlock(&__ip_vs_app_mutex);
 566}
 567
 568static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
 569{
 570        if (v == SEQ_START_TOKEN)
 571                seq_puts(seq, "prot port    usecnt name\n");
 572        else {
 573                const struct ip_vs_app *inc = v;
 574
 575                seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
 576                           ip_vs_proto_name(inc->protocol),
 577                           ntohs(inc->port),
 578                           atomic_read(&inc->usecnt),
 579                           inc->name);
 580        }
 581        return 0;
 582}
 583
 584static const struct seq_operations ip_vs_app_seq_ops = {
 585        .start = ip_vs_app_seq_start,
 586        .next  = ip_vs_app_seq_next,
 587        .stop  = ip_vs_app_seq_stop,
 588        .show  = ip_vs_app_seq_show,
 589};
 590
 591static int ip_vs_app_open(struct inode *inode, struct file *file)
 592{
 593        return seq_open_net(inode, file, &ip_vs_app_seq_ops,
 594                            sizeof(struct seq_net_private));
 595}
 596
 597static const struct file_operations ip_vs_app_fops = {
 598        .owner   = THIS_MODULE,
 599        .open    = ip_vs_app_open,
 600        .read    = seq_read,
 601        .llseek  = seq_lseek,
 602        .release = seq_release_net,
 603};
 604#endif
 605
 606int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
 607{
 608        INIT_LIST_HEAD(&ipvs->app_list);
 609        proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops);
 610        return 0;
 611}
 612
 613void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
 614{
 615        unregister_ip_vs_app(ipvs, NULL /* all */);
 616        remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
 617}
 618