linux/net/netfilter/ipvs/ip_vs_app.c
<<
>>
Prefs
   1/*
   2 * ip_vs_app.c: Application module support for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
  12 * is that ip_vs_app module handles the reverse direction (incoming requests
  13 * and outgoing responses).
  14 *
  15 *              IP_MASQ_APP application masquerading module
  16 *
  17 * Author:      Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
  18 *
  19 */
  20
  21#define KMSG_COMPONENT "IPVS"
  22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  23
  24#include <linux/module.h>
  25#include <linux/kernel.h>
  26#include <linux/skbuff.h>
  27#include <linux/in.h>
  28#include <linux/ip.h>
  29#include <linux/netfilter.h>
  30#include <linux/slab.h>
  31#include <net/net_namespace.h>
  32#include <net/protocol.h>
  33#include <net/tcp.h>
  34#include <linux/stat.h>
  35#include <linux/proc_fs.h>
  36#include <linux/seq_file.h>
  37#include <linux/mutex.h>
  38
  39#include <net/ip_vs.h>
  40
  41EXPORT_SYMBOL(register_ip_vs_app);
  42EXPORT_SYMBOL(unregister_ip_vs_app);
  43EXPORT_SYMBOL(register_ip_vs_app_inc);
  44
  45static DEFINE_MUTEX(__ip_vs_app_mutex);
  46
  47/*
  48 *      Get an ip_vs_app object
  49 */
  50static inline int ip_vs_app_get(struct ip_vs_app *app)
  51{
  52        return try_module_get(app->module);
  53}
  54
  55
  56static inline void ip_vs_app_put(struct ip_vs_app *app)
  57{
  58        module_put(app->module);
  59}
  60
  61static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
  62{
  63        kfree(inc->timeout_table);
  64        kfree(inc);
  65}
  66
  67static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
  68{
  69        struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
  70
  71        ip_vs_app_inc_destroy(inc);
  72}
  73
  74/*
  75 *      Allocate/initialize app incarnation and register it in proto apps.
  76 */
  77static int
  78ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
  79                  __u16 port)
  80{
  81        struct ip_vs_protocol *pp;
  82        struct ip_vs_app *inc;
  83        int ret;
  84
  85        if (!(pp = ip_vs_proto_get(proto)))
  86                return -EPROTONOSUPPORT;
  87
  88        if (!pp->unregister_app)
  89                return -EOPNOTSUPP;
  90
  91        inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
  92        if (!inc)
  93                return -ENOMEM;
  94        INIT_LIST_HEAD(&inc->p_list);
  95        INIT_LIST_HEAD(&inc->incs_list);
  96        inc->app = app;
  97        inc->port = htons(port);
  98        atomic_set(&inc->usecnt, 0);
  99
 100        if (app->timeouts) {
 101                inc->timeout_table =
 102                        ip_vs_create_timeout_table(app->timeouts,
 103                                                   app->timeouts_size);
 104                if (!inc->timeout_table) {
 105                        ret = -ENOMEM;
 106                        goto out;
 107                }
 108        }
 109
 110        ret = pp->register_app(net, inc);
 111        if (ret)
 112                goto out;
 113
 114        list_add(&inc->a_list, &app->incs_list);
 115        IP_VS_DBG(9, "%s App %s:%u registered\n",
 116                  pp->name, inc->name, ntohs(inc->port));
 117
 118        return 0;
 119
 120  out:
 121        ip_vs_app_inc_destroy(inc);
 122        return ret;
 123}
 124
 125
 126/*
 127 *      Release app incarnation
 128 */
 129static void
 130ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
 131{
 132        struct ip_vs_protocol *pp;
 133
 134        if (!(pp = ip_vs_proto_get(inc->protocol)))
 135                return;
 136
 137        if (pp->unregister_app)
 138                pp->unregister_app(net, inc);
 139
 140        IP_VS_DBG(9, "%s App %s:%u unregistered\n",
 141                  pp->name, inc->name, ntohs(inc->port));
 142
 143        list_del(&inc->a_list);
 144
 145        call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
 146}
 147
 148
 149/*
 150 *      Get reference to app inc (only called from softirq)
 151 *
 152 */
 153int ip_vs_app_inc_get(struct ip_vs_app *inc)
 154{
 155        int result;
 156
 157        result = ip_vs_app_get(inc->app);
 158        if (result)
 159                atomic_inc(&inc->usecnt);
 160        return result;
 161}
 162
 163
 164/*
 165 *      Put the app inc (only called from timer or net softirq)
 166 */
 167void ip_vs_app_inc_put(struct ip_vs_app *inc)
 168{
 169        atomic_dec(&inc->usecnt);
 170        ip_vs_app_put(inc->app);
 171}
 172
 173
 174/*
 175 *      Register an application incarnation in protocol applications
 176 */
 177int
 178register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
 179                       __u16 port)
 180{
 181        int result;
 182
 183        mutex_lock(&__ip_vs_app_mutex);
 184
 185        result = ip_vs_app_inc_new(net, app, proto, port);
 186
 187        mutex_unlock(&__ip_vs_app_mutex);
 188
 189        return result;
 190}
 191
 192
 193/* Register application for netns */
 194struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 195{
 196        struct netns_ipvs *ipvs = net_ipvs(net);
 197        struct ip_vs_app *a;
 198        int err = 0;
 199
 200        if (!ipvs)
 201                return ERR_PTR(-ENOENT);
 202
 203        mutex_lock(&__ip_vs_app_mutex);
 204
 205        list_for_each_entry(a, &ipvs->app_list, a_list) {
 206                if (!strcmp(app->name, a->name)) {
 207                        err = -EEXIST;
 208                        goto out_unlock;
 209                }
 210        }
 211        a = kmemdup(app, sizeof(*app), GFP_KERNEL);
 212        if (!a) {
 213                err = -ENOMEM;
 214                goto out_unlock;
 215        }
 216        INIT_LIST_HEAD(&a->incs_list);
 217        list_add(&a->a_list, &ipvs->app_list);
 218        /* increase the module use count */
 219        ip_vs_use_count_inc();
 220
 221out_unlock:
 222        mutex_unlock(&__ip_vs_app_mutex);
 223
 224        return err ? ERR_PTR(err) : a;
 225}
 226
 227
 228/*
 229 *      ip_vs_app unregistration routine
 230 *      We are sure there are no app incarnations attached to services
 231 *      Caller should use synchronize_rcu() or rcu_barrier()
 232 */
 233void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 234{
 235        struct netns_ipvs *ipvs = net_ipvs(net);
 236        struct ip_vs_app *a, *anxt, *inc, *nxt;
 237
 238        if (!ipvs)
 239                return;
 240
 241        mutex_lock(&__ip_vs_app_mutex);
 242
 243        list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
 244                if (app && strcmp(app->name, a->name))
 245                        continue;
 246                list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
 247                        ip_vs_app_inc_release(net, inc);
 248                }
 249
 250                list_del(&a->a_list);
 251                kfree(a);
 252
 253                /* decrease the module use count */
 254                ip_vs_use_count_dec();
 255        }
 256
 257        mutex_unlock(&__ip_vs_app_mutex);
 258}
 259
 260
 261/*
 262 *      Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
 263 */
 264int ip_vs_bind_app(struct ip_vs_conn *cp,
 265                   struct ip_vs_protocol *pp)
 266{
 267        return pp->app_conn_bind(cp);
 268}
 269
 270
 271/*
 272 *      Unbind cp from application incarnation (called by cp destructor)
 273 */
 274void ip_vs_unbind_app(struct ip_vs_conn *cp)
 275{
 276        struct ip_vs_app *inc = cp->app;
 277
 278        if (!inc)
 279                return;
 280
 281        if (inc->unbind_conn)
 282                inc->unbind_conn(inc, cp);
 283        if (inc->done_conn)
 284                inc->done_conn(inc, cp);
 285        ip_vs_app_inc_put(inc);
 286        cp->app = NULL;
 287}
 288
 289
 290/*
 291 *      Fixes th->seq based on ip_vs_seq info.
 292 */
 293static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 294{
 295        __u32 seq = ntohl(th->seq);
 296
 297        /*
 298         *      Adjust seq with delta-offset for all packets after
 299         *      the most recent resized pkt seq and with previous_delta offset
 300         *      for all packets before most recent resized pkt seq.
 301         */
 302        if (vseq->delta || vseq->previous_delta) {
 303                if(after(seq, vseq->init_seq)) {
 304                        th->seq = htonl(seq + vseq->delta);
 305                        IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
 306                                  __func__, vseq->delta);
 307                } else {
 308                        th->seq = htonl(seq + vseq->previous_delta);
 309                        IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
 310                                  __func__, vseq->previous_delta);
 311                }
 312        }
 313}
 314
 315
 316/*
 317 *      Fixes th->ack_seq based on ip_vs_seq info.
 318 */
 319static inline void
 320vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 321{
 322        __u32 ack_seq = ntohl(th->ack_seq);
 323
 324        /*
 325         * Adjust ack_seq with delta-offset for
 326         * the packets AFTER most recent resized pkt has caused a shift
 327         * for packets before most recent resized pkt, use previous_delta
 328         */
 329        if (vseq->delta || vseq->previous_delta) {
 330                /* since ack_seq is the number of octet that is expected
 331                   to receive next, so compare it with init_seq+delta */
 332                if(after(ack_seq, vseq->init_seq+vseq->delta)) {
 333                        th->ack_seq = htonl(ack_seq - vseq->delta);
 334                        IP_VS_DBG(9, "%s(): subtracted delta "
 335                                  "(%d) from ack_seq\n", __func__, vseq->delta);
 336
 337                } else {
 338                        th->ack_seq = htonl(ack_seq - vseq->previous_delta);
 339                        IP_VS_DBG(9, "%s(): subtracted "
 340                                  "previous_delta (%d) from ack_seq\n",
 341                                  __func__, vseq->previous_delta);
 342                }
 343        }
 344}
 345
 346
 347/*
 348 *      Updates ip_vs_seq if pkt has been resized
 349 *      Assumes already checked proto==IPPROTO_TCP and diff!=0.
 350 */
 351static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
 352                                 unsigned int flag, __u32 seq, int diff)
 353{
 354        /* spinlock is to keep updating cp->flags atomic */
 355        spin_lock_bh(&cp->lock);
 356        if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
 357                vseq->previous_delta = vseq->delta;
 358                vseq->delta += diff;
 359                vseq->init_seq = seq;
 360                cp->flags |= flag;
 361        }
 362        spin_unlock_bh(&cp->lock);
 363}
 364
 365static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 366                                  struct ip_vs_app *app)
 367{
 368        int diff;
 369        const unsigned int tcp_offset = ip_hdrlen(skb);
 370        struct tcphdr *th;
 371        __u32 seq;
 372
 373        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 374                return 0;
 375
 376        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 377
 378        /*
 379         *      Remember seq number in case this pkt gets resized
 380         */
 381        seq = ntohl(th->seq);
 382
 383        /*
 384         *      Fix seq stuff if flagged as so.
 385         */
 386        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 387                vs_fix_seq(&cp->out_seq, th);
 388        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 389                vs_fix_ack_seq(&cp->in_seq, th);
 390
 391        /*
 392         *      Call private output hook function
 393         */
 394        if (app->pkt_out == NULL)
 395                return 1;
 396
 397        if (!app->pkt_out(app, cp, skb, &diff))
 398                return 0;
 399
 400        /*
 401         *      Update ip_vs seq stuff if len has changed.
 402         */
 403        if (diff != 0)
 404                vs_seq_update(cp, &cp->out_seq,
 405                              IP_VS_CONN_F_OUT_SEQ, seq, diff);
 406
 407        return 1;
 408}
 409
 410/*
 411 *      Output pkt hook. Will call bound ip_vs_app specific function
 412 *      called by ipvs packet handler, assumes previously checked cp!=NULL
 413 *      returns false if it can't handle packet (oom)
 414 */
 415int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
 416{
 417        struct ip_vs_app *app;
 418
 419        /*
 420         *      check if application module is bound to
 421         *      this ip_vs_conn.
 422         */
 423        if ((app = cp->app) == NULL)
 424                return 1;
 425
 426        /* TCP is complicated */
 427        if (cp->protocol == IPPROTO_TCP)
 428                return app_tcp_pkt_out(cp, skb, app);
 429
 430        /*
 431         *      Call private output hook function
 432         */
 433        if (app->pkt_out == NULL)
 434                return 1;
 435
 436        return app->pkt_out(app, cp, skb, NULL);
 437}
 438
 439
 440static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 441                                 struct ip_vs_app *app)
 442{
 443        int diff;
 444        const unsigned int tcp_offset = ip_hdrlen(skb);
 445        struct tcphdr *th;
 446        __u32 seq;
 447
 448        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 449                return 0;
 450
 451        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 452
 453        /*
 454         *      Remember seq number in case this pkt gets resized
 455         */
 456        seq = ntohl(th->seq);
 457
 458        /*
 459         *      Fix seq stuff if flagged as so.
 460         */
 461        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 462                vs_fix_seq(&cp->in_seq, th);
 463        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 464                vs_fix_ack_seq(&cp->out_seq, th);
 465
 466        /*
 467         *      Call private input hook function
 468         */
 469        if (app->pkt_in == NULL)
 470                return 1;
 471
 472        if (!app->pkt_in(app, cp, skb, &diff))
 473                return 0;
 474
 475        /*
 476         *      Update ip_vs seq stuff if len has changed.
 477         */
 478        if (diff != 0)
 479                vs_seq_update(cp, &cp->in_seq,
 480                              IP_VS_CONN_F_IN_SEQ, seq, diff);
 481
 482        return 1;
 483}
 484
 485/*
 486 *      Input pkt hook. Will call bound ip_vs_app specific function
 487 *      called by ipvs packet handler, assumes previously checked cp!=NULL.
 488 *      returns false if can't handle packet (oom).
 489 */
 490int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
 491{
 492        struct ip_vs_app *app;
 493
 494        /*
 495         *      check if application module is bound to
 496         *      this ip_vs_conn.
 497         */
 498        if ((app = cp->app) == NULL)
 499                return 1;
 500
 501        /* TCP is complicated */
 502        if (cp->protocol == IPPROTO_TCP)
 503                return app_tcp_pkt_in(cp, skb, app);
 504
 505        /*
 506         *      Call private input hook function
 507         */
 508        if (app->pkt_in == NULL)
 509                return 1;
 510
 511        return app->pkt_in(app, cp, skb, NULL);
 512}
 513
 514
 515#ifdef CONFIG_PROC_FS
 516/*
 517 *      /proc/net/ip_vs_app entry function
 518 */
 519
 520static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 521{
 522        struct ip_vs_app *app, *inc;
 523
 524        list_for_each_entry(app, &ipvs->app_list, a_list) {
 525                list_for_each_entry(inc, &app->incs_list, a_list) {
 526                        if (pos-- == 0)
 527                                return inc;
 528                }
 529        }
 530        return NULL;
 531
 532}
 533
 534static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 535{
 536        struct net *net = seq_file_net(seq);
 537        struct netns_ipvs *ipvs = net_ipvs(net);
 538
 539        mutex_lock(&__ip_vs_app_mutex);
 540
 541        return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 542}
 543
 544static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 545{
 546        struct ip_vs_app *inc, *app;
 547        struct list_head *e;
 548        struct net *net = seq_file_net(seq);
 549        struct netns_ipvs *ipvs = net_ipvs(net);
 550
 551        ++*pos;
 552        if (v == SEQ_START_TOKEN)
 553                return ip_vs_app_idx(ipvs, 0);
 554
 555        inc = v;
 556        app = inc->app;
 557
 558        if ((e = inc->a_list.next) != &app->incs_list)
 559                return list_entry(e, struct ip_vs_app, a_list);
 560
 561        /* go on to next application */
 562        for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
 563                app = list_entry(e, struct ip_vs_app, a_list);
 564                list_for_each_entry(inc, &app->incs_list, a_list) {
 565                        return inc;
 566                }
 567        }
 568        return NULL;
 569}
 570
 571static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 572{
 573        mutex_unlock(&__ip_vs_app_mutex);
 574}
 575
 576static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
 577{
 578        if (v == SEQ_START_TOKEN)
 579                seq_puts(seq, "prot port    usecnt name\n");
 580        else {
 581                const struct ip_vs_app *inc = v;
 582
 583                seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
 584                           ip_vs_proto_name(inc->protocol),
 585                           ntohs(inc->port),
 586                           atomic_read(&inc->usecnt),
 587                           inc->name);
 588        }
 589        return 0;
 590}
 591
 592static const struct seq_operations ip_vs_app_seq_ops = {
 593        .start = ip_vs_app_seq_start,
 594        .next  = ip_vs_app_seq_next,
 595        .stop  = ip_vs_app_seq_stop,
 596        .show  = ip_vs_app_seq_show,
 597};
 598
 599static int ip_vs_app_open(struct inode *inode, struct file *file)
 600{
 601        return seq_open_net(inode, file, &ip_vs_app_seq_ops,
 602                            sizeof(struct seq_net_private));
 603}
 604
 605static const struct file_operations ip_vs_app_fops = {
 606        .owner   = THIS_MODULE,
 607        .open    = ip_vs_app_open,
 608        .read    = seq_read,
 609        .llseek  = seq_lseek,
 610        .release = seq_release_net,
 611};
 612#endif
 613
 614int __net_init ip_vs_app_net_init(struct net *net)
 615{
 616        struct netns_ipvs *ipvs = net_ipvs(net);
 617
 618        INIT_LIST_HEAD(&ipvs->app_list);
 619        proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
 620        return 0;
 621}
 622
 623void __net_exit ip_vs_app_net_cleanup(struct net *net)
 624{
 625        unregister_ip_vs_app(net, NULL /* all */);
 626        remove_proc_entry("ip_vs_app", net->proc_net);
 627}
 628