linux/net/netfilter/ipvs/ip_vs_app.c
<<
>>
Prefs
   1/*
   2 * ip_vs_app.c: Application module support for IPVS
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
  12 * is that ip_vs_app module handles the reverse direction (incoming requests
  13 * and outgoing responses).
  14 *
  15 *              IP_MASQ_APP application masquerading module
  16 *
  17 * Author:      Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
  18 *
  19 */
  20
  21#define KMSG_COMPONENT "IPVS"
  22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  23
  24#include <linux/module.h>
  25#include <linux/kernel.h>
  26#include <linux/skbuff.h>
  27#include <linux/in.h>
  28#include <linux/ip.h>
  29#include <linux/netfilter.h>
  30#include <linux/slab.h>
  31#include <net/net_namespace.h>
  32#include <net/protocol.h>
  33#include <net/tcp.h>
  34#include <linux/stat.h>
  35#include <linux/proc_fs.h>
  36#include <linux/seq_file.h>
  37#include <linux/mutex.h>
  38
  39#include <net/ip_vs.h>
  40
  41EXPORT_SYMBOL(register_ip_vs_app);
  42EXPORT_SYMBOL(unregister_ip_vs_app);
  43EXPORT_SYMBOL(register_ip_vs_app_inc);
  44
  45static DEFINE_MUTEX(__ip_vs_app_mutex);
  46
  47/*
  48 *      Get an ip_vs_app object
  49 */
  50static inline int ip_vs_app_get(struct ip_vs_app *app)
  51{
  52        return try_module_get(app->module);
  53}
  54
  55
  56static inline void ip_vs_app_put(struct ip_vs_app *app)
  57{
  58        module_put(app->module);
  59}
  60
  61static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
  62{
  63        kfree(inc->timeout_table);
  64        kfree(inc);
  65}
  66
  67static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
  68{
  69        struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
  70
  71        ip_vs_app_inc_destroy(inc);
  72}
  73
  74/*
  75 *      Allocate/initialize app incarnation and register it in proto apps.
  76 */
  77static int
  78ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
  79                  __u16 port)
  80{
  81        struct ip_vs_protocol *pp;
  82        struct ip_vs_app *inc;
  83        int ret;
  84
  85        if (!(pp = ip_vs_proto_get(proto)))
  86                return -EPROTONOSUPPORT;
  87
  88        if (!pp->unregister_app)
  89                return -EOPNOTSUPP;
  90
  91        inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
  92        if (!inc)
  93                return -ENOMEM;
  94        INIT_LIST_HEAD(&inc->p_list);
  95        INIT_LIST_HEAD(&inc->incs_list);
  96        inc->app = app;
  97        inc->port = htons(port);
  98        atomic_set(&inc->usecnt, 0);
  99
 100        if (app->timeouts) {
 101                inc->timeout_table =
 102                        ip_vs_create_timeout_table(app->timeouts,
 103                                                   app->timeouts_size);
 104                if (!inc->timeout_table) {
 105                        ret = -ENOMEM;
 106                        goto out;
 107                }
 108        }
 109
 110        ret = pp->register_app(ipvs, inc);
 111        if (ret)
 112                goto out;
 113
 114        list_add(&inc->a_list, &app->incs_list);
 115        IP_VS_DBG(9, "%s App %s:%u registered\n",
 116                  pp->name, inc->name, ntohs(inc->port));
 117
 118        return 0;
 119
 120  out:
 121        ip_vs_app_inc_destroy(inc);
 122        return ret;
 123}
 124
 125
 126/*
 127 *      Release app incarnation
 128 */
 129static void
 130ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 131{
 132        struct ip_vs_protocol *pp;
 133
 134        if (!(pp = ip_vs_proto_get(inc->protocol)))
 135                return;
 136
 137        if (pp->unregister_app)
 138                pp->unregister_app(ipvs, inc);
 139
 140        IP_VS_DBG(9, "%s App %s:%u unregistered\n",
 141                  pp->name, inc->name, ntohs(inc->port));
 142
 143        list_del(&inc->a_list);
 144
 145        call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
 146}
 147
 148
 149/*
 150 *      Get reference to app inc (only called from softirq)
 151 *
 152 */
 153int ip_vs_app_inc_get(struct ip_vs_app *inc)
 154{
 155        int result;
 156
 157        result = ip_vs_app_get(inc->app);
 158        if (result)
 159                atomic_inc(&inc->usecnt);
 160        return result;
 161}
 162
 163
 164/*
 165 *      Put the app inc (only called from timer or net softirq)
 166 */
 167void ip_vs_app_inc_put(struct ip_vs_app *inc)
 168{
 169        atomic_dec(&inc->usecnt);
 170        ip_vs_app_put(inc->app);
 171}
 172
 173
 174/*
 175 *      Register an application incarnation in protocol applications
 176 */
 177int
 178register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
 179                       __u16 port)
 180{
 181        int result;
 182
 183        mutex_lock(&__ip_vs_app_mutex);
 184
 185        result = ip_vs_app_inc_new(ipvs, app, proto, port);
 186
 187        mutex_unlock(&__ip_vs_app_mutex);
 188
 189        return result;
 190}
 191
 192
 193/* Register application for netns */
 194struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 195{
 196        struct ip_vs_app *a;
 197        int err = 0;
 198
 199        mutex_lock(&__ip_vs_app_mutex);
 200
 201        list_for_each_entry(a, &ipvs->app_list, a_list) {
 202                if (!strcmp(app->name, a->name)) {
 203                        err = -EEXIST;
 204                        goto out_unlock;
 205                }
 206        }
 207        a = kmemdup(app, sizeof(*app), GFP_KERNEL);
 208        if (!a) {
 209                err = -ENOMEM;
 210                goto out_unlock;
 211        }
 212        INIT_LIST_HEAD(&a->incs_list);
 213        list_add(&a->a_list, &ipvs->app_list);
 214        /* increase the module use count */
 215        ip_vs_use_count_inc();
 216
 217out_unlock:
 218        mutex_unlock(&__ip_vs_app_mutex);
 219
 220        return err ? ERR_PTR(err) : a;
 221}
 222
 223
 224/*
 225 *      ip_vs_app unregistration routine
 226 *      We are sure there are no app incarnations attached to services
 227 *      Caller should use synchronize_rcu() or rcu_barrier()
 228 */
 229void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 230{
 231        struct ip_vs_app *a, *anxt, *inc, *nxt;
 232
 233        mutex_lock(&__ip_vs_app_mutex);
 234
 235        list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
 236                if (app && strcmp(app->name, a->name))
 237                        continue;
 238                list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
 239                        ip_vs_app_inc_release(ipvs, inc);
 240                }
 241
 242                list_del(&a->a_list);
 243                kfree(a);
 244
 245                /* decrease the module use count */
 246                ip_vs_use_count_dec();
 247        }
 248
 249        mutex_unlock(&__ip_vs_app_mutex);
 250}
 251
 252
 253/*
 254 *      Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
 255 */
 256int ip_vs_bind_app(struct ip_vs_conn *cp,
 257                   struct ip_vs_protocol *pp)
 258{
 259        return pp->app_conn_bind(cp);
 260}
 261
 262
 263/*
 264 *      Unbind cp from application incarnation (called by cp destructor)
 265 */
 266void ip_vs_unbind_app(struct ip_vs_conn *cp)
 267{
 268        struct ip_vs_app *inc = cp->app;
 269
 270        if (!inc)
 271                return;
 272
 273        if (inc->unbind_conn)
 274                inc->unbind_conn(inc, cp);
 275        if (inc->done_conn)
 276                inc->done_conn(inc, cp);
 277        ip_vs_app_inc_put(inc);
 278        cp->app = NULL;
 279}
 280
 281
 282/*
 283 *      Fixes th->seq based on ip_vs_seq info.
 284 */
 285static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 286{
 287        __u32 seq = ntohl(th->seq);
 288
 289        /*
 290         *      Adjust seq with delta-offset for all packets after
 291         *      the most recent resized pkt seq and with previous_delta offset
 292         *      for all packets before most recent resized pkt seq.
 293         */
 294        if (vseq->delta || vseq->previous_delta) {
 295                if(after(seq, vseq->init_seq)) {
 296                        th->seq = htonl(seq + vseq->delta);
 297                        IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
 298                                  __func__, vseq->delta);
 299                } else {
 300                        th->seq = htonl(seq + vseq->previous_delta);
 301                        IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
 302                                  __func__, vseq->previous_delta);
 303                }
 304        }
 305}
 306
 307
 308/*
 309 *      Fixes th->ack_seq based on ip_vs_seq info.
 310 */
 311static inline void
 312vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 313{
 314        __u32 ack_seq = ntohl(th->ack_seq);
 315
 316        /*
 317         * Adjust ack_seq with delta-offset for
 318         * the packets AFTER most recent resized pkt has caused a shift
 319         * for packets before most recent resized pkt, use previous_delta
 320         */
 321        if (vseq->delta || vseq->previous_delta) {
 322                /* since ack_seq is the number of octet that is expected
 323                   to receive next, so compare it with init_seq+delta */
 324                if(after(ack_seq, vseq->init_seq+vseq->delta)) {
 325                        th->ack_seq = htonl(ack_seq - vseq->delta);
 326                        IP_VS_DBG(9, "%s(): subtracted delta "
 327                                  "(%d) from ack_seq\n", __func__, vseq->delta);
 328
 329                } else {
 330                        th->ack_seq = htonl(ack_seq - vseq->previous_delta);
 331                        IP_VS_DBG(9, "%s(): subtracted "
 332                                  "previous_delta (%d) from ack_seq\n",
 333                                  __func__, vseq->previous_delta);
 334                }
 335        }
 336}
 337
 338
 339/*
 340 *      Updates ip_vs_seq if pkt has been resized
 341 *      Assumes already checked proto==IPPROTO_TCP and diff!=0.
 342 */
 343static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
 344                                 unsigned int flag, __u32 seq, int diff)
 345{
 346        /* spinlock is to keep updating cp->flags atomic */
 347        spin_lock_bh(&cp->lock);
 348        if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
 349                vseq->previous_delta = vseq->delta;
 350                vseq->delta += diff;
 351                vseq->init_seq = seq;
 352                cp->flags |= flag;
 353        }
 354        spin_unlock_bh(&cp->lock);
 355}
 356
 357static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 358                                  struct ip_vs_app *app,
 359                                  struct ip_vs_iphdr *ipvsh)
 360{
 361        int diff;
 362        const unsigned int tcp_offset = ip_hdrlen(skb);
 363        struct tcphdr *th;
 364        __u32 seq;
 365
 366        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 367                return 0;
 368
 369        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 370
 371        /*
 372         *      Remember seq number in case this pkt gets resized
 373         */
 374        seq = ntohl(th->seq);
 375
 376        /*
 377         *      Fix seq stuff if flagged as so.
 378         */
 379        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 380                vs_fix_seq(&cp->out_seq, th);
 381        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 382                vs_fix_ack_seq(&cp->in_seq, th);
 383
 384        /*
 385         *      Call private output hook function
 386         */
 387        if (app->pkt_out == NULL)
 388                return 1;
 389
 390        if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
 391                return 0;
 392
 393        /*
 394         *      Update ip_vs seq stuff if len has changed.
 395         */
 396        if (diff != 0)
 397                vs_seq_update(cp, &cp->out_seq,
 398                              IP_VS_CONN_F_OUT_SEQ, seq, diff);
 399
 400        return 1;
 401}
 402
 403/*
 404 *      Output pkt hook. Will call bound ip_vs_app specific function
 405 *      called by ipvs packet handler, assumes previously checked cp!=NULL
 406 *      returns false if it can't handle packet (oom)
 407 */
 408int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 409                      struct ip_vs_iphdr *ipvsh)
 410{
 411        struct ip_vs_app *app;
 412
 413        /*
 414         *      check if application module is bound to
 415         *      this ip_vs_conn.
 416         */
 417        if ((app = cp->app) == NULL)
 418                return 1;
 419
 420        /* TCP is complicated */
 421        if (cp->protocol == IPPROTO_TCP)
 422                return app_tcp_pkt_out(cp, skb, app, ipvsh);
 423
 424        /*
 425         *      Call private output hook function
 426         */
 427        if (app->pkt_out == NULL)
 428                return 1;
 429
 430        return app->pkt_out(app, cp, skb, NULL, ipvsh);
 431}
 432
 433
 434static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 435                                 struct ip_vs_app *app,
 436                                 struct ip_vs_iphdr *ipvsh)
 437{
 438        int diff;
 439        const unsigned int tcp_offset = ip_hdrlen(skb);
 440        struct tcphdr *th;
 441        __u32 seq;
 442
 443        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 444                return 0;
 445
 446        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 447
 448        /*
 449         *      Remember seq number in case this pkt gets resized
 450         */
 451        seq = ntohl(th->seq);
 452
 453        /*
 454         *      Fix seq stuff if flagged as so.
 455         */
 456        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 457                vs_fix_seq(&cp->in_seq, th);
 458        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 459                vs_fix_ack_seq(&cp->out_seq, th);
 460
 461        /*
 462         *      Call private input hook function
 463         */
 464        if (app->pkt_in == NULL)
 465                return 1;
 466
 467        if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
 468                return 0;
 469
 470        /*
 471         *      Update ip_vs seq stuff if len has changed.
 472         */
 473        if (diff != 0)
 474                vs_seq_update(cp, &cp->in_seq,
 475                              IP_VS_CONN_F_IN_SEQ, seq, diff);
 476
 477        return 1;
 478}
 479
 480/*
 481 *      Input pkt hook. Will call bound ip_vs_app specific function
 482 *      called by ipvs packet handler, assumes previously checked cp!=NULL.
 483 *      returns false if can't handle packet (oom).
 484 */
 485int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 486                     struct ip_vs_iphdr *ipvsh)
 487{
 488        struct ip_vs_app *app;
 489
 490        /*
 491         *      check if application module is bound to
 492         *      this ip_vs_conn.
 493         */
 494        if ((app = cp->app) == NULL)
 495                return 1;
 496
 497        /* TCP is complicated */
 498        if (cp->protocol == IPPROTO_TCP)
 499                return app_tcp_pkt_in(cp, skb, app, ipvsh);
 500
 501        /*
 502         *      Call private input hook function
 503         */
 504        if (app->pkt_in == NULL)
 505                return 1;
 506
 507        return app->pkt_in(app, cp, skb, NULL, ipvsh);
 508}
 509
 510
 511#ifdef CONFIG_PROC_FS
 512/*
 513 *      /proc/net/ip_vs_app entry function
 514 */
 515
 516static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 517{
 518        struct ip_vs_app *app, *inc;
 519
 520        list_for_each_entry(app, &ipvs->app_list, a_list) {
 521                list_for_each_entry(inc, &app->incs_list, a_list) {
 522                        if (pos-- == 0)
 523                                return inc;
 524                }
 525        }
 526        return NULL;
 527
 528}
 529
 530static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 531{
 532        struct net *net = seq_file_net(seq);
 533        struct netns_ipvs *ipvs = net_ipvs(net);
 534
 535        mutex_lock(&__ip_vs_app_mutex);
 536
 537        return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 538}
 539
 540static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 541{
 542        struct ip_vs_app *inc, *app;
 543        struct list_head *e;
 544        struct net *net = seq_file_net(seq);
 545        struct netns_ipvs *ipvs = net_ipvs(net);
 546
 547        ++*pos;
 548        if (v == SEQ_START_TOKEN)
 549                return ip_vs_app_idx(ipvs, 0);
 550
 551        inc = v;
 552        app = inc->app;
 553
 554        if ((e = inc->a_list.next) != &app->incs_list)
 555                return list_entry(e, struct ip_vs_app, a_list);
 556
 557        /* go on to next application */
 558        for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
 559                app = list_entry(e, struct ip_vs_app, a_list);
 560                list_for_each_entry(inc, &app->incs_list, a_list) {
 561                        return inc;
 562                }
 563        }
 564        return NULL;
 565}
 566
 567static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 568{
 569        mutex_unlock(&__ip_vs_app_mutex);
 570}
 571
 572static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
 573{
 574        if (v == SEQ_START_TOKEN)
 575                seq_puts(seq, "prot port    usecnt name\n");
 576        else {
 577                const struct ip_vs_app *inc = v;
 578
 579                seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
 580                           ip_vs_proto_name(inc->protocol),
 581                           ntohs(inc->port),
 582                           atomic_read(&inc->usecnt),
 583                           inc->name);
 584        }
 585        return 0;
 586}
 587
 588static const struct seq_operations ip_vs_app_seq_ops = {
 589        .start = ip_vs_app_seq_start,
 590        .next  = ip_vs_app_seq_next,
 591        .stop  = ip_vs_app_seq_stop,
 592        .show  = ip_vs_app_seq_show,
 593};
 594#endif
 595
 596int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
 597{
 598        INIT_LIST_HEAD(&ipvs->app_list);
 599        proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
 600                        sizeof(struct seq_net_private));
 601        return 0;
 602}
 603
 604void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
 605{
 606        unregister_ip_vs_app(ipvs, NULL /* all */);
 607        remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
 608}
 609