linux/net/netfilter/ipvs/ip_vs_app.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * ip_vs_app.c: Application module support for IPVS
   4 *
   5 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   6 *
   7 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
   8 * is that ip_vs_app module handles the reverse direction (incoming requests
   9 * and outgoing responses).
  10 *
  11 *              IP_MASQ_APP application masquerading module
  12 *
  13 * Author:      Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
  14 */
  15
  16#define KMSG_COMPONENT "IPVS"
  17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  18
  19#include <linux/module.h>
  20#include <linux/kernel.h>
  21#include <linux/skbuff.h>
  22#include <linux/in.h>
  23#include <linux/ip.h>
  24#include <linux/netfilter.h>
  25#include <linux/slab.h>
  26#include <net/net_namespace.h>
  27#include <net/protocol.h>
  28#include <net/tcp.h>
  29#include <linux/stat.h>
  30#include <linux/proc_fs.h>
  31#include <linux/seq_file.h>
  32#include <linux/mutex.h>
  33
  34#include <net/ip_vs.h>
  35
  36EXPORT_SYMBOL(register_ip_vs_app);
  37EXPORT_SYMBOL(unregister_ip_vs_app);
  38EXPORT_SYMBOL(register_ip_vs_app_inc);
  39
  40static DEFINE_MUTEX(__ip_vs_app_mutex);
  41
  42/*
  43 *      Get an ip_vs_app object
  44 */
  45static inline int ip_vs_app_get(struct ip_vs_app *app)
  46{
  47        return try_module_get(app->module);
  48}
  49
  50
  51static inline void ip_vs_app_put(struct ip_vs_app *app)
  52{
  53        module_put(app->module);
  54}
  55
  56static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
  57{
  58        kfree(inc->timeout_table);
  59        kfree(inc);
  60}
  61
  62static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
  63{
  64        struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
  65
  66        ip_vs_app_inc_destroy(inc);
  67}
  68
  69/*
  70 *      Allocate/initialize app incarnation and register it in proto apps.
  71 */
  72static int
  73ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
  74                  __u16 port)
  75{
  76        struct ip_vs_protocol *pp;
  77        struct ip_vs_app *inc;
  78        int ret;
  79
  80        if (!(pp = ip_vs_proto_get(proto)))
  81                return -EPROTONOSUPPORT;
  82
  83        if (!pp->unregister_app)
  84                return -EOPNOTSUPP;
  85
  86        inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
  87        if (!inc)
  88                return -ENOMEM;
  89        INIT_LIST_HEAD(&inc->p_list);
  90        INIT_LIST_HEAD(&inc->incs_list);
  91        inc->app = app;
  92        inc->port = htons(port);
  93        atomic_set(&inc->usecnt, 0);
  94
  95        if (app->timeouts) {
  96                inc->timeout_table =
  97                        ip_vs_create_timeout_table(app->timeouts,
  98                                                   app->timeouts_size);
  99                if (!inc->timeout_table) {
 100                        ret = -ENOMEM;
 101                        goto out;
 102                }
 103        }
 104
 105        ret = pp->register_app(ipvs, inc);
 106        if (ret)
 107                goto out;
 108
 109        list_add(&inc->a_list, &app->incs_list);
 110        IP_VS_DBG(9, "%s App %s:%u registered\n",
 111                  pp->name, inc->name, ntohs(inc->port));
 112
 113        return 0;
 114
 115  out:
 116        ip_vs_app_inc_destroy(inc);
 117        return ret;
 118}
 119
 120
 121/*
 122 *      Release app incarnation
 123 */
 124static void
 125ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 126{
 127        struct ip_vs_protocol *pp;
 128
 129        if (!(pp = ip_vs_proto_get(inc->protocol)))
 130                return;
 131
 132        if (pp->unregister_app)
 133                pp->unregister_app(ipvs, inc);
 134
 135        IP_VS_DBG(9, "%s App %s:%u unregistered\n",
 136                  pp->name, inc->name, ntohs(inc->port));
 137
 138        list_del(&inc->a_list);
 139
 140        call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
 141}
 142
 143
 144/*
 145 *      Get reference to app inc (only called from softirq)
 146 *
 147 */
 148int ip_vs_app_inc_get(struct ip_vs_app *inc)
 149{
 150        int result;
 151
 152        result = ip_vs_app_get(inc->app);
 153        if (result)
 154                atomic_inc(&inc->usecnt);
 155        return result;
 156}
 157
 158
 159/*
 160 *      Put the app inc (only called from timer or net softirq)
 161 */
 162void ip_vs_app_inc_put(struct ip_vs_app *inc)
 163{
 164        atomic_dec(&inc->usecnt);
 165        ip_vs_app_put(inc->app);
 166}
 167
 168
 169/*
 170 *      Register an application incarnation in protocol applications
 171 */
 172int
 173register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
 174                       __u16 port)
 175{
 176        int result;
 177
 178        mutex_lock(&__ip_vs_app_mutex);
 179
 180        result = ip_vs_app_inc_new(ipvs, app, proto, port);
 181
 182        mutex_unlock(&__ip_vs_app_mutex);
 183
 184        return result;
 185}
 186
 187
 188/* Register application for netns */
 189struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 190{
 191        struct ip_vs_app *a;
 192        int err = 0;
 193
 194        mutex_lock(&__ip_vs_app_mutex);
 195
 196        /* increase the module use count */
 197        if (!ip_vs_use_count_inc()) {
 198                err = -ENOENT;
 199                goto out_unlock;
 200        }
 201
 202        list_for_each_entry(a, &ipvs->app_list, a_list) {
 203                if (!strcmp(app->name, a->name)) {
 204                        err = -EEXIST;
 205                        /* decrease the module use count */
 206                        ip_vs_use_count_dec();
 207                        goto out_unlock;
 208                }
 209        }
 210        a = kmemdup(app, sizeof(*app), GFP_KERNEL);
 211        if (!a) {
 212                err = -ENOMEM;
 213                /* decrease the module use count */
 214                ip_vs_use_count_dec();
 215                goto out_unlock;
 216        }
 217        INIT_LIST_HEAD(&a->incs_list);
 218        list_add(&a->a_list, &ipvs->app_list);
 219
 220out_unlock:
 221        mutex_unlock(&__ip_vs_app_mutex);
 222
 223        return err ? ERR_PTR(err) : a;
 224}
 225
 226
 227/*
 228 *      ip_vs_app unregistration routine
 229 *      We are sure there are no app incarnations attached to services
 230 *      Caller should use synchronize_rcu() or rcu_barrier()
 231 */
 232void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 233{
 234        struct ip_vs_app *a, *anxt, *inc, *nxt;
 235
 236        mutex_lock(&__ip_vs_app_mutex);
 237
 238        list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
 239                if (app && strcmp(app->name, a->name))
 240                        continue;
 241                list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
 242                        ip_vs_app_inc_release(ipvs, inc);
 243                }
 244
 245                list_del(&a->a_list);
 246                kfree(a);
 247
 248                /* decrease the module use count */
 249                ip_vs_use_count_dec();
 250        }
 251
 252        mutex_unlock(&__ip_vs_app_mutex);
 253}
 254
 255
 256/*
 257 *      Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
 258 */
 259int ip_vs_bind_app(struct ip_vs_conn *cp,
 260                   struct ip_vs_protocol *pp)
 261{
 262        return pp->app_conn_bind(cp);
 263}
 264
 265
 266/*
 267 *      Unbind cp from application incarnation (called by cp destructor)
 268 */
 269void ip_vs_unbind_app(struct ip_vs_conn *cp)
 270{
 271        struct ip_vs_app *inc = cp->app;
 272
 273        if (!inc)
 274                return;
 275
 276        if (inc->unbind_conn)
 277                inc->unbind_conn(inc, cp);
 278        if (inc->done_conn)
 279                inc->done_conn(inc, cp);
 280        ip_vs_app_inc_put(inc);
 281        cp->app = NULL;
 282}
 283
 284
 285/*
 286 *      Fixes th->seq based on ip_vs_seq info.
 287 */
 288static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 289{
 290        __u32 seq = ntohl(th->seq);
 291
 292        /*
 293         *      Adjust seq with delta-offset for all packets after
 294         *      the most recent resized pkt seq and with previous_delta offset
 295         *      for all packets before most recent resized pkt seq.
 296         */
 297        if (vseq->delta || vseq->previous_delta) {
 298                if(after(seq, vseq->init_seq)) {
 299                        th->seq = htonl(seq + vseq->delta);
 300                        IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
 301                                  __func__, vseq->delta);
 302                } else {
 303                        th->seq = htonl(seq + vseq->previous_delta);
 304                        IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
 305                                  __func__, vseq->previous_delta);
 306                }
 307        }
 308}
 309
 310
 311/*
 312 *      Fixes th->ack_seq based on ip_vs_seq info.
 313 */
 314static inline void
 315vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 316{
 317        __u32 ack_seq = ntohl(th->ack_seq);
 318
 319        /*
 320         * Adjust ack_seq with delta-offset for
 321         * the packets AFTER most recent resized pkt has caused a shift
 322         * for packets before most recent resized pkt, use previous_delta
 323         */
 324        if (vseq->delta || vseq->previous_delta) {
 325                /* since ack_seq is the number of octet that is expected
 326                   to receive next, so compare it with init_seq+delta */
 327                if(after(ack_seq, vseq->init_seq+vseq->delta)) {
 328                        th->ack_seq = htonl(ack_seq - vseq->delta);
 329                        IP_VS_DBG(9, "%s(): subtracted delta "
 330                                  "(%d) from ack_seq\n", __func__, vseq->delta);
 331
 332                } else {
 333                        th->ack_seq = htonl(ack_seq - vseq->previous_delta);
 334                        IP_VS_DBG(9, "%s(): subtracted "
 335                                  "previous_delta (%d) from ack_seq\n",
 336                                  __func__, vseq->previous_delta);
 337                }
 338        }
 339}
 340
 341
 342/*
 343 *      Updates ip_vs_seq if pkt has been resized
 344 *      Assumes already checked proto==IPPROTO_TCP and diff!=0.
 345 */
 346static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
 347                                 unsigned int flag, __u32 seq, int diff)
 348{
 349        /* spinlock is to keep updating cp->flags atomic */
 350        spin_lock_bh(&cp->lock);
 351        if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
 352                vseq->previous_delta = vseq->delta;
 353                vseq->delta += diff;
 354                vseq->init_seq = seq;
 355                cp->flags |= flag;
 356        }
 357        spin_unlock_bh(&cp->lock);
 358}
 359
 360static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 361                                  struct ip_vs_app *app,
 362                                  struct ip_vs_iphdr *ipvsh)
 363{
 364        int diff;
 365        const unsigned int tcp_offset = ip_hdrlen(skb);
 366        struct tcphdr *th;
 367        __u32 seq;
 368
 369        if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
 370                return 0;
 371
 372        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 373
 374        /*
 375         *      Remember seq number in case this pkt gets resized
 376         */
 377        seq = ntohl(th->seq);
 378
 379        /*
 380         *      Fix seq stuff if flagged as so.
 381         */
 382        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 383                vs_fix_seq(&cp->out_seq, th);
 384        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 385                vs_fix_ack_seq(&cp->in_seq, th);
 386
 387        /*
 388         *      Call private output hook function
 389         */
 390        if (app->pkt_out == NULL)
 391                return 1;
 392
 393        if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
 394                return 0;
 395
 396        /*
 397         *      Update ip_vs seq stuff if len has changed.
 398         */
 399        if (diff != 0)
 400                vs_seq_update(cp, &cp->out_seq,
 401                              IP_VS_CONN_F_OUT_SEQ, seq, diff);
 402
 403        return 1;
 404}
 405
 406/*
 407 *      Output pkt hook. Will call bound ip_vs_app specific function
 408 *      called by ipvs packet handler, assumes previously checked cp!=NULL
 409 *      returns false if it can't handle packet (oom)
 410 */
 411int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 412                      struct ip_vs_iphdr *ipvsh)
 413{
 414        struct ip_vs_app *app;
 415
 416        /*
 417         *      check if application module is bound to
 418         *      this ip_vs_conn.
 419         */
 420        if ((app = cp->app) == NULL)
 421                return 1;
 422
 423        /* TCP is complicated */
 424        if (cp->protocol == IPPROTO_TCP)
 425                return app_tcp_pkt_out(cp, skb, app, ipvsh);
 426
 427        /*
 428         *      Call private output hook function
 429         */
 430        if (app->pkt_out == NULL)
 431                return 1;
 432
 433        return app->pkt_out(app, cp, skb, NULL, ipvsh);
 434}
 435
 436
 437static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 438                                 struct ip_vs_app *app,
 439                                 struct ip_vs_iphdr *ipvsh)
 440{
 441        int diff;
 442        const unsigned int tcp_offset = ip_hdrlen(skb);
 443        struct tcphdr *th;
 444        __u32 seq;
 445
 446        if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
 447                return 0;
 448
 449        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 450
 451        /*
 452         *      Remember seq number in case this pkt gets resized
 453         */
 454        seq = ntohl(th->seq);
 455
 456        /*
 457         *      Fix seq stuff if flagged as so.
 458         */
 459        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
 460                vs_fix_seq(&cp->in_seq, th);
 461        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
 462                vs_fix_ack_seq(&cp->out_seq, th);
 463
 464        /*
 465         *      Call private input hook function
 466         */
 467        if (app->pkt_in == NULL)
 468                return 1;
 469
 470        if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
 471                return 0;
 472
 473        /*
 474         *      Update ip_vs seq stuff if len has changed.
 475         */
 476        if (diff != 0)
 477                vs_seq_update(cp, &cp->in_seq,
 478                              IP_VS_CONN_F_IN_SEQ, seq, diff);
 479
 480        return 1;
 481}
 482
 483/*
 484 *      Input pkt hook. Will call bound ip_vs_app specific function
 485 *      called by ipvs packet handler, assumes previously checked cp!=NULL.
 486 *      returns false if can't handle packet (oom).
 487 */
 488int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 489                     struct ip_vs_iphdr *ipvsh)
 490{
 491        struct ip_vs_app *app;
 492
 493        /*
 494         *      check if application module is bound to
 495         *      this ip_vs_conn.
 496         */
 497        if ((app = cp->app) == NULL)
 498                return 1;
 499
 500        /* TCP is complicated */
 501        if (cp->protocol == IPPROTO_TCP)
 502                return app_tcp_pkt_in(cp, skb, app, ipvsh);
 503
 504        /*
 505         *      Call private input hook function
 506         */
 507        if (app->pkt_in == NULL)
 508                return 1;
 509
 510        return app->pkt_in(app, cp, skb, NULL, ipvsh);
 511}
 512
 513
 514#ifdef CONFIG_PROC_FS
 515/*
 516 *      /proc/net/ip_vs_app entry function
 517 */
 518
 519static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 520{
 521        struct ip_vs_app *app, *inc;
 522
 523        list_for_each_entry(app, &ipvs->app_list, a_list) {
 524                list_for_each_entry(inc, &app->incs_list, a_list) {
 525                        if (pos-- == 0)
 526                                return inc;
 527                }
 528        }
 529        return NULL;
 530
 531}
 532
 533static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 534{
 535        struct net *net = seq_file_net(seq);
 536        struct netns_ipvs *ipvs = net_ipvs(net);
 537
 538        mutex_lock(&__ip_vs_app_mutex);
 539
 540        return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 541}
 542
 543static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 544{
 545        struct ip_vs_app *inc, *app;
 546        struct list_head *e;
 547        struct net *net = seq_file_net(seq);
 548        struct netns_ipvs *ipvs = net_ipvs(net);
 549
 550        ++*pos;
 551        if (v == SEQ_START_TOKEN)
 552                return ip_vs_app_idx(ipvs, 0);
 553
 554        inc = v;
 555        app = inc->app;
 556
 557        if ((e = inc->a_list.next) != &app->incs_list)
 558                return list_entry(e, struct ip_vs_app, a_list);
 559
 560        /* go on to next application */
 561        for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
 562                app = list_entry(e, struct ip_vs_app, a_list);
 563                list_for_each_entry(inc, &app->incs_list, a_list) {
 564                        return inc;
 565                }
 566        }
 567        return NULL;
 568}
 569
 570static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 571{
 572        mutex_unlock(&__ip_vs_app_mutex);
 573}
 574
 575static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
 576{
 577        if (v == SEQ_START_TOKEN)
 578                seq_puts(seq, "prot port    usecnt name\n");
 579        else {
 580                const struct ip_vs_app *inc = v;
 581
 582                seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
 583                           ip_vs_proto_name(inc->protocol),
 584                           ntohs(inc->port),
 585                           atomic_read(&inc->usecnt),
 586                           inc->name);
 587        }
 588        return 0;
 589}
 590
 591static const struct seq_operations ip_vs_app_seq_ops = {
 592        .start = ip_vs_app_seq_start,
 593        .next  = ip_vs_app_seq_next,
 594        .stop  = ip_vs_app_seq_stop,
 595        .show  = ip_vs_app_seq_show,
 596};
 597#endif
 598
 599int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
 600{
 601        INIT_LIST_HEAD(&ipvs->app_list);
 602        proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
 603                        sizeof(struct seq_net_private));
 604        return 0;
 605}
 606
 607void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
 608{
 609        unregister_ip_vs_app(ipvs, NULL /* all */);
 610        remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
 611}
 612