linux/net/netfilter/ipvs/ip_vs_lblcr.c
<<
>>
Prefs
   1/*
   2 * IPVS:        Locality-Based Least-Connection with Replication scheduler
   3 *
   4 * Authors:     Wensong Zhang <wensong@gnuchina.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Changes:
  12 *     Julian Anastasov        :    Added the missing (dest->weight>0)
  13 *                                  condition in the ip_vs_dest_set_max.
  14 *
  15 */
  16
  17/*
  18 * The lblc/r algorithm is as follows (pseudo code):
  19 *
  20 *       if serverSet[dest_ip] is null then
  21 *               n, serverSet[dest_ip] <- {weighted least-conn node};
  22 *       else
  23 *               n <- {least-conn (alive) node in serverSet[dest_ip]};
  24 *               if (n is null) OR
  25 *                  (n.conns>n.weight AND
  26 *                   there is a node m with m.conns<m.weight/2) then
  27 *                   n <- {weighted least-conn node};
  28 *                   add n to serverSet[dest_ip];
  29 *               if |serverSet[dest_ip]| > 1 AND
  30 *                   now - serverSet[dest_ip].lastMod > T then
  31 *                   m <- {most conn node in serverSet[dest_ip]};
  32 *                   remove m from serverSet[dest_ip];
  33 *       if serverSet[dest_ip] changed then
  34 *               serverSet[dest_ip].lastMod <- now;
  35 *
  36 *       return n;
  37 *
  38 */
  39
  40#define KMSG_COMPONENT "IPVS"
  41#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  42
  43#include <linux/ip.h>
  44#include <linux/module.h>
  45#include <linux/kernel.h>
  46#include <linux/skbuff.h>
  47#include <linux/jiffies.h>
  48
  49/* for sysctl */
  50#include <linux/fs.h>
  51#include <linux/sysctl.h>
  52#include <net/net_namespace.h>
  53
  54#include <net/ip_vs.h>
  55
  56
  57/*
  58 *    It is for garbage collection of stale IPVS lblcr entries,
  59 *    when the table is full.
  60 */
  61#define CHECK_EXPIRE_INTERVAL   (60*HZ)
  62#define ENTRY_TIMEOUT           (6*60*HZ)
  63
  64/*
  65 *    It is for full expiration check.
  66 *    When there is no partial expiration check (garbage collection)
  67 *    in a half hour, do a full expiration check to collect stale
  68 *    entries that haven't been touched for a day.
  69 */
  70#define COUNT_FOR_FULL_EXPIRATION   30
  71static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
  72
  73
  74/*
  75 *     for IPVS lblcr entry hash table
  76 */
  77#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
  78#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
  79#endif
  80#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
  81#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
  82#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
  83
  84
  85/*
  86 *      IPVS destination set structure and operations
  87 */
  88struct ip_vs_dest_list {
  89        struct ip_vs_dest_list  *next;          /* list link */
  90        struct ip_vs_dest       *dest;          /* destination server */
  91};
  92
  93struct ip_vs_dest_set {
  94        atomic_t                size;           /* set size */
  95        unsigned long           lastmod;        /* last modified time */
  96        struct ip_vs_dest_list  *list;          /* destination list */
  97        rwlock_t                lock;           /* lock for this list */
  98};
  99
 100
 101static struct ip_vs_dest_list *
 102ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 103{
 104        struct ip_vs_dest_list *e;
 105
 106        for (e=set->list; e!=NULL; e=e->next) {
 107                if (e->dest == dest)
 108                        /* already existed */
 109                        return NULL;
 110        }
 111
 112        e = kmalloc(sizeof(*e), GFP_ATOMIC);
 113        if (e == NULL) {
 114                pr_err("%s(): no memory\n", __func__);
 115                return NULL;
 116        }
 117
 118        atomic_inc(&dest->refcnt);
 119        e->dest = dest;
 120
 121        /* link it to the list */
 122        e->next = set->list;
 123        set->list = e;
 124        atomic_inc(&set->size);
 125
 126        set->lastmod = jiffies;
 127        return e;
 128}
 129
 130static void
 131ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 132{
 133        struct ip_vs_dest_list *e, **ep;
 134
 135        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
 136                if (e->dest == dest) {
 137                        /* HIT */
 138                        *ep = e->next;
 139                        atomic_dec(&set->size);
 140                        set->lastmod = jiffies;
 141                        atomic_dec(&e->dest->refcnt);
 142                        kfree(e);
 143                        break;
 144                }
 145                ep = &e->next;
 146        }
 147}
 148
 149static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 150{
 151        struct ip_vs_dest_list *e, **ep;
 152
 153        write_lock(&set->lock);
 154        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
 155                *ep = e->next;
 156                /*
 157                 * We don't kfree dest because it is refered either
 158                 * by its service or by the trash dest list.
 159                 */
 160                atomic_dec(&e->dest->refcnt);
 161                kfree(e);
 162        }
 163        write_unlock(&set->lock);
 164}
 165
 166/* get weighted least-connection node in the destination set */
 167static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 168{
 169        register struct ip_vs_dest_list *e;
 170        struct ip_vs_dest *dest, *least;
 171        int loh, doh;
 172
 173        if (set == NULL)
 174                return NULL;
 175
 176        /* select the first destination server, whose weight > 0 */
 177        for (e=set->list; e!=NULL; e=e->next) {
 178                least = e->dest;
 179                if (least->flags & IP_VS_DEST_F_OVERLOAD)
 180                        continue;
 181
 182                if ((atomic_read(&least->weight) > 0)
 183                    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
 184                        loh = atomic_read(&least->activeconns) * 50
 185                                + atomic_read(&least->inactconns);
 186                        goto nextstage;
 187                }
 188        }
 189        return NULL;
 190
 191        /* find the destination with the weighted least load */
 192  nextstage:
 193        for (e=e->next; e!=NULL; e=e->next) {
 194                dest = e->dest;
 195                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 196                        continue;
 197
 198                doh = atomic_read(&dest->activeconns) * 50
 199                        + atomic_read(&dest->inactconns);
 200                if ((loh * atomic_read(&dest->weight) >
 201                     doh * atomic_read(&least->weight))
 202                    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 203                        least = dest;
 204                        loh = doh;
 205                }
 206        }
 207
 208        IP_VS_DBG_BUF(6, "%s(): server %s:%d "
 209                      "activeconns %d refcnt %d weight %d overhead %d\n",
 210                      __func__,
 211                      IP_VS_DBG_ADDR(least->af, &least->addr),
 212                      ntohs(least->port),
 213                      atomic_read(&least->activeconns),
 214                      atomic_read(&least->refcnt),
 215                      atomic_read(&least->weight), loh);
 216        return least;
 217}
 218
 219
 220/* get weighted most-connection node in the destination set */
 221static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 222{
 223        register struct ip_vs_dest_list *e;
 224        struct ip_vs_dest *dest, *most;
 225        int moh, doh;
 226
 227        if (set == NULL)
 228                return NULL;
 229
 230        /* select the first destination server, whose weight > 0 */
 231        for (e=set->list; e!=NULL; e=e->next) {
 232                most = e->dest;
 233                if (atomic_read(&most->weight) > 0) {
 234                        moh = atomic_read(&most->activeconns) * 50
 235                                + atomic_read(&most->inactconns);
 236                        goto nextstage;
 237                }
 238        }
 239        return NULL;
 240
 241        /* find the destination with the weighted most load */
 242  nextstage:
 243        for (e=e->next; e!=NULL; e=e->next) {
 244                dest = e->dest;
 245                doh = atomic_read(&dest->activeconns) * 50
 246                        + atomic_read(&dest->inactconns);
 247                /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
 248                if ((moh * atomic_read(&dest->weight) <
 249                     doh * atomic_read(&most->weight))
 250                    && (atomic_read(&dest->weight) > 0)) {
 251                        most = dest;
 252                        moh = doh;
 253                }
 254        }
 255
 256        IP_VS_DBG_BUF(6, "%s(): server %s:%d "
 257                      "activeconns %d refcnt %d weight %d overhead %d\n",
 258                      __func__,
 259                      IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
 260                      atomic_read(&most->activeconns),
 261                      atomic_read(&most->refcnt),
 262                      atomic_read(&most->weight), moh);
 263        return most;
 264}
 265
 266
 267/*
 268 *      IPVS lblcr entry represents an association between destination
 269 *      IP address and its destination server set
 270 */
 271struct ip_vs_lblcr_entry {
 272        struct list_head        list;
 273        int                     af;             /* address family */
 274        union nf_inet_addr      addr;           /* destination IP address */
 275        struct ip_vs_dest_set   set;            /* destination server set */
 276        unsigned long           lastuse;        /* last used time */
 277};
 278
 279
 280/*
 281 *      IPVS lblcr hash table
 282 */
 283struct ip_vs_lblcr_table {
 284        struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
 285        atomic_t                entries;        /* number of entries */
 286        int                     max_size;       /* maximum size of entries */
 287        struct timer_list       periodic_timer; /* collect stale entries */
 288        int                     rover;          /* rover for expire check */
 289        int                     counter;        /* counter for no expire */
 290};
 291
 292
 293/*
 294 *      IPVS LBLCR sysctl table
 295 */
 296
 297static ctl_table vs_vars_table[] = {
 298        {
 299                .procname       = "lblcr_expiration",
 300                .data           = &sysctl_ip_vs_lblcr_expiration,
 301                .maxlen         = sizeof(int),
 302                .mode           = 0644,
 303                .proc_handler   = proc_dointvec_jiffies,
 304        },
 305        { .ctl_name = 0 }
 306};
 307
 308static struct ctl_table_header * sysctl_header;
 309
 310static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 311{
 312        list_del(&en->list);
 313        ip_vs_dest_set_eraseall(&en->set);
 314        kfree(en);
 315}
 316
 317
 318/*
 319 *      Returns hash value for IPVS LBLCR entry
 320 */
 321static inline unsigned
 322ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
 323{
 324        __be32 addr_fold = addr->ip;
 325
 326#ifdef CONFIG_IP_VS_IPV6
 327        if (af == AF_INET6)
 328                addr_fold = addr->ip6[0]^addr->ip6[1]^
 329                            addr->ip6[2]^addr->ip6[3];
 330#endif
 331        return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
 332}
 333
 334
 335/*
 336 *      Hash an entry in the ip_vs_lblcr_table.
 337 *      returns bool success.
 338 */
 339static void
 340ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
 341{
 342        unsigned hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
 343
 344        list_add(&en->list, &tbl->bucket[hash]);
 345        atomic_inc(&tbl->entries);
 346}
 347
 348
 349/*
 350 *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
 351 *  read lock.
 352 */
 353static inline struct ip_vs_lblcr_entry *
 354ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
 355                const union nf_inet_addr *addr)
 356{
 357        unsigned hash = ip_vs_lblcr_hashkey(af, addr);
 358        struct ip_vs_lblcr_entry *en;
 359
 360        list_for_each_entry(en, &tbl->bucket[hash], list)
 361                if (ip_vs_addr_equal(af, &en->addr, addr))
 362                        return en;
 363
 364        return NULL;
 365}
 366
 367
 368/*
 369 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
 370 * IP address to a server. Called under write lock.
 371 */
 372static inline struct ip_vs_lblcr_entry *
 373ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
 374                struct ip_vs_dest *dest)
 375{
 376        struct ip_vs_lblcr_entry *en;
 377
 378        en = ip_vs_lblcr_get(dest->af, tbl, daddr);
 379        if (!en) {
 380                en = kmalloc(sizeof(*en), GFP_ATOMIC);
 381                if (!en) {
 382                        pr_err("%s(): no memory\n", __func__);
 383                        return NULL;
 384                }
 385
 386                en->af = dest->af;
 387                ip_vs_addr_copy(dest->af, &en->addr, daddr);
 388                en->lastuse = jiffies;
 389
 390                /* initilize its dest set */
 391                atomic_set(&(en->set.size), 0);
 392                en->set.list = NULL;
 393                rwlock_init(&en->set.lock);
 394
 395                ip_vs_lblcr_hash(tbl, en);
 396        }
 397
 398        write_lock(&en->set.lock);
 399        ip_vs_dest_set_insert(&en->set, dest);
 400        write_unlock(&en->set.lock);
 401
 402        return en;
 403}
 404
 405
 406/*
 407 *      Flush all the entries of the specified table.
 408 */
 409static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
 410{
 411        int i;
 412        struct ip_vs_lblcr_entry *en, *nxt;
 413
 414        /* No locking required, only called during cleanup. */
 415        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 416                list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
 417                        ip_vs_lblcr_free(en);
 418                }
 419        }
 420}
 421
 422
 423static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 424{
 425        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 426        unsigned long now = jiffies;
 427        int i, j;
 428        struct ip_vs_lblcr_entry *en, *nxt;
 429
 430        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 431                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 432
 433                write_lock(&svc->sched_lock);
 434                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 435                        if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
 436                                       now))
 437                                continue;
 438
 439                        ip_vs_lblcr_free(en);
 440                        atomic_dec(&tbl->entries);
 441                }
 442                write_unlock(&svc->sched_lock);
 443        }
 444        tbl->rover = j;
 445}
 446
 447
 448/*
 449 *      Periodical timer handler for IPVS lblcr table
 450 *      It is used to collect stale entries when the number of entries
 451 *      exceeds the maximum size of the table.
 452 *
 453 *      Fixme: we probably need more complicated algorithm to collect
 454 *             entries that have not been used for a long time even
 455 *             if the number of entries doesn't exceed the maximum size
 456 *             of the table.
 457 *      The full expiration check is for this purpose now.
 458 */
 459static void ip_vs_lblcr_check_expire(unsigned long data)
 460{
 461        struct ip_vs_service *svc = (struct ip_vs_service *) data;
 462        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 463        unsigned long now = jiffies;
 464        int goal;
 465        int i, j;
 466        struct ip_vs_lblcr_entry *en, *nxt;
 467
 468        if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
 469                /* do full expiration check */
 470                ip_vs_lblcr_full_check(svc);
 471                tbl->counter = 1;
 472                goto out;
 473        }
 474
 475        if (atomic_read(&tbl->entries) <= tbl->max_size) {
 476                tbl->counter++;
 477                goto out;
 478        }
 479
 480        goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
 481        if (goal > tbl->max_size/2)
 482                goal = tbl->max_size/2;
 483
 484        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 485                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 486
 487                write_lock(&svc->sched_lock);
 488                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 489                        if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
 490                                continue;
 491
 492                        ip_vs_lblcr_free(en);
 493                        atomic_dec(&tbl->entries);
 494                        goal--;
 495                }
 496                write_unlock(&svc->sched_lock);
 497                if (goal <= 0)
 498                        break;
 499        }
 500        tbl->rover = j;
 501
 502  out:
 503        mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
 504}
 505
 506static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 507{
 508        int i;
 509        struct ip_vs_lblcr_table *tbl;
 510
 511        /*
 512         *    Allocate the ip_vs_lblcr_table for this service
 513         */
 514        tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
 515        if (tbl == NULL) {
 516                pr_err("%s(): no memory\n", __func__);
 517                return -ENOMEM;
 518        }
 519        svc->sched_data = tbl;
 520        IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
 521                  "current service\n", sizeof(*tbl));
 522
 523        /*
 524         *    Initialize the hash buckets
 525         */
 526        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 527                INIT_LIST_HEAD(&tbl->bucket[i]);
 528        }
 529        tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
 530        tbl->rover = 0;
 531        tbl->counter = 1;
 532
 533        /*
 534         *    Hook periodic timer for garbage collection
 535         */
 536        setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
 537                        (unsigned long)svc);
 538        mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
 539
 540        return 0;
 541}
 542
 543
 544static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 545{
 546        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 547
 548        /* remove periodic timer */
 549        del_timer_sync(&tbl->periodic_timer);
 550
 551        /* got to clean up table entries here */
 552        ip_vs_lblcr_flush(tbl);
 553
 554        /* release the table itself */
 555        kfree(tbl);
 556        IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
 557                  sizeof(*tbl));
 558
 559        return 0;
 560}
 561
 562
 563static inline struct ip_vs_dest *
 564__ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 565{
 566        struct ip_vs_dest *dest, *least;
 567        int loh, doh;
 568
 569        /*
 570         * We think the overhead of processing active connections is fifty
 571         * times higher than that of inactive connections in average. (This
 572         * fifty times might not be accurate, we will change it later.) We
 573         * use the following formula to estimate the overhead:
 574         *                dest->activeconns*50 + dest->inactconns
 575         * and the load:
 576         *                (dest overhead) / dest->weight
 577         *
 578         * Remember -- no floats in kernel mode!!!
 579         * The comparison of h1*w2 > h2*w1 is equivalent to that of
 580         *                h1/w1 > h2/w2
 581         * if every weight is larger than zero.
 582         *
 583         * The server with weight=0 is quiesced and will not receive any
 584         * new connection.
 585         */
 586        list_for_each_entry(dest, &svc->destinations, n_list) {
 587                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 588                        continue;
 589
 590                if (atomic_read(&dest->weight) > 0) {
 591                        least = dest;
 592                        loh = atomic_read(&least->activeconns) * 50
 593                                + atomic_read(&least->inactconns);
 594                        goto nextstage;
 595                }
 596        }
 597        return NULL;
 598
 599        /*
 600         *    Find the destination with the least load.
 601         */
 602  nextstage:
 603        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
 604                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 605                        continue;
 606
 607                doh = atomic_read(&dest->activeconns) * 50
 608                        + atomic_read(&dest->inactconns);
 609                if (loh * atomic_read(&dest->weight) >
 610                    doh * atomic_read(&least->weight)) {
 611                        least = dest;
 612                        loh = doh;
 613                }
 614        }
 615
 616        IP_VS_DBG_BUF(6, "LBLCR: server %s:%d "
 617                      "activeconns %d refcnt %d weight %d overhead %d\n",
 618                      IP_VS_DBG_ADDR(least->af, &least->addr),
 619                      ntohs(least->port),
 620                      atomic_read(&least->activeconns),
 621                      atomic_read(&least->refcnt),
 622                      atomic_read(&least->weight), loh);
 623
 624        return least;
 625}
 626
 627
 628/*
 629 *   If this destination server is overloaded and there is a less loaded
 630 *   server, then return true.
 631 */
 632static inline int
 633is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 634{
 635        if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
 636                struct ip_vs_dest *d;
 637
 638                list_for_each_entry(d, &svc->destinations, n_list) {
 639                        if (atomic_read(&d->activeconns)*2
 640                            < atomic_read(&d->weight)) {
 641                                return 1;
 642                        }
 643                }
 644        }
 645        return 0;
 646}
 647
 648
 649/*
 650 *    Locality-Based (weighted) Least-Connection scheduling
 651 */
 652static struct ip_vs_dest *
 653ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 654{
 655        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 656        struct ip_vs_iphdr iph;
 657        struct ip_vs_dest *dest = NULL;
 658        struct ip_vs_lblcr_entry *en;
 659
 660        ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 661
 662        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 663
 664        /* First look in our cache */
 665        read_lock(&svc->sched_lock);
 666        en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
 667        if (en) {
 668                /* We only hold a read lock, but this is atomic */
 669                en->lastuse = jiffies;
 670
 671                /* Get the least loaded destination */
 672                read_lock(&en->set.lock);
 673                dest = ip_vs_dest_set_min(&en->set);
 674                read_unlock(&en->set.lock);
 675
 676                /* More than one destination + enough time passed by, cleanup */
 677                if (atomic_read(&en->set.size) > 1 &&
 678                                time_after(jiffies, en->set.lastmod +
 679                                sysctl_ip_vs_lblcr_expiration)) {
 680                        struct ip_vs_dest *m;
 681
 682                        write_lock(&en->set.lock);
 683                        m = ip_vs_dest_set_max(&en->set);
 684                        if (m)
 685                                ip_vs_dest_set_erase(&en->set, m);
 686                        write_unlock(&en->set.lock);
 687                }
 688
 689                /* If the destination is not overloaded, use it */
 690                if (dest && !is_overloaded(dest, svc)) {
 691                        read_unlock(&svc->sched_lock);
 692                        goto out;
 693                }
 694
 695                /* The cache entry is invalid, time to schedule */
 696                dest = __ip_vs_lblcr_schedule(svc);
 697                if (!dest) {
 698                        IP_VS_ERR_RL("LBLCR: no destination available\n");
 699                        read_unlock(&svc->sched_lock);
 700                        return NULL;
 701                }
 702
 703                /* Update our cache entry */
 704                write_lock(&en->set.lock);
 705                ip_vs_dest_set_insert(&en->set, dest);
 706                write_unlock(&en->set.lock);
 707        }
 708        read_unlock(&svc->sched_lock);
 709
 710        if (dest)
 711                goto out;
 712
 713        /* No cache entry, time to schedule */
 714        dest = __ip_vs_lblcr_schedule(svc);
 715        if (!dest) {
 716                IP_VS_DBG(1, "no destination available\n");
 717                return NULL;
 718        }
 719
 720        /* If we fail to create a cache entry, we'll just use the valid dest */
 721        write_lock(&svc->sched_lock);
 722        ip_vs_lblcr_new(tbl, &iph.daddr, dest);
 723        write_unlock(&svc->sched_lock);
 724
 725out:
 726        IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
 727                      IP_VS_DBG_ADDR(svc->af, &iph.daddr),
 728                      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
 729
 730        return dest;
 731}
 732
 733
 734/*
 735 *      IPVS LBLCR Scheduler structure
 736 */
 737static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 738{
 739        .name =                 "lblcr",
 740        .refcnt =               ATOMIC_INIT(0),
 741        .module =               THIS_MODULE,
 742        .n_list =               LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
 743        .init_service =         ip_vs_lblcr_init_svc,
 744        .done_service =         ip_vs_lblcr_done_svc,
 745        .schedule =             ip_vs_lblcr_schedule,
 746};
 747
 748
 749static int __init ip_vs_lblcr_init(void)
 750{
 751        int ret;
 752
 753        sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
 754        ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 755        if (ret)
 756                unregister_sysctl_table(sysctl_header);
 757        return ret;
 758}
 759
 760
 761static void __exit ip_vs_lblcr_cleanup(void)
 762{
 763        unregister_sysctl_table(sysctl_header);
 764        unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 765}
 766
 767
 768module_init(ip_vs_lblcr_init);
 769module_exit(ip_vs_lblcr_cleanup);
 770MODULE_LICENSE("GPL");
 771