linux/net/netfilter/ipvs/ip_vs_sed.c
<<
>>
Prefs
   1/*
   2 * IPVS:        Shortest Expected Delay scheduling module
   3 *
   4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Changes:
  12 *
  13 */
  14
  15/*
  16 * The SED algorithm attempts to minimize each job's expected delay until
  17 * completion. The expected delay that the job will experience is
  18 * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
  19 * jobs on the ith server and Ui is the fixed service rate (weight) of
  20 * the ith server. The SED algorithm adopts a greedy policy that each does
  21 * what is in its own best interest, i.e. to join the queue which would
  22 * minimize its expected delay of completion.
  23 *
  24 * See the following paper for more information:
  25 * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
  26 * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
  27 * pages 986-994, 1988.
  28 *
  29 * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
  30 *
  31 * The difference between SED and WLC is that SED includes the incoming
  32 * job in the cost function (the increment of 1). SED may outperform
  33 * WLC, while scheduling big jobs under larger heterogeneous systems
  34 * (the server weight varies a lot).
  35 *
  36 */
  37
  38#define KMSG_COMPONENT "IPVS"
  39#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  40
  41#include <linux/module.h>
  42#include <linux/kernel.h>
  43
  44#include <net/ip_vs.h>
  45
  46
  47static inline unsigned int
  48ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
  49{
  50        /*
  51         * We only use the active connection number in the cost
  52         * calculation here.
  53         */
  54        return atomic_read(&dest->activeconns) + 1;
  55}
  56
  57
  58/*
  59 *      Weighted Least Connection scheduling
  60 */
  61static struct ip_vs_dest *
  62ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
  63{
  64        struct ip_vs_dest *dest, *least;
  65        unsigned int loh, doh;
  66
  67        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
  68
  69        /*
  70         * We calculate the load of each dest server as follows:
  71         *      (server expected overhead) / dest->weight
  72         *
  73         * Remember -- no floats in kernel mode!!!
  74         * The comparison of h1*w2 > h2*w1 is equivalent to that of
  75         *                h1/w1 > h2/w2
  76         * if every weight is larger than zero.
  77         *
  78         * The server with weight=0 is quiesced and will not receive any
  79         * new connections.
  80         */
  81
  82        list_for_each_entry(dest, &svc->destinations, n_list) {
  83                if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
  84                    atomic_read(&dest->weight) > 0) {
  85                        least = dest;
  86                        loh = ip_vs_sed_dest_overhead(least);
  87                        goto nextstage;
  88                }
  89        }
  90        ip_vs_scheduler_err(svc, "no destination available");
  91        return NULL;
  92
  93        /*
  94         *    Find the destination with the least load.
  95         */
  96  nextstage:
  97        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
  98                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
  99                        continue;
 100                doh = ip_vs_sed_dest_overhead(dest);
 101                if (loh * atomic_read(&dest->weight) >
 102                    doh * atomic_read(&least->weight)) {
 103                        least = dest;
 104                        loh = doh;
 105                }
 106        }
 107
 108        IP_VS_DBG_BUF(6, "SED: server %s:%u "
 109                      "activeconns %d refcnt %d weight %d overhead %d\n",
 110                      IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
 111                      atomic_read(&least->activeconns),
 112                      atomic_read(&least->refcnt),
 113                      atomic_read(&least->weight), loh);
 114
 115        return least;
 116}
 117
 118
 119static struct ip_vs_scheduler ip_vs_sed_scheduler =
 120{
 121        .name =                 "sed",
 122        .refcnt =               ATOMIC_INIT(0),
 123        .module =               THIS_MODULE,
 124        .n_list =               LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
 125        .schedule =             ip_vs_sed_schedule,
 126};
 127
 128
 129static int __init ip_vs_sed_init(void)
 130{
 131        return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
 132}
 133
 134static void __exit ip_vs_sed_cleanup(void)
 135{
 136        unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
 137}
 138
 139module_init(ip_vs_sed_init);
 140module_exit(ip_vs_sed_cleanup);
 141MODULE_LICENSE("GPL");
 142