LXR linux/net/netfilter/ipvs/ip_vs

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * IPVS:        Shortest Expected Delay scheduling module
   4 *
   5 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   6 *
   7 * Changes:
   8 */
   9
  10/*
  11 * The SED algorithm attempts to minimize each job's expected delay until
  12 * completion. The expected delay that the job will experience is
  13 * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
  14 * jobs on the ith server and Ui is the fixed service rate (weight) of
  15 * the ith server. The SED algorithm adopts a greedy policy that each does
  16 * what is in its own best interest, i.e. to join the queue which would
  17 * minimize its expected delay of completion.
  18 *
  19 * See the following paper for more information:
  20 * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
  21 * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
  22 * pages 986-994, 1988.
  23 *
  24 * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
  25 *
  26 * The difference between SED and WLC is that SED includes the incoming
  27 * job in the cost function (the increment of 1). SED may outperform
  28 * WLC, while scheduling big jobs under larger heterogeneous systems
  29 * (the server weight varies a lot).
  30 *
  31 */
  32
  33#define KMSG_COMPONENT "IPVS"
  34#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  35
  36#include <linux/module.h>
  37#include <linux/kernel.h>
  38
  39#include <net/ip_vs.h>
  40
  41
  42static inline int
  43ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
  44{
  45        /*
  46         * We only use the active connection number in the cost
  47         * calculation here.
  48         */
  49        return atomic_read(&dest->activeconns) + 1;
  50}
  51
  52
  53/*
  54 *      Weighted Least Connection scheduling
  55 */
  56static struct ip_vs_dest *
  57ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  58                   struct ip_vs_iphdr *iph)
  59{
  60        struct ip_vs_dest *dest, *least;
  61        int loh, doh;
  62
  63        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
  64
  65        /*
  66         * We calculate the load of each dest server as follows:
  67         *      (server expected overhead) / dest->weight
  68         *
  69         * Remember -- no floats in kernel mode!!!
  70         * The comparison of h1*w2 > h2*w1 is equivalent to that of
  71         *                h1/w1 > h2/w2
  72         * if every weight is larger than zero.
  73         *
  74         * The server with weight=0 is quiesced and will not receive any
  75         * new connections.
  76         */
  77
  78        list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
  79                if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
  80                    atomic_read(&dest->weight) > 0) {
  81                        least = dest;
  82                        loh = ip_vs_sed_dest_overhead(least);
  83                        goto nextstage;
  84                }
  85        }
  86        ip_vs_scheduler_err(svc, "no destination available");
  87        return NULL;
  88
  89        /*
  90         *    Find the destination with the least load.
  91         */
  92  nextstage:
  93        list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
  94                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
  95                        continue;
  96                doh = ip_vs_sed_dest_overhead(dest);
  97                if ((__s64)loh * atomic_read(&dest->weight) >
  98                    (__s64)doh * atomic_read(&least->weight)) {
  99                        least = dest;
 100                        loh = doh;
 101                }
 102        }
 103
 104        IP_VS_DBG_BUF(6, "SED: server %s:%u "
 105                      "activeconns %d refcnt %d weight %d overhead %d\n",
 106                      IP_VS_DBG_ADDR(least->af, &least->addr),
 107                      ntohs(least->port),
 108                      atomic_read(&least->activeconns),
 109                      refcount_read(&least->refcnt),
 110                      atomic_read(&least->weight), loh);
 111
 112        return least;
 113}
 114
 115
 116static struct ip_vs_scheduler ip_vs_sed_scheduler =
 117{
 118        .name =                 "sed",
 119        .refcnt =               ATOMIC_INIT(0),
 120        .module =               THIS_MODULE,
 121        .n_list =               LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
 122        .schedule =             ip_vs_sed_schedule,
 123};
 124
 125
 126static int __init ip_vs_sed_init(void)
 127{
 128        return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
 129}
 130
 131static void __exit ip_vs_sed_cleanup(void)
 132{
 133        unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
 134        synchronize_rcu();
 135}
 136
 137module_init(ip_vs_sed_init);
 138module_exit(ip_vs_sed_cleanup);
 139MODULE_LICENSE("GPL");
 140