linux/net/netfilter/ipvs/ip_vs_nq.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * IPVS:        Never Queue scheduling module
   4 *
   5 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   6 *
   7 * Changes:
   8 */
   9
  10/*
  11 * The NQ algorithm adopts a two-speed model. When there is an idle server
  12 * available, the job will be sent to the idle server, instead of waiting
  13 * for a fast one. When there is no idle server available, the job will be
  14 * sent to the server that minimize its expected delay (The Shortest
  15 * Expected Delay scheduling algorithm).
  16 *
  17 * See the following paper for more information:
  18 * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
  19 * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
  20 * pages 986-994, 1988.
  21 *
  22 * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
  23 *
  24 * The difference between NQ and SED is that NQ can improve overall
  25 * system utilization.
  26 *
  27 */
  28
  29#define KMSG_COMPONENT "IPVS"
  30#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  31
  32#include <linux/module.h>
  33#include <linux/kernel.h>
  34
  35#include <net/ip_vs.h>
  36
  37
  38static inline int
  39ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
  40{
  41        /*
  42         * We only use the active connection number in the cost
  43         * calculation here.
  44         */
  45        return atomic_read(&dest->activeconns) + 1;
  46}
  47
  48
  49/*
  50 *      Weighted Least Connection scheduling
  51 */
  52static struct ip_vs_dest *
  53ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
  54                  struct ip_vs_iphdr *iph)
  55{
  56        struct ip_vs_dest *dest, *least = NULL;
  57        int loh = 0, doh;
  58
  59        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
  60
  61        /*
  62         * We calculate the load of each dest server as follows:
  63         *      (server expected overhead) / dest->weight
  64         *
  65         * Remember -- no floats in kernel mode!!!
  66         * The comparison of h1*w2 > h2*w1 is equivalent to that of
  67         *                h1/w1 > h2/w2
  68         * if every weight is larger than zero.
  69         *
  70         * The server with weight=0 is quiesced and will not receive any
  71         * new connections.
  72         */
  73
  74        list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
  75
  76                if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
  77                    !atomic_read(&dest->weight))
  78                        continue;
  79
  80                doh = ip_vs_nq_dest_overhead(dest);
  81
  82                /* return the server directly if it is idle */
  83                if (atomic_read(&dest->activeconns) == 0) {
  84                        least = dest;
  85                        loh = doh;
  86                        goto out;
  87                }
  88
  89                if (!least ||
  90                    ((__s64)loh * atomic_read(&dest->weight) >
  91                     (__s64)doh * atomic_read(&least->weight))) {
  92                        least = dest;
  93                        loh = doh;
  94                }
  95        }
  96
  97        if (!least) {
  98                ip_vs_scheduler_err(svc, "no destination available");
  99                return NULL;
 100        }
 101
 102  out:
 103        IP_VS_DBG_BUF(6, "NQ: server %s:%u "
 104                      "activeconns %d refcnt %d weight %d overhead %d\n",
 105                      IP_VS_DBG_ADDR(least->af, &least->addr),
 106                      ntohs(least->port),
 107                      atomic_read(&least->activeconns),
 108                      refcount_read(&least->refcnt),
 109                      atomic_read(&least->weight), loh);
 110
 111        return least;
 112}
 113
 114
 115static struct ip_vs_scheduler ip_vs_nq_scheduler =
 116{
 117        .name =                 "nq",
 118        .refcnt =               ATOMIC_INIT(0),
 119        .module =               THIS_MODULE,
 120        .n_list =               LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
 121        .schedule =             ip_vs_nq_schedule,
 122};
 123
 124
 125static int __init ip_vs_nq_init(void)
 126{
 127        return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
 128}
 129
 130static void __exit ip_vs_nq_cleanup(void)
 131{
 132        unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
 133        synchronize_rcu();
 134}
 135
 136module_init(ip_vs_nq_init);
 137module_exit(ip_vs_nq_cleanup);
 138MODULE_LICENSE("GPL");
 139