linux/net/netfilter/ipvs/ip_vs_dh.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * IPVS:        Destination Hashing scheduling module
   4 *
   5 * Authors:     Wensong Zhang <wensong@gnuchina.org>
   6 *
   7 *              Inspired by the consistent hashing scheduler patch from
   8 *              Thomas Proell <proellt@gmx.de>
   9 *
  10 * Changes:
  11 */
  12
  13/*
  14 * The dh algorithm is to select server by the hash key of destination IP
  15 * address. The pseudo code is as follows:
  16 *
  17 *       n <- servernode[dest_ip];
  18 *       if (n is dead) OR
  19 *          (n is overloaded) OR (n.weight <= 0) then
  20 *                 return NULL;
  21 *
  22 *       return n;
  23 *
  24 * Notes that servernode is a 256-bucket hash table that maps the hash
  25 * index derived from packet destination IP address to the current server
  26 * array. If the dh scheduler is used in cache cluster, it is good to
  27 * combine it with cache_bypass feature. When the statically assigned
  28 * server is dead or overloaded, the load balancer can bypass the cache
  29 * server and send requests to the original server directly.
  30 *
  31 */
  32
  33#define KMSG_COMPONENT "IPVS"
  34#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  35
  36#include <linux/ip.h>
  37#include <linux/slab.h>
  38#include <linux/module.h>
  39#include <linux/kernel.h>
  40#include <linux/skbuff.h>
  41#include <linux/hash.h>
  42
  43#include <net/ip_vs.h>
  44
  45
  46/*
  47 *      IPVS DH bucket
  48 */
  49struct ip_vs_dh_bucket {
  50        struct ip_vs_dest __rcu *dest;  /* real server (cache) */
  51};
  52
  53/*
  54 *     for IPVS DH entry hash table
  55 */
  56#ifndef CONFIG_IP_VS_DH_TAB_BITS
  57#define CONFIG_IP_VS_DH_TAB_BITS        8
  58#endif
  59#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
  60#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
  61#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
  62
  63struct ip_vs_dh_state {
  64        struct ip_vs_dh_bucket          buckets[IP_VS_DH_TAB_SIZE];
  65        struct rcu_head                 rcu_head;
  66};
  67
  68/*
  69 *      Returns hash value for IPVS DH entry
  70 */
  71static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
  72{
  73        __be32 addr_fold = addr->ip;
  74
  75#ifdef CONFIG_IP_VS_IPV6
  76        if (af == AF_INET6)
  77                addr_fold = addr->ip6[0]^addr->ip6[1]^
  78                            addr->ip6[2]^addr->ip6[3];
  79#endif
  80        return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
  81}
  82
  83
  84/*
  85 *      Get ip_vs_dest associated with supplied parameters.
  86 */
  87static inline struct ip_vs_dest *
  88ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
  89{
  90        return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
  91}
  92
  93
  94/*
  95 *      Assign all the hash buckets of the specified table with the service.
  96 */
  97static int
  98ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
  99{
 100        int i;
 101        struct ip_vs_dh_bucket *b;
 102        struct list_head *p;
 103        struct ip_vs_dest *dest;
 104        bool empty;
 105
 106        b = &s->buckets[0];
 107        p = &svc->destinations;
 108        empty = list_empty(p);
 109        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 110                dest = rcu_dereference_protected(b->dest, 1);
 111                if (dest)
 112                        ip_vs_dest_put(dest);
 113                if (empty)
 114                        RCU_INIT_POINTER(b->dest, NULL);
 115                else {
 116                        if (p == &svc->destinations)
 117                                p = p->next;
 118
 119                        dest = list_entry(p, struct ip_vs_dest, n_list);
 120                        ip_vs_dest_hold(dest);
 121                        RCU_INIT_POINTER(b->dest, dest);
 122
 123                        p = p->next;
 124                }
 125                b++;
 126        }
 127        return 0;
 128}
 129
 130
 131/*
 132 *      Flush all the hash buckets of the specified table.
 133 */
 134static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
 135{
 136        int i;
 137        struct ip_vs_dh_bucket *b;
 138        struct ip_vs_dest *dest;
 139
 140        b = &s->buckets[0];
 141        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 142                dest = rcu_dereference_protected(b->dest, 1);
 143                if (dest) {
 144                        ip_vs_dest_put(dest);
 145                        RCU_INIT_POINTER(b->dest, NULL);
 146                }
 147                b++;
 148        }
 149}
 150
 151
 152static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
 153{
 154        struct ip_vs_dh_state *s;
 155
 156        /* allocate the DH table for this service */
 157        s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
 158        if (s == NULL)
 159                return -ENOMEM;
 160
 161        svc->sched_data = s;
 162        IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for "
 163                  "current service\n",
 164                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 165
 166        /* assign the hash buckets with current dests */
 167        ip_vs_dh_reassign(s, svc);
 168
 169        return 0;
 170}
 171
 172
 173static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
 174{
 175        struct ip_vs_dh_state *s = svc->sched_data;
 176
 177        /* got to clean up hash buckets here */
 178        ip_vs_dh_flush(s);
 179
 180        /* release the table itself */
 181        kfree_rcu(s, rcu_head);
 182        IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n",
 183                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 184}
 185
 186
 187static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
 188                                 struct ip_vs_dest *dest)
 189{
 190        struct ip_vs_dh_state *s = svc->sched_data;
 191
 192        /* assign the hash buckets with the updated service */
 193        ip_vs_dh_reassign(s, svc);
 194
 195        return 0;
 196}
 197
 198
 199/*
 200 *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
 201 *      consider that the server is overloaded here.
 202 */
 203static inline int is_overloaded(struct ip_vs_dest *dest)
 204{
 205        return dest->flags & IP_VS_DEST_F_OVERLOAD;
 206}
 207
 208
 209/*
 210 *      Destination hashing scheduling
 211 */
 212static struct ip_vs_dest *
 213ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
 214                  struct ip_vs_iphdr *iph)
 215{
 216        struct ip_vs_dest *dest;
 217        struct ip_vs_dh_state *s;
 218
 219        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 220
 221        s = (struct ip_vs_dh_state *) svc->sched_data;
 222        dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
 223        if (!dest
 224            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 225            || atomic_read(&dest->weight) <= 0
 226            || is_overloaded(dest)) {
 227                ip_vs_scheduler_err(svc, "no destination available");
 228                return NULL;
 229        }
 230
 231        IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
 232                      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
 233                      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 234                      ntohs(dest->port));
 235
 236        return dest;
 237}
 238
 239
 240/*
 241 *      IPVS DH Scheduler structure
 242 */
 243static struct ip_vs_scheduler ip_vs_dh_scheduler =
 244{
 245        .name =                 "dh",
 246        .refcnt =               ATOMIC_INIT(0),
 247        .module =               THIS_MODULE,
 248        .n_list =               LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
 249        .init_service =         ip_vs_dh_init_svc,
 250        .done_service =         ip_vs_dh_done_svc,
 251        .add_dest =             ip_vs_dh_dest_changed,
 252        .del_dest =             ip_vs_dh_dest_changed,
 253        .schedule =             ip_vs_dh_schedule,
 254};
 255
 256
 257static int __init ip_vs_dh_init(void)
 258{
 259        return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
 260}
 261
 262
 263static void __exit ip_vs_dh_cleanup(void)
 264{
 265        unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
 266        synchronize_rcu();
 267}
 268
 269
 270module_init(ip_vs_dh_init);
 271module_exit(ip_vs_dh_cleanup);
 272MODULE_LICENSE("GPL");
 273