linux/net/netfilter/ipvs/ip_vs_dh.c
<<
>>
Prefs
   1/*
   2 * IPVS:        Destination Hashing scheduling module
   3 *
   4 * Authors:     Wensong Zhang <wensong@gnuchina.org>
   5 *
   6 *              Inspired by the consistent hashing scheduler patch from
   7 *              Thomas Proell <proellt@gmx.de>
   8 *
   9 *              This program is free software; you can redistribute it and/or
  10 *              modify it under the terms of the GNU General Public License
  11 *              as published by the Free Software Foundation; either version
  12 *              2 of the License, or (at your option) any later version.
  13 *
  14 * Changes:
  15 *
  16 */
  17
  18/*
  19 * The dh algorithm is to select server by the hash key of destination IP
  20 * address. The pseudo code is as follows:
  21 *
  22 *       n <- servernode[dest_ip];
  23 *       if (n is dead) OR
  24 *          (n is overloaded) OR (n.weight <= 0) then
  25 *                 return NULL;
  26 *
  27 *       return n;
  28 *
  29 * Notes that servernode is a 256-bucket hash table that maps the hash
  30 * index derived from packet destination IP address to the current server
  31 * array. If the dh scheduler is used in cache cluster, it is good to
  32 * combine it with cache_bypass feature. When the statically assigned
  33 * server is dead or overloaded, the load balancer can bypass the cache
  34 * server and send requests to the original server directly.
  35 *
  36 */
  37
  38#define KMSG_COMPONENT "IPVS"
  39#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  40
  41#include <linux/ip.h>
  42#include <linux/slab.h>
  43#include <linux/module.h>
  44#include <linux/kernel.h>
  45#include <linux/skbuff.h>
  46#include <linux/hash.h>
  47
  48#include <net/ip_vs.h>
  49
  50
  51/*
  52 *      IPVS DH bucket
  53 */
  54struct ip_vs_dh_bucket {
  55        struct ip_vs_dest __rcu *dest;  /* real server (cache) */
  56};
  57
  58/*
  59 *     for IPVS DH entry hash table
  60 */
  61#ifndef CONFIG_IP_VS_DH_TAB_BITS
  62#define CONFIG_IP_VS_DH_TAB_BITS        8
  63#endif
  64#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
  65#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
  66#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
  67
  68struct ip_vs_dh_state {
  69        struct ip_vs_dh_bucket          buckets[IP_VS_DH_TAB_SIZE];
  70        struct rcu_head                 rcu_head;
  71};
  72
  73/*
  74 *      Returns hash value for IPVS DH entry
  75 */
  76static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
  77{
  78        __be32 addr_fold = addr->ip;
  79
  80#ifdef CONFIG_IP_VS_IPV6
  81        if (af == AF_INET6)
  82                addr_fold = addr->ip6[0]^addr->ip6[1]^
  83                            addr->ip6[2]^addr->ip6[3];
  84#endif
  85        return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
  86}
  87
  88
  89/*
  90 *      Get ip_vs_dest associated with supplied parameters.
  91 */
  92static inline struct ip_vs_dest *
  93ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
  94{
  95        return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
  96}
  97
  98
  99/*
 100 *      Assign all the hash buckets of the specified table with the service.
 101 */
 102static int
 103ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
 104{
 105        int i;
 106        struct ip_vs_dh_bucket *b;
 107        struct list_head *p;
 108        struct ip_vs_dest *dest;
 109        bool empty;
 110
 111        b = &s->buckets[0];
 112        p = &svc->destinations;
 113        empty = list_empty(p);
 114        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 115                dest = rcu_dereference_protected(b->dest, 1);
 116                if (dest)
 117                        ip_vs_dest_put(dest);
 118                if (empty)
 119                        RCU_INIT_POINTER(b->dest, NULL);
 120                else {
 121                        if (p == &svc->destinations)
 122                                p = p->next;
 123
 124                        dest = list_entry(p, struct ip_vs_dest, n_list);
 125                        ip_vs_dest_hold(dest);
 126                        RCU_INIT_POINTER(b->dest, dest);
 127
 128                        p = p->next;
 129                }
 130                b++;
 131        }
 132        return 0;
 133}
 134
 135
 136/*
 137 *      Flush all the hash buckets of the specified table.
 138 */
 139static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
 140{
 141        int i;
 142        struct ip_vs_dh_bucket *b;
 143        struct ip_vs_dest *dest;
 144
 145        b = &s->buckets[0];
 146        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 147                dest = rcu_dereference_protected(b->dest, 1);
 148                if (dest) {
 149                        ip_vs_dest_put(dest);
 150                        RCU_INIT_POINTER(b->dest, NULL);
 151                }
 152                b++;
 153        }
 154}
 155
 156
 157static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
 158{
 159        struct ip_vs_dh_state *s;
 160
 161        /* allocate the DH table for this service */
 162        s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
 163        if (s == NULL)
 164                return -ENOMEM;
 165
 166        svc->sched_data = s;
 167        IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for "
 168                  "current service\n",
 169                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 170
 171        /* assign the hash buckets with current dests */
 172        ip_vs_dh_reassign(s, svc);
 173
 174        return 0;
 175}
 176
 177
 178static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
 179{
 180        struct ip_vs_dh_state *s = svc->sched_data;
 181
 182        /* got to clean up hash buckets here */
 183        ip_vs_dh_flush(s);
 184
 185        /* release the table itself */
 186        kfree_rcu(s, rcu_head);
 187        IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n",
 188                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 189}
 190
 191
 192static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
 193                                 struct ip_vs_dest *dest)
 194{
 195        struct ip_vs_dh_state *s = svc->sched_data;
 196
 197        /* assign the hash buckets with the updated service */
 198        ip_vs_dh_reassign(s, svc);
 199
 200        return 0;
 201}
 202
 203
 204/*
 205 *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
 206 *      consider that the server is overloaded here.
 207 */
 208static inline int is_overloaded(struct ip_vs_dest *dest)
 209{
 210        return dest->flags & IP_VS_DEST_F_OVERLOAD;
 211}
 212
 213
 214/*
 215 *      Destination hashing scheduling
 216 */
 217static struct ip_vs_dest *
 218ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
 219                  struct ip_vs_iphdr *iph)
 220{
 221        struct ip_vs_dest *dest;
 222        struct ip_vs_dh_state *s;
 223
 224        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 225
 226        s = (struct ip_vs_dh_state *) svc->sched_data;
 227        dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
 228        if (!dest
 229            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 230            || atomic_read(&dest->weight) <= 0
 231            || is_overloaded(dest)) {
 232                ip_vs_scheduler_err(svc, "no destination available");
 233                return NULL;
 234        }
 235
 236        IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
 237                      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
 238                      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 239                      ntohs(dest->port));
 240
 241        return dest;
 242}
 243
 244
 245/*
 246 *      IPVS DH Scheduler structure
 247 */
 248static struct ip_vs_scheduler ip_vs_dh_scheduler =
 249{
 250        .name =                 "dh",
 251        .refcnt =               ATOMIC_INIT(0),
 252        .module =               THIS_MODULE,
 253        .n_list =               LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
 254        .init_service =         ip_vs_dh_init_svc,
 255        .done_service =         ip_vs_dh_done_svc,
 256        .add_dest =             ip_vs_dh_dest_changed,
 257        .del_dest =             ip_vs_dh_dest_changed,
 258        .schedule =             ip_vs_dh_schedule,
 259};
 260
 261
 262static int __init ip_vs_dh_init(void)
 263{
 264        return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
 265}
 266
 267
 268static void __exit ip_vs_dh_cleanup(void)
 269{
 270        unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
 271        synchronize_rcu();
 272}
 273
 274
 275module_init(ip_vs_dh_init);
 276module_exit(ip_vs_dh_cleanup);
 277MODULE_LICENSE("GPL");
 278