linux/net/netfilter/ipvs/ip_vs_dh.c
<<
>>
Prefs
   1/*
   2 * IPVS:        Destination Hashing scheduling module
   3 *
   4 * Authors:     Wensong Zhang <wensong@gnuchina.org>
   5 *
   6 *              Inspired by the consistent hashing scheduler patch from
   7 *              Thomas Proell <proellt@gmx.de>
   8 *
   9 *              This program is free software; you can redistribute it and/or
  10 *              modify it under the terms of the GNU General Public License
  11 *              as published by the Free Software Foundation; either version
  12 *              2 of the License, or (at your option) any later version.
  13 *
  14 * Changes:
  15 *
  16 */
  17
  18/*
  19 * The dh algorithm is to select server by the hash key of destination IP
  20 * address. The pseudo code is as follows:
  21 *
  22 *       n <- servernode[dest_ip];
  23 *       if (n is dead) OR
  24 *          (n is overloaded) OR (n.weight <= 0) then
  25 *                 return NULL;
  26 *
  27 *       return n;
  28 *
  29 * Notes that servernode is a 256-bucket hash table that maps the hash
  30 * index derived from packet destination IP address to the current server
  31 * array. If the dh scheduler is used in cache cluster, it is good to
  32 * combine it with cache_bypass feature. When the statically assigned
  33 * server is dead or overloaded, the load balancer can bypass the cache
  34 * server and send requests to the original server directly.
  35 *
  36 */
  37
  38#define KMSG_COMPONENT "IPVS"
  39#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  40
  41#include <linux/ip.h>
  42#include <linux/slab.h>
  43#include <linux/module.h>
  44#include <linux/kernel.h>
  45#include <linux/skbuff.h>
  46
  47#include <net/ip_vs.h>
  48
  49
  50/*
  51 *      IPVS DH bucket
  52 */
  53struct ip_vs_dh_bucket {
  54        struct ip_vs_dest __rcu *dest;  /* real server (cache) */
  55};
  56
  57/*
  58 *     for IPVS DH entry hash table
  59 */
  60#ifndef CONFIG_IP_VS_DH_TAB_BITS
  61#define CONFIG_IP_VS_DH_TAB_BITS        8
  62#endif
  63#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
  64#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
  65#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
  66
  67struct ip_vs_dh_state {
  68        struct ip_vs_dh_bucket          buckets[IP_VS_DH_TAB_SIZE];
  69        struct rcu_head                 rcu_head;
  70};
  71
  72/*
  73 *      Returns hash value for IPVS DH entry
  74 */
  75static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
  76{
  77        __be32 addr_fold = addr->ip;
  78
  79#ifdef CONFIG_IP_VS_IPV6
  80        if (af == AF_INET6)
  81                addr_fold = addr->ip6[0]^addr->ip6[1]^
  82                            addr->ip6[2]^addr->ip6[3];
  83#endif
  84        return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK;
  85}
  86
  87
  88/*
  89 *      Get ip_vs_dest associated with supplied parameters.
  90 */
  91static inline struct ip_vs_dest *
  92ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
  93{
  94        return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
  95}
  96
  97
  98/*
  99 *      Assign all the hash buckets of the specified table with the service.
 100 */
 101static int
 102ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
 103{
 104        int i;
 105        struct ip_vs_dh_bucket *b;
 106        struct list_head *p;
 107        struct ip_vs_dest *dest;
 108        bool empty;
 109
 110        b = &s->buckets[0];
 111        p = &svc->destinations;
 112        empty = list_empty(p);
 113        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 114                dest = rcu_dereference_protected(b->dest, 1);
 115                if (dest)
 116                        ip_vs_dest_put(dest);
 117                if (empty)
 118                        RCU_INIT_POINTER(b->dest, NULL);
 119                else {
 120                        if (p == &svc->destinations)
 121                                p = p->next;
 122
 123                        dest = list_entry(p, struct ip_vs_dest, n_list);
 124                        ip_vs_dest_hold(dest);
 125                        RCU_INIT_POINTER(b->dest, dest);
 126
 127                        p = p->next;
 128                }
 129                b++;
 130        }
 131        return 0;
 132}
 133
 134
 135/*
 136 *      Flush all the hash buckets of the specified table.
 137 */
 138static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
 139{
 140        int i;
 141        struct ip_vs_dh_bucket *b;
 142        struct ip_vs_dest *dest;
 143
 144        b = &s->buckets[0];
 145        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 146                dest = rcu_dereference_protected(b->dest, 1);
 147                if (dest) {
 148                        ip_vs_dest_put(dest);
 149                        RCU_INIT_POINTER(b->dest, NULL);
 150                }
 151                b++;
 152        }
 153}
 154
 155
 156static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
 157{
 158        struct ip_vs_dh_state *s;
 159
 160        /* allocate the DH table for this service */
 161        s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
 162        if (s == NULL)
 163                return -ENOMEM;
 164
 165        svc->sched_data = s;
 166        IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
 167                  "current service\n",
 168                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 169
 170        /* assign the hash buckets with current dests */
 171        ip_vs_dh_reassign(s, svc);
 172
 173        return 0;
 174}
 175
 176
 177static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
 178{
 179        struct ip_vs_dh_state *s = svc->sched_data;
 180
 181        /* got to clean up hash buckets here */
 182        ip_vs_dh_flush(s);
 183
 184        /* release the table itself */
 185        kfree_rcu(s, rcu_head);
 186        IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
 187                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 188}
 189
 190
 191static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
 192                                 struct ip_vs_dest *dest)
 193{
 194        struct ip_vs_dh_state *s = svc->sched_data;
 195
 196        /* assign the hash buckets with the updated service */
 197        ip_vs_dh_reassign(s, svc);
 198
 199        return 0;
 200}
 201
 202
 203/*
 204 *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
 205 *      consider that the server is overloaded here.
 206 */
 207static inline int is_overloaded(struct ip_vs_dest *dest)
 208{
 209        return dest->flags & IP_VS_DEST_F_OVERLOAD;
 210}
 211
 212
 213/*
 214 *      Destination hashing scheduling
 215 */
 216static struct ip_vs_dest *
 217ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
 218                  struct ip_vs_iphdr *iph)
 219{
 220        struct ip_vs_dest *dest;
 221        struct ip_vs_dh_state *s;
 222
 223        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 224
 225        s = (struct ip_vs_dh_state *) svc->sched_data;
 226        dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
 227        if (!dest
 228            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 229            || atomic_read(&dest->weight) <= 0
 230            || is_overloaded(dest)) {
 231                ip_vs_scheduler_err(svc, "no destination available");
 232                return NULL;
 233        }
 234
 235        IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
 236                      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
 237                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 238                      ntohs(dest->port));
 239
 240        return dest;
 241}
 242
 243
 244/*
 245 *      IPVS DH Scheduler structure
 246 */
 247static struct ip_vs_scheduler ip_vs_dh_scheduler =
 248{
 249        .name =                 "dh",
 250        .refcnt =               ATOMIC_INIT(0),
 251        .module =               THIS_MODULE,
 252        .n_list =               LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
 253        .init_service =         ip_vs_dh_init_svc,
 254        .done_service =         ip_vs_dh_done_svc,
 255        .add_dest =             ip_vs_dh_dest_changed,
 256        .del_dest =             ip_vs_dh_dest_changed,
 257        .schedule =             ip_vs_dh_schedule,
 258};
 259
 260
 261static int __init ip_vs_dh_init(void)
 262{
 263        return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
 264}
 265
 266
 267static void __exit ip_vs_dh_cleanup(void)
 268{
 269        unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
 270        synchronize_rcu();
 271}
 272
 273
 274module_init(ip_vs_dh_init);
 275module_exit(ip_vs_dh_cleanup);
 276MODULE_LICENSE("GPL");
 277