linux/net/netfilter/ipvs/ip_vs_dh.c
<<
>>
Prefs
   1/*
   2 * IPVS:        Destination Hashing scheduling module
   3 *
   4 * Authors:     Wensong Zhang <wensong@gnuchina.org>
   5 *
   6 *              Inspired by the consistent hashing scheduler patch from
   7 *              Thomas Proell <proellt@gmx.de>
   8 *
   9 *              This program is free software; you can redistribute it and/or
  10 *              modify it under the terms of the GNU General Public License
  11 *              as published by the Free Software Foundation; either version
  12 *              2 of the License, or (at your option) any later version.
  13 *
  14 * Changes:
  15 *
  16 */
  17
  18/*
  19 * The dh algorithm is to select server by the hash key of destination IP
  20 * address. The pseudo code is as follows:
  21 *
  22 *       n <- servernode[dest_ip];
  23 *       if (n is dead) OR
  24 *          (n is overloaded) OR (n.weight <= 0) then
  25 *                 return NULL;
  26 *
  27 *       return n;
  28 *
  29 * Notes that servernode is a 256-bucket hash table that maps the hash
  30 * index derived from packet destination IP address to the current server
  31 * array. If the dh scheduler is used in cache cluster, it is good to
  32 * combine it with cache_bypass feature. When the statically assigned
  33 * server is dead or overloaded, the load balancer can bypass the cache
  34 * server and send requests to the original server directly.
  35 *
  36 */
  37
  38#define KMSG_COMPONENT "IPVS"
  39#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  40
  41#include <linux/ip.h>
  42#include <linux/module.h>
  43#include <linux/kernel.h>
  44#include <linux/skbuff.h>
  45
  46#include <net/ip_vs.h>
  47
  48
  49/*
  50 *      IPVS DH bucket
  51 */
  52struct ip_vs_dh_bucket {
  53        struct ip_vs_dest       *dest;          /* real server (cache) */
  54};
  55
  56/*
  57 *     for IPVS DH entry hash table
  58 */
  59#ifndef CONFIG_IP_VS_DH_TAB_BITS
  60#define CONFIG_IP_VS_DH_TAB_BITS        8
  61#endif
  62#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
  63#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
  64#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
  65
  66
  67/*
  68 *      Returns hash value for IPVS DH entry
  69 */
  70static inline unsigned ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
  71{
  72        __be32 addr_fold = addr->ip;
  73
  74#ifdef CONFIG_IP_VS_IPV6
  75        if (af == AF_INET6)
  76                addr_fold = addr->ip6[0]^addr->ip6[1]^
  77                            addr->ip6[2]^addr->ip6[3];
  78#endif
  79        return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK;
  80}
  81
  82
  83/*
  84 *      Get ip_vs_dest associated with supplied parameters.
  85 */
  86static inline struct ip_vs_dest *
  87ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
  88             const union nf_inet_addr *addr)
  89{
  90        return (tbl[ip_vs_dh_hashkey(af, addr)]).dest;
  91}
  92
  93
  94/*
  95 *      Assign all the hash buckets of the specified table with the service.
  96 */
  97static int
  98ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
  99{
 100        int i;
 101        struct ip_vs_dh_bucket *b;
 102        struct list_head *p;
 103        struct ip_vs_dest *dest;
 104
 105        b = tbl;
 106        p = &svc->destinations;
 107        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 108                if (list_empty(p)) {
 109                        b->dest = NULL;
 110                } else {
 111                        if (p == &svc->destinations)
 112                                p = p->next;
 113
 114                        dest = list_entry(p, struct ip_vs_dest, n_list);
 115                        atomic_inc(&dest->refcnt);
 116                        b->dest = dest;
 117
 118                        p = p->next;
 119                }
 120                b++;
 121        }
 122        return 0;
 123}
 124
 125
 126/*
 127 *      Flush all the hash buckets of the specified table.
 128 */
 129static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
 130{
 131        int i;
 132        struct ip_vs_dh_bucket *b;
 133
 134        b = tbl;
 135        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
 136                if (b->dest) {
 137                        atomic_dec(&b->dest->refcnt);
 138                        b->dest = NULL;
 139                }
 140                b++;
 141        }
 142}
 143
 144
 145static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
 146{
 147        struct ip_vs_dh_bucket *tbl;
 148
 149        /* allocate the DH table for this service */
 150        tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
 151                      GFP_ATOMIC);
 152        if (tbl == NULL) {
 153                pr_err("%s(): no memory\n", __func__);
 154                return -ENOMEM;
 155        }
 156        svc->sched_data = tbl;
 157        IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
 158                  "current service\n",
 159                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 160
 161        /* assign the hash buckets with the updated service */
 162        ip_vs_dh_assign(tbl, svc);
 163
 164        return 0;
 165}
 166
 167
 168static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
 169{
 170        struct ip_vs_dh_bucket *tbl = svc->sched_data;
 171
 172        /* got to clean up hash buckets here */
 173        ip_vs_dh_flush(tbl);
 174
 175        /* release the table itself */
 176        kfree(svc->sched_data);
 177        IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
 178                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
 179
 180        return 0;
 181}
 182
 183
 184static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
 185{
 186        struct ip_vs_dh_bucket *tbl = svc->sched_data;
 187
 188        /* got to clean up hash buckets here */
 189        ip_vs_dh_flush(tbl);
 190
 191        /* assign the hash buckets with the updated service */
 192        ip_vs_dh_assign(tbl, svc);
 193
 194        return 0;
 195}
 196
 197
 198/*
 199 *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
 200 *      consider that the server is overloaded here.
 201 */
 202static inline int is_overloaded(struct ip_vs_dest *dest)
 203{
 204        return dest->flags & IP_VS_DEST_F_OVERLOAD;
 205}
 206
 207
 208/*
 209 *      Destination hashing scheduling
 210 */
 211static struct ip_vs_dest *
 212ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 213{
 214        struct ip_vs_dest *dest;
 215        struct ip_vs_dh_bucket *tbl;
 216        struct ip_vs_iphdr iph;
 217
 218        ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 219
 220        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 221
 222        tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
 223        dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr);
 224        if (!dest
 225            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 226            || atomic_read(&dest->weight) <= 0
 227            || is_overloaded(dest)) {
 228                return NULL;
 229        }
 230
 231        IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
 232                      IP_VS_DBG_ADDR(svc->af, &iph.daddr),
 233                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 234                      ntohs(dest->port));
 235
 236        return dest;
 237}
 238
 239
 240/*
 241 *      IPVS DH Scheduler structure
 242 */
 243static struct ip_vs_scheduler ip_vs_dh_scheduler =
 244{
 245        .name =                 "dh",
 246        .refcnt =               ATOMIC_INIT(0),
 247        .module =               THIS_MODULE,
 248        .n_list =               LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
 249        .init_service =         ip_vs_dh_init_svc,
 250        .done_service =         ip_vs_dh_done_svc,
 251        .update_service =       ip_vs_dh_update_svc,
 252        .schedule =             ip_vs_dh_schedule,
 253};
 254
 255
 256static int __init ip_vs_dh_init(void)
 257{
 258        return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
 259}
 260
 261
 262static void __exit ip_vs_dh_cleanup(void)
 263{
 264        unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
 265}
 266
 267
 268module_init(ip_vs_dh_init);
 269module_exit(ip_vs_dh_cleanup);
 270MODULE_LICENSE("GPL");
 271