linux/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * lnet/klnds/o2iblnd/o2iblnd_modparams.c
  37 *
  38 * Author: Eric Barton <eric@bartonsoftware.com>
  39 */
  40
  41#include "o2iblnd.h"
  42
  43static int service = 987;
  44CFS_MODULE_PARM(service, "i", int, 0444,
  45                "service number (within RDMA_PS_TCP)");
  46
  47static int cksum = 0;
  48CFS_MODULE_PARM(cksum, "i", int, 0644,
  49                "set non-zero to enable message (not RDMA) checksums");
  50
  51static int timeout = 50;
  52CFS_MODULE_PARM(timeout, "i", int, 0644,
  53                "timeout (seconds)");
  54
  55/* Number of threads in each scheduler pool which is percpt,
  56 * we will estimate reasonable value based on CPUs if it's set to zero. */
  57static int nscheds;
  58CFS_MODULE_PARM(nscheds, "i", int, 0444,
  59                "number of threads in each scheduler pool");
  60
  61/* NB: this value is shared by all CPTs, it can grow at runtime */
  62static int ntx = 512;
  63CFS_MODULE_PARM(ntx, "i", int, 0444,
  64                "# of message descriptors allocated for each pool");
  65
  66/* NB: this value is shared by all CPTs */
  67static int credits = 256;
  68CFS_MODULE_PARM(credits, "i", int, 0444,
  69                "# concurrent sends");
  70
  71static int peer_credits = 8;
  72CFS_MODULE_PARM(peer_credits, "i", int, 0444,
  73                "# concurrent sends to 1 peer");
  74
  75static int peer_credits_hiw = 0;
  76CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
  77                "when eagerly to return credits");
  78
  79static int peer_buffer_credits = 0;
  80CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
  81                "# per-peer router buffer credits");
  82
  83static int peer_timeout = 180;
  84CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
  85                "Seconds without aliveness news to declare peer dead (<=0 to disable)");
  86
  87static char *ipif_name = "ib0";
  88CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
  89                "IPoIB interface name");
  90
  91static int retry_count = 5;
  92CFS_MODULE_PARM(retry_count, "i", int, 0644,
  93                "Retransmissions when no ACK received");
  94
  95static int rnr_retry_count = 6;
  96CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
  97                "RNR retransmissions");
  98
  99static int keepalive = 100;
 100CFS_MODULE_PARM(keepalive, "i", int, 0644,
 101                "Idle time in seconds before sending a keepalive");
 102
 103static int ib_mtu = 0;
 104CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
 105                "IB MTU 256/512/1024/2048/4096");
 106
 107static int concurrent_sends = 0;
 108CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
 109                "send work-queue sizing");
 110
 111static int map_on_demand = 0;
 112CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
 113                "map on demand");
 114
 115/* NB: this value is shared by all CPTs, it can grow at runtime */
 116static int fmr_pool_size = 512;
 117CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
 118                "size of fmr pool on each CPT (>= ntx / 4)");
 119
 120/* NB: this value is shared by all CPTs, it can grow at runtime */
 121static int fmr_flush_trigger = 384;
 122CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
 123                "# dirty FMRs that triggers pool flush");
 124
 125static int fmr_cache = 1;
 126CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
 127                "non-zero to enable FMR caching");
 128
 129/* NB: this value is shared by all CPTs, it can grow at runtime */
 130static int pmr_pool_size = 512;
 131CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
 132                "size of MR cache pmr pool on each CPT");
 133
 134/*
 135 * 0: disable failover
 136 * 1: enable failover if necessary
 137 * 2: force to failover (for debug)
 138 */
 139static int dev_failover = 0;
 140CFS_MODULE_PARM(dev_failover, "i", int, 0444,
 141               "HCA failover for bonding (0 off, 1 on, other values reserved)");
 142
 143
 144static int require_privileged_port = 0;
 145CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
 146                "require privileged port when accepting connection");
 147
 148static int use_privileged_port = 1;
 149CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
 150                "use privileged port when initiating connection");
 151
 152kib_tunables_t kiblnd_tunables = {
 153        .kib_dev_failover          = &dev_failover,
 154        .kib_service            = &service,
 155        .kib_cksum                = &cksum,
 156        .kib_timeout            = &timeout,
 157        .kib_keepalive        = &keepalive,
 158        .kib_ntx                    = &ntx,
 159        .kib_credits            = &credits,
 160        .kib_peertxcredits        = &peer_credits,
 161        .kib_peercredits_hiw    = &peer_credits_hiw,
 162        .kib_peerrtrcredits      = &peer_buffer_credits,
 163        .kib_peertimeout            = &peer_timeout,
 164        .kib_default_ipif          = &ipif_name,
 165        .kib_retry_count            = &retry_count,
 166        .kib_rnr_retry_count    = &rnr_retry_count,
 167        .kib_concurrent_sends       = &concurrent_sends,
 168        .kib_ib_mtu              = &ib_mtu,
 169        .kib_map_on_demand        = &map_on_demand,
 170        .kib_fmr_pool_size        = &fmr_pool_size,
 171        .kib_fmr_flush_trigger      = &fmr_flush_trigger,
 172        .kib_fmr_cache        = &fmr_cache,
 173        .kib_pmr_pool_size        = &pmr_pool_size,
 174        .kib_require_priv_port      = &require_privileged_port,
 175        .kib_use_priv_port          = &use_privileged_port,
 176        .kib_nscheds                = &nscheds
 177};
 178
 179#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
 180
 181static char ipif_basename_space[32];
 182
 183
 184enum {
 185        O2IBLND_SERVICE  = 1,
 186        O2IBLND_CKSUM,
 187        O2IBLND_TIMEOUT,
 188        O2IBLND_NTX,
 189        O2IBLND_CREDITS,
 190        O2IBLND_PEER_TXCREDITS,
 191        O2IBLND_PEER_CREDITS_HIW,
 192        O2IBLND_PEER_RTRCREDITS,
 193        O2IBLND_PEER_TIMEOUT,
 194        O2IBLND_IPIF_BASENAME,
 195        O2IBLND_RETRY_COUNT,
 196        O2IBLND_RNR_RETRY_COUNT,
 197        O2IBLND_KEEPALIVE,
 198        O2IBLND_CONCURRENT_SENDS,
 199        O2IBLND_IB_MTU,
 200        O2IBLND_MAP_ON_DEMAND,
 201        O2IBLND_FMR_POOL_SIZE,
 202        O2IBLND_FMR_FLUSH_TRIGGER,
 203        O2IBLND_FMR_CACHE,
 204        O2IBLND_PMR_POOL_SIZE,
 205        O2IBLND_DEV_FAILOVER
 206};
 207
 208static ctl_table_t kiblnd_ctl_table[] = {
 209        {
 210                .ctl_name = O2IBLND_SERVICE,
 211                .procname = "service",
 212                .data     = &service,
 213                .maxlen   = sizeof(int),
 214                .mode     = 0444,
 215                .proc_handler = &proc_dointvec
 216        },
 217        {
 218                .ctl_name = O2IBLND_CKSUM,
 219                .procname = "cksum",
 220                .data     = &cksum,
 221                .maxlen   = sizeof(int),
 222                .mode     = 0644,
 223                .proc_handler = &proc_dointvec
 224        },
 225        {
 226                .ctl_name = O2IBLND_TIMEOUT,
 227                .procname = "timeout",
 228                .data     = &timeout,
 229                .maxlen   = sizeof(int),
 230                .mode     = 0644,
 231                .proc_handler = &proc_dointvec
 232        },
 233        {
 234                .ctl_name = O2IBLND_NTX,
 235                .procname = "ntx",
 236                .data     = &ntx,
 237                .maxlen   = sizeof(int),
 238                .mode     = 0444,
 239                .proc_handler = &proc_dointvec
 240        },
 241        {
 242                .ctl_name = O2IBLND_CREDITS,
 243                .procname = "credits",
 244                .data     = &credits,
 245                .maxlen   = sizeof(int),
 246                .mode     = 0444,
 247                .proc_handler = &proc_dointvec
 248        },
 249        {
 250                .ctl_name = O2IBLND_PEER_TXCREDITS,
 251                .procname = "peer_credits",
 252                .data     = &peer_credits,
 253                .maxlen   = sizeof(int),
 254                .mode     = 0444,
 255                .proc_handler = &proc_dointvec
 256        },
 257        {
 258                .ctl_name = O2IBLND_PEER_CREDITS_HIW,
 259                .procname = "peer_credits_hiw",
 260                .data     = &peer_credits_hiw,
 261                .maxlen   = sizeof(int),
 262                .mode     = 0444,
 263                .proc_handler = &proc_dointvec
 264        },
 265        {
 266                .ctl_name = O2IBLND_PEER_RTRCREDITS,
 267                .procname = "peer_buffer_credits",
 268                .data     = &peer_buffer_credits,
 269                .maxlen   = sizeof(int),
 270                .mode     = 0444,
 271                .proc_handler = &proc_dointvec
 272        },
 273        {
 274                .ctl_name = O2IBLND_PEER_TIMEOUT,
 275                .procname = "peer_timeout",
 276                .data     = &peer_timeout,
 277                .maxlen   = sizeof(int),
 278                .mode     = 0444,
 279                .proc_handler = &proc_dointvec
 280        },
 281        {
 282                .ctl_name = O2IBLND_IPIF_BASENAME,
 283                .procname = "ipif_name",
 284                .data     = ipif_basename_space,
 285                .maxlen   = sizeof(ipif_basename_space),
 286                .mode     = 0444,
 287                .proc_handler = &proc_dostring
 288        },
 289        {
 290                .ctl_name = O2IBLND_RETRY_COUNT,
 291                .procname = "retry_count",
 292                .data     = &retry_count,
 293                .maxlen   = sizeof(int),
 294                .mode     = 0644,
 295                .proc_handler = &proc_dointvec
 296        },
 297        {
 298                .ctl_name = O2IBLND_RNR_RETRY_COUNT,
 299                .procname = "rnr_retry_count",
 300                .data     = &rnr_retry_count,
 301                .maxlen   = sizeof(int),
 302                .mode     = 0644,
 303                .proc_handler = &proc_dointvec
 304        },
 305        {
 306                .ctl_name = O2IBLND_KEEPALIVE,
 307                .procname = "keepalive",
 308                .data     = &keepalive,
 309                .maxlen   = sizeof(int),
 310                .mode     = 0644,
 311                .proc_handler = &proc_dointvec
 312        },
 313        {
 314                .ctl_name = O2IBLND_CONCURRENT_SENDS,
 315                .procname = "concurrent_sends",
 316                .data     = &concurrent_sends,
 317                .maxlen   = sizeof(int),
 318                .mode     = 0444,
 319                .proc_handler = &proc_dointvec
 320        },
 321        {
 322                .ctl_name = O2IBLND_IB_MTU,
 323                .procname = "ib_mtu",
 324                .data     = &ib_mtu,
 325                .maxlen   = sizeof(int),
 326                .mode     = 0444,
 327                .proc_handler = &proc_dointvec
 328        },
 329        {
 330                .ctl_name = O2IBLND_MAP_ON_DEMAND,
 331                .procname = "map_on_demand",
 332                .data     = &map_on_demand,
 333                .maxlen   = sizeof(int),
 334                .mode     = 0444,
 335                .proc_handler = &proc_dointvec
 336        },
 337
 338        {
 339                .ctl_name = O2IBLND_FMR_POOL_SIZE,
 340                .procname = "fmr_pool_size",
 341                .data     = &fmr_pool_size,
 342                .maxlen   = sizeof(int),
 343                .mode     = 0444,
 344                .proc_handler = &proc_dointvec
 345        },
 346        {
 347                .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
 348                .procname = "fmr_flush_trigger",
 349                .data     = &fmr_flush_trigger,
 350                .maxlen   = sizeof(int),
 351                .mode     = 0444,
 352                .proc_handler = &proc_dointvec
 353        },
 354        {
 355                .ctl_name = O2IBLND_FMR_CACHE,
 356                .procname = "fmr_cache",
 357                .data     = &fmr_cache,
 358                .maxlen   = sizeof(int),
 359                .mode     = 0444,
 360                .proc_handler = &proc_dointvec
 361        },
 362        {
 363                .ctl_name = O2IBLND_PMR_POOL_SIZE,
 364                .procname = "pmr_pool_size",
 365                .data     = &pmr_pool_size,
 366                .maxlen   = sizeof(int),
 367                .mode     = 0444,
 368                .proc_handler = &proc_dointvec
 369        },
 370        {
 371                .ctl_name = O2IBLND_DEV_FAILOVER,
 372                .procname = "dev_failover",
 373                .data     = &dev_failover,
 374                .maxlen   = sizeof(int),
 375                .mode     = 0444,
 376                .proc_handler = &proc_dointvec
 377        },
 378        {0}
 379};
 380
 381static ctl_table_t kiblnd_top_ctl_table[] = {
 382        {
 383                .ctl_name = CTL_O2IBLND,
 384                .procname = "o2iblnd",
 385                .data     = NULL,
 386                .maxlen   = 0,
 387                .mode     = 0555,
 388                .child    = kiblnd_ctl_table
 389        },
 390        {0}
 391};
 392
 393void
 394kiblnd_initstrtunable(char *space, char *str, int size)
 395{
 396        strncpy(space, str, size);
 397        space[size-1] = 0;
 398}
 399
 400void
 401kiblnd_sysctl_init (void)
 402{
 403        kiblnd_initstrtunable(ipif_basename_space, ipif_name,
 404                              sizeof(ipif_basename_space));
 405
 406        kiblnd_tunables.kib_sysctl =
 407                register_sysctl_table(kiblnd_top_ctl_table);
 408
 409        if (kiblnd_tunables.kib_sysctl == NULL)
 410                CWARN("Can't setup /proc tunables\n");
 411}
 412
 413void
 414kiblnd_sysctl_fini (void)
 415{
 416        if (kiblnd_tunables.kib_sysctl != NULL)
 417                unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
 418}
 419
 420#else
 421
 422void
 423kiblnd_sysctl_init (void)
 424{
 425}
 426
 427void
 428kiblnd_sysctl_fini (void)
 429{
 430}
 431
 432#endif
 433
 434int
 435kiblnd_tunables_init (void)
 436{
 437        if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
 438                CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
 439                       *kiblnd_tunables.kib_ib_mtu);
 440                return -EINVAL;
 441        }
 442
 443        if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
 444                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
 445
 446        if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
 447                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
 448
 449        if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
 450                *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
 451
 452        if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
 453                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
 454
 455        if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
 456                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
 457
 458        if (*kiblnd_tunables.kib_map_on_demand < 0 ||
 459            *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
 460                *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
 461
 462        if (*kiblnd_tunables.kib_map_on_demand == 1)
 463                *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
 464
 465        if (*kiblnd_tunables.kib_concurrent_sends == 0) {
 466                if (*kiblnd_tunables.kib_map_on_demand > 0 &&
 467                    *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
 468                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
 469                else
 470                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
 471        }
 472
 473        if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
 474                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
 475
 476        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
 477                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
 478
 479        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
 480                CWARN("Concurrent sends %d is lower than message queue size: %d, "
 481                      "performance may drop slightly.\n",
 482                      *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
 483        }
 484
 485        kiblnd_sysctl_init();
 486        return 0;
 487}
 488
 489void
 490kiblnd_tunables_fini (void)
 491{
 492        kiblnd_sysctl_fini();
 493}
 494