linux/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.gnu.org/licenses/gpl-2.0.html
  19 *
  20 * GPL HEADER END
  21 */
  22/*
  23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  24 * Use is subject to license terms.
  25 *
  26 * Copyright (c) 2011, 2015, Intel Corporation.
  27 */
  28/*
  29 * This file is part of Lustre, http://www.lustre.org/
  30 * Lustre is a trademark of Sun Microsystems, Inc.
  31 *
  32 * lustre/ptlrpc/sec_bulk.c
  33 *
  34 * Author: Eric Mei <ericm@clusterfs.com>
  35 */
  36
  37#define DEBUG_SUBSYSTEM S_SEC
  38
  39#include "../../include/linux/libcfs/libcfs.h"
  40
  41#include "../include/obd.h"
  42#include "../include/obd_cksum.h"
  43#include "../include/obd_class.h"
  44#include "../include/obd_support.h"
  45#include "../include/lustre_net.h"
  46#include "../include/lustre_import.h"
  47#include "../include/lustre_dlm.h"
  48#include "../include/lustre_sec.h"
  49
  50#include "ptlrpc_internal.h"
  51
  52/****************************************
  53 * bulk encryption page pools      *
  54 ****************************************/
  55
  56#define POINTERS_PER_PAGE       (PAGE_SIZE / sizeof(void *))
  57#define PAGES_PER_POOL          (POINTERS_PER_PAGE)
  58
  59#define IDLE_IDX_MAX     (100)
  60#define IDLE_IDX_WEIGHT  (3)
  61
  62#define CACHE_QUIESCENT_PERIOD  (20)
  63
  64static struct ptlrpc_enc_page_pool {
  65        /*
  66         * constants
  67         */
  68        unsigned long    epp_max_pages;   /* maximum pages can hold, const */
  69        unsigned int     epp_max_pools;   /* number of pools, const */
  70
  71        /*
  72         * wait queue in case of not enough free pages.
  73         */
  74        wait_queue_head_t      epp_waitq;       /* waiting threads */
  75        unsigned int     epp_waitqlen;    /* wait queue length */
  76        unsigned long    epp_pages_short; /* # of pages wanted of in-q users */
  77        unsigned int     epp_growing:1;   /* during adding pages */
  78
  79        /*
  80         * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
  81         * this is counted based on each time when getting pages from
  82         * the pools, not based on time. which means in case that system
  83         * is idled for a while but the idle_idx might still be low if no
  84         * activities happened in the pools.
  85         */
  86        unsigned long    epp_idle_idx;
  87
  88        /* last shrink time due to mem tight */
  89        time64_t         epp_last_shrink;
  90        time64_t         epp_last_access;
  91
  92        /*
  93         * in-pool pages bookkeeping
  94         */
  95        spinlock_t       epp_lock;         /* protect following fields */
  96        unsigned long    epp_total_pages; /* total pages in pools */
  97        unsigned long    epp_free_pages;  /* current pages available */
  98
  99        /*
 100         * statistics
 101         */
 102        unsigned long    epp_st_max_pages;      /* # of pages ever reached */
 103        unsigned int     epp_st_grows;    /* # of grows */
 104        unsigned int     epp_st_grow_fails;     /* # of add pages failures */
 105        unsigned int     epp_st_shrinks;        /* # of shrinks */
 106        unsigned long    epp_st_access;  /* # of access */
 107        unsigned long    epp_st_missings;       /* # of cache missing */
 108        unsigned long    epp_st_lowfree;        /* lowest free pages reached */
 109        unsigned int     epp_st_max_wqlen;      /* highest waitqueue length */
 110        unsigned long       epp_st_max_wait;       /* in jiffies */
 111        unsigned long    epp_st_outofmem;       /* # of out of mem requests */
 112        /*
 113         * pointers to pools
 114         */
 115        struct page    ***epp_pools;
 116} page_pools;
 117
 118/*
 119 * /sys/kernel/debug/lustre/sptlrpc/encrypt_page_pools
 120 */
 121int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
 122{
 123        spin_lock(&page_pools.epp_lock);
 124
 125        seq_printf(m,
 126                   "physical pages:       %lu\n"
 127                   "pages per pool:       %lu\n"
 128                   "max pages:         %lu\n"
 129                   "max pools:         %u\n"
 130                   "total pages:             %lu\n"
 131                   "total free:       %lu\n"
 132                   "idle index:       %lu/100\n"
 133                   "last shrink:             %lds\n"
 134                   "last access:             %lds\n"
 135                   "max pages reached:       %lu\n"
 136                   "grows:                 %u\n"
 137                   "grows failure:         %u\n"
 138                   "shrinks:             %u\n"
 139                   "cache access:           %lu\n"
 140                   "cache missing:         %lu\n"
 141                   "low free mark:         %lu\n"
 142                   "max waitqueue depth:     %u\n"
 143                   "max wait time:         %ld/%lu\n"
 144                   "out of mem:          %lu\n",
 145                   totalram_pages,
 146                   PAGES_PER_POOL,
 147                   page_pools.epp_max_pages,
 148                   page_pools.epp_max_pools,
 149                   page_pools.epp_total_pages,
 150                   page_pools.epp_free_pages,
 151                   page_pools.epp_idle_idx,
 152                   (long)(ktime_get_seconds() - page_pools.epp_last_shrink),
 153                   (long)(ktime_get_seconds() - page_pools.epp_last_access),
 154                   page_pools.epp_st_max_pages,
 155                   page_pools.epp_st_grows,
 156                   page_pools.epp_st_grow_fails,
 157                   page_pools.epp_st_shrinks,
 158                   page_pools.epp_st_access,
 159                   page_pools.epp_st_missings,
 160                   page_pools.epp_st_lowfree,
 161                   page_pools.epp_st_max_wqlen,
 162                   page_pools.epp_st_max_wait,
 163                   msecs_to_jiffies(MSEC_PER_SEC),
 164                   page_pools.epp_st_outofmem);
 165
 166        spin_unlock(&page_pools.epp_lock);
 167
 168        return 0;
 169}
 170
 171static void enc_pools_release_free_pages(long npages)
 172{
 173        int p_idx, g_idx;
 174        int p_idx_max1, p_idx_max2;
 175
 176        LASSERT(npages > 0);
 177        LASSERT(npages <= page_pools.epp_free_pages);
 178        LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
 179
 180        /* max pool index before the release */
 181        p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
 182
 183        page_pools.epp_free_pages -= npages;
 184        page_pools.epp_total_pages -= npages;
 185
 186        /* max pool index after the release */
 187        p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
 188                     ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
 189
 190        p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
 191        g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
 192        LASSERT(page_pools.epp_pools[p_idx]);
 193
 194        while (npages--) {
 195                LASSERT(page_pools.epp_pools[p_idx]);
 196                LASSERT(page_pools.epp_pools[p_idx][g_idx]);
 197
 198                __free_page(page_pools.epp_pools[p_idx][g_idx]);
 199                page_pools.epp_pools[p_idx][g_idx] = NULL;
 200
 201                if (++g_idx == PAGES_PER_POOL) {
 202                        p_idx++;
 203                        g_idx = 0;
 204                }
 205        }
 206
 207        /* free unused pools */
 208        while (p_idx_max1 < p_idx_max2) {
 209                LASSERT(page_pools.epp_pools[p_idx_max2]);
 210                kfree(page_pools.epp_pools[p_idx_max2]);
 211                page_pools.epp_pools[p_idx_max2] = NULL;
 212                p_idx_max2--;
 213        }
 214}
 215
 216/*
 217 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
 218 */
 219static unsigned long enc_pools_shrink_count(struct shrinker *s,
 220                                            struct shrink_control *sc)
 221{
 222        /*
 223         * if no pool access for a long time, we consider it's fully idle.
 224         * a little race here is fine.
 225         */
 226        if (unlikely(ktime_get_seconds() - page_pools.epp_last_access >
 227                     CACHE_QUIESCENT_PERIOD)) {
 228                spin_lock(&page_pools.epp_lock);
 229                page_pools.epp_idle_idx = IDLE_IDX_MAX;
 230                spin_unlock(&page_pools.epp_lock);
 231        }
 232
 233        LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
 234        return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
 235                (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
 236}
 237
 238/*
 239 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
 240 */
 241static unsigned long enc_pools_shrink_scan(struct shrinker *s,
 242                                           struct shrink_control *sc)
 243{
 244        spin_lock(&page_pools.epp_lock);
 245        sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
 246                              page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
 247        if (sc->nr_to_scan > 0) {
 248                enc_pools_release_free_pages(sc->nr_to_scan);
 249                CDEBUG(D_SEC, "released %ld pages, %ld left\n",
 250                       (long)sc->nr_to_scan, page_pools.epp_free_pages);
 251
 252                page_pools.epp_st_shrinks++;
 253                page_pools.epp_last_shrink = ktime_get_seconds();
 254        }
 255        spin_unlock(&page_pools.epp_lock);
 256
 257        /*
 258         * if no pool access for a long time, we consider it's fully idle.
 259         * a little race here is fine.
 260         */
 261        if (unlikely(ktime_get_seconds() - page_pools.epp_last_access >
 262                     CACHE_QUIESCENT_PERIOD)) {
 263                spin_lock(&page_pools.epp_lock);
 264                page_pools.epp_idle_idx = IDLE_IDX_MAX;
 265                spin_unlock(&page_pools.epp_lock);
 266        }
 267
 268        LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
 269        return sc->nr_to_scan;
 270}
 271
 272static inline
 273int npages_to_npools(unsigned long npages)
 274{
 275        return (int)DIV_ROUND_UP(npages, PAGES_PER_POOL);
 276}
 277
 278/*
 279 * return how many pages cleaned up.
 280 */
 281static unsigned long enc_pools_cleanup(struct page ***pools, int npools)
 282{
 283        unsigned long cleaned = 0;
 284        int i, j;
 285
 286        for (i = 0; i < npools; i++) {
 287                if (pools[i]) {
 288                        for (j = 0; j < PAGES_PER_POOL; j++) {
 289                                if (pools[i][j]) {
 290                                        __free_page(pools[i][j]);
 291                                        cleaned++;
 292                                }
 293                        }
 294                        kfree(pools[i]);
 295                        pools[i] = NULL;
 296                }
 297        }
 298
 299        return cleaned;
 300}
 301
 302static inline void enc_pools_wakeup(void)
 303{
 304        assert_spin_locked(&page_pools.epp_lock);
 305
 306        if (unlikely(page_pools.epp_waitqlen)) {
 307                LASSERT(waitqueue_active(&page_pools.epp_waitq));
 308                wake_up_all(&page_pools.epp_waitq);
 309        }
 310}
 311
 312/*
 313 * Export the number of free pages in the pool
 314 */
 315int get_free_pages_in_pool(void)
 316{
 317        return page_pools.epp_free_pages;
 318}
 319
 320/*
 321 * Let outside world know if enc_pool full capacity is reached
 322 */
 323int pool_is_at_full_capacity(void)
 324{
 325        return (page_pools.epp_total_pages == page_pools.epp_max_pages);
 326}
 327
 328void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
 329{
 330        int p_idx, g_idx;
 331        int i;
 332
 333        LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
 334
 335        if (!GET_ENC_KIOV(desc))
 336                return;
 337
 338        LASSERT(desc->bd_iov_count > 0);
 339
 340        spin_lock(&page_pools.epp_lock);
 341
 342        p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
 343        g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
 344
 345        LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
 346                page_pools.epp_total_pages);
 347        LASSERT(page_pools.epp_pools[p_idx]);
 348
 349        for (i = 0; i < desc->bd_iov_count; i++) {
 350                LASSERT(BD_GET_ENC_KIOV(desc, i).bv_page);
 351                LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
 352                LASSERT(!page_pools.epp_pools[p_idx][g_idx]);
 353
 354                page_pools.epp_pools[p_idx][g_idx] =
 355                        BD_GET_ENC_KIOV(desc, i).bv_page;
 356
 357                if (++g_idx == PAGES_PER_POOL) {
 358                        p_idx++;
 359                        g_idx = 0;
 360                }
 361        }
 362
 363        page_pools.epp_free_pages += desc->bd_iov_count;
 364
 365        enc_pools_wakeup();
 366
 367        spin_unlock(&page_pools.epp_lock);
 368
 369        kfree(GET_ENC_KIOV(desc));
 370        GET_ENC_KIOV(desc) = NULL;
 371}
 372
 373static inline void enc_pools_alloc(void)
 374{
 375        LASSERT(page_pools.epp_max_pools);
 376        page_pools.epp_pools =
 377                libcfs_kvzalloc(page_pools.epp_max_pools *
 378                                sizeof(*page_pools.epp_pools),
 379                                GFP_NOFS);
 380}
 381
 382static inline void enc_pools_free(void)
 383{
 384        LASSERT(page_pools.epp_max_pools);
 385        LASSERT(page_pools.epp_pools);
 386
 387        kvfree(page_pools.epp_pools);
 388}
 389
 390static struct shrinker pools_shrinker = {
 391        .count_objects  = enc_pools_shrink_count,
 392        .scan_objects   = enc_pools_shrink_scan,
 393        .seeks          = DEFAULT_SEEKS,
 394};
 395
 396int sptlrpc_enc_pool_init(void)
 397{
 398        /*
 399         * maximum capacity is 1/8 of total physical memory.
 400         * is the 1/8 a good number?
 401         */
 402        page_pools.epp_max_pages = totalram_pages / 8;
 403        page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
 404
 405        init_waitqueue_head(&page_pools.epp_waitq);
 406        page_pools.epp_waitqlen = 0;
 407        page_pools.epp_pages_short = 0;
 408
 409        page_pools.epp_growing = 0;
 410
 411        page_pools.epp_idle_idx = 0;
 412        page_pools.epp_last_shrink = ktime_get_seconds();
 413        page_pools.epp_last_access = ktime_get_seconds();
 414
 415        spin_lock_init(&page_pools.epp_lock);
 416        page_pools.epp_total_pages = 0;
 417        page_pools.epp_free_pages = 0;
 418
 419        page_pools.epp_st_max_pages = 0;
 420        page_pools.epp_st_grows = 0;
 421        page_pools.epp_st_grow_fails = 0;
 422        page_pools.epp_st_shrinks = 0;
 423        page_pools.epp_st_access = 0;
 424        page_pools.epp_st_missings = 0;
 425        page_pools.epp_st_lowfree = 0;
 426        page_pools.epp_st_max_wqlen = 0;
 427        page_pools.epp_st_max_wait = 0;
 428        page_pools.epp_st_outofmem = 0;
 429
 430        enc_pools_alloc();
 431        if (!page_pools.epp_pools)
 432                return -ENOMEM;
 433
 434        register_shrinker(&pools_shrinker);
 435
 436        return 0;
 437}
 438
 439void sptlrpc_enc_pool_fini(void)
 440{
 441        unsigned long cleaned, npools;
 442
 443        LASSERT(page_pools.epp_pools);
 444        LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
 445
 446        unregister_shrinker(&pools_shrinker);
 447
 448        npools = npages_to_npools(page_pools.epp_total_pages);
 449        cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
 450        LASSERT(cleaned == page_pools.epp_total_pages);
 451
 452        enc_pools_free();
 453
 454        if (page_pools.epp_st_access > 0) {
 455                CDEBUG(D_SEC,
 456                       "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld, out of mem %lu\n",
 457                       page_pools.epp_st_max_pages, page_pools.epp_st_grows,
 458                       page_pools.epp_st_grow_fails,
 459                       page_pools.epp_st_shrinks, page_pools.epp_st_access,
 460                       page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
 461                       page_pools.epp_st_max_wait,
 462                       msecs_to_jiffies(MSEC_PER_SEC),
 463                       page_pools.epp_st_outofmem);
 464        }
 465}
 466
 467static int cfs_hash_alg_id[] = {
 468        [BULK_HASH_ALG_NULL]    = CFS_HASH_ALG_NULL,
 469        [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
 470        [BULK_HASH_ALG_CRC32]   = CFS_HASH_ALG_CRC32,
 471        [BULK_HASH_ALG_MD5]     = CFS_HASH_ALG_MD5,
 472        [BULK_HASH_ALG_SHA1]    = CFS_HASH_ALG_SHA1,
 473        [BULK_HASH_ALG_SHA256]  = CFS_HASH_ALG_SHA256,
 474        [BULK_HASH_ALG_SHA384]  = CFS_HASH_ALG_SHA384,
 475        [BULK_HASH_ALG_SHA512]  = CFS_HASH_ALG_SHA512,
 476};
 477
 478const char *sptlrpc_get_hash_name(__u8 hash_alg)
 479{
 480        return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
 481}
 482
 483__u8 sptlrpc_get_hash_alg(const char *algname)
 484{
 485        return cfs_crypto_hash_alg(algname);
 486}
 487
 488int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
 489{
 490        struct ptlrpc_bulk_sec_desc *bsd;
 491        int                       size = msg->lm_buflens[offset];
 492
 493        bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
 494        if (!bsd) {
 495                CERROR("Invalid bulk sec desc: size %d\n", size);
 496                return -EINVAL;
 497        }
 498
 499        if (swabbed)
 500                __swab32s(&bsd->bsd_nob);
 501
 502        if (unlikely(bsd->bsd_version != 0)) {
 503                CERROR("Unexpected version %u\n", bsd->bsd_version);
 504                return -EPROTO;
 505        }
 506
 507        if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
 508                CERROR("Invalid type %u\n", bsd->bsd_type);
 509                return -EPROTO;
 510        }
 511
 512        /* FIXME more sanity check here */
 513
 514        if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
 515                     bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
 516                     bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
 517                CERROR("Invalid svc %u\n", bsd->bsd_svc);
 518                return -EPROTO;
 519        }
 520
 521        return 0;
 522}
 523EXPORT_SYMBOL(bulk_sec_desc_unpack);
 524
 525int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
 526                              void *buf, int buflen)
 527{
 528        struct cfs_crypto_hash_desc *hdesc;
 529        int hashsize;
 530        unsigned int bufsize;
 531        int i, err;
 532
 533        LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
 534        LASSERT(buflen >= 4);
 535
 536        hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
 537        if (IS_ERR(hdesc)) {
 538                CERROR("Unable to initialize checksum hash %s\n",
 539                       cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
 540                return PTR_ERR(hdesc);
 541        }
 542
 543        hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
 544
 545        for (i = 0; i < desc->bd_iov_count; i++) {
 546                cfs_crypto_hash_update_page(hdesc,
 547                                            BD_GET_KIOV(desc, i).bv_page,
 548                                            BD_GET_KIOV(desc, i).bv_offset &
 549                                            ~PAGE_MASK,
 550                                            BD_GET_KIOV(desc, i).bv_len);
 551        }
 552
 553        if (hashsize > buflen) {
 554                unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
 555
 556                bufsize = sizeof(hashbuf);
 557                LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n",
 558                         bufsize, hashsize);
 559                err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize);
 560                memcpy(buf, hashbuf, buflen);
 561        } else {
 562                bufsize = buflen;
 563                err = cfs_crypto_hash_final(hdesc, buf, &bufsize);
 564        }
 565
 566        return err;
 567}
 568