linux/drivers/staging/lustre/lustre/llite/vvp_dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * GPL HEADER START
   4 *
   5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 only,
   9 * as published by the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope that it will be useful, but
  12 * WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * General Public License version 2 for more details (a copy is included
  15 * in the LICENSE file that accompanied this code).
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * version 2 along with this program; If not, see
  19 * http://www.gnu.org/licenses/gpl-2.0.html
  20 *
  21 * GPL HEADER END
  22 */
  23/*
  24 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  25 * Use is subject to license terms.
  26 *
  27 * Copyright (c) 2012, 2015, Intel Corporation.
  28 */
  29/*
  30 * This file is part of Lustre, http://www.lustre.org/
  31 * Lustre is a trademark of Sun Microsystems, Inc.
  32 *
  33 * cl_device and cl_device_type implementation for VVP layer.
  34 *
  35 *   Author: Nikita Danilov <nikita.danilov@sun.com>
  36 *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  37 */
  38
  39#define DEBUG_SUBSYSTEM S_LLITE
  40
  41#include <obd.h>
  42#include "llite_internal.h"
  43#include "vvp_internal.h"
  44
  45/*****************************************************************************
  46 *
  47 * Vvp device and device type functions.
  48 *
  49 */
  50
  51/*
  52 * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical
  53 * "llite_" (var. "ll_") prefix.
  54 */
  55
  56static struct kmem_cache *ll_thread_kmem;
  57struct kmem_cache *vvp_lock_kmem;
  58struct kmem_cache *vvp_object_kmem;
  59static struct kmem_cache *vvp_session_kmem;
  60static struct kmem_cache *vvp_thread_kmem;
  61
  62static struct lu_kmem_descr vvp_caches[] = {
  63        {
  64                .ckd_cache = &ll_thread_kmem,
  65                .ckd_name  = "ll_thread_kmem",
  66                .ckd_size  = sizeof(struct ll_thread_info),
  67        },
  68        {
  69                .ckd_cache = &vvp_lock_kmem,
  70                .ckd_name  = "vvp_lock_kmem",
  71                .ckd_size  = sizeof(struct vvp_lock),
  72        },
  73        {
  74                .ckd_cache = &vvp_object_kmem,
  75                .ckd_name  = "vvp_object_kmem",
  76                .ckd_size  = sizeof(struct vvp_object),
  77        },
  78        {
  79                .ckd_cache = &vvp_session_kmem,
  80                .ckd_name  = "vvp_session_kmem",
  81                .ckd_size  = sizeof(struct vvp_session)
  82        },
  83        {
  84                .ckd_cache = &vvp_thread_kmem,
  85                .ckd_name  = "vvp_thread_kmem",
  86                .ckd_size  = sizeof(struct vvp_thread_info),
  87        },
  88        {
  89                .ckd_cache = NULL
  90        }
  91};
  92
  93static void *ll_thread_key_init(const struct lu_context *ctx,
  94                                struct lu_context_key *key)
  95{
  96        struct vvp_thread_info *info;
  97
  98        info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
  99        if (!info)
 100                info = ERR_PTR(-ENOMEM);
 101        return info;
 102}
 103
 104static void ll_thread_key_fini(const struct lu_context *ctx,
 105                               struct lu_context_key *key, void *data)
 106{
 107        struct vvp_thread_info *info = data;
 108
 109        kmem_cache_free(ll_thread_kmem, info);
 110}
 111
 112struct lu_context_key ll_thread_key = {
 113        .lct_tags = LCT_CL_THREAD,
 114        .lct_init = ll_thread_key_init,
 115        .lct_fini = ll_thread_key_fini
 116};
 117
 118static void *vvp_session_key_init(const struct lu_context *ctx,
 119                                  struct lu_context_key *key)
 120{
 121        struct vvp_session *session;
 122
 123        session = kmem_cache_zalloc(vvp_session_kmem, GFP_NOFS);
 124        if (!session)
 125                session = ERR_PTR(-ENOMEM);
 126        return session;
 127}
 128
 129static void vvp_session_key_fini(const struct lu_context *ctx,
 130                                 struct lu_context_key *key, void *data)
 131{
 132        struct vvp_session *session = data;
 133
 134        kmem_cache_free(vvp_session_kmem, session);
 135}
 136
 137struct lu_context_key vvp_session_key = {
 138        .lct_tags = LCT_SESSION,
 139        .lct_init = vvp_session_key_init,
 140        .lct_fini = vvp_session_key_fini
 141};
 142
 143static void *vvp_thread_key_init(const struct lu_context *ctx,
 144                                 struct lu_context_key *key)
 145{
 146        struct vvp_thread_info *vti;
 147
 148        vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
 149        if (!vti)
 150                vti = ERR_PTR(-ENOMEM);
 151        return vti;
 152}
 153
 154static void vvp_thread_key_fini(const struct lu_context *ctx,
 155                                struct lu_context_key *key, void *data)
 156{
 157        struct vvp_thread_info *vti = data;
 158
 159        kmem_cache_free(vvp_thread_kmem, vti);
 160}
 161
 162struct lu_context_key vvp_thread_key = {
 163        .lct_tags = LCT_CL_THREAD,
 164        .lct_init = vvp_thread_key_init,
 165        .lct_fini = vvp_thread_key_fini
 166};
 167
 168/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
 169LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
 170
 171static const struct lu_device_operations vvp_lu_ops = {
 172        .ldo_object_alloc      = vvp_object_alloc
 173};
 174
 175static struct lu_device *vvp_device_free(const struct lu_env *env,
 176                                         struct lu_device *d)
 177{
 178        struct vvp_device *vdv  = lu2vvp_dev(d);
 179        struct cl_site    *site = lu2cl_site(d->ld_site);
 180        struct lu_device  *next = cl2lu_dev(vdv->vdv_next);
 181
 182        if (d->ld_site) {
 183                cl_site_fini(site);
 184                kfree(site);
 185        }
 186        cl_device_fini(lu2cl_dev(d));
 187        kfree(vdv);
 188        return next;
 189}
 190
 191static struct lu_device *vvp_device_alloc(const struct lu_env *env,
 192                                          struct lu_device_type *t,
 193                                          struct lustre_cfg *cfg)
 194{
 195        struct vvp_device *vdv;
 196        struct lu_device  *lud;
 197        struct cl_site    *site;
 198        int rc;
 199
 200        vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
 201        if (!vdv)
 202                return ERR_PTR(-ENOMEM);
 203
 204        lud = &vdv->vdv_cl.cd_lu_dev;
 205        cl_device_init(&vdv->vdv_cl, t);
 206        vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
 207
 208        site = kzalloc(sizeof(*site), GFP_NOFS);
 209        if (site) {
 210                rc = cl_site_init(site, &vdv->vdv_cl);
 211                if (rc == 0) {
 212                        rc = lu_site_init_finish(&site->cs_lu);
 213                } else {
 214                        LASSERT(!lud->ld_site);
 215                        CERROR("Cannot init lu_site, rc %d.\n", rc);
 216                        kfree(site);
 217                }
 218        } else {
 219                rc = -ENOMEM;
 220        }
 221        if (rc != 0) {
 222                vvp_device_free(env, lud);
 223                lud = ERR_PTR(rc);
 224        }
 225        return lud;
 226}
 227
 228static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
 229                           const char *name, struct lu_device *next)
 230{
 231        struct vvp_device  *vdv;
 232        int rc;
 233
 234        vdv = lu2vvp_dev(d);
 235        vdv->vdv_next = lu2cl_dev(next);
 236
 237        LASSERT(d->ld_site && next->ld_type);
 238        next->ld_site = d->ld_site;
 239        rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
 240                                                      next->ld_type->ldt_name,
 241                                                      NULL);
 242        if (rc == 0) {
 243                lu_device_get(next);
 244                lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
 245        }
 246        return rc;
 247}
 248
 249static struct lu_device *vvp_device_fini(const struct lu_env *env,
 250                                         struct lu_device *d)
 251{
 252        return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
 253}
 254
 255static const struct lu_device_type_operations vvp_device_type_ops = {
 256        .ldto_init = vvp_type_init,
 257        .ldto_fini = vvp_type_fini,
 258
 259        .ldto_start = vvp_type_start,
 260        .ldto_stop  = vvp_type_stop,
 261
 262        .ldto_device_alloc = vvp_device_alloc,
 263        .ldto_device_free       = vvp_device_free,
 264        .ldto_device_init       = vvp_device_init,
 265        .ldto_device_fini       = vvp_device_fini,
 266};
 267
 268struct lu_device_type vvp_device_type = {
 269        .ldt_tags     = LU_DEVICE_CL,
 270        .ldt_name     = LUSTRE_VVP_NAME,
 271        .ldt_ops      = &vvp_device_type_ops,
 272        .ldt_ctx_tags = LCT_CL_THREAD
 273};
 274
 275/**
 276 * A mutex serializing calls to vvp_inode_fini() under extreme memory
 277 * pressure, when environments cannot be allocated.
 278 */
 279int vvp_global_init(void)
 280{
 281        int rc;
 282
 283        rc = lu_kmem_init(vvp_caches);
 284        if (rc != 0)
 285                return rc;
 286
 287        rc = lu_device_type_init(&vvp_device_type);
 288        if (rc != 0)
 289                goto out_kmem;
 290
 291        return 0;
 292
 293out_kmem:
 294        lu_kmem_fini(vvp_caches);
 295
 296        return rc;
 297}
 298
 299void vvp_global_fini(void)
 300{
 301        lu_device_type_fini(&vvp_device_type);
 302        lu_kmem_fini(vvp_caches);
 303}
 304
 305/*****************************************************************************
 306 *
 307 * mirror obd-devices into cl devices.
 308 *
 309 */
 310
 311int cl_sb_init(struct super_block *sb)
 312{
 313        struct ll_sb_info *sbi;
 314        struct cl_device  *cl;
 315        struct lu_env     *env;
 316        int rc = 0;
 317        u16 refcheck;
 318
 319        sbi  = ll_s2sbi(sb);
 320        env = cl_env_get(&refcheck);
 321        if (!IS_ERR(env)) {
 322                cl = cl_type_setup(env, NULL, &vvp_device_type,
 323                                   sbi->ll_dt_exp->exp_obd->obd_lu_dev);
 324                if (!IS_ERR(cl)) {
 325                        sbi->ll_cl = cl;
 326                        sbi->ll_site = cl2lu_dev(cl)->ld_site;
 327                }
 328                cl_env_put(env, &refcheck);
 329        } else {
 330                rc = PTR_ERR(env);
 331        }
 332        return rc;
 333}
 334
 335int cl_sb_fini(struct super_block *sb)
 336{
 337        struct ll_sb_info *sbi;
 338        struct lu_env     *env;
 339        struct cl_device  *cld;
 340        u16 refcheck;
 341        int             result;
 342
 343        sbi = ll_s2sbi(sb);
 344        env = cl_env_get(&refcheck);
 345        if (!IS_ERR(env)) {
 346                cld = sbi->ll_cl;
 347
 348                if (cld) {
 349                        cl_stack_fini(env, cld);
 350                        sbi->ll_cl = NULL;
 351                        sbi->ll_site = NULL;
 352                }
 353                cl_env_put(env, &refcheck);
 354                result = 0;
 355        } else {
 356                CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
 357                result = PTR_ERR(env);
 358        }
 359        return result;
 360}
 361
 362/****************************************************************************
 363 *
 364 * debugfs/lustre/llite/$MNT/dump_page_cache
 365 *
 366 ****************************************************************************/
 367
 368/*
 369 * To represent contents of a page cache as a byte stream, following
 370 * information if encoded in 64bit offset:
 371 *
 372 *       - file hash bucket in lu_site::ls_hash[]       28bits
 373 *
 374 *       - how far file is from bucket head           4bits
 375 *
 376 *       - page index                              32bits
 377 *
 378 * First two data identify a file in the cache uniquely.
 379 */
 380
 381#define PGC_OBJ_SHIFT (32 + 4)
 382#define PGC_DEPTH_SHIFT (32)
 383
 384struct vvp_pgcache_id {
 385        unsigned int             vpi_bucket;
 386        unsigned int             vpi_depth;
 387        u32                      vpi_index;
 388
 389        unsigned int             vpi_curdep;
 390        struct lu_object_header *vpi_obj;
 391};
 392
 393static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
 394{
 395        BUILD_BUG_ON(sizeof(pos) != sizeof(__u64));
 396
 397        id->vpi_index  = pos & 0xffffffff;
 398        id->vpi_depth  = (pos >> PGC_DEPTH_SHIFT) & 0xf;
 399        id->vpi_bucket = (unsigned long long)pos >> PGC_OBJ_SHIFT;
 400}
 401
 402static loff_t vvp_pgcache_id_pack(struct vvp_pgcache_id *id)
 403{
 404        return
 405                ((__u64)id->vpi_index) |
 406                ((__u64)id->vpi_depth  << PGC_DEPTH_SHIFT) |
 407                ((__u64)id->vpi_bucket << PGC_OBJ_SHIFT);
 408}
 409
 410static int vvp_pgcache_obj_get(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 411                               struct hlist_node *hnode, void *data)
 412{
 413        struct vvp_pgcache_id   *id  = data;
 414        struct lu_object_header *hdr = cfs_hash_object(hs, hnode);
 415
 416        if (id->vpi_curdep-- > 0)
 417                return 0; /* continue */
 418
 419        if (lu_object_is_dying(hdr))
 420                return 1;
 421
 422        cfs_hash_get(hs, hnode);
 423        id->vpi_obj = hdr;
 424        return 1;
 425}
 426
 427static struct cl_object *vvp_pgcache_obj(const struct lu_env *env,
 428                                         struct lu_device *dev,
 429                                         struct vvp_pgcache_id *id)
 430{
 431        LASSERT(lu_device_is_cl(dev));
 432
 433        id->vpi_depth &= 0xf;
 434        id->vpi_obj    = NULL;
 435        id->vpi_curdep = id->vpi_depth;
 436
 437        cfs_hash_hlist_for_each(dev->ld_site->ls_obj_hash, id->vpi_bucket,
 438                                vvp_pgcache_obj_get, id);
 439        if (id->vpi_obj) {
 440                struct lu_object *lu_obj;
 441
 442                lu_obj = lu_object_locate(id->vpi_obj, dev->ld_type);
 443                if (lu_obj) {
 444                        lu_object_ref_add(lu_obj, "dump", current);
 445                        return lu2cl(lu_obj);
 446                }
 447                lu_object_put(env, lu_object_top(id->vpi_obj));
 448
 449        } else if (id->vpi_curdep > 0) {
 450                id->vpi_depth = 0xf;
 451        }
 452        return NULL;
 453}
 454
 455static loff_t vvp_pgcache_find(const struct lu_env *env,
 456                               struct lu_device *dev, loff_t pos)
 457{
 458        struct cl_object     *clob;
 459        struct lu_site       *site;
 460        struct vvp_pgcache_id id;
 461
 462        site = dev->ld_site;
 463        vvp_pgcache_id_unpack(pos, &id);
 464
 465        while (1) {
 466                if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash))
 467                        return ~0ULL;
 468                clob = vvp_pgcache_obj(env, dev, &id);
 469                if (clob) {
 470                        struct inode *inode = vvp_object_inode(clob);
 471                        struct page *vmpage;
 472                        int nr;
 473
 474                        nr = find_get_pages_contig(inode->i_mapping,
 475                                                   id.vpi_index, 1, &vmpage);
 476                        if (nr > 0) {
 477                                id.vpi_index = vmpage->index;
 478                                /* Cant support over 16T file */
 479                                nr = !(vmpage->index > 0xffffffff);
 480                                put_page(vmpage);
 481                        }
 482
 483                        lu_object_ref_del(&clob->co_lu, "dump", current);
 484                        cl_object_put(env, clob);
 485                        if (nr > 0)
 486                                return vvp_pgcache_id_pack(&id);
 487                }
 488                /* to the next object. */
 489                ++id.vpi_depth;
 490                id.vpi_depth &= 0xf;
 491                if (id.vpi_depth == 0 && ++id.vpi_bucket == 0)
 492                        return ~0ULL;
 493                id.vpi_index = 0;
 494        }
 495}
 496
 497#define seq_page_flag(seq, page, flag, has_flags) do {            \
 498        if (test_bit(PG_##flag, &(page)->flags)) {                \
 499                seq_printf(seq, "%s"#flag, has_flags ? "|" : "");       \
 500                has_flags = 1;                                    \
 501        }                                                              \
 502} while (0)
 503
 504static void vvp_pgcache_page_show(const struct lu_env *env,
 505                                  struct seq_file *seq, struct cl_page *page)
 506{
 507        struct vvp_page *vpg;
 508        struct page      *vmpage;
 509        int           has_flags;
 510
 511        vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
 512        vmpage = vpg->vpg_page;
 513        seq_printf(seq, " %5i | %p %p %s %s %s | %p " DFID "(%p) %lu %u [",
 514                   0 /* gen */,
 515                   vpg, page,
 516                   "none",
 517                   vpg->vpg_defer_uptodate ? "du" : "- ",
 518                   PageWriteback(vmpage) ? "wb" : "-",
 519                   vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
 520                   vmpage->mapping->host, vmpage->index,
 521                   page_count(vmpage));
 522        has_flags = 0;
 523        seq_page_flag(seq, vmpage, locked, has_flags);
 524        seq_page_flag(seq, vmpage, error, has_flags);
 525        seq_page_flag(seq, vmpage, referenced, has_flags);
 526        seq_page_flag(seq, vmpage, uptodate, has_flags);
 527        seq_page_flag(seq, vmpage, dirty, has_flags);
 528        seq_page_flag(seq, vmpage, writeback, has_flags);
 529        seq_printf(seq, "%s]\n", has_flags ? "" : "-");
 530}
 531
 532static int vvp_pgcache_show(struct seq_file *f, void *v)
 533{
 534        loff_t             pos;
 535        struct ll_sb_info       *sbi;
 536        struct cl_object        *clob;
 537        struct lu_env      *env;
 538        struct vvp_pgcache_id    id;
 539        u16 refcheck;
 540        int                   result;
 541
 542        env = cl_env_get(&refcheck);
 543        if (!IS_ERR(env)) {
 544                pos = *(loff_t *)v;
 545                vvp_pgcache_id_unpack(pos, &id);
 546                sbi = f->private;
 547                clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
 548                if (clob) {
 549                        struct inode *inode = vvp_object_inode(clob);
 550                        struct cl_page *page = NULL;
 551                        struct page *vmpage;
 552
 553                        result = find_get_pages_contig(inode->i_mapping,
 554                                                       id.vpi_index, 1,
 555                                                       &vmpage);
 556                        if (result > 0) {
 557                                lock_page(vmpage);
 558                                page = cl_vmpage_page(vmpage, clob);
 559                                unlock_page(vmpage);
 560                                put_page(vmpage);
 561                        }
 562
 563                        seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
 564                                   PFID(lu_object_fid(&clob->co_lu)));
 565                        if (page) {
 566                                vvp_pgcache_page_show(env, f, page);
 567                                cl_page_put(env, page);
 568                        } else {
 569                                seq_puts(f, "missing\n");
 570                        }
 571                        lu_object_ref_del(&clob->co_lu, "dump", current);
 572                        cl_object_put(env, clob);
 573                } else {
 574                        seq_printf(f, "%llx missing\n", pos);
 575                }
 576                cl_env_put(env, &refcheck);
 577                result = 0;
 578        } else {
 579                result = PTR_ERR(env);
 580        }
 581        return result;
 582}
 583
 584static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
 585{
 586        struct ll_sb_info *sbi;
 587        struct lu_env     *env;
 588        u16 refcheck;
 589
 590        sbi = f->private;
 591
 592        env = cl_env_get(&refcheck);
 593        if (!IS_ERR(env)) {
 594                sbi = f->private;
 595                if (sbi->ll_site->ls_obj_hash->hs_cur_bits >
 596                    64 - PGC_OBJ_SHIFT) {
 597                        pos = ERR_PTR(-EFBIG);
 598                } else {
 599                        *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev,
 600                                                *pos);
 601                        if (*pos == ~0ULL)
 602                                pos = NULL;
 603                }
 604                cl_env_put(env, &refcheck);
 605        }
 606        return pos;
 607}
 608
 609static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
 610{
 611        struct ll_sb_info *sbi;
 612        struct lu_env     *env;
 613        u16 refcheck;
 614
 615        env = cl_env_get(&refcheck);
 616        if (!IS_ERR(env)) {
 617                sbi = f->private;
 618                *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev, *pos + 1);
 619                if (*pos == ~0ULL)
 620                        pos = NULL;
 621                cl_env_put(env, &refcheck);
 622        }
 623        return pos;
 624}
 625
 626static void vvp_pgcache_stop(struct seq_file *f, void *v)
 627{
 628        /* Nothing to do */
 629}
 630
 631static const struct seq_operations vvp_pgcache_ops = {
 632        .start = vvp_pgcache_start,
 633        .next  = vvp_pgcache_next,
 634        .stop  = vvp_pgcache_stop,
 635        .show  = vvp_pgcache_show
 636};
 637
 638static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp)
 639{
 640        struct seq_file *seq;
 641        int rc;
 642
 643        rc = seq_open(filp, &vvp_pgcache_ops);
 644        if (rc)
 645                return rc;
 646
 647        seq = filp->private_data;
 648        seq->private = inode->i_private;
 649
 650        return 0;
 651}
 652
 653const struct file_operations vvp_dump_pgcache_file_ops = {
 654        .owner   = THIS_MODULE,
 655        .open    = vvp_dump_pgcache_seq_open,
 656        .read    = seq_read,
 657        .llseek  = seq_lseek,
 658        .release = seq_release,
 659};
 660