linux/fs/notify/mark.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2, or (at your option)
   7 *  any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; see the file COPYING.  If not, write to
  16 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  17 */
  18
  19/*
  20 * fsnotify inode mark locking/lifetime/and refcnting
  21 *
  22 * REFCNT:
  23 * The group->recnt and mark->refcnt tell how many "things" in the kernel
  24 * currently are referencing the objects. Both kind of objects typically will
  25 * live inside the kernel with a refcnt of 2, one for its creation and one for
  26 * the reference a group and a mark hold to each other.
  27 * If you are holding the appropriate locks, you can take a reference and the
  28 * object itself is guaranteed to survive until the reference is dropped.
  29 *
  30 * LOCKING:
  31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken
  32 * in order as follows:
  33 *
  34 * group->mark_mutex
  35 * mark->lock
  36 * mark->connector->lock
  37 *
  38 * group->mark_mutex protects the marks_list anchored inside a given group and
  39 * each mark is hooked via the g_list.  It also protects the groups private
  40 * data (i.e group limits).
  41
  42 * mark->lock protects the marks attributes like its masks and flags.
  43 * Furthermore it protects the access to a reference of the group that the mark
  44 * is assigned to as well as the access to a reference of the inode/vfsmount
  45 * that is being watched by the mark.
  46 *
  47 * mark->connector->lock protects the list of marks anchored inside an
  48 * inode / vfsmount and each mark is hooked via the i_list.
  49 *
  50 * A list of notification marks relating to inode / mnt is contained in
  51 * fsnotify_mark_connector. That structure is alive as long as there are any
  52 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets
  53 * detached from fsnotify_mark_connector when last reference to the mark is
  54 * dropped.  Thus having mark reference is enough to protect mark->connector
  55 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also
  56 * because we remove mark from g_list before dropping mark reference associated
  57 * with that, any mark found through g_list is guaranteed to have
  58 * mark->connector set until we drop group->mark_mutex.
  59 *
  60 * LIFETIME:
  61 * Inode marks survive between when they are added to an inode and when their
  62 * refcnt==0. Marks are also protected by fsnotify_mark_srcu.
  63 *
  64 * The inode mark can be cleared for a number of different reasons including:
  65 * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
  66 * - The inode is being evicted from cache. (fsnotify_inode_delete)
  67 * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
  68 * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
  69 * - The fsnotify_group associated with the mark is going away and all such marks
  70 *   need to be cleaned up. (fsnotify_clear_marks_by_group)
  71 *
  72 * This has the very interesting property of being able to run concurrently with
  73 * any (or all) other directions.
  74 */
  75
  76#include <linux/fs.h>
  77#include <linux/init.h>
  78#include <linux/kernel.h>
  79#include <linux/kthread.h>
  80#include <linux/module.h>
  81#include <linux/mutex.h>
  82#include <linux/slab.h>
  83#include <linux/spinlock.h>
  84#include <linux/srcu.h>
  85
  86#include <linux/atomic.h>
  87
  88#include <linux/fsnotify_backend.h>
  89#include "fsnotify.h"
  90
  91#define FSNOTIFY_REAPER_DELAY   (1)     /* 1 jiffy */
  92
  93struct srcu_struct fsnotify_mark_srcu;
  94struct kmem_cache *fsnotify_mark_connector_cachep;
  95
  96static DEFINE_SPINLOCK(destroy_lock);
  97static LIST_HEAD(destroy_list);
  98static struct fsnotify_mark_connector *connector_destroy_list;
  99
 100static void fsnotify_mark_destroy_workfn(struct work_struct *work);
 101static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);
 102
 103static void fsnotify_connector_destroy_workfn(struct work_struct *work);
 104static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);
 105
 106void fsnotify_get_mark(struct fsnotify_mark *mark)
 107{
 108        WARN_ON_ONCE(!refcount_read(&mark->refcnt));
 109        refcount_inc(&mark->refcnt);
 110}
 111
 112static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 113{
 114        u32 new_mask = 0;
 115        struct fsnotify_mark *mark;
 116
 117        assert_spin_locked(&conn->lock);
 118        hlist_for_each_entry(mark, &conn->list, obj_list) {
 119                if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
 120                        new_mask |= mark->mask;
 121        }
 122        if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE)
 123                conn->inode->i_fsnotify_mask = new_mask;
 124        else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT)
 125                real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask;
 126}
 127
 128/*
 129 * Calculate mask of events for a list of marks. The caller must make sure
 130 * connector and connector->inode cannot disappear under us.  Callers achieve
 131 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
 132 * list.
 133 */
 134void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 135{
 136        if (!conn)
 137                return;
 138
 139        spin_lock(&conn->lock);
 140        __fsnotify_recalc_mask(conn);
 141        spin_unlock(&conn->lock);
 142        if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE)
 143                __fsnotify_update_child_dentry_flags(conn->inode);
 144}
 145
 146/* Free all connectors queued for freeing once SRCU period ends */
 147static void fsnotify_connector_destroy_workfn(struct work_struct *work)
 148{
 149        struct fsnotify_mark_connector *conn, *free;
 150
 151        spin_lock(&destroy_lock);
 152        conn = connector_destroy_list;
 153        connector_destroy_list = NULL;
 154        spin_unlock(&destroy_lock);
 155
 156        synchronize_srcu(&fsnotify_mark_srcu);
 157        while (conn) {
 158                free = conn;
 159                conn = conn->destroy_next;
 160                kmem_cache_free(fsnotify_mark_connector_cachep, free);
 161        }
 162}
 163
 164static struct inode *fsnotify_detach_connector_from_object(
 165                                        struct fsnotify_mark_connector *conn)
 166{
 167        struct inode *inode = NULL;
 168
 169        if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) {
 170                inode = conn->inode;
 171                rcu_assign_pointer(inode->i_fsnotify_marks, NULL);
 172                inode->i_fsnotify_mask = 0;
 173                conn->inode = NULL;
 174                conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE;
 175        } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 176                rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks,
 177                                   NULL);
 178                real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
 179                conn->mnt = NULL;
 180                conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT;
 181        }
 182
 183        return inode;
 184}
 185
 186static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
 187{
 188        struct fsnotify_group *group = mark->group;
 189
 190        if (WARN_ON_ONCE(!group))
 191                return;
 192        group->ops->free_mark(mark);
 193        fsnotify_put_group(group);
 194}
 195
 196void fsnotify_put_mark(struct fsnotify_mark *mark)
 197{
 198        struct fsnotify_mark_connector *conn;
 199        struct inode *inode = NULL;
 200        bool free_conn = false;
 201
 202        /* Catch marks that were actually never attached to object */
 203        if (!mark->connector) {
 204                if (refcount_dec_and_test(&mark->refcnt))
 205                        fsnotify_final_mark_destroy(mark);
 206                return;
 207        }
 208
 209        /*
 210         * We have to be careful so that traversals of obj_list under lock can
 211         * safely grab mark reference.
 212         */
 213        if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock))
 214                return;
 215
 216        conn = mark->connector;
 217        hlist_del_init_rcu(&mark->obj_list);
 218        if (hlist_empty(&conn->list)) {
 219                inode = fsnotify_detach_connector_from_object(conn);
 220                free_conn = true;
 221        } else {
 222                __fsnotify_recalc_mask(conn);
 223        }
 224        mark->connector = NULL;
 225        spin_unlock(&conn->lock);
 226
 227        iput(inode);
 228
 229        if (free_conn) {
 230                spin_lock(&destroy_lock);
 231                conn->destroy_next = connector_destroy_list;
 232                connector_destroy_list = conn;
 233                spin_unlock(&destroy_lock);
 234                queue_work(system_unbound_wq, &connector_reaper_work);
 235        }
 236        /*
 237         * Note that we didn't update flags telling whether inode cares about
 238         * what's happening with children. We update these flags from
 239         * __fsnotify_parent() lazily when next event happens on one of our
 240         * children.
 241         */
 242        spin_lock(&destroy_lock);
 243        list_add(&mark->g_list, &destroy_list);
 244        spin_unlock(&destroy_lock);
 245        queue_delayed_work(system_unbound_wq, &reaper_work,
 246                           FSNOTIFY_REAPER_DELAY);
 247}
 248
 249/*
 250 * Get mark reference when we found the mark via lockless traversal of object
 251 * list. Mark can be already removed from the list by now and on its way to be
 252 * destroyed once SRCU period ends.
 253 *
 254 * Also pin the group so it doesn't disappear under us.
 255 */
 256static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
 257{
 258        if (!mark)
 259                return true;
 260
 261        if (refcount_inc_not_zero(&mark->refcnt)) {
 262                spin_lock(&mark->lock);
 263                if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
 264                        /* mark is attached, group is still alive then */
 265                        atomic_inc(&mark->group->user_waits);
 266                        spin_unlock(&mark->lock);
 267                        return true;
 268                }
 269                spin_unlock(&mark->lock);
 270                fsnotify_put_mark(mark);
 271        }
 272        return false;
 273}
 274
 275/*
 276 * Puts marks and wakes up group destruction if necessary.
 277 *
 278 * Pairs with fsnotify_get_mark_safe()
 279 */
 280static void fsnotify_put_mark_wake(struct fsnotify_mark *mark)
 281{
 282        if (mark) {
 283                struct fsnotify_group *group = mark->group;
 284
 285                fsnotify_put_mark(mark);
 286                /*
 287                 * We abuse notification_waitq on group shutdown for waiting for
 288                 * all marks pinned when waiting for userspace.
 289                 */
 290                if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
 291                        wake_up(&group->notification_waitq);
 292        }
 293}
 294
 295bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
 296{
 297        /* This can fail if mark is being removed */
 298        if (!fsnotify_get_mark_safe(iter_info->inode_mark))
 299                return false;
 300        if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
 301                fsnotify_put_mark_wake(iter_info->inode_mark);
 302                return false;
 303        }
 304
 305        /*
 306         * Now that both marks are pinned by refcount in the inode / vfsmount
 307         * lists, we can drop SRCU lock, and safely resume the list iteration
 308         * once userspace returns.
 309         */
 310        srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
 311
 312        return true;
 313}
 314
 315void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
 316{
 317        iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 318        fsnotify_put_mark_wake(iter_info->inode_mark);
 319        fsnotify_put_mark_wake(iter_info->vfsmount_mark);
 320}
 321
 322/*
 323 * Mark mark as detached, remove it from group list. Mark still stays in object
 324 * list until its last reference is dropped. Note that we rely on mark being
 325 * removed from group list before corresponding reference to it is dropped. In
 326 * particular we rely on mark->connector being valid while we hold
 327 * group->mark_mutex if we found the mark through g_list.
 328 *
 329 * Must be called with group->mark_mutex held. The caller must either hold
 330 * reference to the mark or be protected by fsnotify_mark_srcu.
 331 */
 332void fsnotify_detach_mark(struct fsnotify_mark *mark)
 333{
 334        struct fsnotify_group *group = mark->group;
 335
 336        WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
 337        WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
 338                     refcount_read(&mark->refcnt) < 1 +
 339                        !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
 340
 341        spin_lock(&mark->lock);
 342        /* something else already called this function on this mark */
 343        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 344                spin_unlock(&mark->lock);
 345                return;
 346        }
 347        mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
 348        list_del_init(&mark->g_list);
 349        spin_unlock(&mark->lock);
 350
 351        atomic_dec(&group->num_marks);
 352
 353        /* Drop mark reference acquired in fsnotify_add_mark_locked() */
 354        fsnotify_put_mark(mark);
 355}
 356
 357/*
 358 * Free fsnotify mark. The mark is actually only marked as being freed.  The
 359 * freeing is actually happening only once last reference to the mark is
 360 * dropped from a workqueue which first waits for srcu period end.
 361 *
 362 * Caller must have a reference to the mark or be protected by
 363 * fsnotify_mark_srcu.
 364 */
 365void fsnotify_free_mark(struct fsnotify_mark *mark)
 366{
 367        struct fsnotify_group *group = mark->group;
 368
 369        spin_lock(&mark->lock);
 370        /* something else already called this function on this mark */
 371        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 372                spin_unlock(&mark->lock);
 373                return;
 374        }
 375        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 376        spin_unlock(&mark->lock);
 377
 378        /*
 379         * Some groups like to know that marks are being freed.  This is a
 380         * callback to the group function to let it know that this mark
 381         * is being freed.
 382         */
 383        if (group->ops->freeing_mark)
 384                group->ops->freeing_mark(mark, group);
 385}
 386
 387void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 388                           struct fsnotify_group *group)
 389{
 390        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 391        fsnotify_detach_mark(mark);
 392        mutex_unlock(&group->mark_mutex);
 393        fsnotify_free_mark(mark);
 394}
 395
 396/*
 397 * Sorting function for lists of fsnotify marks.
 398 *
 399 * Fanotify supports different notification classes (reflected as priority of
 400 * notification group). Events shall be passed to notification groups in
 401 * decreasing priority order. To achieve this marks in notification lists for
 402 * inodes and vfsmounts are sorted so that priorities of corresponding groups
 403 * are descending.
 404 *
 405 * Furthermore correct handling of the ignore mask requires processing inode
 406 * and vfsmount marks of each group together. Using the group address as
 407 * further sort criterion provides a unique sorting order and thus we can
 408 * merge inode and vfsmount lists of marks in linear time and find groups
 409 * present in both lists.
 410 *
 411 * A return value of 1 signifies that b has priority over a.
 412 * A return value of 0 signifies that the two marks have to be handled together.
 413 * A return value of -1 signifies that a has priority over b.
 414 */
 415int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 416{
 417        if (a == b)
 418                return 0;
 419        if (!a)
 420                return 1;
 421        if (!b)
 422                return -1;
 423        if (a->priority < b->priority)
 424                return 1;
 425        if (a->priority > b->priority)
 426                return -1;
 427        if (a < b)
 428                return 1;
 429        return -1;
 430}
 431
 432static int fsnotify_attach_connector_to_object(
 433                                struct fsnotify_mark_connector __rcu **connp,
 434                                struct inode *inode,
 435                                struct vfsmount *mnt)
 436{
 437        struct fsnotify_mark_connector *conn;
 438
 439        conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
 440        if (!conn)
 441                return -ENOMEM;
 442        spin_lock_init(&conn->lock);
 443        INIT_HLIST_HEAD(&conn->list);
 444        if (inode) {
 445                conn->flags = FSNOTIFY_OBJ_TYPE_INODE;
 446                conn->inode = igrab(inode);
 447        } else {
 448                conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
 449                conn->mnt = mnt;
 450        }
 451        /*
 452         * cmpxchg() provides the barrier so that readers of *connp can see
 453         * only initialized structure
 454         */
 455        if (cmpxchg(connp, NULL, conn)) {
 456                /* Someone else created list structure for us */
 457                if (inode)
 458                        iput(inode);
 459                kmem_cache_free(fsnotify_mark_connector_cachep, conn);
 460        }
 461
 462        return 0;
 463}
 464
 465/*
 466 * Get mark connector, make sure it is alive and return with its lock held.
 467 * This is for users that get connector pointer from inode or mount. Users that
 468 * hold reference to a mark on the list may directly lock connector->lock as
 469 * they are sure list cannot go away under them.
 470 */
 471static struct fsnotify_mark_connector *fsnotify_grab_connector(
 472                                struct fsnotify_mark_connector __rcu **connp)
 473{
 474        struct fsnotify_mark_connector *conn;
 475        int idx;
 476
 477        idx = srcu_read_lock(&fsnotify_mark_srcu);
 478        conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
 479        if (!conn)
 480                goto out;
 481        spin_lock(&conn->lock);
 482        if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE |
 483                             FSNOTIFY_OBJ_TYPE_VFSMOUNT))) {
 484                spin_unlock(&conn->lock);
 485                srcu_read_unlock(&fsnotify_mark_srcu, idx);
 486                return NULL;
 487        }
 488out:
 489        srcu_read_unlock(&fsnotify_mark_srcu, idx);
 490        return conn;
 491}
 492
 493/*
 494 * Add mark into proper place in given list of marks. These marks may be used
 495 * for the fsnotify backend to determine which event types should be delivered
 496 * to which group and for which inodes. These marks are ordered according to
 497 * priority, highest number first, and then by the group's location in memory.
 498 */
 499static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 500                                  struct inode *inode, struct vfsmount *mnt,
 501                                  int allow_dups)
 502{
 503        struct fsnotify_mark *lmark, *last = NULL;
 504        struct fsnotify_mark_connector *conn;
 505        struct fsnotify_mark_connector __rcu **connp;
 506        int cmp;
 507        int err = 0;
 508
 509        if (WARN_ON(!inode && !mnt))
 510                return -EINVAL;
 511        if (inode)
 512                connp = &inode->i_fsnotify_marks;
 513        else
 514                connp = &real_mount(mnt)->mnt_fsnotify_marks;
 515restart:
 516        spin_lock(&mark->lock);
 517        conn = fsnotify_grab_connector(connp);
 518        if (!conn) {
 519                spin_unlock(&mark->lock);
 520                err = fsnotify_attach_connector_to_object(connp, inode, mnt);
 521                if (err)
 522                        return err;
 523                goto restart;
 524        }
 525
 526        /* is mark the first mark? */
 527        if (hlist_empty(&conn->list)) {
 528                hlist_add_head_rcu(&mark->obj_list, &conn->list);
 529                goto added;
 530        }
 531
 532        /* should mark be in the middle of the current list? */
 533        hlist_for_each_entry(lmark, &conn->list, obj_list) {
 534                last = lmark;
 535
 536                if ((lmark->group == mark->group) &&
 537                    (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
 538                    !allow_dups) {
 539                        err = -EEXIST;
 540                        goto out_err;
 541                }
 542
 543                cmp = fsnotify_compare_groups(lmark->group, mark->group);
 544                if (cmp >= 0) {
 545                        hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
 546                        goto added;
 547                }
 548        }
 549
 550        BUG_ON(last == NULL);
 551        /* mark should be the last entry.  last is the current last entry */
 552        hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 553added:
 554        mark->connector = conn;
 555out_err:
 556        spin_unlock(&conn->lock);
 557        spin_unlock(&mark->lock);
 558        return err;
 559}
 560
 561/*
 562 * Attach an initialized mark to a given group and fs object.
 563 * These marks may be used for the fsnotify backend to determine which
 564 * event types should be delivered to which group.
 565 */
 566int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
 567                             struct vfsmount *mnt, int allow_dups)
 568{
 569        struct fsnotify_group *group = mark->group;
 570        int ret = 0;
 571
 572        BUG_ON(inode && mnt);
 573        BUG_ON(!inode && !mnt);
 574        BUG_ON(!mutex_is_locked(&group->mark_mutex));
 575
 576        /*
 577         * LOCKING ORDER!!!!
 578         * group->mark_mutex
 579         * mark->lock
 580         * mark->connector->lock
 581         */
 582        spin_lock(&mark->lock);
 583        mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
 584
 585        list_add(&mark->g_list, &group->marks_list);
 586        atomic_inc(&group->num_marks);
 587        fsnotify_get_mark(mark); /* for g_list */
 588        spin_unlock(&mark->lock);
 589
 590        ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups);
 591        if (ret)
 592                goto err;
 593
 594        if (mark->mask)
 595                fsnotify_recalc_mask(mark->connector);
 596
 597        return ret;
 598err:
 599        spin_lock(&mark->lock);
 600        mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
 601                         FSNOTIFY_MARK_FLAG_ATTACHED);
 602        list_del_init(&mark->g_list);
 603        spin_unlock(&mark->lock);
 604        atomic_dec(&group->num_marks);
 605
 606        fsnotify_put_mark(mark);
 607        return ret;
 608}
 609
 610int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
 611                      struct vfsmount *mnt, int allow_dups)
 612{
 613        int ret;
 614        struct fsnotify_group *group = mark->group;
 615
 616        mutex_lock(&group->mark_mutex);
 617        ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups);
 618        mutex_unlock(&group->mark_mutex);
 619        return ret;
 620}
 621
 622/*
 623 * Given a list of marks, find the mark associated with given group. If found
 624 * take a reference to that mark and return it, else return NULL.
 625 */
 626struct fsnotify_mark *fsnotify_find_mark(
 627                                struct fsnotify_mark_connector __rcu **connp,
 628                                struct fsnotify_group *group)
 629{
 630        struct fsnotify_mark_connector *conn;
 631        struct fsnotify_mark *mark;
 632
 633        conn = fsnotify_grab_connector(connp);
 634        if (!conn)
 635                return NULL;
 636
 637        hlist_for_each_entry(mark, &conn->list, obj_list) {
 638                if (mark->group == group &&
 639                    (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 640                        fsnotify_get_mark(mark);
 641                        spin_unlock(&conn->lock);
 642                        return mark;
 643                }
 644        }
 645        spin_unlock(&conn->lock);
 646        return NULL;
 647}
 648
 649/* Clear any marks in a group with given type */
 650void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
 651                                   unsigned int type)
 652{
 653        struct fsnotify_mark *lmark, *mark;
 654        LIST_HEAD(to_free);
 655        struct list_head *head = &to_free;
 656
 657        /* Skip selection step if we want to clear all marks. */
 658        if (type == FSNOTIFY_OBJ_ALL_TYPES) {
 659                head = &group->marks_list;
 660                goto clear;
 661        }
 662        /*
 663         * We have to be really careful here. Anytime we drop mark_mutex, e.g.
 664         * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
 665         * to_free list so we have to use mark_mutex even when accessing that
 666         * list. And freeing mark requires us to drop mark_mutex. So we can
 667         * reliably free only the first mark in the list. That's why we first
 668         * move marks to free to to_free list in one go and then free marks in
 669         * to_free list one by one.
 670         */
 671        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 672        list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 673                if (mark->connector->flags & type)
 674                        list_move(&mark->g_list, &to_free);
 675        }
 676        mutex_unlock(&group->mark_mutex);
 677
 678clear:
 679        while (1) {
 680                mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 681                if (list_empty(head)) {
 682                        mutex_unlock(&group->mark_mutex);
 683                        break;
 684                }
 685                mark = list_first_entry(head, struct fsnotify_mark, g_list);
 686                fsnotify_get_mark(mark);
 687                fsnotify_detach_mark(mark);
 688                mutex_unlock(&group->mark_mutex);
 689                fsnotify_free_mark(mark);
 690                fsnotify_put_mark(mark);
 691        }
 692}
 693
 694/* Destroy all marks attached to inode / vfsmount */
 695void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp)
 696{
 697        struct fsnotify_mark_connector *conn;
 698        struct fsnotify_mark *mark, *old_mark = NULL;
 699        struct inode *inode;
 700
 701        conn = fsnotify_grab_connector(connp);
 702        if (!conn)
 703                return;
 704        /*
 705         * We have to be careful since we can race with e.g.
 706         * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the
 707         * list can get modified. However we are holding mark reference and
 708         * thus our mark cannot be removed from obj_list so we can continue
 709         * iteration after regaining conn->lock.
 710         */
 711        hlist_for_each_entry(mark, &conn->list, obj_list) {
 712                fsnotify_get_mark(mark);
 713                spin_unlock(&conn->lock);
 714                if (old_mark)
 715                        fsnotify_put_mark(old_mark);
 716                old_mark = mark;
 717                fsnotify_destroy_mark(mark, mark->group);
 718                spin_lock(&conn->lock);
 719        }
 720        /*
 721         * Detach list from object now so that we don't pin inode until all
 722         * mark references get dropped. It would lead to strange results such
 723         * as delaying inode deletion or blocking unmount.
 724         */
 725        inode = fsnotify_detach_connector_from_object(conn);
 726        spin_unlock(&conn->lock);
 727        if (old_mark)
 728                fsnotify_put_mark(old_mark);
 729        iput(inode);
 730}
 731
 732/*
 733 * Nothing fancy, just initialize lists and locks and counters.
 734 */
 735void fsnotify_init_mark(struct fsnotify_mark *mark,
 736                        struct fsnotify_group *group)
 737{
 738        memset(mark, 0, sizeof(*mark));
 739        spin_lock_init(&mark->lock);
 740        refcount_set(&mark->refcnt, 1);
 741        fsnotify_get_group(group);
 742        mark->group = group;
 743}
 744
 745/*
 746 * Destroy all marks in destroy_list, waits for SRCU period to finish before
 747 * actually freeing marks.
 748 */
 749static void fsnotify_mark_destroy_workfn(struct work_struct *work)
 750{
 751        struct fsnotify_mark *mark, *next;
 752        struct list_head private_destroy_list;
 753
 754        spin_lock(&destroy_lock);
 755        /* exchange the list head */
 756        list_replace_init(&destroy_list, &private_destroy_list);
 757        spin_unlock(&destroy_lock);
 758
 759        synchronize_srcu(&fsnotify_mark_srcu);
 760
 761        list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
 762                list_del_init(&mark->g_list);
 763                fsnotify_final_mark_destroy(mark);
 764        }
 765}
 766
 767/* Wait for all marks queued for destruction to be actually destroyed */
 768void fsnotify_wait_marks_destroyed(void)
 769{
 770        flush_delayed_work(&reaper_work);
 771}
 772