linux/fs/notify/mark.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2, or (at your option)
   7 *  any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; see the file COPYING.  If not, write to
  16 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  17 */
  18
  19/*
  20 * fsnotify inode mark locking/lifetime/and refcnting
  21 *
  22 * REFCNT:
  23 * The group->recnt and mark->refcnt tell how many "things" in the kernel
  24 * currently are referencing the objects. Both kind of objects typically will
  25 * live inside the kernel with a refcnt of 2, one for its creation and one for
  26 * the reference a group and a mark hold to each other.
  27 * If you are holding the appropriate locks, you can take a reference and the
  28 * object itself is guaranteed to survive until the reference is dropped.
  29 *
  30 * LOCKING:
  31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken
  32 * in order as follows:
  33 *
  34 * group->mark_mutex
  35 * mark->lock
  36 * mark->connector->lock
  37 *
  38 * group->mark_mutex protects the marks_list anchored inside a given group and
  39 * each mark is hooked via the g_list.  It also protects the groups private
  40 * data (i.e group limits).
  41
  42 * mark->lock protects the marks attributes like its masks and flags.
  43 * Furthermore it protects the access to a reference of the group that the mark
  44 * is assigned to as well as the access to a reference of the inode/vfsmount
  45 * that is being watched by the mark.
  46 *
  47 * mark->connector->lock protects the list of marks anchored inside an
  48 * inode / vfsmount and each mark is hooked via the i_list.
  49 *
  50 * A list of notification marks relating to inode / mnt is contained in
  51 * fsnotify_mark_connector. That structure is alive as long as there are any
  52 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets
  53 * detached from fsnotify_mark_connector when last reference to the mark is
  54 * dropped.  Thus having mark reference is enough to protect mark->connector
  55 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also
  56 * because we remove mark from g_list before dropping mark reference associated
  57 * with that, any mark found through g_list is guaranteed to have
  58 * mark->connector set until we drop group->mark_mutex.
  59 *
  60 * LIFETIME:
  61 * Inode marks survive between when they are added to an inode and when their
  62 * refcnt==0. Marks are also protected by fsnotify_mark_srcu.
  63 *
  64 * The inode mark can be cleared for a number of different reasons including:
  65 * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
  66 * - The inode is being evicted from cache. (fsnotify_inode_delete)
  67 * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
  68 * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
  69 * - The fsnotify_group associated with the mark is going away and all such marks
  70 *   need to be cleaned up. (fsnotify_clear_marks_by_group)
  71 *
  72 * This has the very interesting property of being able to run concurrently with
  73 * any (or all) other directions.
  74 */
  75
  76#include <linux/fs.h>
  77#include <linux/init.h>
  78#include <linux/kernel.h>
  79#include <linux/kthread.h>
  80#include <linux/module.h>
  81#include <linux/mutex.h>
  82#include <linux/slab.h>
  83#include <linux/spinlock.h>
  84#include <linux/srcu.h>
  85
  86#include <linux/atomic.h>
  87
  88#include <linux/fsnotify_backend.h>
  89#include "fsnotify.h"
  90
  91#define FSNOTIFY_REAPER_DELAY   (1)     /* 1 jiffy */
  92
  93struct srcu_struct fsnotify_mark_srcu;
  94struct kmem_cache *fsnotify_mark_connector_cachep;
  95
  96static DEFINE_SPINLOCK(destroy_lock);
  97static LIST_HEAD(destroy_list);
  98static struct fsnotify_mark_connector *connector_destroy_list;
  99
 100static void fsnotify_mark_destroy_workfn(struct work_struct *work);
 101static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);
 102
 103static void fsnotify_connector_destroy_workfn(struct work_struct *work);
 104static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);
 105
 106void fsnotify_get_mark(struct fsnotify_mark *mark)
 107{
 108        WARN_ON_ONCE(!refcount_read(&mark->refcnt));
 109        refcount_inc(&mark->refcnt);
 110}
 111
 112static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
 113{
 114        if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
 115                return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
 116        else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
 117                return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
 118        return NULL;
 119}
 120
 121__u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
 122{
 123        if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
 124                return 0;
 125
 126        return *fsnotify_conn_mask_p(conn);
 127}
 128
 129static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 130{
 131        u32 new_mask = 0;
 132        struct fsnotify_mark *mark;
 133
 134        assert_spin_locked(&conn->lock);
 135        /* We can get detached connector here when inode is getting unlinked. */
 136        if (!fsnotify_valid_obj_type(conn->type))
 137                return;
 138        hlist_for_each_entry(mark, &conn->list, obj_list) {
 139                if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
 140                        new_mask |= mark->mask;
 141        }
 142        *fsnotify_conn_mask_p(conn) = new_mask;
 143}
 144
 145/*
 146 * Calculate mask of events for a list of marks. The caller must make sure
 147 * connector and connector->obj cannot disappear under us.  Callers achieve
 148 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
 149 * list.
 150 */
 151void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 152{
 153        if (!conn)
 154                return;
 155
 156        spin_lock(&conn->lock);
 157        __fsnotify_recalc_mask(conn);
 158        spin_unlock(&conn->lock);
 159        if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
 160                __fsnotify_update_child_dentry_flags(
 161                                        fsnotify_conn_inode(conn));
 162}
 163
 164/* Free all connectors queued for freeing once SRCU period ends */
 165static void fsnotify_connector_destroy_workfn(struct work_struct *work)
 166{
 167        struct fsnotify_mark_connector *conn, *free;
 168
 169        spin_lock(&destroy_lock);
 170        conn = connector_destroy_list;
 171        connector_destroy_list = NULL;
 172        spin_unlock(&destroy_lock);
 173
 174        synchronize_srcu(&fsnotify_mark_srcu);
 175        while (conn) {
 176                free = conn;
 177                conn = conn->destroy_next;
 178                kmem_cache_free(fsnotify_mark_connector_cachep, free);
 179        }
 180}
 181
 182static void *fsnotify_detach_connector_from_object(
 183                                        struct fsnotify_mark_connector *conn,
 184                                        unsigned int *type)
 185{
 186        struct inode *inode = NULL;
 187
 188        *type = conn->type;
 189        if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
 190                return NULL;
 191
 192        if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
 193                inode = fsnotify_conn_inode(conn);
 194                inode->i_fsnotify_mask = 0;
 195                atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
 196        } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 197                fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 198        }
 199
 200        rcu_assign_pointer(*(conn->obj), NULL);
 201        conn->obj = NULL;
 202        conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
 203
 204        return inode;
 205}
 206
 207static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
 208{
 209        struct fsnotify_group *group = mark->group;
 210
 211        if (WARN_ON_ONCE(!group))
 212                return;
 213        group->ops->free_mark(mark);
 214        fsnotify_put_group(group);
 215}
 216
 217/* Drop object reference originally held by a connector */
 218static void fsnotify_drop_object(unsigned int type, void *objp)
 219{
 220        struct inode *inode;
 221        struct super_block *sb;
 222
 223        if (!objp)
 224                return;
 225        /* Currently only inode references are passed to be dropped */
 226        if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
 227                return;
 228        inode = objp;
 229        sb = inode->i_sb;
 230        iput(inode);
 231        if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
 232                wake_up_var(&sb->s_fsnotify_inode_refs);
 233}
 234
 235void fsnotify_put_mark(struct fsnotify_mark *mark)
 236{
 237        struct fsnotify_mark_connector *conn;
 238        void *objp = NULL;
 239        unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
 240        bool free_conn = false;
 241
 242        /* Catch marks that were actually never attached to object */
 243        if (!mark->connector) {
 244                if (refcount_dec_and_test(&mark->refcnt))
 245                        fsnotify_final_mark_destroy(mark);
 246                return;
 247        }
 248
 249        /*
 250         * We have to be careful so that traversals of obj_list under lock can
 251         * safely grab mark reference.
 252         */
 253        if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock))
 254                return;
 255
 256        conn = mark->connector;
 257        hlist_del_init_rcu(&mark->obj_list);
 258        if (hlist_empty(&conn->list)) {
 259                objp = fsnotify_detach_connector_from_object(conn, &type);
 260                free_conn = true;
 261        } else {
 262                __fsnotify_recalc_mask(conn);
 263        }
 264        mark->connector = NULL;
 265        spin_unlock(&conn->lock);
 266
 267        fsnotify_drop_object(type, objp);
 268
 269        if (free_conn) {
 270                spin_lock(&destroy_lock);
 271                conn->destroy_next = connector_destroy_list;
 272                connector_destroy_list = conn;
 273                spin_unlock(&destroy_lock);
 274                queue_work(system_unbound_wq, &connector_reaper_work);
 275        }
 276        /*
 277         * Note that we didn't update flags telling whether inode cares about
 278         * what's happening with children. We update these flags from
 279         * __fsnotify_parent() lazily when next event happens on one of our
 280         * children.
 281         */
 282        spin_lock(&destroy_lock);
 283        list_add(&mark->g_list, &destroy_list);
 284        spin_unlock(&destroy_lock);
 285        queue_delayed_work(system_unbound_wq, &reaper_work,
 286                           FSNOTIFY_REAPER_DELAY);
 287}
 288
 289/*
 290 * Get mark reference when we found the mark via lockless traversal of object
 291 * list. Mark can be already removed from the list by now and on its way to be
 292 * destroyed once SRCU period ends.
 293 *
 294 * Also pin the group so it doesn't disappear under us.
 295 */
 296static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
 297{
 298        if (!mark)
 299                return true;
 300
 301        if (refcount_inc_not_zero(&mark->refcnt)) {
 302                spin_lock(&mark->lock);
 303                if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
 304                        /* mark is attached, group is still alive then */
 305                        atomic_inc(&mark->group->user_waits);
 306                        spin_unlock(&mark->lock);
 307                        return true;
 308                }
 309                spin_unlock(&mark->lock);
 310                fsnotify_put_mark(mark);
 311        }
 312        return false;
 313}
 314
 315/*
 316 * Puts marks and wakes up group destruction if necessary.
 317 *
 318 * Pairs with fsnotify_get_mark_safe()
 319 */
 320static void fsnotify_put_mark_wake(struct fsnotify_mark *mark)
 321{
 322        if (mark) {
 323                struct fsnotify_group *group = mark->group;
 324
 325                fsnotify_put_mark(mark);
 326                /*
 327                 * We abuse notification_waitq on group shutdown for waiting for
 328                 * all marks pinned when waiting for userspace.
 329                 */
 330                if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
 331                        wake_up(&group->notification_waitq);
 332        }
 333}
 334
 335bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
 336{
 337        int type;
 338
 339        fsnotify_foreach_obj_type(type) {
 340                /* This can fail if mark is being removed */
 341                if (!fsnotify_get_mark_safe(iter_info->marks[type]))
 342                        goto fail;
 343        }
 344
 345        /*
 346         * Now that both marks are pinned by refcount in the inode / vfsmount
 347         * lists, we can drop SRCU lock, and safely resume the list iteration
 348         * once userspace returns.
 349         */
 350        srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
 351
 352        return true;
 353
 354fail:
 355        for (type--; type >= 0; type--)
 356                fsnotify_put_mark_wake(iter_info->marks[type]);
 357        return false;
 358}
 359
 360void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
 361{
 362        int type;
 363
 364        iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 365        fsnotify_foreach_obj_type(type)
 366                fsnotify_put_mark_wake(iter_info->marks[type]);
 367}
 368
 369/*
 370 * Mark mark as detached, remove it from group list. Mark still stays in object
 371 * list until its last reference is dropped. Note that we rely on mark being
 372 * removed from group list before corresponding reference to it is dropped. In
 373 * particular we rely on mark->connector being valid while we hold
 374 * group->mark_mutex if we found the mark through g_list.
 375 *
 376 * Must be called with group->mark_mutex held. The caller must either hold
 377 * reference to the mark or be protected by fsnotify_mark_srcu.
 378 */
 379void fsnotify_detach_mark(struct fsnotify_mark *mark)
 380{
 381        struct fsnotify_group *group = mark->group;
 382
 383        WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
 384        WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
 385                     refcount_read(&mark->refcnt) < 1 +
 386                        !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
 387
 388        spin_lock(&mark->lock);
 389        /* something else already called this function on this mark */
 390        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 391                spin_unlock(&mark->lock);
 392                return;
 393        }
 394        mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
 395        list_del_init(&mark->g_list);
 396        spin_unlock(&mark->lock);
 397
 398        atomic_dec(&group->num_marks);
 399
 400        /* Drop mark reference acquired in fsnotify_add_mark_locked() */
 401        fsnotify_put_mark(mark);
 402}
 403
 404/*
 405 * Free fsnotify mark. The mark is actually only marked as being freed.  The
 406 * freeing is actually happening only once last reference to the mark is
 407 * dropped from a workqueue which first waits for srcu period end.
 408 *
 409 * Caller must have a reference to the mark or be protected by
 410 * fsnotify_mark_srcu.
 411 */
 412void fsnotify_free_mark(struct fsnotify_mark *mark)
 413{
 414        struct fsnotify_group *group = mark->group;
 415
 416        spin_lock(&mark->lock);
 417        /* something else already called this function on this mark */
 418        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 419                spin_unlock(&mark->lock);
 420                return;
 421        }
 422        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 423        spin_unlock(&mark->lock);
 424
 425        /*
 426         * Some groups like to know that marks are being freed.  This is a
 427         * callback to the group function to let it know that this mark
 428         * is being freed.
 429         */
 430        if (group->ops->freeing_mark)
 431                group->ops->freeing_mark(mark, group);
 432}
 433
 434void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 435                           struct fsnotify_group *group)
 436{
 437        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 438        fsnotify_detach_mark(mark);
 439        mutex_unlock(&group->mark_mutex);
 440        fsnotify_free_mark(mark);
 441}
 442
 443/*
 444 * Sorting function for lists of fsnotify marks.
 445 *
 446 * Fanotify supports different notification classes (reflected as priority of
 447 * notification group). Events shall be passed to notification groups in
 448 * decreasing priority order. To achieve this marks in notification lists for
 449 * inodes and vfsmounts are sorted so that priorities of corresponding groups
 450 * are descending.
 451 *
 452 * Furthermore correct handling of the ignore mask requires processing inode
 453 * and vfsmount marks of each group together. Using the group address as
 454 * further sort criterion provides a unique sorting order and thus we can
 455 * merge inode and vfsmount lists of marks in linear time and find groups
 456 * present in both lists.
 457 *
 458 * A return value of 1 signifies that b has priority over a.
 459 * A return value of 0 signifies that the two marks have to be handled together.
 460 * A return value of -1 signifies that a has priority over b.
 461 */
 462int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 463{
 464        if (a == b)
 465                return 0;
 466        if (!a)
 467                return 1;
 468        if (!b)
 469                return -1;
 470        if (a->priority < b->priority)
 471                return 1;
 472        if (a->priority > b->priority)
 473                return -1;
 474        if (a < b)
 475                return 1;
 476        return -1;
 477}
 478
 479static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 480                                               unsigned int type)
 481{
 482        struct inode *inode = NULL;
 483        struct fsnotify_mark_connector *conn;
 484
 485        conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
 486        if (!conn)
 487                return -ENOMEM;
 488        spin_lock_init(&conn->lock);
 489        INIT_HLIST_HEAD(&conn->list);
 490        conn->type = type;
 491        conn->obj = connp;
 492        if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
 493                inode = igrab(fsnotify_conn_inode(conn));
 494        /*
 495         * cmpxchg() provides the barrier so that readers of *connp can see
 496         * only initialized structure
 497         */
 498        if (cmpxchg(connp, NULL, conn)) {
 499                /* Someone else created list structure for us */
 500                if (inode)
 501                        iput(inode);
 502                kmem_cache_free(fsnotify_mark_connector_cachep, conn);
 503        }
 504
 505        return 0;
 506}
 507
 508/*
 509 * Get mark connector, make sure it is alive and return with its lock held.
 510 * This is for users that get connector pointer from inode or mount. Users that
 511 * hold reference to a mark on the list may directly lock connector->lock as
 512 * they are sure list cannot go away under them.
 513 */
 514static struct fsnotify_mark_connector *fsnotify_grab_connector(
 515                                                fsnotify_connp_t *connp)
 516{
 517        struct fsnotify_mark_connector *conn;
 518        int idx;
 519
 520        idx = srcu_read_lock(&fsnotify_mark_srcu);
 521        conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
 522        if (!conn)
 523                goto out;
 524        spin_lock(&conn->lock);
 525        if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) {
 526                spin_unlock(&conn->lock);
 527                srcu_read_unlock(&fsnotify_mark_srcu, idx);
 528                return NULL;
 529        }
 530out:
 531        srcu_read_unlock(&fsnotify_mark_srcu, idx);
 532        return conn;
 533}
 534
 535/*
 536 * Add mark into proper place in given list of marks. These marks may be used
 537 * for the fsnotify backend to determine which event types should be delivered
 538 * to which group and for which inodes. These marks are ordered according to
 539 * priority, highest number first, and then by the group's location in memory.
 540 */
 541static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 542                                  fsnotify_connp_t *connp, unsigned int type,
 543                                  int allow_dups)
 544{
 545        struct fsnotify_mark *lmark, *last = NULL;
 546        struct fsnotify_mark_connector *conn;
 547        int cmp;
 548        int err = 0;
 549
 550        if (WARN_ON(!fsnotify_valid_obj_type(type)))
 551                return -EINVAL;
 552restart:
 553        spin_lock(&mark->lock);
 554        conn = fsnotify_grab_connector(connp);
 555        if (!conn) {
 556                spin_unlock(&mark->lock);
 557                err = fsnotify_attach_connector_to_object(connp, type);
 558                if (err)
 559                        return err;
 560                goto restart;
 561        }
 562
 563        /* is mark the first mark? */
 564        if (hlist_empty(&conn->list)) {
 565                hlist_add_head_rcu(&mark->obj_list, &conn->list);
 566                goto added;
 567        }
 568
 569        /* should mark be in the middle of the current list? */
 570        hlist_for_each_entry(lmark, &conn->list, obj_list) {
 571                last = lmark;
 572
 573                if ((lmark->group == mark->group) &&
 574                    (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
 575                    !allow_dups) {
 576                        err = -EEXIST;
 577                        goto out_err;
 578                }
 579
 580                cmp = fsnotify_compare_groups(lmark->group, mark->group);
 581                if (cmp >= 0) {
 582                        hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
 583                        goto added;
 584                }
 585        }
 586
 587        BUG_ON(last == NULL);
 588        /* mark should be the last entry.  last is the current last entry */
 589        hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 590added:
 591        mark->connector = conn;
 592out_err:
 593        spin_unlock(&conn->lock);
 594        spin_unlock(&mark->lock);
 595        return err;
 596}
 597
 598/*
 599 * Attach an initialized mark to a given group and fs object.
 600 * These marks may be used for the fsnotify backend to determine which
 601 * event types should be delivered to which group.
 602 */
 603int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 604                             fsnotify_connp_t *connp, unsigned int type,
 605                             int allow_dups)
 606{
 607        struct fsnotify_group *group = mark->group;
 608        int ret = 0;
 609
 610        BUG_ON(!mutex_is_locked(&group->mark_mutex));
 611
 612        /*
 613         * LOCKING ORDER!!!!
 614         * group->mark_mutex
 615         * mark->lock
 616         * mark->connector->lock
 617         */
 618        spin_lock(&mark->lock);
 619        mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
 620
 621        list_add(&mark->g_list, &group->marks_list);
 622        atomic_inc(&group->num_marks);
 623        fsnotify_get_mark(mark); /* for g_list */
 624        spin_unlock(&mark->lock);
 625
 626        ret = fsnotify_add_mark_list(mark, connp, type, allow_dups);
 627        if (ret)
 628                goto err;
 629
 630        if (mark->mask)
 631                fsnotify_recalc_mask(mark->connector);
 632
 633        return ret;
 634err:
 635        spin_lock(&mark->lock);
 636        mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
 637                         FSNOTIFY_MARK_FLAG_ATTACHED);
 638        list_del_init(&mark->g_list);
 639        spin_unlock(&mark->lock);
 640        atomic_dec(&group->num_marks);
 641
 642        fsnotify_put_mark(mark);
 643        return ret;
 644}
 645
 646int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
 647                      unsigned int type, int allow_dups)
 648{
 649        int ret;
 650        struct fsnotify_group *group = mark->group;
 651
 652        mutex_lock(&group->mark_mutex);
 653        ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups);
 654        mutex_unlock(&group->mark_mutex);
 655        return ret;
 656}
 657
 658/*
 659 * Given a list of marks, find the mark associated with given group. If found
 660 * take a reference to that mark and return it, else return NULL.
 661 */
 662struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
 663                                         struct fsnotify_group *group)
 664{
 665        struct fsnotify_mark_connector *conn;
 666        struct fsnotify_mark *mark;
 667
 668        conn = fsnotify_grab_connector(connp);
 669        if (!conn)
 670                return NULL;
 671
 672        hlist_for_each_entry(mark, &conn->list, obj_list) {
 673                if (mark->group == group &&
 674                    (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 675                        fsnotify_get_mark(mark);
 676                        spin_unlock(&conn->lock);
 677                        return mark;
 678                }
 679        }
 680        spin_unlock(&conn->lock);
 681        return NULL;
 682}
 683
 684/* Clear any marks in a group with given type mask */
 685void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
 686                                   unsigned int type_mask)
 687{
 688        struct fsnotify_mark *lmark, *mark;
 689        LIST_HEAD(to_free);
 690        struct list_head *head = &to_free;
 691
 692        /* Skip selection step if we want to clear all marks. */
 693        if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) {
 694                head = &group->marks_list;
 695                goto clear;
 696        }
 697        /*
 698         * We have to be really careful here. Anytime we drop mark_mutex, e.g.
 699         * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
 700         * to_free list so we have to use mark_mutex even when accessing that
 701         * list. And freeing mark requires us to drop mark_mutex. So we can
 702         * reliably free only the first mark in the list. That's why we first
 703         * move marks to free to to_free list in one go and then free marks in
 704         * to_free list one by one.
 705         */
 706        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 707        list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 708                if ((1U << mark->connector->type) & type_mask)
 709                        list_move(&mark->g_list, &to_free);
 710        }
 711        mutex_unlock(&group->mark_mutex);
 712
 713clear:
 714        while (1) {
 715                mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 716                if (list_empty(head)) {
 717                        mutex_unlock(&group->mark_mutex);
 718                        break;
 719                }
 720                mark = list_first_entry(head, struct fsnotify_mark, g_list);
 721                fsnotify_get_mark(mark);
 722                fsnotify_detach_mark(mark);
 723                mutex_unlock(&group->mark_mutex);
 724                fsnotify_free_mark(mark);
 725                fsnotify_put_mark(mark);
 726        }
 727}
 728
 729/* Destroy all marks attached to an object via connector */
 730void fsnotify_destroy_marks(fsnotify_connp_t *connp)
 731{
 732        struct fsnotify_mark_connector *conn;
 733        struct fsnotify_mark *mark, *old_mark = NULL;
 734        void *objp;
 735        unsigned int type;
 736
 737        conn = fsnotify_grab_connector(connp);
 738        if (!conn)
 739                return;
 740        /*
 741         * We have to be careful since we can race with e.g.
 742         * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the
 743         * list can get modified. However we are holding mark reference and
 744         * thus our mark cannot be removed from obj_list so we can continue
 745         * iteration after regaining conn->lock.
 746         */
 747        hlist_for_each_entry(mark, &conn->list, obj_list) {
 748                fsnotify_get_mark(mark);
 749                spin_unlock(&conn->lock);
 750                if (old_mark)
 751                        fsnotify_put_mark(old_mark);
 752                old_mark = mark;
 753                fsnotify_destroy_mark(mark, mark->group);
 754                spin_lock(&conn->lock);
 755        }
 756        /*
 757         * Detach list from object now so that we don't pin inode until all
 758         * mark references get dropped. It would lead to strange results such
 759         * as delaying inode deletion or blocking unmount.
 760         */
 761        objp = fsnotify_detach_connector_from_object(conn, &type);
 762        spin_unlock(&conn->lock);
 763        if (old_mark)
 764                fsnotify_put_mark(old_mark);
 765        fsnotify_drop_object(type, objp);
 766}
 767
 768/*
 769 * Nothing fancy, just initialize lists and locks and counters.
 770 */
 771void fsnotify_init_mark(struct fsnotify_mark *mark,
 772                        struct fsnotify_group *group)
 773{
 774        memset(mark, 0, sizeof(*mark));
 775        spin_lock_init(&mark->lock);
 776        refcount_set(&mark->refcnt, 1);
 777        fsnotify_get_group(group);
 778        mark->group = group;
 779}
 780
 781/*
 782 * Destroy all marks in destroy_list, waits for SRCU period to finish before
 783 * actually freeing marks.
 784 */
 785static void fsnotify_mark_destroy_workfn(struct work_struct *work)
 786{
 787        struct fsnotify_mark *mark, *next;
 788        struct list_head private_destroy_list;
 789
 790        spin_lock(&destroy_lock);
 791        /* exchange the list head */
 792        list_replace_init(&destroy_list, &private_destroy_list);
 793        spin_unlock(&destroy_lock);
 794
 795        synchronize_srcu(&fsnotify_mark_srcu);
 796
 797        list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
 798                list_del_init(&mark->g_list);
 799                fsnotify_final_mark_destroy(mark);
 800        }
 801}
 802
 803/* Wait for all marks queued for destruction to be actually destroyed */
 804void fsnotify_wait_marks_destroyed(void)
 805{
 806        flush_delayed_work(&reaper_work);
 807}
 808