linux/fs/notify/mark.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2, or (at your option)
   7 *  any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; see the file COPYING.  If not, write to
  16 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  17 */
  18
  19/*
  20 * fsnotify inode mark locking/lifetime/and refcnting
  21 *
  22 * REFCNT:
  23 * The group->recnt and mark->refcnt tell how many "things" in the kernel
  24 * currently are referencing the objects. Both kind of objects typically will
  25 * live inside the kernel with a refcnt of 2, one for its creation and one for
  26 * the reference a group and a mark hold to each other.
  27 * If you are holding the appropriate locks, you can take a reference and the
  28 * object itself is guaranteed to survive until the reference is dropped.
  29 *
  30 * LOCKING:
  31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken
  32 * in order as follows:
  33 *
  34 * group->mark_mutex
  35 * mark->lock
  36 * inode->i_lock
  37 *
  38 * group->mark_mutex protects the marks_list anchored inside a given group and
  39 * each mark is hooked via the g_list.  It also protects the groups private
  40 * data (i.e group limits).
  41
  42 * mark->lock protects the marks attributes like its masks and flags.
  43 * Furthermore it protects the access to a reference of the group that the mark
  44 * is assigned to as well as the access to a reference of the inode/vfsmount
  45 * that is being watched by the mark.
  46 *
  47 * inode->i_lock protects the i_fsnotify_marks list anchored inside a
  48 * given inode and each mark is hooked via the i_list. (and sorta the
  49 * free_i_list)
  50 *
  51 *
  52 * LIFETIME:
  53 * Inode marks survive between when they are added to an inode and when their
  54 * refcnt==0.
  55 *
  56 * The inode mark can be cleared for a number of different reasons including:
  57 * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
  58 * - The inode is being evicted from cache. (fsnotify_inode_delete)
  59 * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
  60 * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
  61 * - The fsnotify_group associated with the mark is going away and all such marks
  62 *   need to be cleaned up. (fsnotify_clear_marks_by_group)
  63 *
  64 * Worst case we are given an inode and need to clean up all the marks on that
  65 * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
  66 * mark on the list we take a reference (so the mark can't disappear under us).
  67 * We remove that mark form the inode's list of marks and we add this mark to a
  68 * private list anchored on the stack using i_free_list; we walk i_free_list
  69 * and before we destroy the mark we make sure that we dont race with a
  70 * concurrent destroy_group by getting a ref to the marks group and taking the
  71 * groups mutex.
  72
  73 * Very similarly for freeing by group, except we use free_g_list.
  74 *
  75 * This has the very interesting property of being able to run concurrently with
  76 * any (or all) other directions.
  77 */
  78
  79#include <linux/fs.h>
  80#include <linux/init.h>
  81#include <linux/kernel.h>
  82#include <linux/kthread.h>
  83#include <linux/module.h>
  84#include <linux/mutex.h>
  85#include <linux/slab.h>
  86#include <linux/spinlock.h>
  87#include <linux/srcu.h>
  88
  89#include <linux/atomic.h>
  90
  91#include <linux/fsnotify_backend.h>
  92#include "fsnotify.h"
  93
  94struct srcu_struct fsnotify_mark_srcu;
  95static DEFINE_SPINLOCK(destroy_lock);
  96static LIST_HEAD(destroy_list);
  97static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
  98
  99void fsnotify_get_mark(struct fsnotify_mark *mark)
 100{
 101        atomic_inc(&mark->refcnt);
 102}
 103
 104void fsnotify_put_mark(struct fsnotify_mark *mark)
 105{
 106        if (atomic_dec_and_test(&mark->refcnt)) {
 107                if (mark->group)
 108                        fsnotify_put_group(mark->group);
 109                mark->free_mark(mark);
 110        }
 111}
 112
 113/* Calculate mask of events for a list of marks */
 114u32 fsnotify_recalc_mask(struct hlist_head *head)
 115{
 116        u32 new_mask = 0;
 117        struct fsnotify_mark *mark;
 118
 119        hlist_for_each_entry(mark, head, obj_list)
 120                new_mask |= mark->mask;
 121        return new_mask;
 122}
 123
 124/*
 125 * Any time a mark is getting freed we end up here.
 126 * The caller had better be holding a reference to this mark so we don't actually
 127 * do the final put under the mark->lock
 128 */
 129void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 130                                  struct fsnotify_group *group)
 131{
 132        struct inode *inode = NULL;
 133
 134        BUG_ON(!mutex_is_locked(&group->mark_mutex));
 135
 136        spin_lock(&mark->lock);
 137
 138        /* something else already called this function on this mark */
 139        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 140                spin_unlock(&mark->lock);
 141                return;
 142        }
 143
 144        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 145
 146        if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
 147                inode = mark->inode;
 148                fsnotify_destroy_inode_mark(mark);
 149        } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
 150                fsnotify_destroy_vfsmount_mark(mark);
 151        else
 152                BUG();
 153
 154        list_del_init(&mark->g_list);
 155
 156        spin_unlock(&mark->lock);
 157
 158        if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
 159                iput(inode);
 160        /* release lock temporarily */
 161        mutex_unlock(&group->mark_mutex);
 162
 163        spin_lock(&destroy_lock);
 164        list_add(&mark->g_list, &destroy_list);
 165        spin_unlock(&destroy_lock);
 166        wake_up(&destroy_waitq);
 167        /*
 168         * We don't necessarily have a ref on mark from caller so the above destroy
 169         * may have actually freed it, unless this group provides a 'freeing_mark'
 170         * function which must be holding a reference.
 171         */
 172
 173        /*
 174         * Some groups like to know that marks are being freed.  This is a
 175         * callback to the group function to let it know that this mark
 176         * is being freed.
 177         */
 178        if (group->ops->freeing_mark)
 179                group->ops->freeing_mark(mark, group);
 180
 181        /*
 182         * __fsnotify_update_child_dentry_flags(inode);
 183         *
 184         * I really want to call that, but we can't, we have no idea if the inode
 185         * still exists the second we drop the mark->lock.
 186         *
 187         * The next time an event arrive to this inode from one of it's children
 188         * __fsnotify_parent will see that the inode doesn't care about it's
 189         * children and will update all of these flags then.  So really this
 190         * is just a lazy update (and could be a perf win...)
 191         */
 192
 193        atomic_dec(&group->num_marks);
 194
 195        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 196}
 197
 198void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 199                           struct fsnotify_group *group)
 200{
 201        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 202        fsnotify_destroy_mark_locked(mark, group);
 203        mutex_unlock(&group->mark_mutex);
 204}
 205
 206/*
 207 * Destroy all marks in the given list. The marks must be already detached from
 208 * the original inode / vfsmount.
 209 */
 210void fsnotify_destroy_marks(struct list_head *to_free)
 211{
 212        struct fsnotify_mark *mark, *lmark;
 213        struct fsnotify_group *group;
 214
 215        list_for_each_entry_safe(mark, lmark, to_free, free_list) {
 216                spin_lock(&mark->lock);
 217                fsnotify_get_group(mark->group);
 218                group = mark->group;
 219                spin_unlock(&mark->lock);
 220
 221                fsnotify_destroy_mark(mark, group);
 222                fsnotify_put_mark(mark);
 223                fsnotify_put_group(group);
 224        }
 225}
 226
 227void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
 228{
 229        assert_spin_locked(&mark->lock);
 230
 231        mark->mask = mask;
 232
 233        if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
 234                fsnotify_set_inode_mark_mask_locked(mark, mask);
 235}
 236
 237void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
 238{
 239        assert_spin_locked(&mark->lock);
 240
 241        mark->ignored_mask = mask;
 242}
 243
 244/*
 245 * Sorting function for lists of fsnotify marks.
 246 *
 247 * Fanotify supports different notification classes (reflected as priority of
 248 * notification group). Events shall be passed to notification groups in
 249 * decreasing priority order. To achieve this marks in notification lists for
 250 * inodes and vfsmounts are sorted so that priorities of corresponding groups
 251 * are descending.
 252 *
 253 * Furthermore correct handling of the ignore mask requires processing inode
 254 * and vfsmount marks of each group together. Using the group address as
 255 * further sort criterion provides a unique sorting order and thus we can
 256 * merge inode and vfsmount lists of marks in linear time and find groups
 257 * present in both lists.
 258 *
 259 * A return value of 1 signifies that b has priority over a.
 260 * A return value of 0 signifies that the two marks have to be handled together.
 261 * A return value of -1 signifies that a has priority over b.
 262 */
 263int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 264{
 265        if (a == b)
 266                return 0;
 267        if (!a)
 268                return 1;
 269        if (!b)
 270                return -1;
 271        if (a->priority < b->priority)
 272                return 1;
 273        if (a->priority > b->priority)
 274                return -1;
 275        if (a < b)
 276                return 1;
 277        return -1;
 278}
 279
 280/* Add mark into proper place in given list of marks */
 281int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
 282                           int allow_dups)
 283{
 284        struct fsnotify_mark *lmark, *last = NULL;
 285        int cmp;
 286
 287        /* is mark the first mark? */
 288        if (hlist_empty(head)) {
 289                hlist_add_head_rcu(&mark->obj_list, head);
 290                return 0;
 291        }
 292
 293        /* should mark be in the middle of the current list? */
 294        hlist_for_each_entry(lmark, head, obj_list) {
 295                last = lmark;
 296
 297                if ((lmark->group == mark->group) && !allow_dups)
 298                        return -EEXIST;
 299
 300                cmp = fsnotify_compare_groups(lmark->group, mark->group);
 301                if (cmp >= 0) {
 302                        hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
 303                        return 0;
 304                }
 305        }
 306
 307        BUG_ON(last == NULL);
 308        /* mark should be the last entry.  last is the current last entry */
 309        hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 310        return 0;
 311}
 312
 313/*
 314 * Attach an initialized mark to a given group and fs object.
 315 * These marks may be used for the fsnotify backend to determine which
 316 * event types should be delivered to which group.
 317 */
 318int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 319                             struct fsnotify_group *group, struct inode *inode,
 320                             struct vfsmount *mnt, int allow_dups)
 321{
 322        int ret = 0;
 323
 324        BUG_ON(inode && mnt);
 325        BUG_ON(!inode && !mnt);
 326        BUG_ON(!mutex_is_locked(&group->mark_mutex));
 327
 328        /*
 329         * LOCKING ORDER!!!!
 330         * group->mark_mutex
 331         * mark->lock
 332         * inode->i_lock
 333         */
 334        spin_lock(&mark->lock);
 335        mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
 336
 337        fsnotify_get_group(group);
 338        mark->group = group;
 339        list_add(&mark->g_list, &group->marks_list);
 340        atomic_inc(&group->num_marks);
 341        fsnotify_get_mark(mark); /* for i_list and g_list */
 342
 343        if (inode) {
 344                ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
 345                if (ret)
 346                        goto err;
 347        } else if (mnt) {
 348                ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
 349                if (ret)
 350                        goto err;
 351        } else {
 352                BUG();
 353        }
 354
 355        /* this will pin the object if appropriate */
 356        fsnotify_set_mark_mask_locked(mark, mark->mask);
 357        spin_unlock(&mark->lock);
 358
 359        if (inode)
 360                __fsnotify_update_child_dentry_flags(inode);
 361
 362        return ret;
 363err:
 364        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 365        list_del_init(&mark->g_list);
 366        fsnotify_put_group(group);
 367        mark->group = NULL;
 368        atomic_dec(&group->num_marks);
 369
 370        spin_unlock(&mark->lock);
 371
 372        spin_lock(&destroy_lock);
 373        list_add(&mark->g_list, &destroy_list);
 374        spin_unlock(&destroy_lock);
 375        wake_up(&destroy_waitq);
 376
 377        return ret;
 378}
 379
 380int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 381                      struct inode *inode, struct vfsmount *mnt, int allow_dups)
 382{
 383        int ret;
 384        mutex_lock(&group->mark_mutex);
 385        ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
 386        mutex_unlock(&group->mark_mutex);
 387        return ret;
 388}
 389
 390/*
 391 * Given a list of marks, find the mark associated with given group. If found
 392 * take a reference to that mark and return it, else return NULL.
 393 */
 394struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
 395                                         struct fsnotify_group *group)
 396{
 397        struct fsnotify_mark *mark;
 398
 399        hlist_for_each_entry(mark, head, obj_list) {
 400                if (mark->group == group) {
 401                        fsnotify_get_mark(mark);
 402                        return mark;
 403                }
 404        }
 405        return NULL;
 406}
 407
 408/*
 409 * clear any marks in a group in which mark->flags & flags is true
 410 */
 411void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 412                                         unsigned int flags)
 413{
 414        struct fsnotify_mark *lmark, *mark;
 415
 416        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 417        list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 418                if (mark->flags & flags) {
 419                        fsnotify_get_mark(mark);
 420                        fsnotify_destroy_mark_locked(mark, group);
 421                        fsnotify_put_mark(mark);
 422                }
 423        }
 424        mutex_unlock(&group->mark_mutex);
 425}
 426
 427/*
 428 * Given a group, destroy all of the marks associated with that group.
 429 */
 430void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
 431{
 432        fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
 433}
 434
 435void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
 436{
 437        assert_spin_locked(&old->lock);
 438        new->inode = old->inode;
 439        new->mnt = old->mnt;
 440        if (old->group)
 441                fsnotify_get_group(old->group);
 442        new->group = old->group;
 443        new->mask = old->mask;
 444        new->free_mark = old->free_mark;
 445}
 446
 447/*
 448 * Nothing fancy, just initialize lists and locks and counters.
 449 */
 450void fsnotify_init_mark(struct fsnotify_mark *mark,
 451                        void (*free_mark)(struct fsnotify_mark *mark))
 452{
 453        memset(mark, 0, sizeof(*mark));
 454        spin_lock_init(&mark->lock);
 455        atomic_set(&mark->refcnt, 1);
 456        mark->free_mark = free_mark;
 457}
 458
 459static int fsnotify_mark_destroy(void *ignored)
 460{
 461        struct fsnotify_mark *mark, *next;
 462        struct list_head private_destroy_list;
 463
 464        for (;;) {
 465                spin_lock(&destroy_lock);
 466                /* exchange the list head */
 467                list_replace_init(&destroy_list, &private_destroy_list);
 468                spin_unlock(&destroy_lock);
 469
 470                synchronize_srcu(&fsnotify_mark_srcu);
 471
 472                list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
 473                        list_del_init(&mark->g_list);
 474                        fsnotify_put_mark(mark);
 475                }
 476
 477                wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
 478        }
 479
 480        return 0;
 481}
 482
 483static int __init fsnotify_mark_init(void)
 484{
 485        struct task_struct *thread;
 486
 487        thread = kthread_run(fsnotify_mark_destroy, NULL,
 488                             "fsnotify_mark");
 489        if (IS_ERR(thread))
 490                panic("unable to start fsnotify mark destruction thread.");
 491
 492        return 0;
 493}
 494device_initcall(fsnotify_mark_init);
 495