linux/fs/notify/mark.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2, or (at your option)
   7 *  any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; see the file COPYING.  If not, write to
  16 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  17 */
  18
  19/*
  20 * fsnotify inode mark locking/lifetime/and refcnting
  21 *
  22 * REFCNT:
  23 * The group->recnt and mark->refcnt tell how many "things" in the kernel
  24 * currently are referencing the objects. Both kind of objects typically will
  25 * live inside the kernel with a refcnt of 2, one for its creation and one for
  26 * the reference a group and a mark hold to each other.
  27 * If you are holding the appropriate locks, you can take a reference and the
  28 * object itself is guaranteed to survive until the reference is dropped.
  29 *
  30 * LOCKING:
  31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken
  32 * in order as follows:
  33 *
  34 * group->mark_mutex
  35 * mark->lock
  36 * inode->i_lock
  37 *
  38 * group->mark_mutex protects the marks_list anchored inside a given group and
  39 * each mark is hooked via the g_list.  It also protects the groups private
  40 * data (i.e group limits).
  41
  42 * mark->lock protects the marks attributes like its masks and flags.
  43 * Furthermore it protects the access to a reference of the group that the mark
  44 * is assigned to as well as the access to a reference of the inode/vfsmount
  45 * that is being watched by the mark.
  46 *
  47 * inode->i_lock protects the i_fsnotify_marks list anchored inside a
  48 * given inode and each mark is hooked via the i_list. (and sorta the
  49 * free_i_list)
  50 *
  51 *
  52 * LIFETIME:
  53 * Inode marks survive between when they are added to an inode and when their
  54 * refcnt==0.
  55 *
  56 * The inode mark can be cleared for a number of different reasons including:
  57 * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
  58 * - The inode is being evicted from cache. (fsnotify_inode_delete)
  59 * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
  60 * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
  61 * - The fsnotify_group associated with the mark is going away and all such marks
  62 *   need to be cleaned up. (fsnotify_clear_marks_by_group)
  63 *
  64 * Worst case we are given an inode and need to clean up all the marks on that
  65 * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
  66 * mark on the list we take a reference (so the mark can't disappear under us).
  67 * We remove that mark form the inode's list of marks and we add this mark to a
  68 * private list anchored on the stack using i_free_list; we walk i_free_list
  69 * and before we destroy the mark we make sure that we dont race with a
  70 * concurrent destroy_group by getting a ref to the marks group and taking the
  71 * groups mutex.
  72
  73 * Very similarly for freeing by group, except we use free_g_list.
  74 *
  75 * This has the very interesting property of being able to run concurrently with
  76 * any (or all) other directions.
  77 */
  78
  79#include <linux/fs.h>
  80#include <linux/init.h>
  81#include <linux/kernel.h>
  82#include <linux/kthread.h>
  83#include <linux/module.h>
  84#include <linux/mutex.h>
  85#include <linux/slab.h>
  86#include <linux/spinlock.h>
  87#include <linux/srcu.h>
  88
  89#include <linux/atomic.h>
  90
  91#include <linux/fsnotify_backend.h>
  92#include "fsnotify.h"
  93
  94#define FSNOTIFY_REAPER_DELAY   (1)     /* 1 jiffy */
  95
  96struct srcu_struct fsnotify_mark_srcu;
  97static DEFINE_SPINLOCK(destroy_lock);
  98static LIST_HEAD(destroy_list);
  99
 100static void fsnotify_mark_destroy(struct work_struct *work);
 101static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy);
 102
 103void fsnotify_get_mark(struct fsnotify_mark *mark)
 104{
 105        atomic_inc(&mark->refcnt);
 106}
 107
 108void fsnotify_put_mark(struct fsnotify_mark *mark)
 109{
 110        if (atomic_dec_and_test(&mark->refcnt)) {
 111                if (mark->group)
 112                        fsnotify_put_group(mark->group);
 113                mark->free_mark(mark);
 114        }
 115}
 116
 117/* Calculate mask of events for a list of marks */
 118u32 fsnotify_recalc_mask(struct hlist_head *head)
 119{
 120        u32 new_mask = 0;
 121        struct fsnotify_mark *mark;
 122
 123        hlist_for_each_entry(mark, head, obj_list)
 124                new_mask |= mark->mask;
 125        return new_mask;
 126}
 127
 128/*
 129 * Remove mark from inode / vfsmount list, group list, drop inode reference
 130 * if we got one.
 131 *
 132 * Must be called with group->mark_mutex held.
 133 */
 134void fsnotify_detach_mark(struct fsnotify_mark *mark)
 135{
 136        struct inode *inode = NULL;
 137        struct fsnotify_group *group = mark->group;
 138
 139        BUG_ON(!mutex_is_locked(&group->mark_mutex));
 140
 141        spin_lock(&mark->lock);
 142
 143        /* something else already called this function on this mark */
 144        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 145                spin_unlock(&mark->lock);
 146                return;
 147        }
 148
 149        mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
 150
 151        if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
 152                inode = mark->inode;
 153                fsnotify_destroy_inode_mark(mark);
 154        } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
 155                fsnotify_destroy_vfsmount_mark(mark);
 156        else
 157                BUG();
 158        /*
 159         * Note that we didn't update flags telling whether inode cares about
 160         * what's happening with children. We update these flags from
 161         * __fsnotify_parent() lazily when next event happens on one of our
 162         * children.
 163         */
 164
 165        list_del_init(&mark->g_list);
 166
 167        spin_unlock(&mark->lock);
 168
 169        if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
 170                iput(inode);
 171
 172        atomic_dec(&group->num_marks);
 173}
 174
 175/*
 176 * Free fsnotify mark. The freeing is actually happening from a kthread which
 177 * first waits for srcu period end. Caller must have a reference to the mark
 178 * or be protected by fsnotify_mark_srcu.
 179 */
 180void fsnotify_free_mark(struct fsnotify_mark *mark)
 181{
 182        struct fsnotify_group *group = mark->group;
 183
 184        spin_lock(&mark->lock);
 185        /* something else already called this function on this mark */
 186        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 187                spin_unlock(&mark->lock);
 188                return;
 189        }
 190        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 191        spin_unlock(&mark->lock);
 192
 193        spin_lock(&destroy_lock);
 194        list_add(&mark->g_list, &destroy_list);
 195        spin_unlock(&destroy_lock);
 196        queue_delayed_work(system_unbound_wq, &reaper_work,
 197                                FSNOTIFY_REAPER_DELAY);
 198
 199        /*
 200         * Some groups like to know that marks are being freed.  This is a
 201         * callback to the group function to let it know that this mark
 202         * is being freed.
 203         */
 204        if (group->ops->freeing_mark)
 205                group->ops->freeing_mark(mark, group);
 206}
 207
 208void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 209                           struct fsnotify_group *group)
 210{
 211        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 212        fsnotify_detach_mark(mark);
 213        mutex_unlock(&group->mark_mutex);
 214        fsnotify_free_mark(mark);
 215}
 216
 217void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
 218{
 219        struct fsnotify_mark *mark;
 220
 221        while (1) {
 222                /*
 223                 * We have to be careful since we can race with e.g.
 224                 * fsnotify_clear_marks_by_group() and once we drop 'lock',
 225                 * mark can get removed from the obj_list and destroyed. But
 226                 * we are holding mark reference so mark cannot be freed and
 227                 * calling fsnotify_destroy_mark() more than once is fine.
 228                 */
 229                spin_lock(lock);
 230                if (hlist_empty(head)) {
 231                        spin_unlock(lock);
 232                        break;
 233                }
 234                mark = hlist_entry(head->first, struct fsnotify_mark, obj_list);
 235                /*
 236                 * We don't update i_fsnotify_mask / mnt_fsnotify_mask here
 237                 * since inode / mount is going away anyway. So just remove
 238                 * mark from the list.
 239                 */
 240                hlist_del_init_rcu(&mark->obj_list);
 241                fsnotify_get_mark(mark);
 242                spin_unlock(lock);
 243                fsnotify_destroy_mark(mark, mark->group);
 244                fsnotify_put_mark(mark);
 245        }
 246}
 247
 248void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
 249{
 250        assert_spin_locked(&mark->lock);
 251
 252        mark->mask = mask;
 253
 254        if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
 255                fsnotify_set_inode_mark_mask_locked(mark, mask);
 256}
 257
 258void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
 259{
 260        assert_spin_locked(&mark->lock);
 261
 262        mark->ignored_mask = mask;
 263}
 264
 265/*
 266 * Sorting function for lists of fsnotify marks.
 267 *
 268 * Fanotify supports different notification classes (reflected as priority of
 269 * notification group). Events shall be passed to notification groups in
 270 * decreasing priority order. To achieve this marks in notification lists for
 271 * inodes and vfsmounts are sorted so that priorities of corresponding groups
 272 * are descending.
 273 *
 274 * Furthermore correct handling of the ignore mask requires processing inode
 275 * and vfsmount marks of each group together. Using the group address as
 276 * further sort criterion provides a unique sorting order and thus we can
 277 * merge inode and vfsmount lists of marks in linear time and find groups
 278 * present in both lists.
 279 *
 280 * A return value of 1 signifies that b has priority over a.
 281 * A return value of 0 signifies that the two marks have to be handled together.
 282 * A return value of -1 signifies that a has priority over b.
 283 */
 284int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 285{
 286        if (a == b)
 287                return 0;
 288        if (!a)
 289                return 1;
 290        if (!b)
 291                return -1;
 292        if (a->priority < b->priority)
 293                return 1;
 294        if (a->priority > b->priority)
 295                return -1;
 296        if (a < b)
 297                return 1;
 298        return -1;
 299}
 300
 301/* Add mark into proper place in given list of marks */
 302int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
 303                           int allow_dups)
 304{
 305        struct fsnotify_mark *lmark, *last = NULL;
 306        int cmp;
 307
 308        /* is mark the first mark? */
 309        if (hlist_empty(head)) {
 310                hlist_add_head_rcu(&mark->obj_list, head);
 311                return 0;
 312        }
 313
 314        /* should mark be in the middle of the current list? */
 315        hlist_for_each_entry(lmark, head, obj_list) {
 316                last = lmark;
 317
 318                if ((lmark->group == mark->group) && !allow_dups)
 319                        return -EEXIST;
 320
 321                cmp = fsnotify_compare_groups(lmark->group, mark->group);
 322                if (cmp >= 0) {
 323                        hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
 324                        return 0;
 325                }
 326        }
 327
 328        BUG_ON(last == NULL);
 329        /* mark should be the last entry.  last is the current last entry */
 330        hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 331        return 0;
 332}
 333
 334/*
 335 * Attach an initialized mark to a given group and fs object.
 336 * These marks may be used for the fsnotify backend to determine which
 337 * event types should be delivered to which group.
 338 */
 339int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 340                             struct fsnotify_group *group, struct inode *inode,
 341                             struct vfsmount *mnt, int allow_dups)
 342{
 343        int ret = 0;
 344
 345        BUG_ON(inode && mnt);
 346        BUG_ON(!inode && !mnt);
 347        BUG_ON(!mutex_is_locked(&group->mark_mutex));
 348
 349        /*
 350         * LOCKING ORDER!!!!
 351         * group->mark_mutex
 352         * mark->lock
 353         * inode->i_lock
 354         */
 355        spin_lock(&mark->lock);
 356        mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
 357
 358        fsnotify_get_group(group);
 359        mark->group = group;
 360        list_add(&mark->g_list, &group->marks_list);
 361        atomic_inc(&group->num_marks);
 362        fsnotify_get_mark(mark); /* for i_list and g_list */
 363
 364        if (inode) {
 365                ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
 366                if (ret)
 367                        goto err;
 368        } else if (mnt) {
 369                ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
 370                if (ret)
 371                        goto err;
 372        } else {
 373                BUG();
 374        }
 375
 376        /* this will pin the object if appropriate */
 377        fsnotify_set_mark_mask_locked(mark, mark->mask);
 378        spin_unlock(&mark->lock);
 379
 380        if (inode)
 381                __fsnotify_update_child_dentry_flags(inode);
 382
 383        return ret;
 384err:
 385        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 386        list_del_init(&mark->g_list);
 387        fsnotify_put_group(group);
 388        mark->group = NULL;
 389        atomic_dec(&group->num_marks);
 390
 391        spin_unlock(&mark->lock);
 392
 393        spin_lock(&destroy_lock);
 394        list_add(&mark->g_list, &destroy_list);
 395        spin_unlock(&destroy_lock);
 396        queue_delayed_work(system_unbound_wq, &reaper_work,
 397                                FSNOTIFY_REAPER_DELAY);
 398
 399        return ret;
 400}
 401
 402int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 403                      struct inode *inode, struct vfsmount *mnt, int allow_dups)
 404{
 405        int ret;
 406        mutex_lock(&group->mark_mutex);
 407        ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
 408        mutex_unlock(&group->mark_mutex);
 409        return ret;
 410}
 411
 412/*
 413 * Given a list of marks, find the mark associated with given group. If found
 414 * take a reference to that mark and return it, else return NULL.
 415 */
 416struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
 417                                         struct fsnotify_group *group)
 418{
 419        struct fsnotify_mark *mark;
 420
 421        hlist_for_each_entry(mark, head, obj_list) {
 422                if (mark->group == group) {
 423                        fsnotify_get_mark(mark);
 424                        return mark;
 425                }
 426        }
 427        return NULL;
 428}
 429
 430/*
 431 * clear any marks in a group in which mark->flags & flags is true
 432 */
 433void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 434                                         unsigned int flags)
 435{
 436        struct fsnotify_mark *lmark, *mark;
 437        LIST_HEAD(to_free);
 438
 439        /*
 440         * We have to be really careful here. Anytime we drop mark_mutex, e.g.
 441         * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
 442         * to_free list so we have to use mark_mutex even when accessing that
 443         * list. And freeing mark requires us to drop mark_mutex. So we can
 444         * reliably free only the first mark in the list. That's why we first
 445         * move marks to free to to_free list in one go and then free marks in
 446         * to_free list one by one.
 447         */
 448        mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 449        list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 450                if (mark->flags & flags)
 451                        list_move(&mark->g_list, &to_free);
 452        }
 453        mutex_unlock(&group->mark_mutex);
 454
 455        while (1) {
 456                mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 457                if (list_empty(&to_free)) {
 458                        mutex_unlock(&group->mark_mutex);
 459                        break;
 460                }
 461                mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
 462                fsnotify_get_mark(mark);
 463                fsnotify_detach_mark(mark);
 464                mutex_unlock(&group->mark_mutex);
 465                fsnotify_free_mark(mark);
 466                fsnotify_put_mark(mark);
 467        }
 468}
 469
 470/*
 471 * Given a group, destroy all of the marks associated with that group.
 472 */
 473void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
 474{
 475        fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
 476}
 477
 478void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
 479{
 480        assert_spin_locked(&old->lock);
 481        new->inode = old->inode;
 482        new->mnt = old->mnt;
 483        if (old->group)
 484                fsnotify_get_group(old->group);
 485        new->group = old->group;
 486        new->mask = old->mask;
 487        new->free_mark = old->free_mark;
 488}
 489
 490/*
 491 * Nothing fancy, just initialize lists and locks and counters.
 492 */
 493void fsnotify_init_mark(struct fsnotify_mark *mark,
 494                        void (*free_mark)(struct fsnotify_mark *mark))
 495{
 496        memset(mark, 0, sizeof(*mark));
 497        spin_lock_init(&mark->lock);
 498        atomic_set(&mark->refcnt, 1);
 499        mark->free_mark = free_mark;
 500}
 501
 502static void fsnotify_mark_destroy(struct work_struct *work)
 503{
 504        struct fsnotify_mark *mark, *next;
 505        struct list_head private_destroy_list;
 506
 507        spin_lock(&destroy_lock);
 508        /* exchange the list head */
 509        list_replace_init(&destroy_list, &private_destroy_list);
 510        spin_unlock(&destroy_lock);
 511
 512        synchronize_srcu(&fsnotify_mark_srcu);
 513
 514        list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
 515                list_del_init(&mark->g_list);
 516                fsnotify_put_mark(mark);
 517        }
 518}
 519