linux/fs/notify/fsnotify.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
   4 */
   5
   6#include <linux/dcache.h>
   7#include <linux/fs.h>
   8#include <linux/gfp.h>
   9#include <linux/init.h>
  10#include <linux/module.h>
  11#include <linux/mount.h>
  12#include <linux/srcu.h>
  13
  14#include <linux/fsnotify_backend.h>
  15#include "fsnotify.h"
  16
  17/*
  18 * Clear all of the marks on an inode when it is being evicted from core
  19 */
  20void __fsnotify_inode_delete(struct inode *inode)
  21{
  22        fsnotify_clear_marks_by_inode(inode);
  23}
  24EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
  25
  26void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
  27{
  28        fsnotify_clear_marks_by_mount(mnt);
  29}
  30
  31/**
  32 * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
  33 * @sb: superblock being unmounted.
  34 *
  35 * Called during unmount with no locks held, so needs to be safe against
  36 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
  37 */
  38static void fsnotify_unmount_inodes(struct super_block *sb)
  39{
  40        struct inode *inode, *iput_inode = NULL;
  41
  42        spin_lock(&sb->s_inode_list_lock);
  43        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
  44                /*
  45                 * We cannot __iget() an inode in state I_FREEING,
  46                 * I_WILL_FREE, or I_NEW which is fine because by that point
  47                 * the inode cannot have any associated watches.
  48                 */
  49                spin_lock(&inode->i_lock);
  50                if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
  51                        spin_unlock(&inode->i_lock);
  52                        continue;
  53                }
  54
  55                /*
  56                 * If i_count is zero, the inode cannot have any watches and
  57                 * doing an __iget/iput with SB_ACTIVE clear would actually
  58                 * evict all inodes with zero i_count from icache which is
  59                 * unnecessarily violent and may in fact be illegal to do.
  60                 */
  61                if (!atomic_read(&inode->i_count)) {
  62                        spin_unlock(&inode->i_lock);
  63                        continue;
  64                }
  65
  66                __iget(inode);
  67                spin_unlock(&inode->i_lock);
  68                spin_unlock(&sb->s_inode_list_lock);
  69
  70                if (iput_inode)
  71                        iput(iput_inode);
  72
  73                /* for each watch, send FS_UNMOUNT and then remove it */
  74                fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
  75
  76                fsnotify_inode_delete(inode);
  77
  78                iput_inode = inode;
  79
  80                spin_lock(&sb->s_inode_list_lock);
  81        }
  82        spin_unlock(&sb->s_inode_list_lock);
  83
  84        if (iput_inode)
  85                iput(iput_inode);
  86        /* Wait for outstanding inode references from connectors */
  87        wait_var_event(&sb->s_fsnotify_inode_refs,
  88                       !atomic_long_read(&sb->s_fsnotify_inode_refs));
  89}
  90
  91void fsnotify_sb_delete(struct super_block *sb)
  92{
  93        fsnotify_unmount_inodes(sb);
  94        fsnotify_clear_marks_by_sb(sb);
  95}
  96
  97/*
  98 * Given an inode, first check if we care what happens to our children.  Inotify
  99 * and dnotify both tell their parents about events.  If we care about any event
 100 * on a child we run all of our children and set a dentry flag saying that the
 101 * parent cares.  Thus when an event happens on a child it can quickly tell if
 102 * if there is a need to find a parent and send the event to the parent.
 103 */
 104void __fsnotify_update_child_dentry_flags(struct inode *inode)
 105{
 106        struct dentry *alias;
 107        int watched;
 108
 109        if (!S_ISDIR(inode->i_mode))
 110                return;
 111
 112        /* determine if the children should tell inode about their events */
 113        watched = fsnotify_inode_watches_children(inode);
 114
 115        spin_lock(&inode->i_lock);
 116        /* run all of the dentries associated with this inode.  Since this is a
 117         * directory, there damn well better only be one item on this list */
 118        hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 119                struct dentry *child;
 120
 121                /* run all of the children of the original inode and fix their
 122                 * d_flags to indicate parental interest (their parent is the
 123                 * original inode) */
 124                spin_lock(&alias->d_lock);
 125                list_for_each_entry(child, &alias->d_subdirs, d_child) {
 126                        if (!child->d_inode)
 127                                continue;
 128
 129                        spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
 130                        if (watched)
 131                                child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
 132                        else
 133                                child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
 134                        spin_unlock(&child->d_lock);
 135                }
 136                spin_unlock(&alias->d_lock);
 137        }
 138        spin_unlock(&inode->i_lock);
 139}
 140
 141/* Notify this dentry's parent about a child's events. */
 142int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
 143{
 144        struct dentry *parent;
 145        struct inode *p_inode;
 146        int ret = 0;
 147
 148        if (!dentry)
 149                dentry = path->dentry;
 150
 151        if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
 152                return 0;
 153
 154        parent = dget_parent(dentry);
 155        p_inode = parent->d_inode;
 156
 157        if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
 158                __fsnotify_update_child_dentry_flags(p_inode);
 159        } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
 160                struct name_snapshot name;
 161
 162                /* we are notifying a parent so come up with the new mask which
 163                 * specifies these are events which came from a child. */
 164                mask |= FS_EVENT_ON_CHILD;
 165
 166                take_dentry_name_snapshot(&name, dentry);
 167                if (path)
 168                        ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
 169                                       &name.name, 0);
 170                else
 171                        ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
 172                                       &name.name, 0);
 173                release_dentry_name_snapshot(&name);
 174        }
 175
 176        dput(parent);
 177
 178        return ret;
 179}
 180EXPORT_SYMBOL_GPL(__fsnotify_parent);
 181
 182static int send_to_group(struct inode *to_tell,
 183                         __u32 mask, const void *data,
 184                         int data_is, u32 cookie,
 185                         const struct qstr *file_name,
 186                         struct fsnotify_iter_info *iter_info)
 187{
 188        struct fsnotify_group *group = NULL;
 189        __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 190        __u32 marks_mask = 0;
 191        __u32 marks_ignored_mask = 0;
 192        struct fsnotify_mark *mark;
 193        int type;
 194
 195        if (WARN_ON(!iter_info->report_mask))
 196                return 0;
 197
 198        /* clear ignored on inode modification */
 199        if (mask & FS_MODIFY) {
 200                fsnotify_foreach_obj_type(type) {
 201                        if (!fsnotify_iter_should_report_type(iter_info, type))
 202                                continue;
 203                        mark = iter_info->marks[type];
 204                        if (mark &&
 205                            !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
 206                                mark->ignored_mask = 0;
 207                }
 208        }
 209
 210        fsnotify_foreach_obj_type(type) {
 211                if (!fsnotify_iter_should_report_type(iter_info, type))
 212                        continue;
 213                mark = iter_info->marks[type];
 214                /* does the object mark tell us to do something? */
 215                if (mark) {
 216                        group = mark->group;
 217                        marks_mask |= mark->mask;
 218                        marks_ignored_mask |= mark->ignored_mask;
 219                }
 220        }
 221
 222        pr_debug("%s: group=%p to_tell=%p mask=%x marks_mask=%x marks_ignored_mask=%x"
 223                 " data=%p data_is=%d cookie=%d\n",
 224                 __func__, group, to_tell, mask, marks_mask, marks_ignored_mask,
 225                 data, data_is, cookie);
 226
 227        if (!(test_mask & marks_mask & ~marks_ignored_mask))
 228                return 0;
 229
 230        return group->ops->handle_event(group, to_tell, mask, data, data_is,
 231                                        file_name, cookie, iter_info);
 232}
 233
 234static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
 235{
 236        struct fsnotify_mark_connector *conn;
 237        struct hlist_node *node = NULL;
 238
 239        conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
 240        if (conn)
 241                node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
 242
 243        return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
 244}
 245
 246static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
 247{
 248        struct hlist_node *node = NULL;
 249
 250        if (mark)
 251                node = srcu_dereference(mark->obj_list.next,
 252                                        &fsnotify_mark_srcu);
 253
 254        return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
 255}
 256
 257/*
 258 * iter_info is a multi head priority queue of marks.
 259 * Pick a subset of marks from queue heads, all with the
 260 * same group and set the report_mask for selected subset.
 261 * Returns the report_mask of the selected subset.
 262 */
 263static unsigned int fsnotify_iter_select_report_types(
 264                struct fsnotify_iter_info *iter_info)
 265{
 266        struct fsnotify_group *max_prio_group = NULL;
 267        struct fsnotify_mark *mark;
 268        int type;
 269
 270        /* Choose max prio group among groups of all queue heads */
 271        fsnotify_foreach_obj_type(type) {
 272                mark = iter_info->marks[type];
 273                if (mark &&
 274                    fsnotify_compare_groups(max_prio_group, mark->group) > 0)
 275                        max_prio_group = mark->group;
 276        }
 277
 278        if (!max_prio_group)
 279                return 0;
 280
 281        /* Set the report mask for marks from same group as max prio group */
 282        iter_info->report_mask = 0;
 283        fsnotify_foreach_obj_type(type) {
 284                mark = iter_info->marks[type];
 285                if (mark &&
 286                    fsnotify_compare_groups(max_prio_group, mark->group) == 0)
 287                        fsnotify_iter_set_report_type(iter_info, type);
 288        }
 289
 290        return iter_info->report_mask;
 291}
 292
 293/*
 294 * Pop from iter_info multi head queue, the marks that were iterated in the
 295 * current iteration step.
 296 */
 297static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
 298{
 299        int type;
 300
 301        fsnotify_foreach_obj_type(type) {
 302                if (fsnotify_iter_should_report_type(iter_info, type))
 303                        iter_info->marks[type] =
 304                                fsnotify_next_mark(iter_info->marks[type]);
 305        }
 306}
 307
 308/*
 309 * This is the main call to fsnotify.  The VFS calls into hook specific functions
 310 * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
 311 * out to all of the registered fsnotify_group.  Those groups can then use the
 312 * notification event in whatever means they feel necessary.
 313 */
 314int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 315             const struct qstr *file_name, u32 cookie)
 316{
 317        struct fsnotify_iter_info iter_info = {};
 318        struct super_block *sb = to_tell->i_sb;
 319        struct mount *mnt = NULL;
 320        __u32 mnt_or_sb_mask = sb->s_fsnotify_mask;
 321        int ret = 0;
 322        __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 323
 324        if (data_is == FSNOTIFY_EVENT_PATH) {
 325                mnt = real_mount(((const struct path *)data)->mnt);
 326                mnt_or_sb_mask |= mnt->mnt_fsnotify_mask;
 327        }
 328        /* An event "on child" is not intended for a mount/sb mark */
 329        if (mask & FS_EVENT_ON_CHILD)
 330                mnt_or_sb_mask = 0;
 331
 332        /*
 333         * Optimization: srcu_read_lock() has a memory barrier which can
 334         * be expensive.  It protects walking the *_fsnotify_marks lists.
 335         * However, if we do not walk the lists, we do not have to do
 336         * SRCU because we have no references to any objects and do not
 337         * need SRCU to keep them "alive".
 338         */
 339        if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks &&
 340            (!mnt || !mnt->mnt_fsnotify_marks))
 341                return 0;
 342        /*
 343         * if this is a modify event we may need to clear the ignored masks
 344         * otherwise return if neither the inode nor the vfsmount/sb care about
 345         * this type of event.
 346         */
 347        if (!(mask & FS_MODIFY) &&
 348            !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
 349                return 0;
 350
 351        iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 352
 353        iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
 354                fsnotify_first_mark(&to_tell->i_fsnotify_marks);
 355        iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
 356                fsnotify_first_mark(&sb->s_fsnotify_marks);
 357        if (mnt) {
 358                iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
 359                        fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
 360        }
 361
 362        /*
 363         * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
 364         * ignore masks are properly reflected for mount/sb mark notifications.
 365         * That's why this traversal is so complicated...
 366         */
 367        while (fsnotify_iter_select_report_types(&iter_info)) {
 368                ret = send_to_group(to_tell, mask, data, data_is, cookie,
 369                                    file_name, &iter_info);
 370
 371                if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
 372                        goto out;
 373
 374                fsnotify_iter_next(&iter_info);
 375        }
 376        ret = 0;
 377out:
 378        srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);
 379
 380        return ret;
 381}
 382EXPORT_SYMBOL_GPL(fsnotify);
 383
 384extern struct kmem_cache *fsnotify_mark_connector_cachep;
 385
 386static __init int fsnotify_init(void)
 387{
 388        int ret;
 389
 390        BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
 391
 392        ret = init_srcu_struct(&fsnotify_mark_srcu);
 393        if (ret)
 394                panic("initializing fsnotify_mark_srcu");
 395
 396        fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
 397                                                    SLAB_PANIC);
 398
 399        return 0;
 400}
 401core_initcall(fsnotify_init);
 402