linux/fs/notify/inotify/inotify.c
<<
>>
Prefs
   1/*
   2 * fs/inotify.c - inode-based file event notifications
   3 *
   4 * Authors:
   5 *      John McCutchan  <ttb@tentacle.dhs.org>
   6 *      Robert Love     <rml@novell.com>
   7 *
   8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
   9 *
  10 * Copyright (C) 2005 John McCutchan
  11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
  12 *
  13 * This program is free software; you can redistribute it and/or modify it
  14 * under the terms of the GNU General Public License as published by the
  15 * Free Software Foundation; either version 2, or (at your option) any
  16 * later version.
  17 *
  18 * This program is distributed in the hope that it will be useful, but
  19 * WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 * General Public License for more details.
  22 */
  23
  24#include <linux/module.h>
  25#include <linux/kernel.h>
  26#include <linux/spinlock.h>
  27#include <linux/idr.h>
  28#include <linux/slab.h>
  29#include <linux/fs.h>
  30#include <linux/sched.h>
  31#include <linux/init.h>
  32#include <linux/list.h>
  33#include <linux/writeback.h>
  34#include <linux/inotify.h>
  35#include <linux/fsnotify_backend.h>
  36
  37static atomic_t inotify_cookie;
  38
  39/*
  40 * Lock ordering:
  41 *
  42 * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
  43 * iprune_mutex (synchronize shrink_icache_memory())
  44 *      inode_lock (protects the super_block->s_inodes list)
  45 *      inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
  46 *              inotify_handle->mutex (protects inotify_handle and watches->h_list)
  47 *
  48 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
  49 * of a caller's event handler.  Thus, the caller must not hold any locks
  50 * taken in their event handler while calling any of the published inotify
  51 * interfaces.
  52 */
  53
  54/*
  55 * Lifetimes of the three main data structures--inotify_handle, inode, and
  56 * inotify_watch--are managed by reference count.
  57 *
  58 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
  59 * Additional references can bump the count via get_inotify_handle() and drop
  60 * the count via put_inotify_handle().
  61 *
  62 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
  63 * to remove_watch_no_event().  Additional references can bump the count via
  64 * get_inotify_watch() and drop the count via put_inotify_watch().  The caller
  65 * is reponsible for the final put after receiving IN_IGNORED, or when using
  66 * IN_ONESHOT after receiving the first event.  Inotify does the final put if
  67 * inotify_destroy() is called.
  68 *
  69 * inode: Pinned so long as the inode is associated with a watch, from
  70 * inotify_add_watch() to the final put_inotify_watch().
  71 */
  72
  73/*
  74 * struct inotify_handle - represents an inotify instance
  75 *
  76 * This structure is protected by the mutex 'mutex'.
  77 */
  78struct inotify_handle {
  79        struct idr              idr;            /* idr mapping wd -> watch */
  80        struct mutex            mutex;          /* protects this bad boy */
  81        struct list_head        watches;        /* list of watches */
  82        atomic_t                count;          /* reference count */
  83        u32                     last_wd;        /* the last wd allocated */
  84        const struct inotify_operations *in_ops; /* inotify caller operations */
  85};
  86
  87static inline void get_inotify_handle(struct inotify_handle *ih)
  88{
  89        atomic_inc(&ih->count);
  90}
  91
  92static inline void put_inotify_handle(struct inotify_handle *ih)
  93{
  94        if (atomic_dec_and_test(&ih->count)) {
  95                idr_destroy(&ih->idr);
  96                kfree(ih);
  97        }
  98}
  99
 100/**
 101 * get_inotify_watch - grab a reference to an inotify_watch
 102 * @watch: watch to grab
 103 */
 104void get_inotify_watch(struct inotify_watch *watch)
 105{
 106        atomic_inc(&watch->count);
 107}
 108EXPORT_SYMBOL_GPL(get_inotify_watch);
 109
 110int pin_inotify_watch(struct inotify_watch *watch)
 111{
 112        struct super_block *sb = watch->inode->i_sb;
 113        spin_lock(&sb_lock);
 114        if (sb->s_count >= S_BIAS) {
 115                atomic_inc(&sb->s_active);
 116                spin_unlock(&sb_lock);
 117                atomic_inc(&watch->count);
 118                return 1;
 119        }
 120        spin_unlock(&sb_lock);
 121        return 0;
 122}
 123
 124/**
 125 * put_inotify_watch - decrements the ref count on a given watch.  cleans up
 126 * watch references if the count reaches zero.  inotify_watch is freed by
 127 * inotify callers via the destroy_watch() op.
 128 * @watch: watch to release
 129 */
 130void put_inotify_watch(struct inotify_watch *watch)
 131{
 132        if (atomic_dec_and_test(&watch->count)) {
 133                struct inotify_handle *ih = watch->ih;
 134
 135                iput(watch->inode);
 136                ih->in_ops->destroy_watch(watch);
 137                put_inotify_handle(ih);
 138        }
 139}
 140EXPORT_SYMBOL_GPL(put_inotify_watch);
 141
 142void unpin_inotify_watch(struct inotify_watch *watch)
 143{
 144        struct super_block *sb = watch->inode->i_sb;
 145        put_inotify_watch(watch);
 146        deactivate_super(sb);
 147}
 148
 149/*
 150 * inotify_handle_get_wd - returns the next WD for use by the given handle
 151 *
 152 * Callers must hold ih->mutex.  This function can sleep.
 153 */
 154static int inotify_handle_get_wd(struct inotify_handle *ih,
 155                                 struct inotify_watch *watch)
 156{
 157        int ret;
 158
 159        do {
 160                if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
 161                        return -ENOSPC;
 162                ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
 163        } while (ret == -EAGAIN);
 164
 165        if (likely(!ret))
 166                ih->last_wd = watch->wd;
 167
 168        return ret;
 169}
 170
 171/*
 172 * inotify_inode_watched - returns nonzero if there are watches on this inode
 173 * and zero otherwise.  We call this lockless, we do not care if we race.
 174 */
 175static inline int inotify_inode_watched(struct inode *inode)
 176{
 177        return !list_empty(&inode->inotify_watches);
 178}
 179
 180/*
 181 * Get child dentry flag into synch with parent inode.
 182 * Flag should always be clear for negative dentrys.
 183 */
 184static void set_dentry_child_flags(struct inode *inode, int watched)
 185{
 186        struct dentry *alias;
 187
 188        spin_lock(&dcache_lock);
 189        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 190                struct dentry *child;
 191
 192                list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
 193                        if (!child->d_inode)
 194                                continue;
 195
 196                        spin_lock(&child->d_lock);
 197                        if (watched)
 198                                child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
 199                        else
 200                                child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
 201                        spin_unlock(&child->d_lock);
 202                }
 203        }
 204        spin_unlock(&dcache_lock);
 205}
 206
 207/*
 208 * inotify_find_handle - find the watch associated with the given inode and
 209 * handle
 210 *
 211 * Callers must hold inode->inotify_mutex.
 212 */
 213static struct inotify_watch *inode_find_handle(struct inode *inode,
 214                                               struct inotify_handle *ih)
 215{
 216        struct inotify_watch *watch;
 217
 218        list_for_each_entry(watch, &inode->inotify_watches, i_list) {
 219                if (watch->ih == ih)
 220                        return watch;
 221        }
 222
 223        return NULL;
 224}
 225
 226/*
 227 * remove_watch_no_event - remove watch without the IN_IGNORED event.
 228 *
 229 * Callers must hold both inode->inotify_mutex and ih->mutex.
 230 */
 231static void remove_watch_no_event(struct inotify_watch *watch,
 232                                  struct inotify_handle *ih)
 233{
 234        list_del(&watch->i_list);
 235        list_del(&watch->h_list);
 236
 237        if (!inotify_inode_watched(watch->inode))
 238                set_dentry_child_flags(watch->inode, 0);
 239
 240        idr_remove(&ih->idr, watch->wd);
 241}
 242
 243/**
 244 * inotify_remove_watch_locked - Remove a watch from both the handle and the
 245 * inode.  Sends the IN_IGNORED event signifying that the inode is no longer
 246 * watched.  May be invoked from a caller's event handler.
 247 * @ih: inotify handle associated with watch
 248 * @watch: watch to remove
 249 *
 250 * Callers must hold both inode->inotify_mutex and ih->mutex.
 251 */
 252void inotify_remove_watch_locked(struct inotify_handle *ih,
 253                                 struct inotify_watch *watch)
 254{
 255        remove_watch_no_event(watch, ih);
 256        ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
 257}
 258EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
 259
 260/* Kernel API for producing events */
 261
 262/*
 263 * inotify_d_instantiate - instantiate dcache entry for inode
 264 */
 265void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
 266{
 267        struct dentry *parent;
 268
 269        if (!inode)
 270                return;
 271
 272        spin_lock(&entry->d_lock);
 273        parent = entry->d_parent;
 274        if (parent->d_inode && inotify_inode_watched(parent->d_inode))
 275                entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
 276        spin_unlock(&entry->d_lock);
 277}
 278
 279/*
 280 * inotify_d_move - dcache entry has been moved
 281 */
 282void inotify_d_move(struct dentry *entry)
 283{
 284        struct dentry *parent;
 285
 286        parent = entry->d_parent;
 287        if (inotify_inode_watched(parent->d_inode))
 288                entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
 289        else
 290                entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
 291}
 292
 293/**
 294 * inotify_inode_queue_event - queue an event to all watches on this inode
 295 * @inode: inode event is originating from
 296 * @mask: event mask describing this event
 297 * @cookie: cookie for synchronization, or zero
 298 * @name: filename, if any
 299 * @n_inode: inode associated with name
 300 */
 301void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
 302                               const char *name, struct inode *n_inode)
 303{
 304        struct inotify_watch *watch, *next;
 305
 306        if (!inotify_inode_watched(inode))
 307                return;
 308
 309        mutex_lock(&inode->inotify_mutex);
 310        list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 311                u32 watch_mask = watch->mask;
 312                if (watch_mask & mask) {
 313                        struct inotify_handle *ih= watch->ih;
 314                        mutex_lock(&ih->mutex);
 315                        if (watch_mask & IN_ONESHOT)
 316                                remove_watch_no_event(watch, ih);
 317                        ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
 318                                                 name, n_inode);
 319                        mutex_unlock(&ih->mutex);
 320                }
 321        }
 322        mutex_unlock(&inode->inotify_mutex);
 323}
 324EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
 325
 326/**
 327 * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
 328 * @dentry: the dentry in question, we queue against this dentry's parent
 329 * @mask: event mask describing this event
 330 * @cookie: cookie for synchronization, or zero
 331 * @name: filename, if any
 332 */
 333void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
 334                                       u32 cookie, const char *name)
 335{
 336        struct dentry *parent;
 337        struct inode *inode;
 338
 339        if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
 340                return;
 341
 342        spin_lock(&dentry->d_lock);
 343        parent = dentry->d_parent;
 344        inode = parent->d_inode;
 345
 346        if (inotify_inode_watched(inode)) {
 347                dget(parent);
 348                spin_unlock(&dentry->d_lock);
 349                inotify_inode_queue_event(inode, mask, cookie, name,
 350                                          dentry->d_inode);
 351                dput(parent);
 352        } else
 353                spin_unlock(&dentry->d_lock);
 354}
 355EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
 356
 357/**
 358 * inotify_get_cookie - return a unique cookie for use in synchronizing events.
 359 */
 360u32 inotify_get_cookie(void)
 361{
 362        return atomic_inc_return(&inotify_cookie);
 363}
 364EXPORT_SYMBOL_GPL(inotify_get_cookie);
 365
 366/**
 367 * inotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
 368 * @list: list of inodes being unmounted (sb->s_inodes)
 369 *
 370 * Called with inode_lock held, protecting the unmounting super block's list
 371 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
 372 * We temporarily drop inode_lock, however, and CAN block.
 373 */
 374void inotify_unmount_inodes(struct list_head *list)
 375{
 376        struct inode *inode, *next_i, *need_iput = NULL;
 377
 378        list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
 379                struct inotify_watch *watch, *next_w;
 380                struct inode *need_iput_tmp;
 381                struct list_head *watches;
 382
 383                /*
 384                 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
 385                 * I_WILL_FREE, or I_NEW which is fine because by that point
 386                 * the inode cannot have any associated watches.
 387                 */
 388                if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
 389                        continue;
 390
 391                /*
 392                 * If i_count is zero, the inode cannot have any watches and
 393                 * doing an __iget/iput with MS_ACTIVE clear would actually
 394                 * evict all inodes with zero i_count from icache which is
 395                 * unnecessarily violent and may in fact be illegal to do.
 396                 */
 397                if (!atomic_read(&inode->i_count))
 398                        continue;
 399
 400                need_iput_tmp = need_iput;
 401                need_iput = NULL;
 402                /* In case inotify_remove_watch_locked() drops a reference. */
 403                if (inode != need_iput_tmp)
 404                        __iget(inode);
 405                else
 406                        need_iput_tmp = NULL;
 407                /* In case the dropping of a reference would nuke next_i. */
 408                if ((&next_i->i_sb_list != list) &&
 409                                atomic_read(&next_i->i_count) &&
 410                                !(next_i->i_state & (I_CLEAR | I_FREEING |
 411                                        I_WILL_FREE))) {
 412                        __iget(next_i);
 413                        need_iput = next_i;
 414                }
 415
 416                /*
 417                 * We can safely drop inode_lock here because we hold
 418                 * references on both inode and next_i.  Also no new inodes
 419                 * will be added since the umount has begun.  Finally,
 420                 * iprune_mutex keeps shrink_icache_memory() away.
 421                 */
 422                spin_unlock(&inode_lock);
 423
 424                if (need_iput_tmp)
 425                        iput(need_iput_tmp);
 426
 427                /* for each watch, send IN_UNMOUNT and then remove it */
 428                mutex_lock(&inode->inotify_mutex);
 429                watches = &inode->inotify_watches;
 430                list_for_each_entry_safe(watch, next_w, watches, i_list) {
 431                        struct inotify_handle *ih= watch->ih;
 432                        get_inotify_watch(watch);
 433                        mutex_lock(&ih->mutex);
 434                        ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
 435                                                 NULL, NULL);
 436                        inotify_remove_watch_locked(ih, watch);
 437                        mutex_unlock(&ih->mutex);
 438                        put_inotify_watch(watch);
 439                }
 440                mutex_unlock(&inode->inotify_mutex);
 441                iput(inode);            
 442
 443                spin_lock(&inode_lock);
 444        }
 445}
 446EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
 447
 448/**
 449 * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
 450 * @inode: inode that is about to be removed
 451 */
 452void inotify_inode_is_dead(struct inode *inode)
 453{
 454        struct inotify_watch *watch, *next;
 455
 456        mutex_lock(&inode->inotify_mutex);
 457        list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 458                struct inotify_handle *ih = watch->ih;
 459                mutex_lock(&ih->mutex);
 460                inotify_remove_watch_locked(ih, watch);
 461                mutex_unlock(&ih->mutex);
 462        }
 463        mutex_unlock(&inode->inotify_mutex);
 464}
 465EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
 466
 467/* Kernel Consumer API */
 468
 469/**
 470 * inotify_init - allocate and initialize an inotify instance
 471 * @ops: caller's inotify operations
 472 */
 473struct inotify_handle *inotify_init(const struct inotify_operations *ops)
 474{
 475        struct inotify_handle *ih;
 476
 477        ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
 478        if (unlikely(!ih))
 479                return ERR_PTR(-ENOMEM);
 480
 481        idr_init(&ih->idr);
 482        INIT_LIST_HEAD(&ih->watches);
 483        mutex_init(&ih->mutex);
 484        ih->last_wd = 0;
 485        ih->in_ops = ops;
 486        atomic_set(&ih->count, 0);
 487        get_inotify_handle(ih);
 488
 489        return ih;
 490}
 491EXPORT_SYMBOL_GPL(inotify_init);
 492
 493/**
 494 * inotify_init_watch - initialize an inotify watch
 495 * @watch: watch to initialize
 496 */
 497void inotify_init_watch(struct inotify_watch *watch)
 498{
 499        INIT_LIST_HEAD(&watch->h_list);
 500        INIT_LIST_HEAD(&watch->i_list);
 501        atomic_set(&watch->count, 0);
 502        get_inotify_watch(watch); /* initial get */
 503}
 504EXPORT_SYMBOL_GPL(inotify_init_watch);
 505
 506/*
 507 * Watch removals suck violently.  To kick the watch out we need (in this
 508 * order) inode->inotify_mutex and ih->mutex.  That's fine if we have
 509 * a hold on inode; however, for all other cases we need to make damn sure
 510 * we don't race with umount.  We can *NOT* just grab a reference to a
 511 * watch - inotify_unmount_inodes() will happily sail past it and we'll end
 512 * with reference to inode potentially outliving its superblock.  Ideally
 513 * we just want to grab an active reference to superblock if we can; that
 514 * will make sure we won't go into inotify_umount_inodes() until we are
 515 * done.  Cleanup is just deactivate_super().  However, that leaves a messy
 516 * case - what if we *are* racing with umount() and active references to
 517 * superblock can't be acquired anymore?  We can bump ->s_count, grab
 518 * ->s_umount, which will almost certainly wait until the superblock is shut
 519 * down and the watch in question is pining for fjords.  That's fine, but
 520 * there is a problem - we might have hit the window between ->s_active
 521 * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
 522 * is past the point of no return and is heading for shutdown) and the
 523 * moment when deactivate_super() acquires ->s_umount.  We could just do
 524 * drop_super() yield() and retry, but that's rather antisocial and this
 525 * stuff is luser-triggerable.  OTOH, having grabbed ->s_umount and having
 526 * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
 527 * that we won't race with inotify_umount_inodes().  So we could grab a
 528 * reference to watch and do the rest as above, just with drop_super() instead
 529 * of deactivate_super(), right?  Wrong.  We had to drop ih->mutex before we
 530 * could grab ->s_umount.  So the watch could've been gone already.
 531 *
 532 * That still can be dealt with - we need to save watch->wd, do idr_find()
 533 * and compare its result with our pointer.  If they match, we either have
 534 * the damn thing still alive or we'd lost not one but two races at once,
 535 * the watch had been killed and a new one got created with the same ->wd
 536 * at the same address.  That couldn't have happened in inotify_destroy(),
 537 * but inotify_rm_wd() could run into that.  Still, "new one got created"
 538 * is not a problem - we have every right to kill it or leave it alone,
 539 * whatever's more convenient.
 540 *
 541 * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
 542 * "grab it and kill it" check.  If it's been our original watch, we are
 543 * fine, if it's a newcomer - nevermind, just pretend that we'd won the
 544 * race and kill the fscker anyway; we are safe since we know that its
 545 * superblock won't be going away.
 546 *
 547 * And yes, this is far beyond mere "not very pretty"; so's the entire
 548 * concept of inotify to start with.
 549 */
 550
 551/**
 552 * pin_to_kill - pin the watch down for removal
 553 * @ih: inotify handle
 554 * @watch: watch to kill
 555 *
 556 * Called with ih->mutex held, drops it.  Possible return values:
 557 * 0 - nothing to do, it has died
 558 * 1 - remove it, drop the reference and deactivate_super()
 559 * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
 560 * that variant, since it involved a lot of PITA, but that's the best that
 561 * could've been done.
 562 */
 563static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
 564{
 565        struct super_block *sb = watch->inode->i_sb;
 566        s32 wd = watch->wd;
 567
 568        spin_lock(&sb_lock);
 569        if (sb->s_count >= S_BIAS) {
 570                atomic_inc(&sb->s_active);
 571                spin_unlock(&sb_lock);
 572                get_inotify_watch(watch);
 573                mutex_unlock(&ih->mutex);
 574                return 1;       /* the best outcome */
 575        }
 576        sb->s_count++;
 577        spin_unlock(&sb_lock);
 578        mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
 579        down_read(&sb->s_umount);
 580        if (likely(!sb->s_root)) {
 581                /* fs is already shut down; the watch is dead */
 582                drop_super(sb);
 583                return 0;
 584        }
 585        /* raced with the final deactivate_super() */
 586        mutex_lock(&ih->mutex);
 587        if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
 588                /* the watch is dead */
 589                mutex_unlock(&ih->mutex);
 590                drop_super(sb);
 591                return 0;
 592        }
 593        /* still alive or freed and reused with the same sb and wd; kill */
 594        get_inotify_watch(watch);
 595        mutex_unlock(&ih->mutex);
 596        return 2;
 597}
 598
 599static void unpin_and_kill(struct inotify_watch *watch, int how)
 600{
 601        struct super_block *sb = watch->inode->i_sb;
 602        put_inotify_watch(watch);
 603        switch (how) {
 604        case 1:
 605                deactivate_super(sb);
 606                break;
 607        case 2:
 608                drop_super(sb);
 609        }
 610}
 611
 612/**
 613 * inotify_destroy - clean up and destroy an inotify instance
 614 * @ih: inotify handle
 615 */
 616void inotify_destroy(struct inotify_handle *ih)
 617{
 618        /*
 619         * Destroy all of the watches for this handle. Unfortunately, not very
 620         * pretty.  We cannot do a simple iteration over the list, because we
 621         * do not know the inode until we iterate to the watch.  But we need to
 622         * hold inode->inotify_mutex before ih->mutex.  The following works.
 623         *
 624         * AV: it had to become even uglier to start working ;-/
 625         */
 626        while (1) {
 627                struct inotify_watch *watch;
 628                struct list_head *watches;
 629                struct super_block *sb;
 630                struct inode *inode;
 631                int how;
 632
 633                mutex_lock(&ih->mutex);
 634                watches = &ih->watches;
 635                if (list_empty(watches)) {
 636                        mutex_unlock(&ih->mutex);
 637                        break;
 638                }
 639                watch = list_first_entry(watches, struct inotify_watch, h_list);
 640                sb = watch->inode->i_sb;
 641                how = pin_to_kill(ih, watch);
 642                if (!how)
 643                        continue;
 644
 645                inode = watch->inode;
 646                mutex_lock(&inode->inotify_mutex);
 647                mutex_lock(&ih->mutex);
 648
 649                /* make sure we didn't race with another list removal */
 650                if (likely(idr_find(&ih->idr, watch->wd))) {
 651                        remove_watch_no_event(watch, ih);
 652                        put_inotify_watch(watch);
 653                }
 654
 655                mutex_unlock(&ih->mutex);
 656                mutex_unlock(&inode->inotify_mutex);
 657                unpin_and_kill(watch, how);
 658        }
 659
 660        /* free this handle: the put matching the get in inotify_init() */
 661        put_inotify_handle(ih);
 662}
 663EXPORT_SYMBOL_GPL(inotify_destroy);
 664
 665/**
 666 * inotify_find_watch - find an existing watch for an (ih,inode) pair
 667 * @ih: inotify handle
 668 * @inode: inode to watch
 669 * @watchp: pointer to existing inotify_watch
 670 *
 671 * Caller must pin given inode (via nameidata).
 672 */
 673s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
 674                       struct inotify_watch **watchp)
 675{
 676        struct inotify_watch *old;
 677        int ret = -ENOENT;
 678
 679        mutex_lock(&inode->inotify_mutex);
 680        mutex_lock(&ih->mutex);
 681
 682        old = inode_find_handle(inode, ih);
 683        if (unlikely(old)) {
 684                get_inotify_watch(old); /* caller must put watch */
 685                *watchp = old;
 686                ret = old->wd;
 687        }
 688
 689        mutex_unlock(&ih->mutex);
 690        mutex_unlock(&inode->inotify_mutex);
 691
 692        return ret;
 693}
 694EXPORT_SYMBOL_GPL(inotify_find_watch);
 695
 696/**
 697 * inotify_find_update_watch - find and update the mask of an existing watch
 698 * @ih: inotify handle
 699 * @inode: inode's watch to update
 700 * @mask: mask of events to watch
 701 *
 702 * Caller must pin given inode (via nameidata).
 703 */
 704s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
 705                              u32 mask)
 706{
 707        struct inotify_watch *old;
 708        int mask_add = 0;
 709        int ret;
 710
 711        if (mask & IN_MASK_ADD)
 712                mask_add = 1;
 713
 714        /* don't allow invalid bits: we don't want flags set */
 715        mask &= IN_ALL_EVENTS | IN_ONESHOT;
 716        if (unlikely(!mask))
 717                return -EINVAL;
 718
 719        mutex_lock(&inode->inotify_mutex);
 720        mutex_lock(&ih->mutex);
 721
 722        /*
 723         * Handle the case of re-adding a watch on an (inode,ih) pair that we
 724         * are already watching.  We just update the mask and return its wd.
 725         */
 726        old = inode_find_handle(inode, ih);
 727        if (unlikely(!old)) {
 728                ret = -ENOENT;
 729                goto out;
 730        }
 731
 732        if (mask_add)
 733                old->mask |= mask;
 734        else
 735                old->mask = mask;
 736        ret = old->wd;
 737out:
 738        mutex_unlock(&ih->mutex);
 739        mutex_unlock(&inode->inotify_mutex);
 740        return ret;
 741}
 742EXPORT_SYMBOL_GPL(inotify_find_update_watch);
 743
 744/**
 745 * inotify_add_watch - add a watch to an inotify instance
 746 * @ih: inotify handle
 747 * @watch: caller allocated watch structure
 748 * @inode: inode to watch
 749 * @mask: mask of events to watch
 750 *
 751 * Caller must pin given inode (via nameidata).
 752 * Caller must ensure it only calls inotify_add_watch() once per watch.
 753 * Calls inotify_handle_get_wd() so may sleep.
 754 */
 755s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
 756                      struct inode *inode, u32 mask)
 757{
 758        int ret = 0;
 759        int newly_watched;
 760
 761        /* don't allow invalid bits: we don't want flags set */
 762        mask &= IN_ALL_EVENTS | IN_ONESHOT;
 763        if (unlikely(!mask))
 764                return -EINVAL;
 765        watch->mask = mask;
 766
 767        mutex_lock(&inode->inotify_mutex);
 768        mutex_lock(&ih->mutex);
 769
 770        /* Initialize a new watch */
 771        ret = inotify_handle_get_wd(ih, watch);
 772        if (unlikely(ret))
 773                goto out;
 774        ret = watch->wd;
 775
 776        /* save a reference to handle and bump the count to make it official */
 777        get_inotify_handle(ih);
 778        watch->ih = ih;
 779
 780        /*
 781         * Save a reference to the inode and bump the ref count to make it
 782         * official.  We hold a reference to nameidata, which makes this safe.
 783         */
 784        watch->inode = igrab(inode);
 785
 786        /* Add the watch to the handle's and the inode's list */
 787        newly_watched = !inotify_inode_watched(inode);
 788        list_add(&watch->h_list, &ih->watches);
 789        list_add(&watch->i_list, &inode->inotify_watches);
 790        /*
 791         * Set child flags _after_ adding the watch, so there is no race
 792         * windows where newly instantiated children could miss their parent's
 793         * watched flag.
 794         */
 795        if (newly_watched)
 796                set_dentry_child_flags(inode, 1);
 797
 798out:
 799        mutex_unlock(&ih->mutex);
 800        mutex_unlock(&inode->inotify_mutex);
 801        return ret;
 802}
 803EXPORT_SYMBOL_GPL(inotify_add_watch);
 804
 805/**
 806 * inotify_clone_watch - put the watch next to existing one
 807 * @old: already installed watch
 808 * @new: new watch
 809 *
 810 * Caller must hold the inotify_mutex of inode we are dealing with;
 811 * it is expected to remove the old watch before unlocking the inode.
 812 */
 813s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
 814{
 815        struct inotify_handle *ih = old->ih;
 816        int ret = 0;
 817
 818        new->mask = old->mask;
 819        new->ih = ih;
 820
 821        mutex_lock(&ih->mutex);
 822
 823        /* Initialize a new watch */
 824        ret = inotify_handle_get_wd(ih, new);
 825        if (unlikely(ret))
 826                goto out;
 827        ret = new->wd;
 828
 829        get_inotify_handle(ih);
 830
 831        new->inode = igrab(old->inode);
 832
 833        list_add(&new->h_list, &ih->watches);
 834        list_add(&new->i_list, &old->inode->inotify_watches);
 835out:
 836        mutex_unlock(&ih->mutex);
 837        return ret;
 838}
 839
 840void inotify_evict_watch(struct inotify_watch *watch)
 841{
 842        get_inotify_watch(watch);
 843        mutex_lock(&watch->ih->mutex);
 844        inotify_remove_watch_locked(watch->ih, watch);
 845        mutex_unlock(&watch->ih->mutex);
 846}
 847
 848/**
 849 * inotify_rm_wd - remove a watch from an inotify instance
 850 * @ih: inotify handle
 851 * @wd: watch descriptor to remove
 852 *
 853 * Can sleep.
 854 */
 855int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
 856{
 857        struct inotify_watch *watch;
 858        struct super_block *sb;
 859        struct inode *inode;
 860        int how;
 861
 862        mutex_lock(&ih->mutex);
 863        watch = idr_find(&ih->idr, wd);
 864        if (unlikely(!watch)) {
 865                mutex_unlock(&ih->mutex);
 866                return -EINVAL;
 867        }
 868        sb = watch->inode->i_sb;
 869        how = pin_to_kill(ih, watch);
 870        if (!how)
 871                return 0;
 872
 873        inode = watch->inode;
 874
 875        mutex_lock(&inode->inotify_mutex);
 876        mutex_lock(&ih->mutex);
 877
 878        /* make sure that we did not race */
 879        if (likely(idr_find(&ih->idr, wd) == watch))
 880                inotify_remove_watch_locked(ih, watch);
 881
 882        mutex_unlock(&ih->mutex);
 883        mutex_unlock(&inode->inotify_mutex);
 884        unpin_and_kill(watch, how);
 885
 886        return 0;
 887}
 888EXPORT_SYMBOL_GPL(inotify_rm_wd);
 889
 890/**
 891 * inotify_rm_watch - remove a watch from an inotify instance
 892 * @ih: inotify handle
 893 * @watch: watch to remove
 894 *
 895 * Can sleep.
 896 */
 897int inotify_rm_watch(struct inotify_handle *ih,
 898                     struct inotify_watch *watch)
 899{
 900        return inotify_rm_wd(ih, watch->wd);
 901}
 902EXPORT_SYMBOL_GPL(inotify_rm_watch);
 903
 904/*
 905 * inotify_setup - core initialization function
 906 */
 907static int __init inotify_setup(void)
 908{
 909        BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
 910        BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
 911        BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
 912        BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
 913        BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
 914        BUILD_BUG_ON(IN_OPEN != FS_OPEN);
 915        BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
 916        BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
 917        BUILD_BUG_ON(IN_CREATE != FS_CREATE);
 918        BUILD_BUG_ON(IN_DELETE != FS_DELETE);
 919        BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
 920        BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
 921        BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
 922
 923        BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
 924        BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
 925        BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
 926        BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 927
 928        atomic_set(&inotify_cookie, 0);
 929
 930        return 0;
 931}
 932
 933module_init(inotify_setup);
 934