linux/block/disk-events.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Disk events - monitor disk events like media change and eject request.
   4 */
   5#include <linux/export.h>
   6#include <linux/moduleparam.h>
   7#include <linux/genhd.h>
   8#include "blk.h"
   9
  10struct disk_events {
  11        struct list_head        node;           /* all disk_event's */
  12        struct gendisk          *disk;          /* the associated disk */
  13        spinlock_t              lock;
  14
  15        struct mutex            block_mutex;    /* protects blocking */
  16        int                     block;          /* event blocking depth */
  17        unsigned int            pending;        /* events already sent out */
  18        unsigned int            clearing;       /* events being cleared */
  19
  20        long                    poll_msecs;     /* interval, -1 for default */
  21        struct delayed_work     dwork;
  22};
  23
  24static const char *disk_events_strs[] = {
  25        [ilog2(DISK_EVENT_MEDIA_CHANGE)]        = "media_change",
  26        [ilog2(DISK_EVENT_EJECT_REQUEST)]       = "eject_request",
  27};
  28
  29static char *disk_uevents[] = {
  30        [ilog2(DISK_EVENT_MEDIA_CHANGE)]        = "DISK_MEDIA_CHANGE=1",
  31        [ilog2(DISK_EVENT_EJECT_REQUEST)]       = "DISK_EJECT_REQUEST=1",
  32};
  33
  34/* list of all disk_events */
  35static DEFINE_MUTEX(disk_events_mutex);
  36static LIST_HEAD(disk_events);
  37
  38/* disable in-kernel polling by default */
  39static unsigned long disk_events_dfl_poll_msecs;
  40
  41static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
  42{
  43        struct disk_events *ev = disk->ev;
  44        long intv_msecs = 0;
  45
  46        /*
  47         * If device-specific poll interval is set, always use it.  If
  48         * the default is being used, poll if the POLL flag is set.
  49         */
  50        if (ev->poll_msecs >= 0)
  51                intv_msecs = ev->poll_msecs;
  52        else if (disk->event_flags & DISK_EVENT_FLAG_POLL)
  53                intv_msecs = disk_events_dfl_poll_msecs;
  54
  55        return msecs_to_jiffies(intv_msecs);
  56}
  57
  58/**
  59 * disk_block_events - block and flush disk event checking
  60 * @disk: disk to block events for
  61 *
  62 * On return from this function, it is guaranteed that event checking
  63 * isn't in progress and won't happen until unblocked by
  64 * disk_unblock_events().  Events blocking is counted and the actual
  65 * unblocking happens after the matching number of unblocks are done.
  66 *
  67 * Note that this intentionally does not block event checking from
  68 * disk_clear_events().
  69 *
  70 * CONTEXT:
  71 * Might sleep.
  72 */
  73void disk_block_events(struct gendisk *disk)
  74{
  75        struct disk_events *ev = disk->ev;
  76        unsigned long flags;
  77        bool cancel;
  78
  79        if (!ev)
  80                return;
  81
  82        /*
  83         * Outer mutex ensures that the first blocker completes canceling
  84         * the event work before further blockers are allowed to finish.
  85         */
  86        mutex_lock(&ev->block_mutex);
  87
  88        spin_lock_irqsave(&ev->lock, flags);
  89        cancel = !ev->block++;
  90        spin_unlock_irqrestore(&ev->lock, flags);
  91
  92        if (cancel)
  93                cancel_delayed_work_sync(&disk->ev->dwork);
  94
  95        mutex_unlock(&ev->block_mutex);
  96}
  97
  98static void __disk_unblock_events(struct gendisk *disk, bool check_now)
  99{
 100        struct disk_events *ev = disk->ev;
 101        unsigned long intv;
 102        unsigned long flags;
 103
 104        spin_lock_irqsave(&ev->lock, flags);
 105
 106        if (WARN_ON_ONCE(ev->block <= 0))
 107                goto out_unlock;
 108
 109        if (--ev->block)
 110                goto out_unlock;
 111
 112        intv = disk_events_poll_jiffies(disk);
 113        if (check_now)
 114                queue_delayed_work(system_freezable_power_efficient_wq,
 115                                &ev->dwork, 0);
 116        else if (intv)
 117                queue_delayed_work(system_freezable_power_efficient_wq,
 118                                &ev->dwork, intv);
 119out_unlock:
 120        spin_unlock_irqrestore(&ev->lock, flags);
 121}
 122
 123/**
 124 * disk_unblock_events - unblock disk event checking
 125 * @disk: disk to unblock events for
 126 *
 127 * Undo disk_block_events().  When the block count reaches zero, it
 128 * starts events polling if configured.
 129 *
 130 * CONTEXT:
 131 * Don't care.  Safe to call from irq context.
 132 */
 133void disk_unblock_events(struct gendisk *disk)
 134{
 135        if (disk->ev)
 136                __disk_unblock_events(disk, false);
 137}
 138
 139/**
 140 * disk_flush_events - schedule immediate event checking and flushing
 141 * @disk: disk to check and flush events for
 142 * @mask: events to flush
 143 *
 144 * Schedule immediate event checking on @disk if not blocked.  Events in
 145 * @mask are scheduled to be cleared from the driver.  Note that this
 146 * doesn't clear the events from @disk->ev.
 147 *
 148 * CONTEXT:
 149 * If @mask is non-zero must be called with disk->open_mutex held.
 150 */
 151void disk_flush_events(struct gendisk *disk, unsigned int mask)
 152{
 153        struct disk_events *ev = disk->ev;
 154
 155        if (!ev)
 156                return;
 157
 158        spin_lock_irq(&ev->lock);
 159        ev->clearing |= mask;
 160        if (!ev->block)
 161                mod_delayed_work(system_freezable_power_efficient_wq,
 162                                &ev->dwork, 0);
 163        spin_unlock_irq(&ev->lock);
 164}
 165
 166/*
 167 * Tell userland about new events.  Only the events listed in @disk->events are
 168 * reported, and only if DISK_EVENT_FLAG_UEVENT is set.  Otherwise, events are
 169 * processed internally but never get reported to userland.
 170 */
 171static void disk_event_uevent(struct gendisk *disk, unsigned int events)
 172{
 173        char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
 174        int nr_events = 0, i;
 175
 176        for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
 177                if (events & disk->events & (1 << i))
 178                        envp[nr_events++] = disk_uevents[i];
 179
 180        if (nr_events)
 181                kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
 182}
 183
 184static void disk_check_events(struct disk_events *ev,
 185                              unsigned int *clearing_ptr)
 186{
 187        struct gendisk *disk = ev->disk;
 188        unsigned int clearing = *clearing_ptr;
 189        unsigned int events;
 190        unsigned long intv;
 191
 192        /* check events */
 193        events = disk->fops->check_events(disk, clearing);
 194
 195        /* accumulate pending events and schedule next poll if necessary */
 196        spin_lock_irq(&ev->lock);
 197
 198        events &= ~ev->pending;
 199        ev->pending |= events;
 200        *clearing_ptr &= ~clearing;
 201
 202        intv = disk_events_poll_jiffies(disk);
 203        if (!ev->block && intv)
 204                queue_delayed_work(system_freezable_power_efficient_wq,
 205                                &ev->dwork, intv);
 206
 207        spin_unlock_irq(&ev->lock);
 208
 209        if (events & DISK_EVENT_MEDIA_CHANGE)
 210                inc_diskseq(disk);
 211
 212        if (disk->event_flags & DISK_EVENT_FLAG_UEVENT)
 213                disk_event_uevent(disk, events);
 214}
 215
 216/**
 217 * disk_clear_events - synchronously check, clear and return pending events
 218 * @disk: disk to fetch and clear events from
 219 * @mask: mask of events to be fetched and cleared
 220 *
 221 * Disk events are synchronously checked and pending events in @mask
 222 * are cleared and returned.  This ignores the block count.
 223 *
 224 * CONTEXT:
 225 * Might sleep.
 226 */
 227static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 228{
 229        struct disk_events *ev = disk->ev;
 230        unsigned int pending;
 231        unsigned int clearing = mask;
 232
 233        if (!ev)
 234                return 0;
 235
 236        disk_block_events(disk);
 237
 238        /*
 239         * store the union of mask and ev->clearing on the stack so that the
 240         * race with disk_flush_events does not cause ambiguity (ev->clearing
 241         * can still be modified even if events are blocked).
 242         */
 243        spin_lock_irq(&ev->lock);
 244        clearing |= ev->clearing;
 245        ev->clearing = 0;
 246        spin_unlock_irq(&ev->lock);
 247
 248        disk_check_events(ev, &clearing);
 249        /*
 250         * if ev->clearing is not 0, the disk_flush_events got called in the
 251         * middle of this function, so we want to run the workfn without delay.
 252         */
 253        __disk_unblock_events(disk, ev->clearing ? true : false);
 254
 255        /* then, fetch and clear pending events */
 256        spin_lock_irq(&ev->lock);
 257        pending = ev->pending & mask;
 258        ev->pending &= ~mask;
 259        spin_unlock_irq(&ev->lock);
 260        WARN_ON_ONCE(clearing & mask);
 261
 262        return pending;
 263}
 264
 265/**
 266 * bdev_check_media_change - check if a removable media has been changed
 267 * @bdev: block device to check
 268 *
 269 * Check whether a removable media has been changed, and attempt to free all
 270 * dentries and inodes and invalidates all block device page cache entries in
 271 * that case.
 272 *
 273 * Returns %true if the block device changed, or %false if not.
 274 */
 275bool bdev_check_media_change(struct block_device *bdev)
 276{
 277        unsigned int events;
 278
 279        events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
 280                                   DISK_EVENT_EJECT_REQUEST);
 281        if (!(events & DISK_EVENT_MEDIA_CHANGE))
 282                return false;
 283
 284        if (__invalidate_device(bdev, true))
 285                pr_warn("VFS: busy inodes on changed media %s\n",
 286                        bdev->bd_disk->disk_name);
 287        set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
 288        return true;
 289}
 290EXPORT_SYMBOL(bdev_check_media_change);
 291
 292/**
 293 * disk_force_media_change - force a media change event
 294 * @disk: the disk which will raise the event
 295 * @events: the events to raise
 296 *
 297 * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present,
 298 * attempt to free all dentries and inodes and invalidates all block
 299 * device page cache entries in that case.
 300 *
 301 * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not.
 302 */
 303bool disk_force_media_change(struct gendisk *disk, unsigned int events)
 304{
 305        disk_event_uevent(disk, events);
 306
 307        if (!(events & DISK_EVENT_MEDIA_CHANGE))
 308                return false;
 309
 310        if (__invalidate_device(disk->part0, true))
 311                pr_warn("VFS: busy inodes on changed media %s\n",
 312                        disk->disk_name);
 313        set_bit(GD_NEED_PART_SCAN, &disk->state);
 314        return true;
 315}
 316EXPORT_SYMBOL_GPL(disk_force_media_change);
 317
 318/*
 319 * Separate this part out so that a different pointer for clearing_ptr can be
 320 * passed in for disk_clear_events.
 321 */
 322static void disk_events_workfn(struct work_struct *work)
 323{
 324        struct delayed_work *dwork = to_delayed_work(work);
 325        struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
 326
 327        disk_check_events(ev, &ev->clearing);
 328}
 329
 330/*
 331 * A disk events enabled device has the following sysfs nodes under
 332 * its /sys/block/X/ directory.
 333 *
 334 * events               : list of all supported events
 335 * events_async         : list of events which can be detected w/o polling
 336 *                        (always empty, only for backwards compatibility)
 337 * events_poll_msecs    : polling interval, 0: disable, -1: system default
 338 */
 339static ssize_t __disk_events_show(unsigned int events, char *buf)
 340{
 341        const char *delim = "";
 342        ssize_t pos = 0;
 343        int i;
 344
 345        for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
 346                if (events & (1 << i)) {
 347                        pos += sprintf(buf + pos, "%s%s",
 348                                       delim, disk_events_strs[i]);
 349                        delim = " ";
 350                }
 351        if (pos)
 352                pos += sprintf(buf + pos, "\n");
 353        return pos;
 354}
 355
 356static ssize_t disk_events_show(struct device *dev,
 357                                struct device_attribute *attr, char *buf)
 358{
 359        struct gendisk *disk = dev_to_disk(dev);
 360
 361        if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
 362                return 0;
 363        return __disk_events_show(disk->events, buf);
 364}
 365
 366static ssize_t disk_events_async_show(struct device *dev,
 367                                      struct device_attribute *attr, char *buf)
 368{
 369        return 0;
 370}
 371
 372static ssize_t disk_events_poll_msecs_show(struct device *dev,
 373                                           struct device_attribute *attr,
 374                                           char *buf)
 375{
 376        struct gendisk *disk = dev_to_disk(dev);
 377
 378        if (!disk->ev)
 379                return sprintf(buf, "-1\n");
 380        return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
 381}
 382
 383static ssize_t disk_events_poll_msecs_store(struct device *dev,
 384                                            struct device_attribute *attr,
 385                                            const char *buf, size_t count)
 386{
 387        struct gendisk *disk = dev_to_disk(dev);
 388        long intv;
 389
 390        if (!count || !sscanf(buf, "%ld", &intv))
 391                return -EINVAL;
 392
 393        if (intv < 0 && intv != -1)
 394                return -EINVAL;
 395
 396        if (!disk->ev)
 397                return -ENODEV;
 398
 399        disk_block_events(disk);
 400        disk->ev->poll_msecs = intv;
 401        __disk_unblock_events(disk, true);
 402        return count;
 403}
 404
 405DEVICE_ATTR(events, 0444, disk_events_show, NULL);
 406DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
 407DEVICE_ATTR(events_poll_msecs, 0644, disk_events_poll_msecs_show,
 408            disk_events_poll_msecs_store);
 409
 410/*
 411 * The default polling interval can be specified by the kernel
 412 * parameter block.events_dfl_poll_msecs which defaults to 0
 413 * (disable).  This can also be modified runtime by writing to
 414 * /sys/module/block/parameters/events_dfl_poll_msecs.
 415 */
 416static int disk_events_set_dfl_poll_msecs(const char *val,
 417                                          const struct kernel_param *kp)
 418{
 419        struct disk_events *ev;
 420        int ret;
 421
 422        ret = param_set_ulong(val, kp);
 423        if (ret < 0)
 424                return ret;
 425
 426        mutex_lock(&disk_events_mutex);
 427        list_for_each_entry(ev, &disk_events, node)
 428                disk_flush_events(ev->disk, 0);
 429        mutex_unlock(&disk_events_mutex);
 430        return 0;
 431}
 432
 433static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
 434        .set    = disk_events_set_dfl_poll_msecs,
 435        .get    = param_get_ulong,
 436};
 437
 438#undef MODULE_PARAM_PREFIX
 439#define MODULE_PARAM_PREFIX     "block."
 440
 441module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
 442                &disk_events_dfl_poll_msecs, 0644);
 443
 444/*
 445 * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
 446 */
 447int disk_alloc_events(struct gendisk *disk)
 448{
 449        struct disk_events *ev;
 450
 451        if (!disk->fops->check_events || !disk->events)
 452                return 0;
 453
 454        ev = kzalloc(sizeof(*ev), GFP_KERNEL);
 455        if (!ev) {
 456                pr_warn("%s: failed to initialize events\n", disk->disk_name);
 457                return -ENOMEM;
 458        }
 459
 460        INIT_LIST_HEAD(&ev->node);
 461        ev->disk = disk;
 462        spin_lock_init(&ev->lock);
 463        mutex_init(&ev->block_mutex);
 464        ev->block = 1;
 465        ev->poll_msecs = -1;
 466        INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
 467
 468        disk->ev = ev;
 469        return 0;
 470}
 471
 472void disk_add_events(struct gendisk *disk)
 473{
 474        if (!disk->ev)
 475                return;
 476
 477        mutex_lock(&disk_events_mutex);
 478        list_add_tail(&disk->ev->node, &disk_events);
 479        mutex_unlock(&disk_events_mutex);
 480
 481        /*
 482         * Block count is initialized to 1 and the following initial
 483         * unblock kicks it into action.
 484         */
 485        __disk_unblock_events(disk, true);
 486}
 487
 488void disk_del_events(struct gendisk *disk)
 489{
 490        if (disk->ev) {
 491                disk_block_events(disk);
 492
 493                mutex_lock(&disk_events_mutex);
 494                list_del_init(&disk->ev->node);
 495                mutex_unlock(&disk_events_mutex);
 496        }
 497}
 498
 499void disk_release_events(struct gendisk *disk)
 500{
 501        /* the block count should be 1 from disk_del_events() */
 502        WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
 503        kfree(disk->ev);
 504}
 505