linux/fs/pipe.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/fs/pipe.c
   4 *
   5 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   6 */
   7
   8#include <linux/mm.h>
   9#include <linux/file.h>
  10#include <linux/poll.h>
  11#include <linux/slab.h>
  12#include <linux/module.h>
  13#include <linux/init.h>
  14#include <linux/fs.h>
  15#include <linux/log2.h>
  16#include <linux/mount.h>
  17#include <linux/pseudo_fs.h>
  18#include <linux/magic.h>
  19#include <linux/pipe_fs_i.h>
  20#include <linux/uio.h>
  21#include <linux/highmem.h>
  22#include <linux/pagemap.h>
  23#include <linux/audit.h>
  24#include <linux/syscalls.h>
  25#include <linux/fcntl.h>
  26#include <linux/memcontrol.h>
  27#include <linux/watch_queue.h>
  28
  29#include <linux/uaccess.h>
  30#include <asm/ioctls.h>
  31
  32#include "internal.h"
  33
  34/*
  35 * The max size that a non-root user is allowed to grow the pipe. Can
  36 * be set by root in /proc/sys/fs/pipe-max-size
  37 */
  38unsigned int pipe_max_size = 1048576;
  39
  40/* Maximum allocatable pages per user. Hard limit is unset by default, soft
  41 * matches default values.
  42 */
  43unsigned long pipe_user_pages_hard;
  44unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
  45
  46/*
  47 * We use head and tail indices that aren't masked off, except at the point of
  48 * dereference, but rather they're allowed to wrap naturally.  This means there
  49 * isn't a dead spot in the buffer, but the ring has to be a power of two and
  50 * <= 2^31.
  51 * -- David Howells 2019-09-23.
  52 *
  53 * Reads with count = 0 should always return 0.
  54 * -- Julian Bradfield 1999-06-07.
  55 *
  56 * FIFOs and Pipes now generate SIGIO for both readers and writers.
  57 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
  58 *
  59 * pipe_read & write cleanup
  60 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  61 */
  62
  63static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
  64{
  65        if (pipe->files)
  66                mutex_lock_nested(&pipe->mutex, subclass);
  67}
  68
  69void pipe_lock(struct pipe_inode_info *pipe)
  70{
  71        /*
  72         * pipe_lock() nests non-pipe inode locks (for writing to a file)
  73         */
  74        pipe_lock_nested(pipe, I_MUTEX_PARENT);
  75}
  76EXPORT_SYMBOL(pipe_lock);
  77
  78void pipe_unlock(struct pipe_inode_info *pipe)
  79{
  80        if (pipe->files)
  81                mutex_unlock(&pipe->mutex);
  82}
  83EXPORT_SYMBOL(pipe_unlock);
  84
  85static inline void __pipe_lock(struct pipe_inode_info *pipe)
  86{
  87        mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
  88}
  89
  90static inline void __pipe_unlock(struct pipe_inode_info *pipe)
  91{
  92        mutex_unlock(&pipe->mutex);
  93}
  94
  95void pipe_double_lock(struct pipe_inode_info *pipe1,
  96                      struct pipe_inode_info *pipe2)
  97{
  98        BUG_ON(pipe1 == pipe2);
  99
 100        if (pipe1 < pipe2) {
 101                pipe_lock_nested(pipe1, I_MUTEX_PARENT);
 102                pipe_lock_nested(pipe2, I_MUTEX_CHILD);
 103        } else {
 104                pipe_lock_nested(pipe2, I_MUTEX_PARENT);
 105                pipe_lock_nested(pipe1, I_MUTEX_CHILD);
 106        }
 107}
 108
 109/* Drop the inode semaphore and wait for a pipe event, atomically */
 110void pipe_wait(struct pipe_inode_info *pipe)
 111{
 112        DEFINE_WAIT(rdwait);
 113        DEFINE_WAIT(wrwait);
 114
 115        /*
 116         * Pipes are system-local resources, so sleeping on them
 117         * is considered a noninteractive wait:
 118         */
 119        prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
 120        prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
 121        pipe_unlock(pipe);
 122        schedule();
 123        finish_wait(&pipe->rd_wait, &rdwait);
 124        finish_wait(&pipe->wr_wait, &wrwait);
 125        pipe_lock(pipe);
 126}
 127
 128static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 129                                  struct pipe_buffer *buf)
 130{
 131        struct page *page = buf->page;
 132
 133        /*
 134         * If nobody else uses this page, and we don't already have a
 135         * temporary page, let's keep track of it as a one-deep
 136         * allocation cache. (Otherwise just release our reference to it)
 137         */
 138        if (page_count(page) == 1 && !pipe->tmp_page)
 139                pipe->tmp_page = page;
 140        else
 141                put_page(page);
 142}
 143
 144static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
 145                struct pipe_buffer *buf)
 146{
 147        struct page *page = buf->page;
 148
 149        if (page_count(page) != 1)
 150                return false;
 151        memcg_kmem_uncharge_page(page, 0);
 152        __SetPageLocked(page);
 153        return true;
 154}
 155
 156/**
 157 * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
 158 * @pipe:       the pipe that the buffer belongs to
 159 * @buf:        the buffer to attempt to steal
 160 *
 161 * Description:
 162 *      This function attempts to steal the &struct page attached to
 163 *      @buf. If successful, this function returns 0 and returns with
 164 *      the page locked. The caller may then reuse the page for whatever
 165 *      he wishes; the typical use is insertion into a different file
 166 *      page cache.
 167 */
 168bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
 169                struct pipe_buffer *buf)
 170{
 171        struct page *page = buf->page;
 172
 173        /*
 174         * A reference of one is golden, that means that the owner of this
 175         * page is the only one holding a reference to it. lock the page
 176         * and return OK.
 177         */
 178        if (page_count(page) == 1) {
 179                lock_page(page);
 180                return true;
 181        }
 182        return false;
 183}
 184EXPORT_SYMBOL(generic_pipe_buf_try_steal);
 185
 186/**
 187 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
 188 * @pipe:       the pipe that the buffer belongs to
 189 * @buf:        the buffer to get a reference to
 190 *
 191 * Description:
 192 *      This function grabs an extra reference to @buf. It's used in
 193 *      in the tee() system call, when we duplicate the buffers in one
 194 *      pipe into another.
 195 */
 196bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 197{
 198        return try_get_page(buf->page);
 199}
 200EXPORT_SYMBOL(generic_pipe_buf_get);
 201
 202/**
 203 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
 204 * @pipe:       the pipe that the buffer belongs to
 205 * @buf:        the buffer to put a reference to
 206 *
 207 * Description:
 208 *      This function releases a reference to @buf.
 209 */
 210void generic_pipe_buf_release(struct pipe_inode_info *pipe,
 211                              struct pipe_buffer *buf)
 212{
 213        put_page(buf->page);
 214}
 215EXPORT_SYMBOL(generic_pipe_buf_release);
 216
 217static const struct pipe_buf_operations anon_pipe_buf_ops = {
 218        .release        = anon_pipe_buf_release,
 219        .try_steal      = anon_pipe_buf_try_steal,
 220        .get            = generic_pipe_buf_get,
 221};
 222
 223/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
 224static inline bool pipe_readable(const struct pipe_inode_info *pipe)
 225{
 226        unsigned int head = READ_ONCE(pipe->head);
 227        unsigned int tail = READ_ONCE(pipe->tail);
 228        unsigned int writers = READ_ONCE(pipe->writers);
 229
 230        return !pipe_empty(head, tail) || !writers;
 231}
 232
 233static ssize_t
 234pipe_read(struct kiocb *iocb, struct iov_iter *to)
 235{
 236        size_t total_len = iov_iter_count(to);
 237        struct file *filp = iocb->ki_filp;
 238        struct pipe_inode_info *pipe = filp->private_data;
 239        bool was_full, wake_next_reader = false;
 240        ssize_t ret;
 241
 242        /* Null read succeeds. */
 243        if (unlikely(total_len == 0))
 244                return 0;
 245
 246        ret = 0;
 247        __pipe_lock(pipe);
 248
 249        /*
 250         * We only wake up writers if the pipe was full when we started
 251         * reading in order to avoid unnecessary wakeups.
 252         *
 253         * But when we do wake up writers, we do so using a sync wakeup
 254         * (WF_SYNC), because we want them to get going and generate more
 255         * data for us.
 256         */
 257        was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 258        for (;;) {
 259                unsigned int head = pipe->head;
 260                unsigned int tail = pipe->tail;
 261                unsigned int mask = pipe->ring_size - 1;
 262
 263#ifdef CONFIG_WATCH_QUEUE
 264                if (pipe->note_loss) {
 265                        struct watch_notification n;
 266
 267                        if (total_len < 8) {
 268                                if (ret == 0)
 269                                        ret = -ENOBUFS;
 270                                break;
 271                        }
 272
 273                        n.type = WATCH_TYPE_META;
 274                        n.subtype = WATCH_META_LOSS_NOTIFICATION;
 275                        n.info = watch_sizeof(n);
 276                        if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
 277                                if (ret == 0)
 278                                        ret = -EFAULT;
 279                                break;
 280                        }
 281                        ret += sizeof(n);
 282                        total_len -= sizeof(n);
 283                        pipe->note_loss = false;
 284                }
 285#endif
 286
 287                if (!pipe_empty(head, tail)) {
 288                        struct pipe_buffer *buf = &pipe->bufs[tail & mask];
 289                        size_t chars = buf->len;
 290                        size_t written;
 291                        int error;
 292
 293                        if (chars > total_len) {
 294                                if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
 295                                        if (ret == 0)
 296                                                ret = -ENOBUFS;
 297                                        break;
 298                                }
 299                                chars = total_len;
 300                        }
 301
 302                        error = pipe_buf_confirm(pipe, buf);
 303                        if (error) {
 304                                if (!ret)
 305                                        ret = error;
 306                                break;
 307                        }
 308
 309                        written = copy_page_to_iter(buf->page, buf->offset, chars, to);
 310                        if (unlikely(written < chars)) {
 311                                if (!ret)
 312                                        ret = -EFAULT;
 313                                break;
 314                        }
 315                        ret += chars;
 316                        buf->offset += chars;
 317                        buf->len -= chars;
 318
 319                        /* Was it a packet buffer? Clean up and exit */
 320                        if (buf->flags & PIPE_BUF_FLAG_PACKET) {
 321                                total_len = chars;
 322                                buf->len = 0;
 323                        }
 324
 325                        if (!buf->len) {
 326                                pipe_buf_release(pipe, buf);
 327                                spin_lock_irq(&pipe->rd_wait.lock);
 328#ifdef CONFIG_WATCH_QUEUE
 329                                if (buf->flags & PIPE_BUF_FLAG_LOSS)
 330                                        pipe->note_loss = true;
 331#endif
 332                                tail++;
 333                                pipe->tail = tail;
 334                                spin_unlock_irq(&pipe->rd_wait.lock);
 335                        }
 336                        total_len -= chars;
 337                        if (!total_len)
 338                                break;  /* common path: read succeeded */
 339                        if (!pipe_empty(head, tail))    /* More to do? */
 340                                continue;
 341                }
 342
 343                if (!pipe->writers)
 344                        break;
 345                if (ret)
 346                        break;
 347                if (filp->f_flags & O_NONBLOCK) {
 348                        ret = -EAGAIN;
 349                        break;
 350                }
 351                __pipe_unlock(pipe);
 352
 353                /*
 354                 * We only get here if we didn't actually read anything.
 355                 *
 356                 * However, we could have seen (and removed) a zero-sized
 357                 * pipe buffer, and might have made space in the buffers
 358                 * that way.
 359                 *
 360                 * You can't make zero-sized pipe buffers by doing an empty
 361                 * write (not even in packet mode), but they can happen if
 362                 * the writer gets an EFAULT when trying to fill a buffer
 363                 * that already got allocated and inserted in the buffer
 364                 * array.
 365                 *
 366                 * So we still need to wake up any pending writers in the
 367                 * _very_ unlikely case that the pipe was full, but we got
 368                 * no data.
 369                 */
 370                if (unlikely(was_full)) {
 371                        wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 372                        kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 373                }
 374
 375                /*
 376                 * But because we didn't read anything, at this point we can
 377                 * just return directly with -ERESTARTSYS if we're interrupted,
 378                 * since we've done any required wakeups and there's no need
 379                 * to mark anything accessed. And we've dropped the lock.
 380                 */
 381                if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 382                        return -ERESTARTSYS;
 383
 384                __pipe_lock(pipe);
 385                was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 386                wake_next_reader = true;
 387        }
 388        if (pipe_empty(pipe->head, pipe->tail))
 389                wake_next_reader = false;
 390        __pipe_unlock(pipe);
 391
 392        if (was_full) {
 393                wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 394                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 395        }
 396        if (wake_next_reader)
 397                wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 398        if (ret > 0)
 399                file_accessed(filp);
 400        return ret;
 401}
 402
 403static inline int is_packetized(struct file *file)
 404{
 405        return (file->f_flags & O_DIRECT) != 0;
 406}
 407
 408/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
 409static inline bool pipe_writable(const struct pipe_inode_info *pipe)
 410{
 411        unsigned int head = READ_ONCE(pipe->head);
 412        unsigned int tail = READ_ONCE(pipe->tail);
 413        unsigned int max_usage = READ_ONCE(pipe->max_usage);
 414
 415        return !pipe_full(head, tail, max_usage) ||
 416                !READ_ONCE(pipe->readers);
 417}
 418
 419static ssize_t
 420pipe_write(struct kiocb *iocb, struct iov_iter *from)
 421{
 422        struct file *filp = iocb->ki_filp;
 423        struct pipe_inode_info *pipe = filp->private_data;
 424        unsigned int head;
 425        ssize_t ret = 0;
 426        size_t total_len = iov_iter_count(from);
 427        ssize_t chars;
 428        bool was_empty = false;
 429        bool wake_next_writer = false;
 430
 431        /* Null write succeeds. */
 432        if (unlikely(total_len == 0))
 433                return 0;
 434
 435        __pipe_lock(pipe);
 436
 437        if (!pipe->readers) {
 438                send_sig(SIGPIPE, current, 0);
 439                ret = -EPIPE;
 440                goto out;
 441        }
 442
 443#ifdef CONFIG_WATCH_QUEUE
 444        if (pipe->watch_queue) {
 445                ret = -EXDEV;
 446                goto out;
 447        }
 448#endif
 449
 450        /*
 451         * Only wake up if the pipe started out empty, since
 452         * otherwise there should be no readers waiting.
 453         *
 454         * If it wasn't empty we try to merge new data into
 455         * the last buffer.
 456         *
 457         * That naturally merges small writes, but it also
 458         * page-aligs the rest of the writes for large writes
 459         * spanning multiple pages.
 460         */
 461        head = pipe->head;
 462        was_empty = pipe_empty(head, pipe->tail);
 463        chars = total_len & (PAGE_SIZE-1);
 464        if (chars && !was_empty) {
 465                unsigned int mask = pipe->ring_size - 1;
 466                struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
 467                int offset = buf->offset + buf->len;
 468
 469                if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
 470                    offset + chars <= PAGE_SIZE) {
 471                        ret = pipe_buf_confirm(pipe, buf);
 472                        if (ret)
 473                                goto out;
 474
 475                        ret = copy_page_from_iter(buf->page, offset, chars, from);
 476                        if (unlikely(ret < chars)) {
 477                                ret = -EFAULT;
 478                                goto out;
 479                        }
 480
 481                        buf->len += ret;
 482                        if (!iov_iter_count(from))
 483                                goto out;
 484                }
 485        }
 486
 487        for (;;) {
 488                if (!pipe->readers) {
 489                        send_sig(SIGPIPE, current, 0);
 490                        if (!ret)
 491                                ret = -EPIPE;
 492                        break;
 493                }
 494
 495                head = pipe->head;
 496                if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
 497                        unsigned int mask = pipe->ring_size - 1;
 498                        struct pipe_buffer *buf = &pipe->bufs[head & mask];
 499                        struct page *page = pipe->tmp_page;
 500                        int copied;
 501
 502                        if (!page) {
 503                                page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
 504                                if (unlikely(!page)) {
 505                                        ret = ret ? : -ENOMEM;
 506                                        break;
 507                                }
 508                                pipe->tmp_page = page;
 509                        }
 510
 511                        /* Allocate a slot in the ring in advance and attach an
 512                         * empty buffer.  If we fault or otherwise fail to use
 513                         * it, either the reader will consume it or it'll still
 514                         * be there for the next write.
 515                         */
 516                        spin_lock_irq(&pipe->rd_wait.lock);
 517
 518                        head = pipe->head;
 519                        if (pipe_full(head, pipe->tail, pipe->max_usage)) {
 520                                spin_unlock_irq(&pipe->rd_wait.lock);
 521                                continue;
 522                        }
 523
 524                        pipe->head = head + 1;
 525                        spin_unlock_irq(&pipe->rd_wait.lock);
 526
 527                        /* Insert it into the buffer array */
 528                        buf = &pipe->bufs[head & mask];
 529                        buf->page = page;
 530                        buf->ops = &anon_pipe_buf_ops;
 531                        buf->offset = 0;
 532                        buf->len = 0;
 533                        if (is_packetized(filp))
 534                                buf->flags = PIPE_BUF_FLAG_PACKET;
 535                        else
 536                                buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
 537                        pipe->tmp_page = NULL;
 538
 539                        copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
 540                        if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
 541                                if (!ret)
 542                                        ret = -EFAULT;
 543                                break;
 544                        }
 545                        ret += copied;
 546                        buf->offset = 0;
 547                        buf->len = copied;
 548
 549                        if (!iov_iter_count(from))
 550                                break;
 551                }
 552
 553                if (!pipe_full(head, pipe->tail, pipe->max_usage))
 554                        continue;
 555
 556                /* Wait for buffer space to become available. */
 557                if (filp->f_flags & O_NONBLOCK) {
 558                        if (!ret)
 559                                ret = -EAGAIN;
 560                        break;
 561                }
 562                if (signal_pending(current)) {
 563                        if (!ret)
 564                                ret = -ERESTARTSYS;
 565                        break;
 566                }
 567
 568                /*
 569                 * We're going to release the pipe lock and wait for more
 570                 * space. We wake up any readers if necessary, and then
 571                 * after waiting we need to re-check whether the pipe
 572                 * become empty while we dropped the lock.
 573                 */
 574                __pipe_unlock(pipe);
 575                if (was_empty) {
 576                        wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 577                        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 578                }
 579                wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
 580                __pipe_lock(pipe);
 581                was_empty = pipe_empty(pipe->head, pipe->tail);
 582                wake_next_writer = true;
 583        }
 584out:
 585        if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
 586                wake_next_writer = false;
 587        __pipe_unlock(pipe);
 588
 589        /*
 590         * If we do do a wakeup event, we do a 'sync' wakeup, because we
 591         * want the reader to start processing things asap, rather than
 592         * leave the data pending.
 593         *
 594         * This is particularly important for small writes, because of
 595         * how (for example) the GNU make jobserver uses small writes to
 596         * wake up pending jobs
 597         */
 598        if (was_empty) {
 599                wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 600                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 601        }
 602        if (wake_next_writer)
 603                wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 604        if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
 605                int err = file_update_time(filp);
 606                if (err)
 607                        ret = err;
 608                sb_end_write(file_inode(filp)->i_sb);
 609        }
 610        return ret;
 611}
 612
 613static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 614{
 615        struct pipe_inode_info *pipe = filp->private_data;
 616        int count, head, tail, mask;
 617
 618        switch (cmd) {
 619        case FIONREAD:
 620                __pipe_lock(pipe);
 621                count = 0;
 622                head = pipe->head;
 623                tail = pipe->tail;
 624                mask = pipe->ring_size - 1;
 625
 626                while (tail != head) {
 627                        count += pipe->bufs[tail & mask].len;
 628                        tail++;
 629                }
 630                __pipe_unlock(pipe);
 631
 632                return put_user(count, (int __user *)arg);
 633
 634#ifdef CONFIG_WATCH_QUEUE
 635        case IOC_WATCH_QUEUE_SET_SIZE: {
 636                int ret;
 637                __pipe_lock(pipe);
 638                ret = watch_queue_set_size(pipe, arg);
 639                __pipe_unlock(pipe);
 640                return ret;
 641        }
 642
 643        case IOC_WATCH_QUEUE_SET_FILTER:
 644                return watch_queue_set_filter(
 645                        pipe, (struct watch_notification_filter __user *)arg);
 646#endif
 647
 648        default:
 649                return -ENOIOCTLCMD;
 650        }
 651}
 652
 653/* No kernel lock held - fine */
 654static __poll_t
 655pipe_poll(struct file *filp, poll_table *wait)
 656{
 657        __poll_t mask;
 658        struct pipe_inode_info *pipe = filp->private_data;
 659        unsigned int head, tail;
 660
 661        /*
 662         * Reading pipe state only -- no need for acquiring the semaphore.
 663         *
 664         * But because this is racy, the code has to add the
 665         * entry to the poll table _first_ ..
 666         */
 667        if (filp->f_mode & FMODE_READ)
 668                poll_wait(filp, &pipe->rd_wait, wait);
 669        if (filp->f_mode & FMODE_WRITE)
 670                poll_wait(filp, &pipe->wr_wait, wait);
 671
 672        /*
 673         * .. and only then can you do the racy tests. That way,
 674         * if something changes and you got it wrong, the poll
 675         * table entry will wake you up and fix it.
 676         */
 677        head = READ_ONCE(pipe->head);
 678        tail = READ_ONCE(pipe->tail);
 679
 680        mask = 0;
 681        if (filp->f_mode & FMODE_READ) {
 682                if (!pipe_empty(head, tail))
 683                        mask |= EPOLLIN | EPOLLRDNORM;
 684                if (!pipe->writers && filp->f_version != pipe->w_counter)
 685                        mask |= EPOLLHUP;
 686        }
 687
 688        if (filp->f_mode & FMODE_WRITE) {
 689                if (!pipe_full(head, tail, pipe->max_usage))
 690                        mask |= EPOLLOUT | EPOLLWRNORM;
 691                /*
 692                 * Most Unices do not set EPOLLERR for FIFOs but on Linux they
 693                 * behave exactly like pipes for poll().
 694                 */
 695                if (!pipe->readers)
 696                        mask |= EPOLLERR;
 697        }
 698
 699        return mask;
 700}
 701
 702static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
 703{
 704        int kill = 0;
 705
 706        spin_lock(&inode->i_lock);
 707        if (!--pipe->files) {
 708                inode->i_pipe = NULL;
 709                kill = 1;
 710        }
 711        spin_unlock(&inode->i_lock);
 712
 713        if (kill)
 714                free_pipe_info(pipe);
 715}
 716
 717static int
 718pipe_release(struct inode *inode, struct file *file)
 719{
 720        struct pipe_inode_info *pipe = file->private_data;
 721
 722        __pipe_lock(pipe);
 723        if (file->f_mode & FMODE_READ)
 724                pipe->readers--;
 725        if (file->f_mode & FMODE_WRITE)
 726                pipe->writers--;
 727
 728        /* Was that the last reader or writer, but not the other side? */
 729        if (!pipe->readers != !pipe->writers) {
 730                wake_up_interruptible_all(&pipe->rd_wait);
 731                wake_up_interruptible_all(&pipe->wr_wait);
 732                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 733                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 734        }
 735        __pipe_unlock(pipe);
 736
 737        put_pipe_info(inode, pipe);
 738        return 0;
 739}
 740
 741static int
 742pipe_fasync(int fd, struct file *filp, int on)
 743{
 744        struct pipe_inode_info *pipe = filp->private_data;
 745        int retval = 0;
 746
 747        __pipe_lock(pipe);
 748        if (filp->f_mode & FMODE_READ)
 749                retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 750        if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
 751                retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
 752                if (retval < 0 && (filp->f_mode & FMODE_READ))
 753                        /* this can happen only if on == T */
 754                        fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 755        }
 756        __pipe_unlock(pipe);
 757        return retval;
 758}
 759
 760unsigned long account_pipe_buffers(struct user_struct *user,
 761                                   unsigned long old, unsigned long new)
 762{
 763        return atomic_long_add_return(new - old, &user->pipe_bufs);
 764}
 765
 766bool too_many_pipe_buffers_soft(unsigned long user_bufs)
 767{
 768        unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
 769
 770        return soft_limit && user_bufs > soft_limit;
 771}
 772
 773bool too_many_pipe_buffers_hard(unsigned long user_bufs)
 774{
 775        unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
 776
 777        return hard_limit && user_bufs > hard_limit;
 778}
 779
 780bool pipe_is_unprivileged_user(void)
 781{
 782        return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
 783}
 784
 785struct pipe_inode_info *alloc_pipe_info(void)
 786{
 787        struct pipe_inode_info *pipe;
 788        unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
 789        struct user_struct *user = get_current_user();
 790        unsigned long user_bufs;
 791        unsigned int max_size = READ_ONCE(pipe_max_size);
 792
 793        pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
 794        if (pipe == NULL)
 795                goto out_free_uid;
 796
 797        if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
 798                pipe_bufs = max_size >> PAGE_SHIFT;
 799
 800        user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
 801
 802        if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
 803                user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
 804                pipe_bufs = 1;
 805        }
 806
 807        if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
 808                goto out_revert_acct;
 809
 810        pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
 811                             GFP_KERNEL_ACCOUNT);
 812
 813        if (pipe->bufs) {
 814                init_waitqueue_head(&pipe->rd_wait);
 815                init_waitqueue_head(&pipe->wr_wait);
 816                pipe->r_counter = pipe->w_counter = 1;
 817                pipe->max_usage = pipe_bufs;
 818                pipe->ring_size = pipe_bufs;
 819                pipe->nr_accounted = pipe_bufs;
 820                pipe->user = user;
 821                mutex_init(&pipe->mutex);
 822                return pipe;
 823        }
 824
 825out_revert_acct:
 826        (void) account_pipe_buffers(user, pipe_bufs, 0);
 827        kfree(pipe);
 828out_free_uid:
 829        free_uid(user);
 830        return NULL;
 831}
 832
 833void free_pipe_info(struct pipe_inode_info *pipe)
 834{
 835        int i;
 836
 837#ifdef CONFIG_WATCH_QUEUE
 838        if (pipe->watch_queue) {
 839                watch_queue_clear(pipe->watch_queue);
 840                put_watch_queue(pipe->watch_queue);
 841        }
 842#endif
 843
 844        (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
 845        free_uid(pipe->user);
 846        for (i = 0; i < pipe->ring_size; i++) {
 847                struct pipe_buffer *buf = pipe->bufs + i;
 848                if (buf->ops)
 849                        pipe_buf_release(pipe, buf);
 850        }
 851        if (pipe->tmp_page)
 852                __free_page(pipe->tmp_page);
 853        kfree(pipe->bufs);
 854        kfree(pipe);
 855}
 856
 857static struct vfsmount *pipe_mnt __read_mostly;
 858
 859/*
 860 * pipefs_dname() is called from d_path().
 861 */
 862static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 863{
 864        return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
 865                                d_inode(dentry)->i_ino);
 866}
 867
 868static const struct dentry_operations pipefs_dentry_operations = {
 869        .d_dname        = pipefs_dname,
 870};
 871
 872static struct inode * get_pipe_inode(void)
 873{
 874        struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
 875        struct pipe_inode_info *pipe;
 876
 877        if (!inode)
 878                goto fail_inode;
 879
 880        inode->i_ino = get_next_ino();
 881
 882        pipe = alloc_pipe_info();
 883        if (!pipe)
 884                goto fail_iput;
 885
 886        inode->i_pipe = pipe;
 887        pipe->files = 2;
 888        pipe->readers = pipe->writers = 1;
 889        inode->i_fop = &pipefifo_fops;
 890
 891        /*
 892         * Mark the inode dirty from the very beginning,
 893         * that way it will never be moved to the dirty
 894         * list because "mark_inode_dirty()" will think
 895         * that it already _is_ on the dirty list.
 896         */
 897        inode->i_state = I_DIRTY;
 898        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 899        inode->i_uid = current_fsuid();
 900        inode->i_gid = current_fsgid();
 901        inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 902
 903        return inode;
 904
 905fail_iput:
 906        iput(inode);
 907
 908fail_inode:
 909        return NULL;
 910}
 911
 912int create_pipe_files(struct file **res, int flags)
 913{
 914        struct inode *inode = get_pipe_inode();
 915        struct file *f;
 916
 917        if (!inode)
 918                return -ENFILE;
 919
 920        if (flags & O_NOTIFICATION_PIPE) {
 921#ifdef CONFIG_WATCH_QUEUE
 922                if (watch_queue_init(inode->i_pipe) < 0) {
 923                        iput(inode);
 924                        return -ENOMEM;
 925                }
 926#else
 927                return -ENOPKG;
 928#endif
 929        }
 930
 931        f = alloc_file_pseudo(inode, pipe_mnt, "",
 932                                O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
 933                                &pipefifo_fops);
 934        if (IS_ERR(f)) {
 935                free_pipe_info(inode->i_pipe);
 936                iput(inode);
 937                return PTR_ERR(f);
 938        }
 939
 940        f->private_data = inode->i_pipe;
 941
 942        res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
 943                                  &pipefifo_fops);
 944        if (IS_ERR(res[0])) {
 945                put_pipe_info(inode, inode->i_pipe);
 946                fput(f);
 947                return PTR_ERR(res[0]);
 948        }
 949        res[0]->private_data = inode->i_pipe;
 950        res[1] = f;
 951        stream_open(inode, res[0]);
 952        stream_open(inode, res[1]);
 953        return 0;
 954}
 955
 956static int __do_pipe_flags(int *fd, struct file **files, int flags)
 957{
 958        int error;
 959        int fdw, fdr;
 960
 961        if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
 962                return -EINVAL;
 963
 964        error = create_pipe_files(files, flags);
 965        if (error)
 966                return error;
 967
 968        error = get_unused_fd_flags(flags);
 969        if (error < 0)
 970                goto err_read_pipe;
 971        fdr = error;
 972
 973        error = get_unused_fd_flags(flags);
 974        if (error < 0)
 975                goto err_fdr;
 976        fdw = error;
 977
 978        audit_fd_pair(fdr, fdw);
 979        fd[0] = fdr;
 980        fd[1] = fdw;
 981        return 0;
 982
 983 err_fdr:
 984        put_unused_fd(fdr);
 985 err_read_pipe:
 986        fput(files[0]);
 987        fput(files[1]);
 988        return error;
 989}
 990
 991int do_pipe_flags(int *fd, int flags)
 992{
 993        struct file *files[2];
 994        int error = __do_pipe_flags(fd, files, flags);
 995        if (!error) {
 996                fd_install(fd[0], files[0]);
 997                fd_install(fd[1], files[1]);
 998        }
 999        return error;
1000}
1001
1002/*
1003 * sys_pipe() is the normal C calling standard for creating
1004 * a pipe. It's not the way Unix traditionally does this, though.
1005 */
1006static int do_pipe2(int __user *fildes, int flags)
1007{
1008        struct file *files[2];
1009        int fd[2];
1010        int error;
1011
1012        error = __do_pipe_flags(fd, files, flags);
1013        if (!error) {
1014                if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
1015                        fput(files[0]);
1016                        fput(files[1]);
1017                        put_unused_fd(fd[0]);
1018                        put_unused_fd(fd[1]);
1019                        error = -EFAULT;
1020                } else {
1021                        fd_install(fd[0], files[0]);
1022                        fd_install(fd[1], files[1]);
1023                }
1024        }
1025        return error;
1026}
1027
1028SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1029{
1030        return do_pipe2(fildes, flags);
1031}
1032
1033SYSCALL_DEFINE1(pipe, int __user *, fildes)
1034{
1035        return do_pipe2(fildes, 0);
1036}
1037
1038static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
1039{
1040        int cur = *cnt;
1041
1042        while (cur == *cnt) {
1043                pipe_wait(pipe);
1044                if (signal_pending(current))
1045                        break;
1046        }
1047        return cur == *cnt ? -ERESTARTSYS : 0;
1048}
1049
1050static void wake_up_partner(struct pipe_inode_info *pipe)
1051{
1052        wake_up_interruptible_all(&pipe->rd_wait);
1053        wake_up_interruptible_all(&pipe->wr_wait);
1054}
1055
1056static int fifo_open(struct inode *inode, struct file *filp)
1057{
1058        struct pipe_inode_info *pipe;
1059        bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
1060        int ret;
1061
1062        filp->f_version = 0;
1063
1064        spin_lock(&inode->i_lock);
1065        if (inode->i_pipe) {
1066                pipe = inode->i_pipe;
1067                pipe->files++;
1068                spin_unlock(&inode->i_lock);
1069        } else {
1070                spin_unlock(&inode->i_lock);
1071                pipe = alloc_pipe_info();
1072                if (!pipe)
1073                        return -ENOMEM;
1074                pipe->files = 1;
1075                spin_lock(&inode->i_lock);
1076                if (unlikely(inode->i_pipe)) {
1077                        inode->i_pipe->files++;
1078                        spin_unlock(&inode->i_lock);
1079                        free_pipe_info(pipe);
1080                        pipe = inode->i_pipe;
1081                } else {
1082                        inode->i_pipe = pipe;
1083                        spin_unlock(&inode->i_lock);
1084                }
1085        }
1086        filp->private_data = pipe;
1087        /* OK, we have a pipe and it's pinned down */
1088
1089        __pipe_lock(pipe);
1090
1091        /* We can only do regular read/write on fifos */
1092        stream_open(inode, filp);
1093
1094        switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
1095        case FMODE_READ:
1096        /*
1097         *  O_RDONLY
1098         *  POSIX.1 says that O_NONBLOCK means return with the FIFO
1099         *  opened, even when there is no process writing the FIFO.
1100         */
1101                pipe->r_counter++;
1102                if (pipe->readers++ == 0)
1103                        wake_up_partner(pipe);
1104
1105                if (!is_pipe && !pipe->writers) {
1106                        if ((filp->f_flags & O_NONBLOCK)) {
1107                                /* suppress EPOLLHUP until we have
1108                                 * seen a writer */
1109                                filp->f_version = pipe->w_counter;
1110                        } else {
1111                                if (wait_for_partner(pipe, &pipe->w_counter))
1112                                        goto err_rd;
1113                        }
1114                }
1115                break;
1116
1117        case FMODE_WRITE:
1118        /*
1119         *  O_WRONLY
1120         *  POSIX.1 says that O_NONBLOCK means return -1 with
1121         *  errno=ENXIO when there is no process reading the FIFO.
1122         */
1123                ret = -ENXIO;
1124                if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
1125                        goto err;
1126
1127                pipe->w_counter++;
1128                if (!pipe->writers++)
1129                        wake_up_partner(pipe);
1130
1131                if (!is_pipe && !pipe->readers) {
1132                        if (wait_for_partner(pipe, &pipe->r_counter))
1133                                goto err_wr;
1134                }
1135                break;
1136
1137        case FMODE_READ | FMODE_WRITE:
1138        /*
1139         *  O_RDWR
1140         *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
1141         *  This implementation will NEVER block on a O_RDWR open, since
1142         *  the process can at least talk to itself.
1143         */
1144
1145                pipe->readers++;
1146                pipe->writers++;
1147                pipe->r_counter++;
1148                pipe->w_counter++;
1149                if (pipe->readers == 1 || pipe->writers == 1)
1150                        wake_up_partner(pipe);
1151                break;
1152
1153        default:
1154                ret = -EINVAL;
1155                goto err;
1156        }
1157
1158        /* Ok! */
1159        __pipe_unlock(pipe);
1160        return 0;
1161
1162err_rd:
1163        if (!--pipe->readers)
1164                wake_up_interruptible(&pipe->wr_wait);
1165        ret = -ERESTARTSYS;
1166        goto err;
1167
1168err_wr:
1169        if (!--pipe->writers)
1170                wake_up_interruptible_all(&pipe->rd_wait);
1171        ret = -ERESTARTSYS;
1172        goto err;
1173
1174err:
1175        __pipe_unlock(pipe);
1176
1177        put_pipe_info(inode, pipe);
1178        return ret;
1179}
1180
1181const struct file_operations pipefifo_fops = {
1182        .open           = fifo_open,
1183        .llseek         = no_llseek,
1184        .read_iter      = pipe_read,
1185        .write_iter     = pipe_write,
1186        .poll           = pipe_poll,
1187        .unlocked_ioctl = pipe_ioctl,
1188        .release        = pipe_release,
1189        .fasync         = pipe_fasync,
1190};
1191
1192/*
1193 * Currently we rely on the pipe array holding a power-of-2 number
1194 * of pages. Returns 0 on error.
1195 */
1196unsigned int round_pipe_size(unsigned long size)
1197{
1198        if (size > (1U << 31))
1199                return 0;
1200
1201        /* Minimum pipe size, as required by POSIX */
1202        if (size < PAGE_SIZE)
1203                return PAGE_SIZE;
1204
1205        return roundup_pow_of_two(size);
1206}
1207
1208/*
1209 * Resize the pipe ring to a number of slots.
1210 */
1211int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
1212{
1213        struct pipe_buffer *bufs;
1214        unsigned int head, tail, mask, n;
1215
1216        /*
1217         * We can shrink the pipe, if arg is greater than the ring occupancy.
1218         * Since we don't expect a lot of shrink+grow operations, just free and
1219         * allocate again like we would do for growing.  If the pipe currently
1220         * contains more buffers than arg, then return busy.
1221         */
1222        mask = pipe->ring_size - 1;
1223        head = pipe->head;
1224        tail = pipe->tail;
1225        n = pipe_occupancy(pipe->head, pipe->tail);
1226        if (nr_slots < n)
1227                return -EBUSY;
1228
1229        bufs = kcalloc(nr_slots, sizeof(*bufs),
1230                       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
1231        if (unlikely(!bufs))
1232                return -ENOMEM;
1233
1234        /*
1235         * The pipe array wraps around, so just start the new one at zero
1236         * and adjust the indices.
1237         */
1238        if (n > 0) {
1239                unsigned int h = head & mask;
1240                unsigned int t = tail & mask;
1241                if (h > t) {
1242                        memcpy(bufs, pipe->bufs + t,
1243                               n * sizeof(struct pipe_buffer));
1244                } else {
1245                        unsigned int tsize = pipe->ring_size - t;
1246                        if (h > 0)
1247                                memcpy(bufs + tsize, pipe->bufs,
1248                                       h * sizeof(struct pipe_buffer));
1249                        memcpy(bufs, pipe->bufs + t,
1250                               tsize * sizeof(struct pipe_buffer));
1251                }
1252        }
1253
1254        head = n;
1255        tail = 0;
1256
1257        kfree(pipe->bufs);
1258        pipe->bufs = bufs;
1259        pipe->ring_size = nr_slots;
1260        if (pipe->max_usage > nr_slots)
1261                pipe->max_usage = nr_slots;
1262        pipe->tail = tail;
1263        pipe->head = head;
1264
1265        /* This might have made more room for writers */
1266        wake_up_interruptible(&pipe->wr_wait);
1267        return 0;
1268}
1269
1270/*
1271 * Allocate a new array of pipe buffers and copy the info over. Returns the
1272 * pipe size if successful, or return -ERROR on error.
1273 */
1274static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1275{
1276        unsigned long user_bufs;
1277        unsigned int nr_slots, size;
1278        long ret = 0;
1279
1280#ifdef CONFIG_WATCH_QUEUE
1281        if (pipe->watch_queue)
1282                return -EBUSY;
1283#endif
1284
1285        size = round_pipe_size(arg);
1286        nr_slots = size >> PAGE_SHIFT;
1287
1288        if (!nr_slots)
1289                return -EINVAL;
1290
1291        /*
1292         * If trying to increase the pipe capacity, check that an
1293         * unprivileged user is not trying to exceed various limits
1294         * (soft limit check here, hard limit check just below).
1295         * Decreasing the pipe capacity is always permitted, even
1296         * if the user is currently over a limit.
1297         */
1298        if (nr_slots > pipe->max_usage &&
1299                        size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
1300                return -EPERM;
1301
1302        user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
1303
1304        if (nr_slots > pipe->max_usage &&
1305                        (too_many_pipe_buffers_hard(user_bufs) ||
1306                         too_many_pipe_buffers_soft(user_bufs)) &&
1307                        pipe_is_unprivileged_user()) {
1308                ret = -EPERM;
1309                goto out_revert_acct;
1310        }
1311
1312        ret = pipe_resize_ring(pipe, nr_slots);
1313        if (ret < 0)
1314                goto out_revert_acct;
1315
1316        pipe->max_usage = nr_slots;
1317        pipe->nr_accounted = nr_slots;
1318        return pipe->max_usage * PAGE_SIZE;
1319
1320out_revert_acct:
1321        (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
1322        return ret;
1323}
1324
1325/*
1326 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1327 * location, so checking ->i_pipe is not enough to verify that this is a
1328 * pipe.
1329 */
1330struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
1331{
1332        struct pipe_inode_info *pipe = file->private_data;
1333
1334        if (file->f_op != &pipefifo_fops || !pipe)
1335                return NULL;
1336#ifdef CONFIG_WATCH_QUEUE
1337        if (for_splice && pipe->watch_queue)
1338                return NULL;
1339#endif
1340        return pipe;
1341}
1342
1343long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1344{
1345        struct pipe_inode_info *pipe;
1346        long ret;
1347
1348        pipe = get_pipe_info(file, false);
1349        if (!pipe)
1350                return -EBADF;
1351
1352        __pipe_lock(pipe);
1353
1354        switch (cmd) {
1355        case F_SETPIPE_SZ:
1356                ret = pipe_set_size(pipe, arg);
1357                break;
1358        case F_GETPIPE_SZ:
1359                ret = pipe->max_usage * PAGE_SIZE;
1360                break;
1361        default:
1362                ret = -EINVAL;
1363                break;
1364        }
1365
1366        __pipe_unlock(pipe);
1367        return ret;
1368}
1369
1370static const struct super_operations pipefs_ops = {
1371        .destroy_inode = free_inode_nonrcu,
1372        .statfs = simple_statfs,
1373};
1374
1375/*
1376 * pipefs should _never_ be mounted by userland - too much of security hassle,
1377 * no real gain from having the whole whorehouse mounted. So we don't need
1378 * any operations on the root directory. However, we need a non-trivial
1379 * d_name - pipe: will go nicely and kill the special-casing in procfs.
1380 */
1381
1382static int pipefs_init_fs_context(struct fs_context *fc)
1383{
1384        struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
1385        if (!ctx)
1386                return -ENOMEM;
1387        ctx->ops = &pipefs_ops;
1388        ctx->dops = &pipefs_dentry_operations;
1389        return 0;
1390}
1391
1392static struct file_system_type pipe_fs_type = {
1393        .name           = "pipefs",
1394        .init_fs_context = pipefs_init_fs_context,
1395        .kill_sb        = kill_anon_super,
1396};
1397
1398static int __init init_pipe_fs(void)
1399{
1400        int err = register_filesystem(&pipe_fs_type);
1401
1402        if (!err) {
1403                pipe_mnt = kern_mount(&pipe_fs_type);
1404                if (IS_ERR(pipe_mnt)) {
1405                        err = PTR_ERR(pipe_mnt);
1406                        unregister_filesystem(&pipe_fs_type);
1407                }
1408        }
1409        return err;
1410}
1411
1412fs_initcall(init_pipe_fs);
1413