linux/fs/pipe.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/pipe.c
   3 *
   4 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   5 */
   6
   7#include <linux/mm.h>
   8#include <linux/file.h>
   9#include <linux/poll.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/init.h>
  13#include <linux/fs.h>
  14#include <linux/mount.h>
  15#include <linux/pipe_fs_i.h>
  16#include <linux/uio.h>
  17#include <linux/highmem.h>
  18#include <linux/pagemap.h>
  19#include <linux/audit.h>
  20#include <linux/syscalls.h>
  21
  22#include <asm/uaccess.h>
  23#include <asm/ioctls.h>
  24
  25/*
  26 * We use a start+len construction, which provides full use of the 
  27 * allocated memory.
  28 * -- Florian Coosmann (FGC)
  29 * 
  30 * Reads with count = 0 should always return 0.
  31 * -- Julian Bradfield 1999-06-07.
  32 *
  33 * FIFOs and Pipes now generate SIGIO for both readers and writers.
  34 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
  35 *
  36 * pipe_read & write cleanup
  37 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  38 */
  39
  40static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
  41{
  42        if (pipe->inode)
  43                mutex_lock_nested(&pipe->inode->i_mutex, subclass);
  44}
  45
  46void pipe_lock(struct pipe_inode_info *pipe)
  47{
  48        /*
  49         * pipe_lock() nests non-pipe inode locks (for writing to a file)
  50         */
  51        pipe_lock_nested(pipe, I_MUTEX_PARENT);
  52}
  53EXPORT_SYMBOL(pipe_lock);
  54
  55void pipe_unlock(struct pipe_inode_info *pipe)
  56{
  57        if (pipe->inode)
  58                mutex_unlock(&pipe->inode->i_mutex);
  59}
  60EXPORT_SYMBOL(pipe_unlock);
  61
  62void pipe_double_lock(struct pipe_inode_info *pipe1,
  63                      struct pipe_inode_info *pipe2)
  64{
  65        BUG_ON(pipe1 == pipe2);
  66
  67        if (pipe1 < pipe2) {
  68                pipe_lock_nested(pipe1, I_MUTEX_PARENT);
  69                pipe_lock_nested(pipe2, I_MUTEX_CHILD);
  70        } else {
  71                pipe_lock_nested(pipe2, I_MUTEX_PARENT);
  72                pipe_lock_nested(pipe1, I_MUTEX_CHILD);
  73        }
  74}
  75
  76/* Drop the inode semaphore and wait for a pipe event, atomically */
  77void pipe_wait(struct pipe_inode_info *pipe)
  78{
  79        DEFINE_WAIT(wait);
  80
  81        /*
  82         * Pipes are system-local resources, so sleeping on them
  83         * is considered a noninteractive wait:
  84         */
  85        prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
  86        pipe_unlock(pipe);
  87        schedule();
  88        finish_wait(&pipe->wait, &wait);
  89        pipe_lock(pipe);
  90}
  91
  92static int
  93pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
  94                        int atomic)
  95{
  96        unsigned long copy;
  97
  98        while (len > 0) {
  99                while (!iov->iov_len)
 100                        iov++;
 101                copy = min_t(unsigned long, len, iov->iov_len);
 102
 103                if (atomic) {
 104                        if (__copy_from_user_inatomic(to, iov->iov_base, copy))
 105                                return -EFAULT;
 106                } else {
 107                        if (copy_from_user(to, iov->iov_base, copy))
 108                                return -EFAULT;
 109                }
 110                to += copy;
 111                len -= copy;
 112                iov->iov_base += copy;
 113                iov->iov_len -= copy;
 114        }
 115        return 0;
 116}
 117
 118static int
 119pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
 120                      int atomic)
 121{
 122        unsigned long copy;
 123
 124        while (len > 0) {
 125                while (!iov->iov_len)
 126                        iov++;
 127                copy = min_t(unsigned long, len, iov->iov_len);
 128
 129                if (atomic) {
 130                        if (__copy_to_user_inatomic(iov->iov_base, from, copy))
 131                                return -EFAULT;
 132                } else {
 133                        if (copy_to_user(iov->iov_base, from, copy))
 134                                return -EFAULT;
 135                }
 136                from += copy;
 137                len -= copy;
 138                iov->iov_base += copy;
 139                iov->iov_len -= copy;
 140        }
 141        return 0;
 142}
 143
 144/*
 145 * Attempt to pre-fault in the user memory, so we can use atomic copies.
 146 * Returns the number of bytes not faulted in.
 147 */
 148static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
 149{
 150        while (!iov->iov_len)
 151                iov++;
 152
 153        while (len > 0) {
 154                unsigned long this_len;
 155
 156                this_len = min_t(unsigned long, len, iov->iov_len);
 157                if (fault_in_pages_writeable(iov->iov_base, this_len))
 158                        break;
 159
 160                len -= this_len;
 161                iov++;
 162        }
 163
 164        return len;
 165}
 166
 167/*
 168 * Pre-fault in the user memory, so we can use atomic copies.
 169 */
 170static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
 171{
 172        while (!iov->iov_len)
 173                iov++;
 174
 175        while (len > 0) {
 176                unsigned long this_len;
 177
 178                this_len = min_t(unsigned long, len, iov->iov_len);
 179                fault_in_pages_readable(iov->iov_base, this_len);
 180                len -= this_len;
 181                iov++;
 182        }
 183}
 184
 185static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 186                                  struct pipe_buffer *buf)
 187{
 188        struct page *page = buf->page;
 189
 190        /*
 191         * If nobody else uses this page, and we don't already have a
 192         * temporary page, let's keep track of it as a one-deep
 193         * allocation cache. (Otherwise just release our reference to it)
 194         */
 195        if (page_count(page) == 1 && !pipe->tmp_page)
 196                pipe->tmp_page = page;
 197        else
 198                page_cache_release(page);
 199}
 200
 201/**
 202 * generic_pipe_buf_map - virtually map a pipe buffer
 203 * @pipe:       the pipe that the buffer belongs to
 204 * @buf:        the buffer that should be mapped
 205 * @atomic:     whether to use an atomic map
 206 *
 207 * Description:
 208 *      This function returns a kernel virtual address mapping for the
 209 *      pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
 210 *      and the caller has to be careful not to fault before calling
 211 *      the unmap function.
 212 *
 213 *      Note that this function occupies KM_USER0 if @atomic != 0.
 214 */
 215void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
 216                           struct pipe_buffer *buf, int atomic)
 217{
 218        if (atomic) {
 219                buf->flags |= PIPE_BUF_FLAG_ATOMIC;
 220                return kmap_atomic(buf->page, KM_USER0);
 221        }
 222
 223        return kmap(buf->page);
 224}
 225
 226/**
 227 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
 228 * @pipe:       the pipe that the buffer belongs to
 229 * @buf:        the buffer that should be unmapped
 230 * @map_data:   the data that the mapping function returned
 231 *
 232 * Description:
 233 *      This function undoes the mapping that ->map() provided.
 234 */
 235void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
 236                            struct pipe_buffer *buf, void *map_data)
 237{
 238        if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
 239                buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
 240                kunmap_atomic(map_data, KM_USER0);
 241        } else
 242                kunmap(buf->page);
 243}
 244
 245/**
 246 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
 247 * @pipe:       the pipe that the buffer belongs to
 248 * @buf:        the buffer to attempt to steal
 249 *
 250 * Description:
 251 *      This function attempts to steal the &struct page attached to
 252 *      @buf. If successful, this function returns 0 and returns with
 253 *      the page locked. The caller may then reuse the page for whatever
 254 *      he wishes; the typical use is insertion into a different file
 255 *      page cache.
 256 */
 257int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
 258                           struct pipe_buffer *buf)
 259{
 260        struct page *page = buf->page;
 261
 262        /*
 263         * A reference of one is golden, that means that the owner of this
 264         * page is the only one holding a reference to it. lock the page
 265         * and return OK.
 266         */
 267        if (page_count(page) == 1) {
 268                lock_page(page);
 269                return 0;
 270        }
 271
 272        return 1;
 273}
 274
 275/**
 276 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
 277 * @pipe:       the pipe that the buffer belongs to
 278 * @buf:        the buffer to get a reference to
 279 *
 280 * Description:
 281 *      This function grabs an extra reference to @buf. It's used in
 282 *      in the tee() system call, when we duplicate the buffers in one
 283 *      pipe into another.
 284 */
 285void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 286{
 287        page_cache_get(buf->page);
 288}
 289
 290/**
 291 * generic_pipe_buf_confirm - verify contents of the pipe buffer
 292 * @info:       the pipe that the buffer belongs to
 293 * @buf:        the buffer to confirm
 294 *
 295 * Description:
 296 *      This function does nothing, because the generic pipe code uses
 297 *      pages that are always good when inserted into the pipe.
 298 */
 299int generic_pipe_buf_confirm(struct pipe_inode_info *info,
 300                             struct pipe_buffer *buf)
 301{
 302        return 0;
 303}
 304
 305/**
 306 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
 307 * @pipe:       the pipe that the buffer belongs to
 308 * @buf:        the buffer to put a reference to
 309 *
 310 * Description:
 311 *      This function releases a reference to @buf.
 312 */
 313void generic_pipe_buf_release(struct pipe_inode_info *pipe,
 314                              struct pipe_buffer *buf)
 315{
 316        page_cache_release(buf->page);
 317}
 318
 319static const struct pipe_buf_operations anon_pipe_buf_ops = {
 320        .can_merge = 1,
 321        .map = generic_pipe_buf_map,
 322        .unmap = generic_pipe_buf_unmap,
 323        .confirm = generic_pipe_buf_confirm,
 324        .release = anon_pipe_buf_release,
 325        .steal = generic_pipe_buf_steal,
 326        .get = generic_pipe_buf_get,
 327};
 328
 329static ssize_t
 330pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 331           unsigned long nr_segs, loff_t pos)
 332{
 333        struct file *filp = iocb->ki_filp;
 334        struct inode *inode = filp->f_path.dentry->d_inode;
 335        struct pipe_inode_info *pipe;
 336        int do_wakeup;
 337        ssize_t ret;
 338        struct iovec *iov = (struct iovec *)_iov;
 339        size_t total_len;
 340
 341        total_len = iov_length(iov, nr_segs);
 342        /* Null read succeeds. */
 343        if (unlikely(total_len == 0))
 344                return 0;
 345
 346        do_wakeup = 0;
 347        ret = 0;
 348        mutex_lock(&inode->i_mutex);
 349        pipe = inode->i_pipe;
 350        for (;;) {
 351                int bufs = pipe->nrbufs;
 352                if (bufs) {
 353                        int curbuf = pipe->curbuf;
 354                        struct pipe_buffer *buf = pipe->bufs + curbuf;
 355                        const struct pipe_buf_operations *ops = buf->ops;
 356                        void *addr;
 357                        size_t chars = buf->len;
 358                        int error, atomic;
 359
 360                        if (chars > total_len)
 361                                chars = total_len;
 362
 363                        error = ops->confirm(pipe, buf);
 364                        if (error) {
 365                                if (!ret)
 366                                        error = ret;
 367                                break;
 368                        }
 369
 370                        atomic = !iov_fault_in_pages_write(iov, chars);
 371redo:
 372                        addr = ops->map(pipe, buf, atomic);
 373                        error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
 374                        ops->unmap(pipe, buf, addr);
 375                        if (unlikely(error)) {
 376                                /*
 377                                 * Just retry with the slow path if we failed.
 378                                 */
 379                                if (atomic) {
 380                                        atomic = 0;
 381                                        goto redo;
 382                                }
 383                                if (!ret)
 384                                        ret = error;
 385                                break;
 386                        }
 387                        ret += chars;
 388                        buf->offset += chars;
 389                        buf->len -= chars;
 390                        if (!buf->len) {
 391                                buf->ops = NULL;
 392                                ops->release(pipe, buf);
 393                                curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
 394                                pipe->curbuf = curbuf;
 395                                pipe->nrbufs = --bufs;
 396                                do_wakeup = 1;
 397                        }
 398                        total_len -= chars;
 399                        if (!total_len)
 400                                break;  /* common path: read succeeded */
 401                }
 402                if (bufs)       /* More to do? */
 403                        continue;
 404                if (!pipe->writers)
 405                        break;
 406                if (!pipe->waiting_writers) {
 407                        /* syscall merging: Usually we must not sleep
 408                         * if O_NONBLOCK is set, or if we got some data.
 409                         * But if a writer sleeps in kernel space, then
 410                         * we can wait for that data without violating POSIX.
 411                         */
 412                        if (ret)
 413                                break;
 414                        if (filp->f_flags & O_NONBLOCK) {
 415                                ret = -EAGAIN;
 416                                break;
 417                        }
 418                }
 419                if (signal_pending(current)) {
 420                        if (!ret)
 421                                ret = -ERESTARTSYS;
 422                        break;
 423                }
 424                if (do_wakeup) {
 425                        wake_up_interruptible_sync(&pipe->wait);
 426                        kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 427                }
 428                pipe_wait(pipe);
 429        }
 430        mutex_unlock(&inode->i_mutex);
 431
 432        /* Signal writers asynchronously that there is more room. */
 433        if (do_wakeup) {
 434                wake_up_interruptible_sync(&pipe->wait);
 435                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 436        }
 437        if (ret > 0)
 438                file_accessed(filp);
 439        return ret;
 440}
 441
 442static ssize_t
 443pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 444            unsigned long nr_segs, loff_t ppos)
 445{
 446        struct file *filp = iocb->ki_filp;
 447        struct inode *inode = filp->f_path.dentry->d_inode;
 448        struct pipe_inode_info *pipe;
 449        ssize_t ret;
 450        int do_wakeup;
 451        struct iovec *iov = (struct iovec *)_iov;
 452        size_t total_len;
 453        ssize_t chars;
 454
 455        total_len = iov_length(iov, nr_segs);
 456        /* Null write succeeds. */
 457        if (unlikely(total_len == 0))
 458                return 0;
 459
 460        do_wakeup = 0;
 461        ret = 0;
 462        mutex_lock(&inode->i_mutex);
 463        pipe = inode->i_pipe;
 464
 465        if (!pipe->readers) {
 466                send_sig(SIGPIPE, current, 0);
 467                ret = -EPIPE;
 468                goto out;
 469        }
 470
 471        /* We try to merge small writes */
 472        chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
 473        if (pipe->nrbufs && chars != 0) {
 474                int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
 475                                                        (PIPE_BUFFERS-1);
 476                struct pipe_buffer *buf = pipe->bufs + lastbuf;
 477                const struct pipe_buf_operations *ops = buf->ops;
 478                int offset = buf->offset + buf->len;
 479
 480                if (ops->can_merge && offset + chars <= PAGE_SIZE) {
 481                        int error, atomic = 1;
 482                        void *addr;
 483
 484                        error = ops->confirm(pipe, buf);
 485                        if (error)
 486                                goto out;
 487
 488                        iov_fault_in_pages_read(iov, chars);
 489redo1:
 490                        addr = ops->map(pipe, buf, atomic);
 491                        error = pipe_iov_copy_from_user(offset + addr, iov,
 492                                                        chars, atomic);
 493                        ops->unmap(pipe, buf, addr);
 494                        ret = error;
 495                        do_wakeup = 1;
 496                        if (error) {
 497                                if (atomic) {
 498                                        atomic = 0;
 499                                        goto redo1;
 500                                }
 501                                goto out;
 502                        }
 503                        buf->len += chars;
 504                        total_len -= chars;
 505                        ret = chars;
 506                        if (!total_len)
 507                                goto out;
 508                }
 509        }
 510
 511        for (;;) {
 512                int bufs;
 513
 514                if (!pipe->readers) {
 515                        send_sig(SIGPIPE, current, 0);
 516                        if (!ret)
 517                                ret = -EPIPE;
 518                        break;
 519                }
 520                bufs = pipe->nrbufs;
 521                if (bufs < PIPE_BUFFERS) {
 522                        int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
 523                        struct pipe_buffer *buf = pipe->bufs + newbuf;
 524                        struct page *page = pipe->tmp_page;
 525                        char *src;
 526                        int error, atomic = 1;
 527
 528                        if (!page) {
 529                                page = alloc_page(GFP_HIGHUSER);
 530                                if (unlikely(!page)) {
 531                                        ret = ret ? : -ENOMEM;
 532                                        break;
 533                                }
 534                                pipe->tmp_page = page;
 535                        }
 536                        /* Always wake up, even if the copy fails. Otherwise
 537                         * we lock up (O_NONBLOCK-)readers that sleep due to
 538                         * syscall merging.
 539                         * FIXME! Is this really true?
 540                         */
 541                        do_wakeup = 1;
 542                        chars = PAGE_SIZE;
 543                        if (chars > total_len)
 544                                chars = total_len;
 545
 546                        iov_fault_in_pages_read(iov, chars);
 547redo2:
 548                        if (atomic)
 549                                src = kmap_atomic(page, KM_USER0);
 550                        else
 551                                src = kmap(page);
 552
 553                        error = pipe_iov_copy_from_user(src, iov, chars,
 554                                                        atomic);
 555                        if (atomic)
 556                                kunmap_atomic(src, KM_USER0);
 557                        else
 558                                kunmap(page);
 559
 560                        if (unlikely(error)) {
 561                                if (atomic) {
 562                                        atomic = 0;
 563                                        goto redo2;
 564                                }
 565                                if (!ret)
 566                                        ret = error;
 567                                break;
 568                        }
 569                        ret += chars;
 570
 571                        /* Insert it into the buffer array */
 572                        buf->page = page;
 573                        buf->ops = &anon_pipe_buf_ops;
 574                        buf->offset = 0;
 575                        buf->len = chars;
 576                        pipe->nrbufs = ++bufs;
 577                        pipe->tmp_page = NULL;
 578
 579                        total_len -= chars;
 580                        if (!total_len)
 581                                break;
 582                }
 583                if (bufs < PIPE_BUFFERS)
 584                        continue;
 585                if (filp->f_flags & O_NONBLOCK) {
 586                        if (!ret)
 587                                ret = -EAGAIN;
 588                        break;
 589                }
 590                if (signal_pending(current)) {
 591                        if (!ret)
 592                                ret = -ERESTARTSYS;
 593                        break;
 594                }
 595                if (do_wakeup) {
 596                        wake_up_interruptible_sync(&pipe->wait);
 597                        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 598                        do_wakeup = 0;
 599                }
 600                pipe->waiting_writers++;
 601                pipe_wait(pipe);
 602                pipe->waiting_writers--;
 603        }
 604out:
 605        mutex_unlock(&inode->i_mutex);
 606        if (do_wakeup) {
 607                wake_up_interruptible_sync(&pipe->wait);
 608                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 609        }
 610        if (ret > 0)
 611                file_update_time(filp);
 612        return ret;
 613}
 614
 615static ssize_t
 616bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 617{
 618        return -EBADF;
 619}
 620
 621static ssize_t
 622bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
 623           loff_t *ppos)
 624{
 625        return -EBADF;
 626}
 627
 628static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 629{
 630        struct inode *inode = filp->f_path.dentry->d_inode;
 631        struct pipe_inode_info *pipe;
 632        int count, buf, nrbufs;
 633
 634        switch (cmd) {
 635                case FIONREAD:
 636                        mutex_lock(&inode->i_mutex);
 637                        pipe = inode->i_pipe;
 638                        count = 0;
 639                        buf = pipe->curbuf;
 640                        nrbufs = pipe->nrbufs;
 641                        while (--nrbufs >= 0) {
 642                                count += pipe->bufs[buf].len;
 643                                buf = (buf+1) & (PIPE_BUFFERS-1);
 644                        }
 645                        mutex_unlock(&inode->i_mutex);
 646
 647                        return put_user(count, (int __user *)arg);
 648                default:
 649                        return -EINVAL;
 650        }
 651}
 652
 653/* No kernel lock held - fine */
 654static unsigned int
 655pipe_poll(struct file *filp, poll_table *wait)
 656{
 657        unsigned int mask;
 658        struct inode *inode = filp->f_path.dentry->d_inode;
 659        struct pipe_inode_info *pipe = inode->i_pipe;
 660        int nrbufs;
 661
 662        poll_wait(filp, &pipe->wait, wait);
 663
 664        /* Reading only -- no need for acquiring the semaphore.  */
 665        nrbufs = pipe->nrbufs;
 666        mask = 0;
 667        if (filp->f_mode & FMODE_READ) {
 668                mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
 669                if (!pipe->writers && filp->f_version != pipe->w_counter)
 670                        mask |= POLLHUP;
 671        }
 672
 673        if (filp->f_mode & FMODE_WRITE) {
 674                mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
 675                /*
 676                 * Most Unices do not set POLLERR for FIFOs but on Linux they
 677                 * behave exactly like pipes for poll().
 678                 */
 679                if (!pipe->readers)
 680                        mask |= POLLERR;
 681        }
 682
 683        return mask;
 684}
 685
 686static int
 687pipe_release(struct inode *inode, int decr, int decw)
 688{
 689        struct pipe_inode_info *pipe;
 690
 691        mutex_lock(&inode->i_mutex);
 692        pipe = inode->i_pipe;
 693        pipe->readers -= decr;
 694        pipe->writers -= decw;
 695
 696        if (!pipe->readers && !pipe->writers) {
 697                free_pipe_info(inode);
 698        } else {
 699                wake_up_interruptible_sync(&pipe->wait);
 700                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 701                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 702        }
 703        mutex_unlock(&inode->i_mutex);
 704
 705        return 0;
 706}
 707
 708static int
 709pipe_read_fasync(int fd, struct file *filp, int on)
 710{
 711        struct inode *inode = filp->f_path.dentry->d_inode;
 712        int retval;
 713
 714        mutex_lock(&inode->i_mutex);
 715        retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
 716        mutex_unlock(&inode->i_mutex);
 717
 718        return retval;
 719}
 720
 721
 722static int
 723pipe_write_fasync(int fd, struct file *filp, int on)
 724{
 725        struct inode *inode = filp->f_path.dentry->d_inode;
 726        int retval;
 727
 728        mutex_lock(&inode->i_mutex);
 729        retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
 730        mutex_unlock(&inode->i_mutex);
 731
 732        return retval;
 733}
 734
 735
 736static int
 737pipe_rdwr_fasync(int fd, struct file *filp, int on)
 738{
 739        struct inode *inode = filp->f_path.dentry->d_inode;
 740        struct pipe_inode_info *pipe = inode->i_pipe;
 741        int retval;
 742
 743        mutex_lock(&inode->i_mutex);
 744        retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 745        if (retval >= 0) {
 746                retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
 747                if (retval < 0) /* this can happen only if on == T */
 748                        fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 749        }
 750        mutex_unlock(&inode->i_mutex);
 751        return retval;
 752}
 753
 754
 755static int
 756pipe_read_release(struct inode *inode, struct file *filp)
 757{
 758        return pipe_release(inode, 1, 0);
 759}
 760
 761static int
 762pipe_write_release(struct inode *inode, struct file *filp)
 763{
 764        return pipe_release(inode, 0, 1);
 765}
 766
 767static int
 768pipe_rdwr_release(struct inode *inode, struct file *filp)
 769{
 770        int decr, decw;
 771
 772        decr = (filp->f_mode & FMODE_READ) != 0;
 773        decw = (filp->f_mode & FMODE_WRITE) != 0;
 774        return pipe_release(inode, decr, decw);
 775}
 776
 777static int
 778pipe_read_open(struct inode *inode, struct file *filp)
 779{
 780        int ret = -ENOENT;
 781
 782        mutex_lock(&inode->i_mutex);
 783
 784        if (inode->i_pipe) {
 785                ret = 0;
 786                inode->i_pipe->readers++;
 787        }
 788
 789        mutex_unlock(&inode->i_mutex);
 790
 791        return ret;
 792}
 793
 794static int
 795pipe_write_open(struct inode *inode, struct file *filp)
 796{
 797        int ret = -ENOENT;
 798
 799        mutex_lock(&inode->i_mutex);
 800
 801        if (inode->i_pipe) {
 802                ret = 0;
 803                inode->i_pipe->writers++;
 804        }
 805
 806        mutex_unlock(&inode->i_mutex);
 807
 808        return ret;
 809}
 810
 811static int
 812pipe_rdwr_open(struct inode *inode, struct file *filp)
 813{
 814        int ret = -ENOENT;
 815
 816        mutex_lock(&inode->i_mutex);
 817
 818        if (inode->i_pipe) {
 819                ret = 0;
 820                if (filp->f_mode & FMODE_READ)
 821                        inode->i_pipe->readers++;
 822                if (filp->f_mode & FMODE_WRITE)
 823                        inode->i_pipe->writers++;
 824        }
 825
 826        mutex_unlock(&inode->i_mutex);
 827
 828        return ret;
 829}
 830
 831/*
 832 * The file_operations structs are not static because they
 833 * are also used in linux/fs/fifo.c to do operations on FIFOs.
 834 *
 835 * Pipes reuse fifos' file_operations structs.
 836 */
 837const struct file_operations read_pipefifo_fops = {
 838        .llseek         = no_llseek,
 839        .read           = do_sync_read,
 840        .aio_read       = pipe_read,
 841        .write          = bad_pipe_w,
 842        .poll           = pipe_poll,
 843        .unlocked_ioctl = pipe_ioctl,
 844        .open           = pipe_read_open,
 845        .release        = pipe_read_release,
 846        .fasync         = pipe_read_fasync,
 847};
 848
 849const struct file_operations write_pipefifo_fops = {
 850        .llseek         = no_llseek,
 851        .read           = bad_pipe_r,
 852        .write          = do_sync_write,
 853        .aio_write      = pipe_write,
 854        .poll           = pipe_poll,
 855        .unlocked_ioctl = pipe_ioctl,
 856        .open           = pipe_write_open,
 857        .release        = pipe_write_release,
 858        .fasync         = pipe_write_fasync,
 859};
 860
 861const struct file_operations rdwr_pipefifo_fops = {
 862        .llseek         = no_llseek,
 863        .read           = do_sync_read,
 864        .aio_read       = pipe_read,
 865        .write          = do_sync_write,
 866        .aio_write      = pipe_write,
 867        .poll           = pipe_poll,
 868        .unlocked_ioctl = pipe_ioctl,
 869        .open           = pipe_rdwr_open,
 870        .release        = pipe_rdwr_release,
 871        .fasync         = pipe_rdwr_fasync,
 872};
 873
 874struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
 875{
 876        struct pipe_inode_info *pipe;
 877
 878        pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 879        if (pipe) {
 880                init_waitqueue_head(&pipe->wait);
 881                pipe->r_counter = pipe->w_counter = 1;
 882                pipe->inode = inode;
 883        }
 884
 885        return pipe;
 886}
 887
 888void __free_pipe_info(struct pipe_inode_info *pipe)
 889{
 890        int i;
 891
 892        for (i = 0; i < PIPE_BUFFERS; i++) {
 893                struct pipe_buffer *buf = pipe->bufs + i;
 894                if (buf->ops)
 895                        buf->ops->release(pipe, buf);
 896        }
 897        if (pipe->tmp_page)
 898                __free_page(pipe->tmp_page);
 899        kfree(pipe);
 900}
 901
 902void free_pipe_info(struct inode *inode)
 903{
 904        __free_pipe_info(inode->i_pipe);
 905        inode->i_pipe = NULL;
 906}
 907
 908static struct vfsmount *pipe_mnt __read_mostly;
 909static int pipefs_delete_dentry(struct dentry *dentry)
 910{
 911        /*
 912         * At creation time, we pretended this dentry was hashed
 913         * (by clearing DCACHE_UNHASHED bit in d_flags)
 914         * At delete time, we restore the truth : not hashed.
 915         * (so that dput() can proceed correctly)
 916         */
 917        dentry->d_flags |= DCACHE_UNHASHED;
 918        return 0;
 919}
 920
 921/*
 922 * pipefs_dname() is called from d_path().
 923 */
 924static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 925{
 926        return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
 927                                dentry->d_inode->i_ino);
 928}
 929
 930static const struct dentry_operations pipefs_dentry_operations = {
 931        .d_delete       = pipefs_delete_dentry,
 932        .d_dname        = pipefs_dname,
 933};
 934
 935static struct inode * get_pipe_inode(void)
 936{
 937        struct inode *inode = new_inode(pipe_mnt->mnt_sb);
 938        struct pipe_inode_info *pipe;
 939
 940        if (!inode)
 941                goto fail_inode;
 942
 943        pipe = alloc_pipe_info(inode);
 944        if (!pipe)
 945                goto fail_iput;
 946        inode->i_pipe = pipe;
 947
 948        pipe->readers = pipe->writers = 1;
 949        inode->i_fop = &rdwr_pipefifo_fops;
 950
 951        /*
 952         * Mark the inode dirty from the very beginning,
 953         * that way it will never be moved to the dirty
 954         * list because "mark_inode_dirty()" will think
 955         * that it already _is_ on the dirty list.
 956         */
 957        inode->i_state = I_DIRTY;
 958        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 959        inode->i_uid = current_fsuid();
 960        inode->i_gid = current_fsgid();
 961        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 962
 963        return inode;
 964
 965fail_iput:
 966        iput(inode);
 967
 968fail_inode:
 969        return NULL;
 970}
 971
 972struct file *create_write_pipe(int flags)
 973{
 974        int err;
 975        struct inode *inode;
 976        struct file *f;
 977        struct dentry *dentry;
 978        struct qstr name = { .name = "" };
 979
 980        err = -ENFILE;
 981        inode = get_pipe_inode();
 982        if (!inode)
 983                goto err;
 984
 985        err = -ENOMEM;
 986        dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
 987        if (!dentry)
 988                goto err_inode;
 989
 990        dentry->d_op = &pipefs_dentry_operations;
 991        /*
 992         * We dont want to publish this dentry into global dentry hash table.
 993         * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 994         * This permits a working /proc/$pid/fd/XXX on pipes
 995         */
 996        dentry->d_flags &= ~DCACHE_UNHASHED;
 997        d_instantiate(dentry, inode);
 998
 999        err = -ENFILE;
1000        f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
1001        if (!f)
1002                goto err_dentry;
1003        f->f_mapping = inode->i_mapping;
1004
1005        f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
1006        f->f_version = 0;
1007
1008        return f;
1009
1010 err_dentry:
1011        free_pipe_info(inode);
1012        dput(dentry);
1013        return ERR_PTR(err);
1014
1015 err_inode:
1016        free_pipe_info(inode);
1017        iput(inode);
1018 err:
1019        return ERR_PTR(err);
1020}
1021
1022void free_write_pipe(struct file *f)
1023{
1024        free_pipe_info(f->f_dentry->d_inode);
1025        path_put(&f->f_path);
1026        put_filp(f);
1027}
1028
1029struct file *create_read_pipe(struct file *wrf, int flags)
1030{
1031        struct file *f = get_empty_filp();
1032        if (!f)
1033                return ERR_PTR(-ENFILE);
1034
1035        /* Grab pipe from the writer */
1036        f->f_path = wrf->f_path;
1037        path_get(&wrf->f_path);
1038        f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1039
1040        f->f_pos = 0;
1041        f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1042        f->f_op = &read_pipefifo_fops;
1043        f->f_mode = FMODE_READ;
1044        f->f_version = 0;
1045
1046        return f;
1047}
1048
1049int do_pipe_flags(int *fd, int flags)
1050{
1051        struct file *fw, *fr;
1052        int error;
1053        int fdw, fdr;
1054
1055        if (flags & ~(O_CLOEXEC | O_NONBLOCK))
1056                return -EINVAL;
1057
1058        fw = create_write_pipe(flags);
1059        if (IS_ERR(fw))
1060                return PTR_ERR(fw);
1061        fr = create_read_pipe(fw, flags);
1062        error = PTR_ERR(fr);
1063        if (IS_ERR(fr))
1064                goto err_write_pipe;
1065
1066        error = get_unused_fd_flags(flags);
1067        if (error < 0)
1068                goto err_read_pipe;
1069        fdr = error;
1070
1071        error = get_unused_fd_flags(flags);
1072        if (error < 0)
1073                goto err_fdr;
1074        fdw = error;
1075
1076        audit_fd_pair(fdr, fdw);
1077        fd_install(fdr, fr);
1078        fd_install(fdw, fw);
1079        fd[0] = fdr;
1080        fd[1] = fdw;
1081
1082        return 0;
1083
1084 err_fdr:
1085        put_unused_fd(fdr);
1086 err_read_pipe:
1087        path_put(&fr->f_path);
1088        put_filp(fr);
1089 err_write_pipe:
1090        free_write_pipe(fw);
1091        return error;
1092}
1093
1094/*
1095 * sys_pipe() is the normal C calling standard for creating
1096 * a pipe. It's not the way Unix traditionally does this, though.
1097 */
1098SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1099{
1100        int fd[2];
1101        int error;
1102
1103        error = do_pipe_flags(fd, flags);
1104        if (!error) {
1105                if (copy_to_user(fildes, fd, sizeof(fd))) {
1106                        sys_close(fd[0]);
1107                        sys_close(fd[1]);
1108                        error = -EFAULT;
1109                }
1110        }
1111        return error;
1112}
1113
1114SYSCALL_DEFINE1(pipe, int __user *, fildes)
1115{
1116        return sys_pipe2(fildes, 0);
1117}
1118
1119/*
1120 * pipefs should _never_ be mounted by userland - too much of security hassle,
1121 * no real gain from having the whole whorehouse mounted. So we don't need
1122 * any operations on the root directory. However, we need a non-trivial
1123 * d_name - pipe: will go nicely and kill the special-casing in procfs.
1124 */
1125static int pipefs_get_sb(struct file_system_type *fs_type,
1126                         int flags, const char *dev_name, void *data,
1127                         struct vfsmount *mnt)
1128{
1129        return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
1130}
1131
1132static struct file_system_type pipe_fs_type = {
1133        .name           = "pipefs",
1134        .get_sb         = pipefs_get_sb,
1135        .kill_sb        = kill_anon_super,
1136};
1137
1138static int __init init_pipe_fs(void)
1139{
1140        int err = register_filesystem(&pipe_fs_type);
1141
1142        if (!err) {
1143                pipe_mnt = kern_mount(&pipe_fs_type);
1144                if (IS_ERR(pipe_mnt)) {
1145                        err = PTR_ERR(pipe_mnt);
1146                        unregister_filesystem(&pipe_fs_type);
1147                }
1148        }
1149        return err;
1150}
1151
1152static void __exit exit_pipe_fs(void)
1153{
1154        unregister_filesystem(&pipe_fs_type);
1155        mntput(pipe_mnt);
1156}
1157
1158fs_initcall(init_pipe_fs);
1159module_exit(exit_pipe_fs);
1160