linux/fs/btrfs/send.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/bsearch.h>
  20#include <linux/fs.h>
  21#include <linux/file.h>
  22#include <linux/sort.h>
  23#include <linux/mount.h>
  24#include <linux/xattr.h>
  25#include <linux/posix_acl_xattr.h>
  26#include <linux/radix-tree.h>
  27#include <linux/vmalloc.h>
  28#include <linux/string.h>
  29
  30#include "send.h"
  31#include "backref.h"
  32#include "hash.h"
  33#include "locking.h"
  34#include "disk-io.h"
  35#include "btrfs_inode.h"
  36#include "transaction.h"
  37
  38static int g_verbose = 0;
  39
  40#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
  41
  42/*
  43 * A fs_path is a helper to dynamically build path names with unknown size.
  44 * It reallocates the internal buffer on demand.
  45 * It allows fast adding of path elements on the right side (normal path) and
  46 * fast adding to the left side (reversed path). A reversed path can also be
  47 * unreversed if needed.
  48 */
  49struct fs_path {
  50        union {
  51                struct {
  52                        char *start;
  53                        char *end;
  54
  55                        char *buf;
  56                        unsigned short buf_len:15;
  57                        unsigned short reversed:1;
  58                        char inline_buf[];
  59                };
  60                /*
  61                 * Average path length does not exceed 200 bytes, we'll have
  62                 * better packing in the slab and higher chance to satisfy
  63                 * a allocation later during send.
  64                 */
  65                char pad[256];
  66        };
  67};
  68#define FS_PATH_INLINE_SIZE \
  69        (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
  70
  71
  72/* reused for each extent */
  73struct clone_root {
  74        struct btrfs_root *root;
  75        u64 ino;
  76        u64 offset;
  77
  78        u64 found_refs;
  79};
  80
  81#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
  82#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
  83
  84struct send_ctx {
  85        struct file *send_filp;
  86        loff_t send_off;
  87        char *send_buf;
  88        u32 send_size;
  89        u32 send_max_size;
  90        u64 total_send_size;
  91        u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
  92        u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
  93
  94        struct btrfs_root *send_root;
  95        struct btrfs_root *parent_root;
  96        struct clone_root *clone_roots;
  97        int clone_roots_cnt;
  98
  99        /* current state of the compare_tree call */
 100        struct btrfs_path *left_path;
 101        struct btrfs_path *right_path;
 102        struct btrfs_key *cmp_key;
 103
 104        /*
 105         * infos of the currently processed inode. In case of deleted inodes,
 106         * these are the values from the deleted inode.
 107         */
 108        u64 cur_ino;
 109        u64 cur_inode_gen;
 110        int cur_inode_new;
 111        int cur_inode_new_gen;
 112        int cur_inode_deleted;
 113        u64 cur_inode_size;
 114        u64 cur_inode_mode;
 115        u64 cur_inode_rdev;
 116        u64 cur_inode_last_extent;
 117
 118        u64 send_progress;
 119
 120        struct list_head new_refs;
 121        struct list_head deleted_refs;
 122
 123        struct radix_tree_root name_cache;
 124        struct list_head name_cache_list;
 125        int name_cache_size;
 126
 127        struct file_ra_state ra;
 128
 129        char *read_buf;
 130
 131        /*
 132         * We process inodes by their increasing order, so if before an
 133         * incremental send we reverse the parent/child relationship of
 134         * directories such that a directory with a lower inode number was
 135         * the parent of a directory with a higher inode number, and the one
 136         * becoming the new parent got renamed too, we can't rename/move the
 137         * directory with lower inode number when we finish processing it - we
 138         * must process the directory with higher inode number first, then
 139         * rename/move it and then rename/move the directory with lower inode
 140         * number. Example follows.
 141         *
 142         * Tree state when the first send was performed:
 143         *
 144         * .
 145         * |-- a                   (ino 257)
 146         *     |-- b               (ino 258)
 147         *         |
 148         *         |
 149         *         |-- c           (ino 259)
 150         *         |   |-- d       (ino 260)
 151         *         |
 152         *         |-- c2          (ino 261)
 153         *
 154         * Tree state when the second (incremental) send is performed:
 155         *
 156         * .
 157         * |-- a                   (ino 257)
 158         *     |-- b               (ino 258)
 159         *         |-- c2          (ino 261)
 160         *             |-- d2      (ino 260)
 161         *                 |-- cc  (ino 259)
 162         *
 163         * The sequence of steps that lead to the second state was:
 164         *
 165         * mv /a/b/c/d /a/b/c2/d2
 166         * mv /a/b/c /a/b/c2/d2/cc
 167         *
 168         * "c" has lower inode number, but we can't move it (2nd mv operation)
 169         * before we move "d", which has higher inode number.
 170         *
 171         * So we just memorize which move/rename operations must be performed
 172         * later when their respective parent is processed and moved/renamed.
 173         */
 174
 175        /* Indexed by parent directory inode number. */
 176        struct rb_root pending_dir_moves;
 177
 178        /*
 179         * Reverse index, indexed by the inode number of a directory that
 180         * is waiting for the move/rename of its immediate parent before its
 181         * own move/rename can be performed.
 182         */
 183        struct rb_root waiting_dir_moves;
 184
 185        /*
 186         * A directory that is going to be rm'ed might have a child directory
 187         * which is in the pending directory moves index above. In this case,
 188         * the directory can only be removed after the move/rename of its child
 189         * is performed. Example:
 190         *
 191         * Parent snapshot:
 192         *
 193         * .                        (ino 256)
 194         * |-- a/                   (ino 257)
 195         *     |-- b/               (ino 258)
 196         *         |-- c/           (ino 259)
 197         *         |   |-- x/       (ino 260)
 198         *         |
 199         *         |-- y/           (ino 261)
 200         *
 201         * Send snapshot:
 202         *
 203         * .                        (ino 256)
 204         * |-- a/                   (ino 257)
 205         *     |-- b/               (ino 258)
 206         *         |-- YY/          (ino 261)
 207         *              |-- x/      (ino 260)
 208         *
 209         * Sequence of steps that lead to the send snapshot:
 210         * rm -f /a/b/c/foo.txt
 211         * mv /a/b/y /a/b/YY
 212         * mv /a/b/c/x /a/b/YY
 213         * rmdir /a/b/c
 214         *
 215         * When the child is processed, its move/rename is delayed until its
 216         * parent is processed (as explained above), but all other operations
 217         * like update utimes, chown, chgrp, etc, are performed and the paths
 218         * that it uses for those operations must use the orphanized name of
 219         * its parent (the directory we're going to rm later), so we need to
 220         * memorize that name.
 221         *
 222         * Indexed by the inode number of the directory to be deleted.
 223         */
 224        struct rb_root orphan_dirs;
 225};
 226
 227struct pending_dir_move {
 228        struct rb_node node;
 229        struct list_head list;
 230        u64 parent_ino;
 231        u64 ino;
 232        u64 gen;
 233        bool is_orphan;
 234        struct list_head update_refs;
 235};
 236
 237struct waiting_dir_move {
 238        struct rb_node node;
 239        u64 ino;
 240        /*
 241         * There might be some directory that could not be removed because it
 242         * was waiting for this directory inode to be moved first. Therefore
 243         * after this directory is moved, we can try to rmdir the ino rmdir_ino.
 244         */
 245        u64 rmdir_ino;
 246};
 247
 248struct orphan_dir_info {
 249        struct rb_node node;
 250        u64 ino;
 251        u64 gen;
 252};
 253
 254struct name_cache_entry {
 255        struct list_head list;
 256        /*
 257         * radix_tree has only 32bit entries but we need to handle 64bit inums.
 258         * We use the lower 32bit of the 64bit inum to store it in the tree. If
 259         * more then one inum would fall into the same entry, we use radix_list
 260         * to store the additional entries. radix_list is also used to store
 261         * entries where two entries have the same inum but different
 262         * generations.
 263         */
 264        struct list_head radix_list;
 265        u64 ino;
 266        u64 gen;
 267        u64 parent_ino;
 268        u64 parent_gen;
 269        int ret;
 270        int need_later_update;
 271        int name_len;
 272        char name[];
 273};
 274
 275static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 276
 277static struct waiting_dir_move *
 278get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
 279
 280static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
 281
 282static int need_send_hole(struct send_ctx *sctx)
 283{
 284        return (sctx->parent_root && !sctx->cur_inode_new &&
 285                !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
 286                S_ISREG(sctx->cur_inode_mode));
 287}
 288
 289static void fs_path_reset(struct fs_path *p)
 290{
 291        if (p->reversed) {
 292                p->start = p->buf + p->buf_len - 1;
 293                p->end = p->start;
 294                *p->start = 0;
 295        } else {
 296                p->start = p->buf;
 297                p->end = p->start;
 298                *p->start = 0;
 299        }
 300}
 301
 302static struct fs_path *fs_path_alloc(void)
 303{
 304        struct fs_path *p;
 305
 306        p = kmalloc(sizeof(*p), GFP_NOFS);
 307        if (!p)
 308                return NULL;
 309        p->reversed = 0;
 310        p->buf = p->inline_buf;
 311        p->buf_len = FS_PATH_INLINE_SIZE;
 312        fs_path_reset(p);
 313        return p;
 314}
 315
 316static struct fs_path *fs_path_alloc_reversed(void)
 317{
 318        struct fs_path *p;
 319
 320        p = fs_path_alloc();
 321        if (!p)
 322                return NULL;
 323        p->reversed = 1;
 324        fs_path_reset(p);
 325        return p;
 326}
 327
 328static void fs_path_free(struct fs_path *p)
 329{
 330        if (!p)
 331                return;
 332        if (p->buf != p->inline_buf)
 333                kfree(p->buf);
 334        kfree(p);
 335}
 336
 337static int fs_path_len(struct fs_path *p)
 338{
 339        return p->end - p->start;
 340}
 341
 342static int fs_path_ensure_buf(struct fs_path *p, int len)
 343{
 344        char *tmp_buf;
 345        int path_len;
 346        int old_buf_len;
 347
 348        len++;
 349
 350        if (p->buf_len >= len)
 351                return 0;
 352
 353        if (len > PATH_MAX) {
 354                WARN_ON(1);
 355                return -ENOMEM;
 356        }
 357
 358        path_len = p->end - p->start;
 359        old_buf_len = p->buf_len;
 360
 361        /*
 362         * First time the inline_buf does not suffice
 363         */
 364        if (p->buf == p->inline_buf) {
 365                tmp_buf = kmalloc(len, GFP_NOFS);
 366                if (tmp_buf)
 367                        memcpy(tmp_buf, p->buf, old_buf_len);
 368        } else {
 369                tmp_buf = krealloc(p->buf, len, GFP_NOFS);
 370        }
 371        if (!tmp_buf)
 372                return -ENOMEM;
 373        p->buf = tmp_buf;
 374        /*
 375         * The real size of the buffer is bigger, this will let the fast path
 376         * happen most of the time
 377         */
 378        p->buf_len = ksize(p->buf);
 379
 380        if (p->reversed) {
 381                tmp_buf = p->buf + old_buf_len - path_len - 1;
 382                p->end = p->buf + p->buf_len - 1;
 383                p->start = p->end - path_len;
 384                memmove(p->start, tmp_buf, path_len + 1);
 385        } else {
 386                p->start = p->buf;
 387                p->end = p->start + path_len;
 388        }
 389        return 0;
 390}
 391
 392static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
 393                                   char **prepared)
 394{
 395        int ret;
 396        int new_len;
 397
 398        new_len = p->end - p->start + name_len;
 399        if (p->start != p->end)
 400                new_len++;
 401        ret = fs_path_ensure_buf(p, new_len);
 402        if (ret < 0)
 403                goto out;
 404
 405        if (p->reversed) {
 406                if (p->start != p->end)
 407                        *--p->start = '/';
 408                p->start -= name_len;
 409                *prepared = p->start;
 410        } else {
 411                if (p->start != p->end)
 412                        *p->end++ = '/';
 413                *prepared = p->end;
 414                p->end += name_len;
 415                *p->end = 0;
 416        }
 417
 418out:
 419        return ret;
 420}
 421
 422static int fs_path_add(struct fs_path *p, const char *name, int name_len)
 423{
 424        int ret;
 425        char *prepared;
 426
 427        ret = fs_path_prepare_for_add(p, name_len, &prepared);
 428        if (ret < 0)
 429                goto out;
 430        memcpy(prepared, name, name_len);
 431
 432out:
 433        return ret;
 434}
 435
 436static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
 437{
 438        int ret;
 439        char *prepared;
 440
 441        ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
 442        if (ret < 0)
 443                goto out;
 444        memcpy(prepared, p2->start, p2->end - p2->start);
 445
 446out:
 447        return ret;
 448}
 449
 450static int fs_path_add_from_extent_buffer(struct fs_path *p,
 451                                          struct extent_buffer *eb,
 452                                          unsigned long off, int len)
 453{
 454        int ret;
 455        char *prepared;
 456
 457        ret = fs_path_prepare_for_add(p, len, &prepared);
 458        if (ret < 0)
 459                goto out;
 460
 461        read_extent_buffer(eb, prepared, off, len);
 462
 463out:
 464        return ret;
 465}
 466
 467static int fs_path_copy(struct fs_path *p, struct fs_path *from)
 468{
 469        int ret;
 470
 471        p->reversed = from->reversed;
 472        fs_path_reset(p);
 473
 474        ret = fs_path_add_path(p, from);
 475
 476        return ret;
 477}
 478
 479
 480static void fs_path_unreverse(struct fs_path *p)
 481{
 482        char *tmp;
 483        int len;
 484
 485        if (!p->reversed)
 486                return;
 487
 488        tmp = p->start;
 489        len = p->end - p->start;
 490        p->start = p->buf;
 491        p->end = p->start + len;
 492        memmove(p->start, tmp, len + 1);
 493        p->reversed = 0;
 494}
 495
 496static struct btrfs_path *alloc_path_for_send(void)
 497{
 498        struct btrfs_path *path;
 499
 500        path = btrfs_alloc_path();
 501        if (!path)
 502                return NULL;
 503        path->search_commit_root = 1;
 504        path->skip_locking = 1;
 505        path->need_commit_sem = 1;
 506        return path;
 507}
 508
 509static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
 510{
 511        int ret;
 512        mm_segment_t old_fs;
 513        u32 pos = 0;
 514
 515        old_fs = get_fs();
 516        set_fs(KERNEL_DS);
 517
 518        while (pos < len) {
 519                ret = vfs_write(filp, (__force const char __user *)buf + pos,
 520                                len - pos, off);
 521                /* TODO handle that correctly */
 522                /*if (ret == -ERESTARTSYS) {
 523                        continue;
 524                }*/
 525                if (ret < 0)
 526                        goto out;
 527                if (ret == 0) {
 528                        ret = -EIO;
 529                        goto out;
 530                }
 531                pos += ret;
 532        }
 533
 534        ret = 0;
 535
 536out:
 537        set_fs(old_fs);
 538        return ret;
 539}
 540
 541static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
 542{
 543        struct btrfs_tlv_header *hdr;
 544        int total_len = sizeof(*hdr) + len;
 545        int left = sctx->send_max_size - sctx->send_size;
 546
 547        if (unlikely(left < total_len))
 548                return -EOVERFLOW;
 549
 550        hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
 551        hdr->tlv_type = cpu_to_le16(attr);
 552        hdr->tlv_len = cpu_to_le16(len);
 553        memcpy(hdr + 1, data, len);
 554        sctx->send_size += total_len;
 555
 556        return 0;
 557}
 558
 559#define TLV_PUT_DEFINE_INT(bits) \
 560        static int tlv_put_u##bits(struct send_ctx *sctx,               \
 561                        u##bits attr, u##bits value)                    \
 562        {                                                               \
 563                __le##bits __tmp = cpu_to_le##bits(value);              \
 564                return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));      \
 565        }
 566
 567TLV_PUT_DEFINE_INT(64)
 568
 569static int tlv_put_string(struct send_ctx *sctx, u16 attr,
 570                          const char *str, int len)
 571{
 572        if (len == -1)
 573                len = strlen(str);
 574        return tlv_put(sctx, attr, str, len);
 575}
 576
 577static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
 578                        const u8 *uuid)
 579{
 580        return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
 581}
 582
 583static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
 584                                  struct extent_buffer *eb,
 585                                  struct btrfs_timespec *ts)
 586{
 587        struct btrfs_timespec bts;
 588        read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
 589        return tlv_put(sctx, attr, &bts, sizeof(bts));
 590}
 591
 592
 593#define TLV_PUT(sctx, attrtype, attrlen, data) \
 594        do { \
 595                ret = tlv_put(sctx, attrtype, attrlen, data); \
 596                if (ret < 0) \
 597                        goto tlv_put_failure; \
 598        } while (0)
 599
 600#define TLV_PUT_INT(sctx, attrtype, bits, value) \
 601        do { \
 602                ret = tlv_put_u##bits(sctx, attrtype, value); \
 603                if (ret < 0) \
 604                        goto tlv_put_failure; \
 605        } while (0)
 606
 607#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
 608#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
 609#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
 610#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
 611#define TLV_PUT_STRING(sctx, attrtype, str, len) \
 612        do { \
 613                ret = tlv_put_string(sctx, attrtype, str, len); \
 614                if (ret < 0) \
 615                        goto tlv_put_failure; \
 616        } while (0)
 617#define TLV_PUT_PATH(sctx, attrtype, p) \
 618        do { \
 619                ret = tlv_put_string(sctx, attrtype, p->start, \
 620                        p->end - p->start); \
 621                if (ret < 0) \
 622                        goto tlv_put_failure; \
 623        } while(0)
 624#define TLV_PUT_UUID(sctx, attrtype, uuid) \
 625        do { \
 626                ret = tlv_put_uuid(sctx, attrtype, uuid); \
 627                if (ret < 0) \
 628                        goto tlv_put_failure; \
 629        } while (0)
 630#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
 631        do { \
 632                ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
 633                if (ret < 0) \
 634                        goto tlv_put_failure; \
 635        } while (0)
 636
 637static int send_header(struct send_ctx *sctx)
 638{
 639        struct btrfs_stream_header hdr;
 640
 641        strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
 642        hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
 643
 644        return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
 645                                        &sctx->send_off);
 646}
 647
 648/*
 649 * For each command/item we want to send to userspace, we call this function.
 650 */
 651static int begin_cmd(struct send_ctx *sctx, int cmd)
 652{
 653        struct btrfs_cmd_header *hdr;
 654
 655        if (WARN_ON(!sctx->send_buf))
 656                return -EINVAL;
 657
 658        BUG_ON(sctx->send_size);
 659
 660        sctx->send_size += sizeof(*hdr);
 661        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 662        hdr->cmd = cpu_to_le16(cmd);
 663
 664        return 0;
 665}
 666
 667static int send_cmd(struct send_ctx *sctx)
 668{
 669        int ret;
 670        struct btrfs_cmd_header *hdr;
 671        u32 crc;
 672
 673        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 674        hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 675        hdr->crc = 0;
 676
 677        crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 678        hdr->crc = cpu_to_le32(crc);
 679
 680        ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
 681                                        &sctx->send_off);
 682
 683        sctx->total_send_size += sctx->send_size;
 684        sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
 685        sctx->send_size = 0;
 686
 687        return ret;
 688}
 689
 690/*
 691 * Sends a move instruction to user space
 692 */
 693static int send_rename(struct send_ctx *sctx,
 694                     struct fs_path *from, struct fs_path *to)
 695{
 696        int ret;
 697
 698verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
 699
 700        ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
 701        if (ret < 0)
 702                goto out;
 703
 704        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
 705        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
 706
 707        ret = send_cmd(sctx);
 708
 709tlv_put_failure:
 710out:
 711        return ret;
 712}
 713
 714/*
 715 * Sends a link instruction to user space
 716 */
 717static int send_link(struct send_ctx *sctx,
 718                     struct fs_path *path, struct fs_path *lnk)
 719{
 720        int ret;
 721
 722verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
 723
 724        ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
 725        if (ret < 0)
 726                goto out;
 727
 728        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 729        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
 730
 731        ret = send_cmd(sctx);
 732
 733tlv_put_failure:
 734out:
 735        return ret;
 736}
 737
 738/*
 739 * Sends an unlink instruction to user space
 740 */
 741static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
 742{
 743        int ret;
 744
 745verbose_printk("btrfs: send_unlink %s\n", path->start);
 746
 747        ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
 748        if (ret < 0)
 749                goto out;
 750
 751        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 752
 753        ret = send_cmd(sctx);
 754
 755tlv_put_failure:
 756out:
 757        return ret;
 758}
 759
 760/*
 761 * Sends a rmdir instruction to user space
 762 */
 763static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
 764{
 765        int ret;
 766
 767verbose_printk("btrfs: send_rmdir %s\n", path->start);
 768
 769        ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
 770        if (ret < 0)
 771                goto out;
 772
 773        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 774
 775        ret = send_cmd(sctx);
 776
 777tlv_put_failure:
 778out:
 779        return ret;
 780}
 781
 782/*
 783 * Helper function to retrieve some fields from an inode item.
 784 */
 785static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
 786                          u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
 787                          u64 *gid, u64 *rdev)
 788{
 789        int ret;
 790        struct btrfs_inode_item *ii;
 791        struct btrfs_key key;
 792
 793        key.objectid = ino;
 794        key.type = BTRFS_INODE_ITEM_KEY;
 795        key.offset = 0;
 796        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 797        if (ret) {
 798                if (ret > 0)
 799                        ret = -ENOENT;
 800                return ret;
 801        }
 802
 803        ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
 804                        struct btrfs_inode_item);
 805        if (size)
 806                *size = btrfs_inode_size(path->nodes[0], ii);
 807        if (gen)
 808                *gen = btrfs_inode_generation(path->nodes[0], ii);
 809        if (mode)
 810                *mode = btrfs_inode_mode(path->nodes[0], ii);
 811        if (uid)
 812                *uid = btrfs_inode_uid(path->nodes[0], ii);
 813        if (gid)
 814                *gid = btrfs_inode_gid(path->nodes[0], ii);
 815        if (rdev)
 816                *rdev = btrfs_inode_rdev(path->nodes[0], ii);
 817
 818        return ret;
 819}
 820
 821static int get_inode_info(struct btrfs_root *root,
 822                          u64 ino, u64 *size, u64 *gen,
 823                          u64 *mode, u64 *uid, u64 *gid,
 824                          u64 *rdev)
 825{
 826        struct btrfs_path *path;
 827        int ret;
 828
 829        path = alloc_path_for_send();
 830        if (!path)
 831                return -ENOMEM;
 832        ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
 833                               rdev);
 834        btrfs_free_path(path);
 835        return ret;
 836}
 837
 838typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
 839                                   struct fs_path *p,
 840                                   void *ctx);
 841
 842/*
 843 * Helper function to iterate the entries in ONE btrfs_inode_ref or
 844 * btrfs_inode_extref.
 845 * The iterate callback may return a non zero value to stop iteration. This can
 846 * be a negative value for error codes or 1 to simply stop it.
 847 *
 848 * path must point to the INODE_REF or INODE_EXTREF when called.
 849 */
 850static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
 851                             struct btrfs_key *found_key, int resolve,
 852                             iterate_inode_ref_t iterate, void *ctx)
 853{
 854        struct extent_buffer *eb = path->nodes[0];
 855        struct btrfs_item *item;
 856        struct btrfs_inode_ref *iref;
 857        struct btrfs_inode_extref *extref;
 858        struct btrfs_path *tmp_path;
 859        struct fs_path *p;
 860        u32 cur = 0;
 861        u32 total;
 862        int slot = path->slots[0];
 863        u32 name_len;
 864        char *start;
 865        int ret = 0;
 866        int num = 0;
 867        int index;
 868        u64 dir;
 869        unsigned long name_off;
 870        unsigned long elem_size;
 871        unsigned long ptr;
 872
 873        p = fs_path_alloc_reversed();
 874        if (!p)
 875                return -ENOMEM;
 876
 877        tmp_path = alloc_path_for_send();
 878        if (!tmp_path) {
 879                fs_path_free(p);
 880                return -ENOMEM;
 881        }
 882
 883
 884        if (found_key->type == BTRFS_INODE_REF_KEY) {
 885                ptr = (unsigned long)btrfs_item_ptr(eb, slot,
 886                                                    struct btrfs_inode_ref);
 887                item = btrfs_item_nr(slot);
 888                total = btrfs_item_size(eb, item);
 889                elem_size = sizeof(*iref);
 890        } else {
 891                ptr = btrfs_item_ptr_offset(eb, slot);
 892                total = btrfs_item_size_nr(eb, slot);
 893                elem_size = sizeof(*extref);
 894        }
 895
 896        while (cur < total) {
 897                fs_path_reset(p);
 898
 899                if (found_key->type == BTRFS_INODE_REF_KEY) {
 900                        iref = (struct btrfs_inode_ref *)(ptr + cur);
 901                        name_len = btrfs_inode_ref_name_len(eb, iref);
 902                        name_off = (unsigned long)(iref + 1);
 903                        index = btrfs_inode_ref_index(eb, iref);
 904                        dir = found_key->offset;
 905                } else {
 906                        extref = (struct btrfs_inode_extref *)(ptr + cur);
 907                        name_len = btrfs_inode_extref_name_len(eb, extref);
 908                        name_off = (unsigned long)&extref->name;
 909                        index = btrfs_inode_extref_index(eb, extref);
 910                        dir = btrfs_inode_extref_parent(eb, extref);
 911                }
 912
 913                if (resolve) {
 914                        start = btrfs_ref_to_path(root, tmp_path, name_len,
 915                                                  name_off, eb, dir,
 916                                                  p->buf, p->buf_len);
 917                        if (IS_ERR(start)) {
 918                                ret = PTR_ERR(start);
 919                                goto out;
 920                        }
 921                        if (start < p->buf) {
 922                                /* overflow , try again with larger buffer */
 923                                ret = fs_path_ensure_buf(p,
 924                                                p->buf_len + p->buf - start);
 925                                if (ret < 0)
 926                                        goto out;
 927                                start = btrfs_ref_to_path(root, tmp_path,
 928                                                          name_len, name_off,
 929                                                          eb, dir,
 930                                                          p->buf, p->buf_len);
 931                                if (IS_ERR(start)) {
 932                                        ret = PTR_ERR(start);
 933                                        goto out;
 934                                }
 935                                BUG_ON(start < p->buf);
 936                        }
 937                        p->start = start;
 938                } else {
 939                        ret = fs_path_add_from_extent_buffer(p, eb, name_off,
 940                                                             name_len);
 941                        if (ret < 0)
 942                                goto out;
 943                }
 944
 945                cur += elem_size + name_len;
 946                ret = iterate(num, dir, index, p, ctx);
 947                if (ret)
 948                        goto out;
 949                num++;
 950        }
 951
 952out:
 953        btrfs_free_path(tmp_path);
 954        fs_path_free(p);
 955        return ret;
 956}
 957
 958typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
 959                                  const char *name, int name_len,
 960                                  const char *data, int data_len,
 961                                  u8 type, void *ctx);
 962
 963/*
 964 * Helper function to iterate the entries in ONE btrfs_dir_item.
 965 * The iterate callback may return a non zero value to stop iteration. This can
 966 * be a negative value for error codes or 1 to simply stop it.
 967 *
 968 * path must point to the dir item when called.
 969 */
 970static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
 971                            struct btrfs_key *found_key,
 972                            iterate_dir_item_t iterate, void *ctx)
 973{
 974        int ret = 0;
 975        struct extent_buffer *eb;
 976        struct btrfs_item *item;
 977        struct btrfs_dir_item *di;
 978        struct btrfs_key di_key;
 979        char *buf = NULL;
 980        int buf_len;
 981        u32 name_len;
 982        u32 data_len;
 983        u32 cur;
 984        u32 len;
 985        u32 total;
 986        int slot;
 987        int num;
 988        u8 type;
 989
 990        /*
 991         * Start with a small buffer (1 page). If later we end up needing more
 992         * space, which can happen for xattrs on a fs with a leaf size greater
 993         * then the page size, attempt to increase the buffer. Typically xattr
 994         * values are small.
 995         */
 996        buf_len = PATH_MAX;
 997        buf = kmalloc(buf_len, GFP_NOFS);
 998        if (!buf) {
 999                ret = -ENOMEM;
1000                goto out;
1001        }
1002
1003        eb = path->nodes[0];
1004        slot = path->slots[0];
1005        item = btrfs_item_nr(slot);
1006        di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1007        cur = 0;
1008        len = 0;
1009        total = btrfs_item_size(eb, item);
1010
1011        num = 0;
1012        while (cur < total) {
1013                name_len = btrfs_dir_name_len(eb, di);
1014                data_len = btrfs_dir_data_len(eb, di);
1015                type = btrfs_dir_type(eb, di);
1016                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
1017
1018                if (type == BTRFS_FT_XATTR) {
1019                        if (name_len > XATTR_NAME_MAX) {
1020                                ret = -ENAMETOOLONG;
1021                                goto out;
1022                        }
1023                        if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) {
1024                                ret = -E2BIG;
1025                                goto out;
1026                        }
1027                } else {
1028                        /*
1029                         * Path too long
1030                         */
1031                        if (name_len + data_len > PATH_MAX) {
1032                                ret = -ENAMETOOLONG;
1033                                goto out;
1034                        }
1035                }
1036
1037                if (name_len + data_len > buf_len) {
1038                        buf_len = name_len + data_len;
1039                        if (is_vmalloc_addr(buf)) {
1040                                vfree(buf);
1041                                buf = NULL;
1042                        } else {
1043                                char *tmp = krealloc(buf, buf_len,
1044                                                     GFP_NOFS | __GFP_NOWARN);
1045
1046                                if (!tmp)
1047                                        kfree(buf);
1048                                buf = tmp;
1049                        }
1050                        if (!buf) {
1051                                buf = vmalloc(buf_len);
1052                                if (!buf) {
1053                                        ret = -ENOMEM;
1054                                        goto out;
1055                                }
1056                        }
1057                }
1058
1059                read_extent_buffer(eb, buf, (unsigned long)(di + 1),
1060                                name_len + data_len);
1061
1062                len = sizeof(*di) + name_len + data_len;
1063                di = (struct btrfs_dir_item *)((char *)di + len);
1064                cur += len;
1065
1066                ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1067                                data_len, type, ctx);
1068                if (ret < 0)
1069                        goto out;
1070                if (ret) {
1071                        ret = 0;
1072                        goto out;
1073                }
1074
1075                num++;
1076        }
1077
1078out:
1079        kvfree(buf);
1080        return ret;
1081}
1082
1083static int __copy_first_ref(int num, u64 dir, int index,
1084                            struct fs_path *p, void *ctx)
1085{
1086        int ret;
1087        struct fs_path *pt = ctx;
1088
1089        ret = fs_path_copy(pt, p);
1090        if (ret < 0)
1091                return ret;
1092
1093        /* we want the first only */
1094        return 1;
1095}
1096
1097/*
1098 * Retrieve the first path of an inode. If an inode has more then one
1099 * ref/hardlink, this is ignored.
1100 */
1101static int get_inode_path(struct btrfs_root *root,
1102                          u64 ino, struct fs_path *path)
1103{
1104        int ret;
1105        struct btrfs_key key, found_key;
1106        struct btrfs_path *p;
1107
1108        p = alloc_path_for_send();
1109        if (!p)
1110                return -ENOMEM;
1111
1112        fs_path_reset(path);
1113
1114        key.objectid = ino;
1115        key.type = BTRFS_INODE_REF_KEY;
1116        key.offset = 0;
1117
1118        ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1119        if (ret < 0)
1120                goto out;
1121        if (ret) {
1122                ret = 1;
1123                goto out;
1124        }
1125        btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1126        if (found_key.objectid != ino ||
1127            (found_key.type != BTRFS_INODE_REF_KEY &&
1128             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1129                ret = -ENOENT;
1130                goto out;
1131        }
1132
1133        ret = iterate_inode_ref(root, p, &found_key, 1,
1134                                __copy_first_ref, path);
1135        if (ret < 0)
1136                goto out;
1137        ret = 0;
1138
1139out:
1140        btrfs_free_path(p);
1141        return ret;
1142}
1143
1144struct backref_ctx {
1145        struct send_ctx *sctx;
1146
1147        struct btrfs_path *path;
1148        /* number of total found references */
1149        u64 found;
1150
1151        /*
1152         * used for clones found in send_root. clones found behind cur_objectid
1153         * and cur_offset are not considered as allowed clones.
1154         */
1155        u64 cur_objectid;
1156        u64 cur_offset;
1157
1158        /* may be truncated in case it's the last extent in a file */
1159        u64 extent_len;
1160
1161        /* Just to check for bugs in backref resolving */
1162        int found_itself;
1163};
1164
1165static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1166{
1167        u64 root = (u64)(uintptr_t)key;
1168        struct clone_root *cr = (struct clone_root *)elt;
1169
1170        if (root < cr->root->objectid)
1171                return -1;
1172        if (root > cr->root->objectid)
1173                return 1;
1174        return 0;
1175}
1176
1177static int __clone_root_cmp_sort(const void *e1, const void *e2)
1178{
1179        struct clone_root *cr1 = (struct clone_root *)e1;
1180        struct clone_root *cr2 = (struct clone_root *)e2;
1181
1182        if (cr1->root->objectid < cr2->root->objectid)
1183                return -1;
1184        if (cr1->root->objectid > cr2->root->objectid)
1185                return 1;
1186        return 0;
1187}
1188
1189/*
1190 * Called for every backref that is found for the current extent.
1191 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1192 */
1193static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1194{
1195        struct backref_ctx *bctx = ctx_;
1196        struct clone_root *found;
1197        int ret;
1198        u64 i_size;
1199
1200        /* First check if the root is in the list of accepted clone sources */
1201        found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1202                        bctx->sctx->clone_roots_cnt,
1203                        sizeof(struct clone_root),
1204                        __clone_root_cmp_bsearch);
1205        if (!found)
1206                return 0;
1207
1208        if (found->root == bctx->sctx->send_root &&
1209            ino == bctx->cur_objectid &&
1210            offset == bctx->cur_offset) {
1211                bctx->found_itself = 1;
1212        }
1213
1214        /*
1215         * There are inodes that have extents that lie behind its i_size. Don't
1216         * accept clones from these extents.
1217         */
1218        ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
1219                               NULL, NULL, NULL);
1220        btrfs_release_path(bctx->path);
1221        if (ret < 0)
1222                return ret;
1223
1224        if (offset + bctx->extent_len > i_size)
1225                return 0;
1226
1227        /*
1228         * Make sure we don't consider clones from send_root that are
1229         * behind the current inode/offset.
1230         */
1231        if (found->root == bctx->sctx->send_root) {
1232                /*
1233                 * TODO for the moment we don't accept clones from the inode
1234                 * that is currently send. We may change this when
1235                 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1236                 * file.
1237                 */
1238                if (ino >= bctx->cur_objectid)
1239                        return 0;
1240#if 0
1241                if (ino > bctx->cur_objectid)
1242                        return 0;
1243                if (offset + bctx->extent_len > bctx->cur_offset)
1244                        return 0;
1245#endif
1246        }
1247
1248        bctx->found++;
1249        found->found_refs++;
1250        if (ino < found->ino) {
1251                found->ino = ino;
1252                found->offset = offset;
1253        } else if (found->ino == ino) {
1254                /*
1255                 * same extent found more then once in the same file.
1256                 */
1257                if (found->offset > offset + bctx->extent_len)
1258                        found->offset = offset;
1259        }
1260
1261        return 0;
1262}
1263
1264/*
1265 * Given an inode, offset and extent item, it finds a good clone for a clone
1266 * instruction. Returns -ENOENT when none could be found. The function makes
1267 * sure that the returned clone is usable at the point where sending is at the
1268 * moment. This means, that no clones are accepted which lie behind the current
1269 * inode+offset.
1270 *
1271 * path must point to the extent item when called.
1272 */
1273static int find_extent_clone(struct send_ctx *sctx,
1274                             struct btrfs_path *path,
1275                             u64 ino, u64 data_offset,
1276                             u64 ino_size,
1277                             struct clone_root **found)
1278{
1279        int ret;
1280        int extent_type;
1281        u64 logical;
1282        u64 disk_byte;
1283        u64 num_bytes;
1284        u64 extent_item_pos;
1285        u64 flags = 0;
1286        struct btrfs_file_extent_item *fi;
1287        struct extent_buffer *eb = path->nodes[0];
1288        struct backref_ctx *backref_ctx = NULL;
1289        struct clone_root *cur_clone_root;
1290        struct btrfs_key found_key;
1291        struct btrfs_path *tmp_path;
1292        int compressed;
1293        u32 i;
1294
1295        tmp_path = alloc_path_for_send();
1296        if (!tmp_path)
1297                return -ENOMEM;
1298
1299        /* We only use this path under the commit sem */
1300        tmp_path->need_commit_sem = 0;
1301
1302        backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1303        if (!backref_ctx) {
1304                ret = -ENOMEM;
1305                goto out;
1306        }
1307
1308        backref_ctx->path = tmp_path;
1309
1310        if (data_offset >= ino_size) {
1311                /*
1312                 * There may be extents that lie behind the file's size.
1313                 * I at least had this in combination with snapshotting while
1314                 * writing large files.
1315                 */
1316                ret = 0;
1317                goto out;
1318        }
1319
1320        fi = btrfs_item_ptr(eb, path->slots[0],
1321                        struct btrfs_file_extent_item);
1322        extent_type = btrfs_file_extent_type(eb, fi);
1323        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1324                ret = -ENOENT;
1325                goto out;
1326        }
1327        compressed = btrfs_file_extent_compression(eb, fi);
1328
1329        num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1330        disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1331        if (disk_byte == 0) {
1332                ret = -ENOENT;
1333                goto out;
1334        }
1335        logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1336
1337        down_read(&sctx->send_root->fs_info->commit_root_sem);
1338        ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1339                                  &found_key, &flags);
1340        up_read(&sctx->send_root->fs_info->commit_root_sem);
1341        btrfs_release_path(tmp_path);
1342
1343        if (ret < 0)
1344                goto out;
1345        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1346                ret = -EIO;
1347                goto out;
1348        }
1349
1350        /*
1351         * Setup the clone roots.
1352         */
1353        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1354                cur_clone_root = sctx->clone_roots + i;
1355                cur_clone_root->ino = (u64)-1;
1356                cur_clone_root->offset = 0;
1357                cur_clone_root->found_refs = 0;
1358        }
1359
1360        backref_ctx->sctx = sctx;
1361        backref_ctx->found = 0;
1362        backref_ctx->cur_objectid = ino;
1363        backref_ctx->cur_offset = data_offset;
1364        backref_ctx->found_itself = 0;
1365        backref_ctx->extent_len = num_bytes;
1366
1367        /*
1368         * The last extent of a file may be too large due to page alignment.
1369         * We need to adjust extent_len in this case so that the checks in
1370         * __iterate_backrefs work.
1371         */
1372        if (data_offset + num_bytes >= ino_size)
1373                backref_ctx->extent_len = ino_size - data_offset;
1374
1375        /*
1376         * Now collect all backrefs.
1377         */
1378        if (compressed == BTRFS_COMPRESS_NONE)
1379                extent_item_pos = logical - found_key.objectid;
1380        else
1381                extent_item_pos = 0;
1382        ret = iterate_extent_inodes(sctx->send_root->fs_info,
1383                                        found_key.objectid, extent_item_pos, 1,
1384                                        __iterate_backrefs, backref_ctx);
1385
1386        if (ret < 0)
1387                goto out;
1388
1389        if (!backref_ctx->found_itself) {
1390                /* found a bug in backref code? */
1391                ret = -EIO;
1392                btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1393                                "send_root. inode=%llu, offset=%llu, "
1394                                "disk_byte=%llu found extent=%llu",
1395                                ino, data_offset, disk_byte, found_key.objectid);
1396                goto out;
1397        }
1398
1399verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1400                "ino=%llu, "
1401                "num_bytes=%llu, logical=%llu\n",
1402                data_offset, ino, num_bytes, logical);
1403
1404        if (!backref_ctx->found)
1405                verbose_printk("btrfs:    no clones found\n");
1406
1407        cur_clone_root = NULL;
1408        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1409                if (sctx->clone_roots[i].found_refs) {
1410                        if (!cur_clone_root)
1411                                cur_clone_root = sctx->clone_roots + i;
1412                        else if (sctx->clone_roots[i].root == sctx->send_root)
1413                                /* prefer clones from send_root over others */
1414                                cur_clone_root = sctx->clone_roots + i;
1415                }
1416
1417        }
1418
1419        if (cur_clone_root) {
1420                if (compressed != BTRFS_COMPRESS_NONE) {
1421                        /*
1422                         * Offsets given by iterate_extent_inodes() are relative
1423                         * to the start of the extent, we need to add logical
1424                         * offset from the file extent item.
1425                         * (See why at backref.c:check_extent_in_eb())
1426                         */
1427                        cur_clone_root->offset += btrfs_file_extent_offset(eb,
1428                                                                           fi);
1429                }
1430                *found = cur_clone_root;
1431                ret = 0;
1432        } else {
1433                ret = -ENOENT;
1434        }
1435
1436out:
1437        btrfs_free_path(tmp_path);
1438        kfree(backref_ctx);
1439        return ret;
1440}
1441
1442static int read_symlink(struct btrfs_root *root,
1443                        u64 ino,
1444                        struct fs_path *dest)
1445{
1446        int ret;
1447        struct btrfs_path *path;
1448        struct btrfs_key key;
1449        struct btrfs_file_extent_item *ei;
1450        u8 type;
1451        u8 compression;
1452        unsigned long off;
1453        int len;
1454
1455        path = alloc_path_for_send();
1456        if (!path)
1457                return -ENOMEM;
1458
1459        key.objectid = ino;
1460        key.type = BTRFS_EXTENT_DATA_KEY;
1461        key.offset = 0;
1462        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1463        if (ret < 0)
1464                goto out;
1465        BUG_ON(ret);
1466
1467        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1468                        struct btrfs_file_extent_item);
1469        type = btrfs_file_extent_type(path->nodes[0], ei);
1470        compression = btrfs_file_extent_compression(path->nodes[0], ei);
1471        BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1472        BUG_ON(compression);
1473
1474        off = btrfs_file_extent_inline_start(ei);
1475        len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1476
1477        ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1478
1479out:
1480        btrfs_free_path(path);
1481        return ret;
1482}
1483
1484/*
1485 * Helper function to generate a file name that is unique in the root of
1486 * send_root and parent_root. This is used to generate names for orphan inodes.
1487 */
1488static int gen_unique_name(struct send_ctx *sctx,
1489                           u64 ino, u64 gen,
1490                           struct fs_path *dest)
1491{
1492        int ret = 0;
1493        struct btrfs_path *path;
1494        struct btrfs_dir_item *di;
1495        char tmp[64];
1496        int len;
1497        u64 idx = 0;
1498
1499        path = alloc_path_for_send();
1500        if (!path)
1501                return -ENOMEM;
1502
1503        while (1) {
1504                len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1505                                ino, gen, idx);
1506                ASSERT(len < sizeof(tmp));
1507
1508                di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1509                                path, BTRFS_FIRST_FREE_OBJECTID,
1510                                tmp, strlen(tmp), 0);
1511                btrfs_release_path(path);
1512                if (IS_ERR(di)) {
1513                        ret = PTR_ERR(di);
1514                        goto out;
1515                }
1516                if (di) {
1517                        /* not unique, try again */
1518                        idx++;
1519                        continue;
1520                }
1521
1522                if (!sctx->parent_root) {
1523                        /* unique */
1524                        ret = 0;
1525                        break;
1526                }
1527
1528                di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1529                                path, BTRFS_FIRST_FREE_OBJECTID,
1530                                tmp, strlen(tmp), 0);
1531                btrfs_release_path(path);
1532                if (IS_ERR(di)) {
1533                        ret = PTR_ERR(di);
1534                        goto out;
1535                }
1536                if (di) {
1537                        /* not unique, try again */
1538                        idx++;
1539                        continue;
1540                }
1541                /* unique */
1542                break;
1543        }
1544
1545        ret = fs_path_add(dest, tmp, strlen(tmp));
1546
1547out:
1548        btrfs_free_path(path);
1549        return ret;
1550}
1551
1552enum inode_state {
1553        inode_state_no_change,
1554        inode_state_will_create,
1555        inode_state_did_create,
1556        inode_state_will_delete,
1557        inode_state_did_delete,
1558};
1559
1560static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1561{
1562        int ret;
1563        int left_ret;
1564        int right_ret;
1565        u64 left_gen;
1566        u64 right_gen;
1567
1568        ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1569                        NULL, NULL);
1570        if (ret < 0 && ret != -ENOENT)
1571                goto out;
1572        left_ret = ret;
1573
1574        if (!sctx->parent_root) {
1575                right_ret = -ENOENT;
1576        } else {
1577                ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1578                                NULL, NULL, NULL, NULL);
1579                if (ret < 0 && ret != -ENOENT)
1580                        goto out;
1581                right_ret = ret;
1582        }
1583
1584        if (!left_ret && !right_ret) {
1585                if (left_gen == gen && right_gen == gen) {
1586                        ret = inode_state_no_change;
1587                } else if (left_gen == gen) {
1588                        if (ino < sctx->send_progress)
1589                                ret = inode_state_did_create;
1590                        else
1591                                ret = inode_state_will_create;
1592                } else if (right_gen == gen) {
1593                        if (ino < sctx->send_progress)
1594                                ret = inode_state_did_delete;
1595                        else
1596                                ret = inode_state_will_delete;
1597                } else  {
1598                        ret = -ENOENT;
1599                }
1600        } else if (!left_ret) {
1601                if (left_gen == gen) {
1602                        if (ino < sctx->send_progress)
1603                                ret = inode_state_did_create;
1604                        else
1605                                ret = inode_state_will_create;
1606                } else {
1607                        ret = -ENOENT;
1608                }
1609        } else if (!right_ret) {
1610                if (right_gen == gen) {
1611                        if (ino < sctx->send_progress)
1612                                ret = inode_state_did_delete;
1613                        else
1614                                ret = inode_state_will_delete;
1615                } else {
1616                        ret = -ENOENT;
1617                }
1618        } else {
1619                ret = -ENOENT;
1620        }
1621
1622out:
1623        return ret;
1624}
1625
1626static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1627{
1628        int ret;
1629
1630        ret = get_cur_inode_state(sctx, ino, gen);
1631        if (ret < 0)
1632                goto out;
1633
1634        if (ret == inode_state_no_change ||
1635            ret == inode_state_did_create ||
1636            ret == inode_state_will_delete)
1637                ret = 1;
1638        else
1639                ret = 0;
1640
1641out:
1642        return ret;
1643}
1644
1645/*
1646 * Helper function to lookup a dir item in a dir.
1647 */
1648static int lookup_dir_item_inode(struct btrfs_root *root,
1649                                 u64 dir, const char *name, int name_len,
1650                                 u64 *found_inode,
1651                                 u8 *found_type)
1652{
1653        int ret = 0;
1654        struct btrfs_dir_item *di;
1655        struct btrfs_key key;
1656        struct btrfs_path *path;
1657
1658        path = alloc_path_for_send();
1659        if (!path)
1660                return -ENOMEM;
1661
1662        di = btrfs_lookup_dir_item(NULL, root, path,
1663                        dir, name, name_len, 0);
1664        if (!di) {
1665                ret = -ENOENT;
1666                goto out;
1667        }
1668        if (IS_ERR(di)) {
1669                ret = PTR_ERR(di);
1670                goto out;
1671        }
1672        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1673        if (key.type == BTRFS_ROOT_ITEM_KEY) {
1674                ret = -ENOENT;
1675                goto out;
1676        }
1677        *found_inode = key.objectid;
1678        *found_type = btrfs_dir_type(path->nodes[0], di);
1679
1680out:
1681        btrfs_free_path(path);
1682        return ret;
1683}
1684
1685/*
1686 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1687 * generation of the parent dir and the name of the dir entry.
1688 */
1689static int get_first_ref(struct btrfs_root *root, u64 ino,
1690                         u64 *dir, u64 *dir_gen, struct fs_path *name)
1691{
1692        int ret;
1693        struct btrfs_key key;
1694        struct btrfs_key found_key;
1695        struct btrfs_path *path;
1696        int len;
1697        u64 parent_dir;
1698
1699        path = alloc_path_for_send();
1700        if (!path)
1701                return -ENOMEM;
1702
1703        key.objectid = ino;
1704        key.type = BTRFS_INODE_REF_KEY;
1705        key.offset = 0;
1706
1707        ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1708        if (ret < 0)
1709                goto out;
1710        if (!ret)
1711                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1712                                path->slots[0]);
1713        if (ret || found_key.objectid != ino ||
1714            (found_key.type != BTRFS_INODE_REF_KEY &&
1715             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1716                ret = -ENOENT;
1717                goto out;
1718        }
1719
1720        if (found_key.type == BTRFS_INODE_REF_KEY) {
1721                struct btrfs_inode_ref *iref;
1722                iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1723                                      struct btrfs_inode_ref);
1724                len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1725                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1726                                                     (unsigned long)(iref + 1),
1727                                                     len);
1728                parent_dir = found_key.offset;
1729        } else {
1730                struct btrfs_inode_extref *extref;
1731                extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1732                                        struct btrfs_inode_extref);
1733                len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1734                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1735                                        (unsigned long)&extref->name, len);
1736                parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1737        }
1738        if (ret < 0)
1739                goto out;
1740        btrfs_release_path(path);
1741
1742        if (dir_gen) {
1743                ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
1744                                     NULL, NULL, NULL);
1745                if (ret < 0)
1746                        goto out;
1747        }
1748
1749        *dir = parent_dir;
1750
1751out:
1752        btrfs_free_path(path);
1753        return ret;
1754}
1755
1756static int is_first_ref(struct btrfs_root *root,
1757                        u64 ino, u64 dir,
1758                        const char *name, int name_len)
1759{
1760        int ret;
1761        struct fs_path *tmp_name;
1762        u64 tmp_dir;
1763
1764        tmp_name = fs_path_alloc();
1765        if (!tmp_name)
1766                return -ENOMEM;
1767
1768        ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
1769        if (ret < 0)
1770                goto out;
1771
1772        if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1773                ret = 0;
1774                goto out;
1775        }
1776
1777        ret = !memcmp(tmp_name->start, name, name_len);
1778
1779out:
1780        fs_path_free(tmp_name);
1781        return ret;
1782}
1783
1784/*
1785 * Used by process_recorded_refs to determine if a new ref would overwrite an
1786 * already existing ref. In case it detects an overwrite, it returns the
1787 * inode/gen in who_ino/who_gen.
1788 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1789 * to make sure later references to the overwritten inode are possible.
1790 * Orphanizing is however only required for the first ref of an inode.
1791 * process_recorded_refs does an additional is_first_ref check to see if
1792 * orphanizing is really required.
1793 */
1794static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1795                              const char *name, int name_len,
1796                              u64 *who_ino, u64 *who_gen)
1797{
1798        int ret = 0;
1799        u64 gen;
1800        u64 other_inode = 0;
1801        u8 other_type = 0;
1802
1803        if (!sctx->parent_root)
1804                goto out;
1805
1806        ret = is_inode_existent(sctx, dir, dir_gen);
1807        if (ret <= 0)
1808                goto out;
1809
1810        /*
1811         * If we have a parent root we need to verify that the parent dir was
1812         * not delted and then re-created, if it was then we have no overwrite
1813         * and we can just unlink this entry.
1814         */
1815        if (sctx->parent_root) {
1816                ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1817                                     NULL, NULL, NULL);
1818                if (ret < 0 && ret != -ENOENT)
1819                        goto out;
1820                if (ret) {
1821                        ret = 0;
1822                        goto out;
1823                }
1824                if (gen != dir_gen)
1825                        goto out;
1826        }
1827
1828        ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1829                        &other_inode, &other_type);
1830        if (ret < 0 && ret != -ENOENT)
1831                goto out;
1832        if (ret) {
1833                ret = 0;
1834                goto out;
1835        }
1836
1837        /*
1838         * Check if the overwritten ref was already processed. If yes, the ref
1839         * was already unlinked/moved, so we can safely assume that we will not
1840         * overwrite anything at this point in time.
1841         */
1842        if (other_inode > sctx->send_progress) {
1843                ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1844                                who_gen, NULL, NULL, NULL, NULL);
1845                if (ret < 0)
1846                        goto out;
1847
1848                ret = 1;
1849                *who_ino = other_inode;
1850        } else {
1851                ret = 0;
1852        }
1853
1854out:
1855        return ret;
1856}
1857
1858/*
1859 * Checks if the ref was overwritten by an already processed inode. This is
1860 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1861 * thus the orphan name needs be used.
1862 * process_recorded_refs also uses it to avoid unlinking of refs that were
1863 * overwritten.
1864 */
1865static int did_overwrite_ref(struct send_ctx *sctx,
1866                            u64 dir, u64 dir_gen,
1867                            u64 ino, u64 ino_gen,
1868                            const char *name, int name_len)
1869{
1870        int ret = 0;
1871        u64 gen;
1872        u64 ow_inode;
1873        u8 other_type;
1874
1875        if (!sctx->parent_root)
1876                goto out;
1877
1878        ret = is_inode_existent(sctx, dir, dir_gen);
1879        if (ret <= 0)
1880                goto out;
1881
1882        /* check if the ref was overwritten by another ref */
1883        ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1884                        &ow_inode, &other_type);
1885        if (ret < 0 && ret != -ENOENT)
1886                goto out;
1887        if (ret) {
1888                /* was never and will never be overwritten */
1889                ret = 0;
1890                goto out;
1891        }
1892
1893        ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1894                        NULL, NULL);
1895        if (ret < 0)
1896                goto out;
1897
1898        if (ow_inode == ino && gen == ino_gen) {
1899                ret = 0;
1900                goto out;
1901        }
1902
1903        /* we know that it is or will be overwritten. check this now */
1904        if (ow_inode < sctx->send_progress)
1905                ret = 1;
1906        else
1907                ret = 0;
1908
1909out:
1910        return ret;
1911}
1912
1913/*
1914 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1915 * that got overwritten. This is used by process_recorded_refs to determine
1916 * if it has to use the path as returned by get_cur_path or the orphan name.
1917 */
1918static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1919{
1920        int ret = 0;
1921        struct fs_path *name = NULL;
1922        u64 dir;
1923        u64 dir_gen;
1924
1925        if (!sctx->parent_root)
1926                goto out;
1927
1928        name = fs_path_alloc();
1929        if (!name)
1930                return -ENOMEM;
1931
1932        ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
1933        if (ret < 0)
1934                goto out;
1935
1936        ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1937                        name->start, fs_path_len(name));
1938
1939out:
1940        fs_path_free(name);
1941        return ret;
1942}
1943
1944/*
1945 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
1946 * so we need to do some special handling in case we have clashes. This function
1947 * takes care of this with the help of name_cache_entry::radix_list.
1948 * In case of error, nce is kfreed.
1949 */
1950static int name_cache_insert(struct send_ctx *sctx,
1951                             struct name_cache_entry *nce)
1952{
1953        int ret = 0;
1954        struct list_head *nce_head;
1955
1956        nce_head = radix_tree_lookup(&sctx->name_cache,
1957                        (unsigned long)nce->ino);
1958        if (!nce_head) {
1959                nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1960                if (!nce_head) {
1961                        kfree(nce);
1962                        return -ENOMEM;
1963                }
1964                INIT_LIST_HEAD(nce_head);
1965
1966                ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
1967                if (ret < 0) {
1968                        kfree(nce_head);
1969                        kfree(nce);
1970                        return ret;
1971                }
1972        }
1973        list_add_tail(&nce->radix_list, nce_head);
1974        list_add_tail(&nce->list, &sctx->name_cache_list);
1975        sctx->name_cache_size++;
1976
1977        return ret;
1978}
1979
1980static void name_cache_delete(struct send_ctx *sctx,
1981                              struct name_cache_entry *nce)
1982{
1983        struct list_head *nce_head;
1984
1985        nce_head = radix_tree_lookup(&sctx->name_cache,
1986                        (unsigned long)nce->ino);
1987        if (!nce_head) {
1988                btrfs_err(sctx->send_root->fs_info,
1989              "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
1990                        nce->ino, sctx->name_cache_size);
1991        }
1992
1993        list_del(&nce->radix_list);
1994        list_del(&nce->list);
1995        sctx->name_cache_size--;
1996
1997        /*
1998         * We may not get to the final release of nce_head if the lookup fails
1999         */
2000        if (nce_head && list_empty(nce_head)) {
2001                radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
2002                kfree(nce_head);
2003        }
2004}
2005
2006static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
2007                                                    u64 ino, u64 gen)
2008{
2009        struct list_head *nce_head;
2010        struct name_cache_entry *cur;
2011
2012        nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
2013        if (!nce_head)
2014                return NULL;
2015
2016        list_for_each_entry(cur, nce_head, radix_list) {
2017                if (cur->ino == ino && cur->gen == gen)
2018                        return cur;
2019        }
2020        return NULL;
2021}
2022
2023/*
2024 * Removes the entry from the list and adds it back to the end. This marks the
2025 * entry as recently used so that name_cache_clean_unused does not remove it.
2026 */
2027static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
2028{
2029        list_del(&nce->list);
2030        list_add_tail(&nce->list, &sctx->name_cache_list);
2031}
2032
2033/*
2034 * Remove some entries from the beginning of name_cache_list.
2035 */
2036static void name_cache_clean_unused(struct send_ctx *sctx)
2037{
2038        struct name_cache_entry *nce;
2039
2040        if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
2041                return;
2042
2043        while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
2044                nce = list_entry(sctx->name_cache_list.next,
2045                                struct name_cache_entry, list);
2046                name_cache_delete(sctx, nce);
2047                kfree(nce);
2048        }
2049}
2050
2051static void name_cache_free(struct send_ctx *sctx)
2052{
2053        struct name_cache_entry *nce;
2054
2055        while (!list_empty(&sctx->name_cache_list)) {
2056                nce = list_entry(sctx->name_cache_list.next,
2057                                struct name_cache_entry, list);
2058                name_cache_delete(sctx, nce);
2059                kfree(nce);
2060        }
2061}
2062
2063/*
2064 * Used by get_cur_path for each ref up to the root.
2065 * Returns 0 if it succeeded.
2066 * Returns 1 if the inode is not existent or got overwritten. In that case, the
2067 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
2068 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
2069 * Returns <0 in case of error.
2070 */
2071static int __get_cur_name_and_parent(struct send_ctx *sctx,
2072                                     u64 ino, u64 gen,
2073                                     u64 *parent_ino,
2074                                     u64 *parent_gen,
2075                                     struct fs_path *dest)
2076{
2077        int ret;
2078        int nce_ret;
2079        struct name_cache_entry *nce = NULL;
2080
2081        /*
2082         * First check if we already did a call to this function with the same
2083         * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2084         * return the cached result.
2085         */
2086        nce = name_cache_search(sctx, ino, gen);
2087        if (nce) {
2088                if (ino < sctx->send_progress && nce->need_later_update) {
2089                        name_cache_delete(sctx, nce);
2090                        kfree(nce);
2091                        nce = NULL;
2092                } else {
2093                        name_cache_used(sctx, nce);
2094                        *parent_ino = nce->parent_ino;
2095                        *parent_gen = nce->parent_gen;
2096                        ret = fs_path_add(dest, nce->name, nce->name_len);
2097                        if (ret < 0)
2098                                goto out;
2099                        ret = nce->ret;
2100                        goto out;
2101                }
2102        }
2103
2104        /*
2105         * If the inode is not existent yet, add the orphan name and return 1.
2106         * This should only happen for the parent dir that we determine in
2107         * __record_new_ref
2108         */
2109        ret = is_inode_existent(sctx, ino, gen);
2110        if (ret < 0)
2111                goto out;
2112
2113        if (!ret) {
2114                ret = gen_unique_name(sctx, ino, gen, dest);
2115                if (ret < 0)
2116                        goto out;
2117                ret = 1;
2118                goto out_cache;
2119        }
2120
2121        /*
2122         * Depending on whether the inode was already processed or not, use
2123         * send_root or parent_root for ref lookup.
2124         */
2125        if (ino < sctx->send_progress)
2126                ret = get_first_ref(sctx->send_root, ino,
2127                                    parent_ino, parent_gen, dest);
2128        else
2129                ret = get_first_ref(sctx->parent_root, ino,
2130                                    parent_ino, parent_gen, dest);
2131        if (ret < 0)
2132                goto out;
2133
2134        /*
2135         * Check if the ref was overwritten by an inode's ref that was processed
2136         * earlier. If yes, treat as orphan and return 1.
2137         */
2138        ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2139                        dest->start, dest->end - dest->start);
2140        if (ret < 0)
2141                goto out;
2142        if (ret) {
2143                fs_path_reset(dest);
2144                ret = gen_unique_name(sctx, ino, gen, dest);
2145                if (ret < 0)
2146                        goto out;
2147                ret = 1;
2148        }
2149
2150out_cache:
2151        /*
2152         * Store the result of the lookup in the name cache.
2153         */
2154        nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
2155        if (!nce) {
2156                ret = -ENOMEM;
2157                goto out;
2158        }
2159
2160        nce->ino = ino;
2161        nce->gen = gen;
2162        nce->parent_ino = *parent_ino;
2163        nce->parent_gen = *parent_gen;
2164        nce->name_len = fs_path_len(dest);
2165        nce->ret = ret;
2166        strcpy(nce->name, dest->start);
2167
2168        if (ino < sctx->send_progress)
2169                nce->need_later_update = 0;
2170        else
2171                nce->need_later_update = 1;
2172
2173        nce_ret = name_cache_insert(sctx, nce);
2174        if (nce_ret < 0)
2175                ret = nce_ret;
2176        name_cache_clean_unused(sctx);
2177
2178out:
2179        return ret;
2180}
2181
2182/*
2183 * Magic happens here. This function returns the first ref to an inode as it
2184 * would look like while receiving the stream at this point in time.
2185 * We walk the path up to the root. For every inode in between, we check if it
2186 * was already processed/sent. If yes, we continue with the parent as found
2187 * in send_root. If not, we continue with the parent as found in parent_root.
2188 * If we encounter an inode that was deleted at this point in time, we use the
2189 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2190 * that were not created yet and overwritten inodes/refs.
2191 *
2192 * When do we have have orphan inodes:
2193 * 1. When an inode is freshly created and thus no valid refs are available yet
2194 * 2. When a directory lost all it's refs (deleted) but still has dir items
2195 *    inside which were not processed yet (pending for move/delete). If anyone
2196 *    tried to get the path to the dir items, it would get a path inside that
2197 *    orphan directory.
2198 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2199 *    of an unprocessed inode. If in that case the first ref would be
2200 *    overwritten, the overwritten inode gets "orphanized". Later when we
2201 *    process this overwritten inode, it is restored at a new place by moving
2202 *    the orphan inode.
2203 *
2204 * sctx->send_progress tells this function at which point in time receiving
2205 * would be.
2206 */
2207static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2208                        struct fs_path *dest)
2209{
2210        int ret = 0;
2211        struct fs_path *name = NULL;
2212        u64 parent_inode = 0;
2213        u64 parent_gen = 0;
2214        int stop = 0;
2215
2216        name = fs_path_alloc();
2217        if (!name) {
2218                ret = -ENOMEM;
2219                goto out;
2220        }
2221
2222        dest->reversed = 1;
2223        fs_path_reset(dest);
2224
2225        while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2226                fs_path_reset(name);
2227
2228                if (is_waiting_for_rm(sctx, ino)) {
2229                        ret = gen_unique_name(sctx, ino, gen, name);
2230                        if (ret < 0)
2231                                goto out;
2232                        ret = fs_path_add_path(dest, name);
2233                        break;
2234                }
2235
2236                if (is_waiting_for_move(sctx, ino)) {
2237                        ret = get_first_ref(sctx->parent_root, ino,
2238                                            &parent_inode, &parent_gen, name);
2239                } else {
2240                        ret = __get_cur_name_and_parent(sctx, ino, gen,
2241                                                        &parent_inode,
2242                                                        &parent_gen, name);
2243                        if (ret)
2244                                stop = 1;
2245                }
2246
2247                if (ret < 0)
2248                        goto out;
2249
2250                ret = fs_path_add_path(dest, name);
2251                if (ret < 0)
2252                        goto out;
2253
2254                ino = parent_inode;
2255                gen = parent_gen;
2256        }
2257
2258out:
2259        fs_path_free(name);
2260        if (!ret)
2261                fs_path_unreverse(dest);
2262        return ret;
2263}
2264
2265/*
2266 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2267 */
2268static int send_subvol_begin(struct send_ctx *sctx)
2269{
2270        int ret;
2271        struct btrfs_root *send_root = sctx->send_root;
2272        struct btrfs_root *parent_root = sctx->parent_root;
2273        struct btrfs_path *path;
2274        struct btrfs_key key;
2275        struct btrfs_root_ref *ref;
2276        struct extent_buffer *leaf;
2277        char *name = NULL;
2278        int namelen;
2279
2280        path = btrfs_alloc_path();
2281        if (!path)
2282                return -ENOMEM;
2283
2284        name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2285        if (!name) {
2286                btrfs_free_path(path);
2287                return -ENOMEM;
2288        }
2289
2290        key.objectid = send_root->objectid;
2291        key.type = BTRFS_ROOT_BACKREF_KEY;
2292        key.offset = 0;
2293
2294        ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2295                                &key, path, 1, 0);
2296        if (ret < 0)
2297                goto out;
2298        if (ret) {
2299                ret = -ENOENT;
2300                goto out;
2301        }
2302
2303        leaf = path->nodes[0];
2304        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2305        if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2306            key.objectid != send_root->objectid) {
2307                ret = -ENOENT;
2308                goto out;
2309        }
2310        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2311        namelen = btrfs_root_ref_name_len(leaf, ref);
2312        read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2313        btrfs_release_path(path);
2314
2315        if (parent_root) {
2316                ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2317                if (ret < 0)
2318                        goto out;
2319        } else {
2320                ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2321                if (ret < 0)
2322                        goto out;
2323        }
2324
2325        TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2326        TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2327                        sctx->send_root->root_item.uuid);
2328        TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2329                    le64_to_cpu(sctx->send_root->root_item.ctransid));
2330        if (parent_root) {
2331                TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2332                                sctx->parent_root->root_item.uuid);
2333                TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2334                            le64_to_cpu(sctx->parent_root->root_item.ctransid));
2335        }
2336
2337        ret = send_cmd(sctx);
2338
2339tlv_put_failure:
2340out:
2341        btrfs_free_path(path);
2342        kfree(name);
2343        return ret;
2344}
2345
2346static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2347{
2348        int ret = 0;
2349        struct fs_path *p;
2350
2351verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2352
2353        p = fs_path_alloc();
2354        if (!p)
2355                return -ENOMEM;
2356
2357        ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2358        if (ret < 0)
2359                goto out;
2360
2361        ret = get_cur_path(sctx, ino, gen, p);
2362        if (ret < 0)
2363                goto out;
2364        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2365        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2366
2367        ret = send_cmd(sctx);
2368
2369tlv_put_failure:
2370out:
2371        fs_path_free(p);
2372        return ret;
2373}
2374
2375static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2376{
2377        int ret = 0;
2378        struct fs_path *p;
2379
2380verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2381
2382        p = fs_path_alloc();
2383        if (!p)
2384                return -ENOMEM;
2385
2386        ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2387        if (ret < 0)
2388                goto out;
2389
2390        ret = get_cur_path(sctx, ino, gen, p);
2391        if (ret < 0)
2392                goto out;
2393        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2394        TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2395
2396        ret = send_cmd(sctx);
2397
2398tlv_put_failure:
2399out:
2400        fs_path_free(p);
2401        return ret;
2402}
2403
2404static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2405{
2406        int ret = 0;
2407        struct fs_path *p;
2408
2409verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2410
2411        p = fs_path_alloc();
2412        if (!p)
2413                return -ENOMEM;
2414
2415        ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2416        if (ret < 0)
2417                goto out;
2418
2419        ret = get_cur_path(sctx, ino, gen, p);
2420        if (ret < 0)
2421                goto out;
2422        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2423        TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2424        TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2425
2426        ret = send_cmd(sctx);
2427
2428tlv_put_failure:
2429out:
2430        fs_path_free(p);
2431        return ret;
2432}
2433
2434static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2435{
2436        int ret = 0;
2437        struct fs_path *p = NULL;
2438        struct btrfs_inode_item *ii;
2439        struct btrfs_path *path = NULL;
2440        struct extent_buffer *eb;
2441        struct btrfs_key key;
2442        int slot;
2443
2444verbose_printk("btrfs: send_utimes %llu\n", ino);
2445
2446        p = fs_path_alloc();
2447        if (!p)
2448                return -ENOMEM;
2449
2450        path = alloc_path_for_send();
2451        if (!path) {
2452                ret = -ENOMEM;
2453                goto out;
2454        }
2455
2456        key.objectid = ino;
2457        key.type = BTRFS_INODE_ITEM_KEY;
2458        key.offset = 0;
2459        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2460        if (ret < 0)
2461                goto out;
2462
2463        eb = path->nodes[0];
2464        slot = path->slots[0];
2465        ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2466
2467        ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2468        if (ret < 0)
2469                goto out;
2470
2471        ret = get_cur_path(sctx, ino, gen, p);
2472        if (ret < 0)
2473                goto out;
2474        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2475        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2476        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2477        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2478        /* TODO Add otime support when the otime patches get into upstream */
2479
2480        ret = send_cmd(sctx);
2481
2482tlv_put_failure:
2483out:
2484        fs_path_free(p);
2485        btrfs_free_path(path);
2486        return ret;
2487}
2488
2489/*
2490 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2491 * a valid path yet because we did not process the refs yet. So, the inode
2492 * is created as orphan.
2493 */
2494static int send_create_inode(struct send_ctx *sctx, u64 ino)
2495{
2496        int ret = 0;
2497        struct fs_path *p;
2498        int cmd;
2499        u64 gen;
2500        u64 mode;
2501        u64 rdev;
2502
2503verbose_printk("btrfs: send_create_inode %llu\n", ino);
2504
2505        p = fs_path_alloc();
2506        if (!p)
2507                return -ENOMEM;
2508
2509        if (ino != sctx->cur_ino) {
2510                ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
2511                                     NULL, NULL, &rdev);
2512                if (ret < 0)
2513                        goto out;
2514        } else {
2515                gen = sctx->cur_inode_gen;
2516                mode = sctx->cur_inode_mode;
2517                rdev = sctx->cur_inode_rdev;
2518        }
2519
2520        if (S_ISREG(mode)) {
2521                cmd = BTRFS_SEND_C_MKFILE;
2522        } else if (S_ISDIR(mode)) {
2523                cmd = BTRFS_SEND_C_MKDIR;
2524        } else if (S_ISLNK(mode)) {
2525                cmd = BTRFS_SEND_C_SYMLINK;
2526        } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2527                cmd = BTRFS_SEND_C_MKNOD;
2528        } else if (S_ISFIFO(mode)) {
2529                cmd = BTRFS_SEND_C_MKFIFO;
2530        } else if (S_ISSOCK(mode)) {
2531                cmd = BTRFS_SEND_C_MKSOCK;
2532        } else {
2533                printk(KERN_WARNING "btrfs: unexpected inode type %o",
2534                                (int)(mode & S_IFMT));
2535                ret = -ENOTSUPP;
2536                goto out;
2537        }
2538
2539        ret = begin_cmd(sctx, cmd);
2540        if (ret < 0)
2541                goto out;
2542
2543        ret = gen_unique_name(sctx, ino, gen, p);
2544        if (ret < 0)
2545                goto out;
2546
2547        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2548        TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2549
2550        if (S_ISLNK(mode)) {
2551                fs_path_reset(p);
2552                ret = read_symlink(sctx->send_root, ino, p);
2553                if (ret < 0)
2554                        goto out;
2555                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2556        } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2557                   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2558                TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2559                TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2560        }
2561
2562        ret = send_cmd(sctx);
2563        if (ret < 0)
2564                goto out;
2565
2566
2567tlv_put_failure:
2568out:
2569        fs_path_free(p);
2570        return ret;
2571}
2572
2573/*
2574 * We need some special handling for inodes that get processed before the parent
2575 * directory got created. See process_recorded_refs for details.
2576 * This function does the check if we already created the dir out of order.
2577 */
2578static int did_create_dir(struct send_ctx *sctx, u64 dir)
2579{
2580        int ret = 0;
2581        struct btrfs_path *path = NULL;
2582        struct btrfs_key key;
2583        struct btrfs_key found_key;
2584        struct btrfs_key di_key;
2585        struct extent_buffer *eb;
2586        struct btrfs_dir_item *di;
2587        int slot;
2588
2589        path = alloc_path_for_send();
2590        if (!path) {
2591                ret = -ENOMEM;
2592                goto out;
2593        }
2594
2595        key.objectid = dir;
2596        key.type = BTRFS_DIR_INDEX_KEY;
2597        key.offset = 0;
2598        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2599        if (ret < 0)
2600                goto out;
2601
2602        while (1) {
2603                eb = path->nodes[0];
2604                slot = path->slots[0];
2605                if (slot >= btrfs_header_nritems(eb)) {
2606                        ret = btrfs_next_leaf(sctx->send_root, path);
2607                        if (ret < 0) {
2608                                goto out;
2609                        } else if (ret > 0) {
2610                                ret = 0;
2611                                break;
2612                        }
2613                        continue;
2614                }
2615
2616                btrfs_item_key_to_cpu(eb, &found_key, slot);
2617                if (found_key.objectid != key.objectid ||
2618                    found_key.type != key.type) {
2619                        ret = 0;
2620                        goto out;
2621                }
2622
2623                di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2624                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2625
2626                if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2627                    di_key.objectid < sctx->send_progress) {
2628                        ret = 1;
2629                        goto out;
2630                }
2631
2632                path->slots[0]++;
2633        }
2634
2635out:
2636        btrfs_free_path(path);
2637        return ret;
2638}
2639
2640/*
2641 * Only creates the inode if it is:
2642 * 1. Not a directory
2643 * 2. Or a directory which was not created already due to out of order
2644 *    directories. See did_create_dir and process_recorded_refs for details.
2645 */
2646static int send_create_inode_if_needed(struct send_ctx *sctx)
2647{
2648        int ret;
2649
2650        if (S_ISDIR(sctx->cur_inode_mode)) {
2651                ret = did_create_dir(sctx, sctx->cur_ino);
2652                if (ret < 0)
2653                        goto out;
2654                if (ret) {
2655                        ret = 0;
2656                        goto out;
2657                }
2658        }
2659
2660        ret = send_create_inode(sctx, sctx->cur_ino);
2661        if (ret < 0)
2662                goto out;
2663
2664out:
2665        return ret;
2666}
2667
2668struct recorded_ref {
2669        struct list_head list;
2670        char *dir_path;
2671        char *name;
2672        struct fs_path *full_path;
2673        u64 dir;
2674        u64 dir_gen;
2675        int dir_path_len;
2676        int name_len;
2677};
2678
2679/*
2680 * We need to process new refs before deleted refs, but compare_tree gives us
2681 * everything mixed. So we first record all refs and later process them.
2682 * This function is a helper to record one ref.
2683 */
2684static int __record_ref(struct list_head *head, u64 dir,
2685                      u64 dir_gen, struct fs_path *path)
2686{
2687        struct recorded_ref *ref;
2688
2689        ref = kmalloc(sizeof(*ref), GFP_NOFS);
2690        if (!ref)
2691                return -ENOMEM;
2692
2693        ref->dir = dir;
2694        ref->dir_gen = dir_gen;
2695        ref->full_path = path;
2696
2697        ref->name = (char *)kbasename(ref->full_path->start);
2698        ref->name_len = ref->full_path->end - ref->name;
2699        ref->dir_path = ref->full_path->start;
2700        if (ref->name == ref->full_path->start)
2701                ref->dir_path_len = 0;
2702        else
2703                ref->dir_path_len = ref->full_path->end -
2704                                ref->full_path->start - 1 - ref->name_len;
2705
2706        list_add_tail(&ref->list, head);
2707        return 0;
2708}
2709
2710static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2711{
2712        struct recorded_ref *new;
2713
2714        new = kmalloc(sizeof(*ref), GFP_NOFS);
2715        if (!new)
2716                return -ENOMEM;
2717
2718        new->dir = ref->dir;
2719        new->dir_gen = ref->dir_gen;
2720        new->full_path = NULL;
2721        INIT_LIST_HEAD(&new->list);
2722        list_add_tail(&new->list, list);
2723        return 0;
2724}
2725
2726static void __free_recorded_refs(struct list_head *head)
2727{
2728        struct recorded_ref *cur;
2729
2730        while (!list_empty(head)) {
2731                cur = list_entry(head->next, struct recorded_ref, list);
2732                fs_path_free(cur->full_path);
2733                list_del(&cur->list);
2734                kfree(cur);
2735        }
2736}
2737
2738static void free_recorded_refs(struct send_ctx *sctx)
2739{
2740        __free_recorded_refs(&sctx->new_refs);
2741        __free_recorded_refs(&sctx->deleted_refs);
2742}
2743
2744/*
2745 * Renames/moves a file/dir to its orphan name. Used when the first
2746 * ref of an unprocessed inode gets overwritten and for all non empty
2747 * directories.
2748 */
2749static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2750                          struct fs_path *path)
2751{
2752        int ret;
2753        struct fs_path *orphan;
2754
2755        orphan = fs_path_alloc();
2756        if (!orphan)
2757                return -ENOMEM;
2758
2759        ret = gen_unique_name(sctx, ino, gen, orphan);
2760        if (ret < 0)
2761                goto out;
2762
2763        ret = send_rename(sctx, path, orphan);
2764
2765out:
2766        fs_path_free(orphan);
2767        return ret;
2768}
2769
2770static struct orphan_dir_info *
2771add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2772{
2773        struct rb_node **p = &sctx->orphan_dirs.rb_node;
2774        struct rb_node *parent = NULL;
2775        struct orphan_dir_info *entry, *odi;
2776
2777        odi = kmalloc(sizeof(*odi), GFP_NOFS);
2778        if (!odi)
2779                return ERR_PTR(-ENOMEM);
2780        odi->ino = dir_ino;
2781        odi->gen = 0;
2782
2783        while (*p) {
2784                parent = *p;
2785                entry = rb_entry(parent, struct orphan_dir_info, node);
2786                if (dir_ino < entry->ino) {
2787                        p = &(*p)->rb_left;
2788                } else if (dir_ino > entry->ino) {
2789                        p = &(*p)->rb_right;
2790                } else {
2791                        kfree(odi);
2792                        return entry;
2793                }
2794        }
2795
2796        rb_link_node(&odi->node, parent, p);
2797        rb_insert_color(&odi->node, &sctx->orphan_dirs);
2798        return odi;
2799}
2800
2801static struct orphan_dir_info *
2802get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2803{
2804        struct rb_node *n = sctx->orphan_dirs.rb_node;
2805        struct orphan_dir_info *entry;
2806
2807        while (n) {
2808                entry = rb_entry(n, struct orphan_dir_info, node);
2809                if (dir_ino < entry->ino)
2810                        n = n->rb_left;
2811                else if (dir_ino > entry->ino)
2812                        n = n->rb_right;
2813                else
2814                        return entry;
2815        }
2816        return NULL;
2817}
2818
2819static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2820{
2821        struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2822
2823        return odi != NULL;
2824}
2825
2826static void free_orphan_dir_info(struct send_ctx *sctx,
2827                                 struct orphan_dir_info *odi)
2828{
2829        if (!odi)
2830                return;
2831        rb_erase(&odi->node, &sctx->orphan_dirs);
2832        kfree(odi);
2833}
2834
2835/*
2836 * Returns 1 if a directory can be removed at this point in time.
2837 * We check this by iterating all dir items and checking if the inode behind
2838 * the dir item was already processed.
2839 */
2840static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2841                     u64 send_progress)
2842{
2843        int ret = 0;
2844        struct btrfs_root *root = sctx->parent_root;
2845        struct btrfs_path *path;
2846        struct btrfs_key key;
2847        struct btrfs_key found_key;
2848        struct btrfs_key loc;
2849        struct btrfs_dir_item *di;
2850
2851        /*
2852         * Don't try to rmdir the top/root subvolume dir.
2853         */
2854        if (dir == BTRFS_FIRST_FREE_OBJECTID)
2855                return 0;
2856
2857        path = alloc_path_for_send();
2858        if (!path)
2859                return -ENOMEM;
2860
2861        key.objectid = dir;
2862        key.type = BTRFS_DIR_INDEX_KEY;
2863        key.offset = 0;
2864        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2865        if (ret < 0)
2866                goto out;
2867
2868        while (1) {
2869                struct waiting_dir_move *dm;
2870
2871                if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2872                        ret = btrfs_next_leaf(root, path);
2873                        if (ret < 0)
2874                                goto out;
2875                        else if (ret > 0)
2876                                break;
2877                        continue;
2878                }
2879                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2880                                      path->slots[0]);
2881                if (found_key.objectid != key.objectid ||
2882                    found_key.type != key.type)
2883                        break;
2884
2885                di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2886                                struct btrfs_dir_item);
2887                btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2888
2889                dm = get_waiting_dir_move(sctx, loc.objectid);
2890                if (dm) {
2891                        struct orphan_dir_info *odi;
2892
2893                        odi = add_orphan_dir_info(sctx, dir);
2894                        if (IS_ERR(odi)) {
2895                                ret = PTR_ERR(odi);
2896                                goto out;
2897                        }
2898                        odi->gen = dir_gen;
2899                        dm->rmdir_ino = dir;
2900                        ret = 0;
2901                        goto out;
2902                }
2903
2904                if (loc.objectid > send_progress) {
2905                        ret = 0;
2906                        goto out;
2907                }
2908
2909                path->slots[0]++;
2910        }
2911
2912        ret = 1;
2913
2914out:
2915        btrfs_free_path(path);
2916        return ret;
2917}
2918
2919static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2920{
2921        struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
2922
2923        return entry != NULL;
2924}
2925
2926static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2927{
2928        struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2929        struct rb_node *parent = NULL;
2930        struct waiting_dir_move *entry, *dm;
2931
2932        dm = kmalloc(sizeof(*dm), GFP_NOFS);
2933        if (!dm)
2934                return -ENOMEM;
2935        dm->ino = ino;
2936        dm->rmdir_ino = 0;
2937
2938        while (*p) {
2939                parent = *p;
2940                entry = rb_entry(parent, struct waiting_dir_move, node);
2941                if (ino < entry->ino) {
2942                        p = &(*p)->rb_left;
2943                } else if (ino > entry->ino) {
2944                        p = &(*p)->rb_right;
2945                } else {
2946                        kfree(dm);
2947                        return -EEXIST;
2948                }
2949        }
2950
2951        rb_link_node(&dm->node, parent, p);
2952        rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2953        return 0;
2954}
2955
2956static struct waiting_dir_move *
2957get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2958{
2959        struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2960        struct waiting_dir_move *entry;
2961
2962        while (n) {
2963                entry = rb_entry(n, struct waiting_dir_move, node);
2964                if (ino < entry->ino)
2965                        n = n->rb_left;
2966                else if (ino > entry->ino)
2967                        n = n->rb_right;
2968                else
2969                        return entry;
2970        }
2971        return NULL;
2972}
2973
2974static void free_waiting_dir_move(struct send_ctx *sctx,
2975                                  struct waiting_dir_move *dm)
2976{
2977        if (!dm)
2978                return;
2979        rb_erase(&dm->node, &sctx->waiting_dir_moves);
2980        kfree(dm);
2981}
2982
2983static int add_pending_dir_move(struct send_ctx *sctx,
2984                                u64 ino,
2985                                u64 ino_gen,
2986                                u64 parent_ino,
2987                                struct list_head *new_refs,
2988                                struct list_head *deleted_refs,
2989                                const bool is_orphan)
2990{
2991        struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2992        struct rb_node *parent = NULL;
2993        struct pending_dir_move *entry = NULL, *pm;
2994        struct recorded_ref *cur;
2995        int exists = 0;
2996        int ret;
2997
2998        pm = kmalloc(sizeof(*pm), GFP_NOFS);
2999        if (!pm)
3000                return -ENOMEM;
3001        pm->parent_ino = parent_ino;
3002        pm->ino = ino;
3003        pm->gen = ino_gen;
3004        pm->is_orphan = is_orphan;
3005        INIT_LIST_HEAD(&pm->list);
3006        INIT_LIST_HEAD(&pm->update_refs);
3007        RB_CLEAR_NODE(&pm->node);
3008
3009        while (*p) {
3010                parent = *p;
3011                entry = rb_entry(parent, struct pending_dir_move, node);
3012                if (parent_ino < entry->parent_ino) {
3013                        p = &(*p)->rb_left;
3014                } else if (parent_ino > entry->parent_ino) {
3015                        p = &(*p)->rb_right;
3016                } else {
3017                        exists = 1;
3018                        break;
3019                }
3020        }
3021
3022        list_for_each_entry(cur, deleted_refs, list) {
3023                ret = dup_ref(cur, &pm->update_refs);
3024                if (ret < 0)
3025                        goto out;
3026        }
3027        list_for_each_entry(cur, new_refs, list) {
3028                ret = dup_ref(cur, &pm->update_refs);
3029                if (ret < 0)
3030                        goto out;
3031        }
3032
3033        ret = add_waiting_dir_move(sctx, pm->ino);
3034        if (ret)
3035                goto out;
3036
3037        if (exists) {
3038                list_add_tail(&pm->list, &entry->list);
3039        } else {
3040                rb_link_node(&pm->node, parent, p);
3041                rb_insert_color(&pm->node, &sctx->pending_dir_moves);
3042        }
3043        ret = 0;
3044out:
3045        if (ret) {
3046                __free_recorded_refs(&pm->update_refs);
3047                kfree(pm);
3048        }
3049        return ret;
3050}
3051
3052static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3053                                                      u64 parent_ino)
3054{
3055        struct rb_node *n = sctx->pending_dir_moves.rb_node;
3056        struct pending_dir_move *entry;
3057
3058        while (n) {
3059                entry = rb_entry(n, struct pending_dir_move, node);
3060                if (parent_ino < entry->parent_ino)
3061                        n = n->rb_left;
3062                else if (parent_ino > entry->parent_ino)
3063                        n = n->rb_right;
3064                else
3065                        return entry;
3066        }
3067        return NULL;
3068}
3069
3070static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3071                     u64 ino, u64 gen, u64 *ancestor_ino)
3072{
3073        int ret = 0;
3074        u64 parent_inode = 0;
3075        u64 parent_gen = 0;
3076        u64 start_ino = ino;
3077
3078        *ancestor_ino = 0;
3079        while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3080                fs_path_reset(name);
3081
3082                if (is_waiting_for_rm(sctx, ino))
3083                        break;
3084                if (is_waiting_for_move(sctx, ino)) {
3085                        if (*ancestor_ino == 0)
3086                                *ancestor_ino = ino;
3087                        ret = get_first_ref(sctx->parent_root, ino,
3088                                            &parent_inode, &parent_gen, name);
3089                } else {
3090                        ret = __get_cur_name_and_parent(sctx, ino, gen,
3091                                                        &parent_inode,
3092                                                        &parent_gen, name);
3093                        if (ret > 0) {
3094                                ret = 0;
3095                                break;
3096                        }
3097                }
3098                if (ret < 0)
3099                        break;
3100                if (parent_inode == start_ino) {
3101                        ret = 1;
3102                        if (*ancestor_ino == 0)
3103                                *ancestor_ino = ino;
3104                        break;
3105                }
3106                ino = parent_inode;
3107                gen = parent_gen;
3108        }
3109        return ret;
3110}
3111
3112static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3113{
3114        struct fs_path *from_path = NULL;
3115        struct fs_path *to_path = NULL;
3116        struct fs_path *name = NULL;
3117        u64 orig_progress = sctx->send_progress;
3118        struct recorded_ref *cur;
3119        u64 parent_ino, parent_gen;
3120        struct waiting_dir_move *dm = NULL;
3121        u64 rmdir_ino = 0;
3122        int ret;
3123        u64 ancestor = 0;
3124
3125        name = fs_path_alloc();
3126        from_path = fs_path_alloc();
3127        if (!name || !from_path) {
3128                ret = -ENOMEM;
3129                goto out;
3130        }
3131
3132        dm = get_waiting_dir_move(sctx, pm->ino);
3133        ASSERT(dm);
3134        rmdir_ino = dm->rmdir_ino;
3135        free_waiting_dir_move(sctx, dm);
3136
3137        if (pm->is_orphan) {
3138                ret = gen_unique_name(sctx, pm->ino,
3139                                      pm->gen, from_path);
3140        } else {
3141                ret = get_first_ref(sctx->parent_root, pm->ino,
3142                                    &parent_ino, &parent_gen, name);
3143                if (ret < 0)
3144                        goto out;
3145                ret = get_cur_path(sctx, parent_ino, parent_gen,
3146                                   from_path);
3147                if (ret < 0)
3148                        goto out;
3149                ret = fs_path_add_path(from_path, name);
3150        }
3151        if (ret < 0)
3152                goto out;
3153
3154        sctx->send_progress = sctx->cur_ino + 1;
3155        ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3156        if (ret) {
3157                LIST_HEAD(deleted_refs);
3158                ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3159                ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3160                                           &pm->update_refs, &deleted_refs,
3161                                           pm->is_orphan);
3162                if (ret < 0)
3163                        goto out;
3164                if (rmdir_ino) {
3165                        dm = get_waiting_dir_move(sctx, pm->ino);
3166                        ASSERT(dm);
3167                        dm->rmdir_ino = rmdir_ino;
3168                }
3169                goto out;
3170        }
3171        fs_path_reset(name);
3172        to_path = name;
3173        name = NULL;
3174        ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3175        if (ret < 0)
3176                goto out;
3177
3178        ret = send_rename(sctx, from_path, to_path);
3179        if (ret < 0)
3180                goto out;
3181
3182        if (rmdir_ino) {
3183                struct orphan_dir_info *odi;
3184
3185                odi = get_orphan_dir_info(sctx, rmdir_ino);
3186                if (!odi) {
3187                        /* already deleted */
3188                        goto finish;
3189                }
3190                ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
3191                if (ret < 0)
3192                        goto out;
3193                if (!ret)
3194                        goto finish;
3195
3196                name = fs_path_alloc();
3197                if (!name) {
3198                        ret = -ENOMEM;
3199                        goto out;
3200                }
3201                ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
3202                if (ret < 0)
3203                        goto out;
3204                ret = send_rmdir(sctx, name);
3205                if (ret < 0)
3206                        goto out;
3207                free_orphan_dir_info(sctx, odi);
3208        }
3209
3210finish:
3211        ret = send_utimes(sctx, pm->ino, pm->gen);
3212        if (ret < 0)
3213                goto out;
3214
3215        /*
3216         * After rename/move, need to update the utimes of both new parent(s)
3217         * and old parent(s).
3218         */
3219        list_for_each_entry(cur, &pm->update_refs, list) {
3220                if (cur->dir == rmdir_ino)
3221                        continue;
3222                ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3223                if (ret < 0)
3224                        goto out;
3225        }
3226
3227out:
3228        fs_path_free(name);
3229        fs_path_free(from_path);
3230        fs_path_free(to_path);
3231        sctx->send_progress = orig_progress;
3232
3233        return ret;
3234}
3235
3236static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3237{
3238        if (!list_empty(&m->list))
3239                list_del(&m->list);
3240        if (!RB_EMPTY_NODE(&m->node))
3241                rb_erase(&m->node, &sctx->pending_dir_moves);
3242        __free_recorded_refs(&m->update_refs);
3243        kfree(m);
3244}
3245
3246static void tail_append_pending_moves(struct pending_dir_move *moves,
3247                                      struct list_head *stack)
3248{
3249        if (list_empty(&moves->list)) {
3250                list_add_tail(&moves->list, stack);
3251        } else {
3252                LIST_HEAD(list);
3253                list_splice_init(&moves->list, &list);
3254                list_add_tail(&moves->list, stack);
3255                list_splice_tail(&list, stack);
3256        }
3257}
3258
3259static int apply_children_dir_moves(struct send_ctx *sctx)
3260{
3261        struct pending_dir_move *pm;
3262        struct list_head stack;
3263        u64 parent_ino = sctx->cur_ino;
3264        int ret = 0;
3265
3266        pm = get_pending_dir_moves(sctx, parent_ino);
3267        if (!pm)
3268                return 0;
3269
3270        INIT_LIST_HEAD(&stack);
3271        tail_append_pending_moves(pm, &stack);
3272
3273        while (!list_empty(&stack)) {
3274                pm = list_first_entry(&stack, struct pending_dir_move, list);
3275                parent_ino = pm->ino;
3276                ret = apply_dir_move(sctx, pm);
3277                free_pending_move(sctx, pm);
3278                if (ret)
3279                        goto out;
3280                pm = get_pending_dir_moves(sctx, parent_ino);
3281                if (pm)
3282                        tail_append_pending_moves(pm, &stack);
3283        }
3284        return 0;
3285
3286out:
3287        while (!list_empty(&stack)) {
3288                pm = list_first_entry(&stack, struct pending_dir_move, list);
3289                free_pending_move(sctx, pm);
3290        }
3291        return ret;
3292}
3293
3294/*
3295 * We might need to delay a directory rename even when no ancestor directory
3296 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3297 * renamed. This happens when we rename a directory to the old name (the name
3298 * in the parent root) of some other unrelated directory that got its rename
3299 * delayed due to some ancestor with higher number that got renamed.
3300 *
3301 * Example:
3302 *
3303 * Parent snapshot:
3304 * .                                       (ino 256)
3305 * |---- a/                                (ino 257)
3306 * |     |---- file                        (ino 260)
3307 * |
3308 * |---- b/                                (ino 258)
3309 * |---- c/                                (ino 259)
3310 *
3311 * Send snapshot:
3312 * .                                       (ino 256)
3313 * |---- a/                                (ino 258)
3314 * |---- x/                                (ino 259)
3315 *       |---- y/                          (ino 257)
3316 *             |----- file                 (ino 260)
3317 *
3318 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3319 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3320 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3321 * must issue is:
3322 *
3323 * 1 - rename 259 from 'c' to 'x'
3324 * 2 - rename 257 from 'a' to 'x/y'
3325 * 3 - rename 258 from 'b' to 'a'
3326 *
3327 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3328 * be done right away and < 0 on error.
3329 */
3330static int wait_for_dest_dir_move(struct send_ctx *sctx,
3331                                  struct recorded_ref *parent_ref,
3332                                  const bool is_orphan)
3333{
3334        struct btrfs_path *path;
3335        struct btrfs_key key;
3336        struct btrfs_key di_key;
3337        struct btrfs_dir_item *di;
3338        u64 left_gen;
3339        u64 right_gen;
3340        int ret = 0;
3341
3342        if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3343                return 0;
3344
3345        path = alloc_path_for_send();
3346        if (!path)
3347                return -ENOMEM;
3348
3349        key.objectid = parent_ref->dir;
3350        key.type = BTRFS_DIR_ITEM_KEY;
3351        key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3352
3353        ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3354        if (ret < 0) {
3355                goto out;
3356        } else if (ret > 0) {
3357                ret = 0;
3358                goto out;
3359        }
3360
3361        di = btrfs_match_dir_item_name(sctx->parent_root, path,
3362                                       parent_ref->name, parent_ref->name_len);
3363        if (!di) {
3364                ret = 0;
3365                goto out;
3366        }
3367        /*
3368         * di_key.objectid has the number of the inode that has a dentry in the
3369         * parent directory with the same name that sctx->cur_ino is being
3370         * renamed to. We need to check if that inode is in the send root as
3371         * well and if it is currently marked as an inode with a pending rename,
3372         * if it is, we need to delay the rename of sctx->cur_ino as well, so
3373         * that it happens after that other inode is renamed.
3374         */
3375        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3376        if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3377                ret = 0;
3378                goto out;
3379        }
3380
3381        ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3382                             &left_gen, NULL, NULL, NULL, NULL);
3383        if (ret < 0)
3384                goto out;
3385        ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3386                             &right_gen, NULL, NULL, NULL, NULL);
3387        if (ret < 0) {
3388                if (ret == -ENOENT)
3389                        ret = 0;
3390                goto out;
3391        }
3392
3393        /* Different inode, no need to delay the rename of sctx->cur_ino */
3394        if (right_gen != left_gen) {
3395                ret = 0;
3396                goto out;
3397        }
3398
3399        if (is_waiting_for_move(sctx, di_key.objectid)) {
3400                ret = add_pending_dir_move(sctx,
3401                                           sctx->cur_ino,
3402                                           sctx->cur_inode_gen,
3403                                           di_key.objectid,
3404                                           &sctx->new_refs,
3405                                           &sctx->deleted_refs,
3406                                           is_orphan);
3407                if (!ret)
3408                        ret = 1;
3409        }
3410out:
3411        btrfs_free_path(path);
3412        return ret;
3413}
3414
3415static int wait_for_parent_move(struct send_ctx *sctx,
3416                                struct recorded_ref *parent_ref)
3417{
3418        int ret = 0;
3419        u64 ino = parent_ref->dir;
3420        u64 parent_ino_before, parent_ino_after;
3421        struct fs_path *path_before = NULL;
3422        struct fs_path *path_after = NULL;
3423        int len1, len2;
3424
3425        path_after = fs_path_alloc();
3426        path_before = fs_path_alloc();
3427        if (!path_after || !path_before) {
3428                ret = -ENOMEM;
3429                goto out;
3430        }
3431
3432        /*
3433         * Our current directory inode may not yet be renamed/moved because some
3434         * ancestor (immediate or not) has to be renamed/moved first. So find if
3435         * such ancestor exists and make sure our own rename/move happens after
3436         * that ancestor is processed.
3437         */
3438        while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3439                if (is_waiting_for_move(sctx, ino)) {
3440                        ret = 1;
3441                        break;
3442                }
3443
3444                fs_path_reset(path_before);
3445                fs_path_reset(path_after);
3446
3447                ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3448                                    NULL, path_after);
3449                if (ret < 0)
3450                        goto out;
3451                ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3452                                    NULL, path_before);
3453                if (ret < 0 && ret != -ENOENT) {
3454                        goto out;
3455                } else if (ret == -ENOENT) {
3456                        ret = 0;
3457                        break;
3458                }
3459
3460                len1 = fs_path_len(path_before);
3461                len2 = fs_path_len(path_after);
3462                if (ino > sctx->cur_ino &&
3463                    (parent_ino_before != parent_ino_after || len1 != len2 ||
3464                     memcmp(path_before->start, path_after->start, len1))) {
3465                        ret = 1;
3466                        break;
3467                }
3468                ino = parent_ino_after;
3469        }
3470
3471out:
3472        fs_path_free(path_before);
3473        fs_path_free(path_after);
3474
3475        if (ret == 1) {
3476                ret = add_pending_dir_move(sctx,
3477                                           sctx->cur_ino,
3478                                           sctx->cur_inode_gen,
3479                                           ino,
3480                                           &sctx->new_refs,
3481                                           &sctx->deleted_refs,
3482                                           false);
3483                if (!ret)
3484                        ret = 1;
3485        }
3486
3487        return ret;
3488}
3489
3490/*
3491 * This does all the move/link/unlink/rmdir magic.
3492 */
3493static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3494{
3495        int ret = 0;
3496        struct recorded_ref *cur;
3497        struct recorded_ref *cur2;
3498        struct list_head check_dirs;
3499        struct fs_path *valid_path = NULL;
3500        u64 ow_inode = 0;
3501        u64 ow_gen;
3502        int did_overwrite = 0;
3503        int is_orphan = 0;
3504        u64 last_dir_ino_rm = 0;
3505        bool can_rename = true;
3506
3507verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3508
3509        /*
3510         * This should never happen as the root dir always has the same ref
3511         * which is always '..'
3512         */
3513        BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3514        INIT_LIST_HEAD(&check_dirs);
3515
3516        valid_path = fs_path_alloc();
3517        if (!valid_path) {
3518                ret = -ENOMEM;
3519                goto out;
3520        }
3521
3522        /*
3523         * First, check if the first ref of the current inode was overwritten
3524         * before. If yes, we know that the current inode was already orphanized
3525         * and thus use the orphan name. If not, we can use get_cur_path to
3526         * get the path of the first ref as it would like while receiving at
3527         * this point in time.
3528         * New inodes are always orphan at the beginning, so force to use the
3529         * orphan name in this case.
3530         * The first ref is stored in valid_path and will be updated if it
3531         * gets moved around.
3532         */
3533        if (!sctx->cur_inode_new) {
3534                ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3535                                sctx->cur_inode_gen);
3536                if (ret < 0)
3537                        goto out;
3538                if (ret)
3539                        did_overwrite = 1;
3540        }
3541        if (sctx->cur_inode_new || did_overwrite) {
3542                ret = gen_unique_name(sctx, sctx->cur_ino,
3543                                sctx->cur_inode_gen, valid_path);
3544                if (ret < 0)
3545                        goto out;
3546                is_orphan = 1;
3547        } else {
3548                ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3549                                valid_path);
3550                if (ret < 0)
3551                        goto out;
3552        }
3553
3554        list_for_each_entry(cur, &sctx->new_refs, list) {
3555                /*
3556                 * We may have refs where the parent directory does not exist
3557                 * yet. This happens if the parent directories inum is higher
3558                 * the the current inum. To handle this case, we create the
3559                 * parent directory out of order. But we need to check if this
3560                 * did already happen before due to other refs in the same dir.
3561                 */
3562                ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3563                if (ret < 0)
3564                        goto out;
3565                if (ret == inode_state_will_create) {
3566                        ret = 0;
3567                        /*
3568                         * First check if any of the current inodes refs did
3569                         * already create the dir.
3570                         */
3571                        list_for_each_entry(cur2, &sctx->new_refs, list) {
3572                                if (cur == cur2)
3573                                        break;
3574                                if (cur2->dir == cur->dir) {
3575                                        ret = 1;
3576                                        break;
3577                                }
3578                        }
3579
3580                        /*
3581                         * If that did not happen, check if a previous inode
3582                         * did already create the dir.
3583                         */
3584                        if (!ret)
3585                                ret = did_create_dir(sctx, cur->dir);
3586                        if (ret < 0)
3587                                goto out;
3588                        if (!ret) {
3589                                ret = send_create_inode(sctx, cur->dir);
3590                                if (ret < 0)
3591                                        goto out;
3592                        }
3593                }
3594
3595                /*
3596                 * Check if this new ref would overwrite the first ref of
3597                 * another unprocessed inode. If yes, orphanize the
3598                 * overwritten inode. If we find an overwritten ref that is
3599                 * not the first ref, simply unlink it.
3600                 */
3601                ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3602                                cur->name, cur->name_len,
3603                                &ow_inode, &ow_gen);
3604                if (ret < 0)
3605                        goto out;
3606                if (ret) {
3607                        ret = is_first_ref(sctx->parent_root,
3608                                           ow_inode, cur->dir, cur->name,
3609                                           cur->name_len);
3610                        if (ret < 0)
3611                                goto out;
3612                        if (ret) {
3613                                ret = orphanize_inode(sctx, ow_inode, ow_gen,
3614                                                cur->full_path);
3615                                if (ret < 0)
3616                                        goto out;
3617                        } else {
3618                                ret = send_unlink(sctx, cur->full_path);
3619                                if (ret < 0)
3620                                        goto out;
3621                        }
3622                }
3623
3624                if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3625                        ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3626                        if (ret < 0)
3627                                goto out;
3628                        if (ret == 1) {
3629                                can_rename = false;
3630                                *pending_move = 1;
3631                        }
3632                }
3633
3634                /*
3635                 * link/move the ref to the new place. If we have an orphan
3636                 * inode, move it and update valid_path. If not, link or move
3637                 * it depending on the inode mode.
3638                 */
3639                if (is_orphan && can_rename) {
3640                        ret = send_rename(sctx, valid_path, cur->full_path);
3641                        if (ret < 0)
3642                                goto out;
3643                        is_orphan = 0;
3644                        ret = fs_path_copy(valid_path, cur->full_path);
3645                        if (ret < 0)
3646                                goto out;
3647                } else if (can_rename) {
3648                        if (S_ISDIR(sctx->cur_inode_mode)) {
3649                                /*
3650                                 * Dirs can't be linked, so move it. For moved
3651                                 * dirs, we always have one new and one deleted
3652                                 * ref. The deleted ref is ignored later.
3653                                 */
3654                                ret = wait_for_parent_move(sctx, cur);
3655                                if (ret < 0)
3656                                        goto out;
3657                                if (ret) {
3658                                        *pending_move = 1;
3659                                } else {
3660                                        ret = send_rename(sctx, valid_path,
3661                                                          cur->full_path);
3662                                        if (!ret)
3663                                                ret = fs_path_copy(valid_path,
3664                                                               cur->full_path);
3665                                }
3666                                if (ret < 0)
3667                                        goto out;
3668                        } else {
3669                                ret = send_link(sctx, cur->full_path,
3670                                                valid_path);
3671                                if (ret < 0)
3672                                        goto out;
3673                        }
3674                }
3675                ret = dup_ref(cur, &check_dirs);
3676                if (ret < 0)
3677                        goto out;
3678        }
3679
3680        if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
3681                /*
3682                 * Check if we can already rmdir the directory. If not,
3683                 * orphanize it. For every dir item inside that gets deleted
3684                 * later, we do this check again and rmdir it then if possible.
3685                 * See the use of check_dirs for more details.
3686                 */
3687                ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3688                                sctx->cur_ino);
3689                if (ret < 0)
3690                        goto out;
3691                if (ret) {
3692                        ret = send_rmdir(sctx, valid_path);
3693                        if (ret < 0)
3694                                goto out;
3695                } else if (!is_orphan) {
3696                        ret = orphanize_inode(sctx, sctx->cur_ino,
3697                                        sctx->cur_inode_gen, valid_path);
3698                        if (ret < 0)
3699                                goto out;
3700                        is_orphan = 1;
3701                }
3702
3703                list_for_each_entry(cur, &sctx->deleted_refs, list) {
3704                        ret = dup_ref(cur, &check_dirs);
3705                        if (ret < 0)
3706                                goto out;
3707                }
3708        } else if (S_ISDIR(sctx->cur_inode_mode) &&
3709                   !list_empty(&sctx->deleted_refs)) {
3710                /*
3711                 * We have a moved dir. Add the old parent to check_dirs
3712                 */
3713                cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
3714                                list);
3715                ret = dup_ref(cur, &check_dirs);
3716                if (ret < 0)
3717                        goto out;
3718        } else if (!S_ISDIR(sctx->cur_inode_mode)) {
3719                /*
3720                 * We have a non dir inode. Go through all deleted refs and
3721                 * unlink them if they were not already overwritten by other
3722                 * inodes.
3723                 */
3724                list_for_each_entry(cur, &sctx->deleted_refs, list) {
3725                        ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3726                                        sctx->cur_ino, sctx->cur_inode_gen,
3727                                        cur->name, cur->name_len);
3728                        if (ret < 0)
3729                                goto out;
3730                        if (!ret) {
3731                                ret = send_unlink(sctx, cur->full_path);
3732                                if (ret < 0)
3733                                        goto out;
3734                        }
3735                        ret = dup_ref(cur, &check_dirs);
3736                        if (ret < 0)
3737                                goto out;
3738                }
3739                /*
3740                 * If the inode is still orphan, unlink the orphan. This may
3741                 * happen when a previous inode did overwrite the first ref
3742                 * of this inode and no new refs were added for the current
3743                 * inode. Unlinking does not mean that the inode is deleted in
3744                 * all cases. There may still be links to this inode in other
3745                 * places.
3746                 */
3747                if (is_orphan) {
3748                        ret = send_unlink(sctx, valid_path);
3749                        if (ret < 0)
3750                                goto out;
3751                }
3752        }
3753
3754        /*
3755         * We did collect all parent dirs where cur_inode was once located. We
3756         * now go through all these dirs and check if they are pending for
3757         * deletion and if it's finally possible to perform the rmdir now.
3758         * We also update the inode stats of the parent dirs here.
3759         */
3760        list_for_each_entry(cur, &check_dirs, list) {
3761                /*
3762                 * In case we had refs into dirs that were not processed yet,
3763                 * we don't need to do the utime and rmdir logic for these dirs.
3764                 * The dir will be processed later.
3765                 */
3766                if (cur->dir > sctx->cur_ino)
3767                        continue;
3768
3769                ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3770                if (ret < 0)
3771                        goto out;
3772
3773                if (ret == inode_state_did_create ||
3774                    ret == inode_state_no_change) {
3775                        /* TODO delayed utimes */
3776                        ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3777                        if (ret < 0)
3778                                goto out;
3779                } else if (ret == inode_state_did_delete &&
3780                           cur->dir != last_dir_ino_rm) {
3781                        ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
3782                                        sctx->cur_ino);
3783                        if (ret < 0)
3784                                goto out;
3785                        if (ret) {
3786                                ret = get_cur_path(sctx, cur->dir,
3787                                                   cur->dir_gen, valid_path);
3788                                if (ret < 0)
3789                                        goto out;
3790                                ret = send_rmdir(sctx, valid_path);
3791                                if (ret < 0)
3792                                        goto out;
3793                                last_dir_ino_rm = cur->dir;
3794                        }
3795                }
3796        }
3797
3798        ret = 0;
3799
3800out:
3801        __free_recorded_refs(&check_dirs);
3802        free_recorded_refs(sctx);
3803        fs_path_free(valid_path);
3804        return ret;
3805}
3806
3807static int record_ref(struct btrfs_root *root, int num, u64 dir, int index,
3808                      struct fs_path *name, void *ctx, struct list_head *refs)
3809{
3810        int ret = 0;
3811        struct send_ctx *sctx = ctx;
3812        struct fs_path *p;
3813        u64 gen;
3814
3815        p = fs_path_alloc();
3816        if (!p)
3817                return -ENOMEM;
3818
3819        ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL,
3820                        NULL, NULL);
3821        if (ret < 0)
3822                goto out;
3823
3824        ret = get_cur_path(sctx, dir, gen, p);
3825        if (ret < 0)
3826                goto out;
3827        ret = fs_path_add_path(p, name);
3828        if (ret < 0)
3829                goto out;
3830
3831        ret = __record_ref(refs, dir, gen, p);
3832
3833out:
3834        if (ret)
3835                fs_path_free(p);
3836        return ret;
3837}
3838
3839static int __record_new_ref(int num, u64 dir, int index,
3840                            struct fs_path *name,
3841                            void *ctx)
3842{
3843        struct send_ctx *sctx = ctx;
3844        return record_ref(sctx->send_root, num, dir, index, name,
3845                          ctx, &sctx->new_refs);
3846}
3847
3848
3849static int __record_deleted_ref(int num, u64 dir, int index,
3850                                struct fs_path *name,
3851                                void *ctx)
3852{
3853        struct send_ctx *sctx = ctx;
3854        return record_ref(sctx->parent_root, num, dir, index, name,
3855                          ctx, &sctx->deleted_refs);
3856}
3857
3858static int record_new_ref(struct send_ctx *sctx)
3859{
3860        int ret;
3861
3862        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3863                                sctx->cmp_key, 0, __record_new_ref, sctx);
3864        if (ret < 0)
3865                goto out;
3866        ret = 0;
3867
3868out:
3869        return ret;
3870}
3871
3872static int record_deleted_ref(struct send_ctx *sctx)
3873{
3874        int ret;
3875
3876        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3877                                sctx->cmp_key, 0, __record_deleted_ref, sctx);
3878        if (ret < 0)
3879                goto out;
3880        ret = 0;
3881
3882out:
3883        return ret;
3884}
3885
3886struct find_ref_ctx {
3887        u64 dir;
3888        u64 dir_gen;
3889        struct btrfs_root *root;
3890        struct fs_path *name;
3891        int found_idx;
3892};
3893
3894static int __find_iref(int num, u64 dir, int index,
3895                       struct fs_path *name,
3896                       void *ctx_)
3897{
3898        struct find_ref_ctx *ctx = ctx_;
3899        u64 dir_gen;
3900        int ret;
3901
3902        if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3903            strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3904                /*
3905                 * To avoid doing extra lookups we'll only do this if everything
3906                 * else matches.
3907                 */
3908                ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
3909                                     NULL, NULL, NULL);
3910                if (ret)
3911                        return ret;
3912                if (dir_gen != ctx->dir_gen)
3913                        return 0;
3914                ctx->found_idx = num;
3915                return 1;
3916        }
3917        return 0;
3918}
3919
3920static int find_iref(struct btrfs_root *root,
3921                     struct btrfs_path *path,
3922                     struct btrfs_key *key,
3923                     u64 dir, u64 dir_gen, struct fs_path *name)
3924{
3925        int ret;
3926        struct find_ref_ctx ctx;
3927
3928        ctx.dir = dir;
3929        ctx.name = name;
3930        ctx.dir_gen = dir_gen;
3931        ctx.found_idx = -1;
3932        ctx.root = root;
3933
3934        ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3935        if (ret < 0)
3936                return ret;
3937
3938        if (ctx.found_idx == -1)
3939                return -ENOENT;
3940
3941        return ctx.found_idx;
3942}
3943
3944static int __record_changed_new_ref(int num, u64 dir, int index,
3945                                    struct fs_path *name,
3946                                    void *ctx)
3947{
3948        u64 dir_gen;
3949        int ret;
3950        struct send_ctx *sctx = ctx;
3951
3952        ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
3953                             NULL, NULL, NULL);
3954        if (ret)
3955                return ret;
3956
3957        ret = find_iref(sctx->parent_root, sctx->right_path,
3958                        sctx->cmp_key, dir, dir_gen, name);
3959        if (ret == -ENOENT)
3960                ret = __record_new_ref(num, dir, index, name, sctx);
3961        else if (ret > 0)
3962                ret = 0;
3963
3964        return ret;
3965}
3966
3967static int __record_changed_deleted_ref(int num, u64 dir, int index,
3968                                        struct fs_path *name,
3969                                        void *ctx)
3970{
3971        u64 dir_gen;
3972        int ret;
3973        struct send_ctx *sctx = ctx;
3974
3975        ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
3976                             NULL, NULL, NULL);
3977        if (ret)
3978                return ret;
3979
3980        ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
3981                        dir, dir_gen, name);
3982        if (ret == -ENOENT)
3983                ret = __record_deleted_ref(num, dir, index, name, sctx);
3984        else if (ret > 0)
3985                ret = 0;
3986
3987        return ret;
3988}
3989
3990static int record_changed_ref(struct send_ctx *sctx)
3991{
3992        int ret = 0;
3993
3994        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3995                        sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3996        if (ret < 0)
3997                goto out;
3998        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3999                        sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
4000        if (ret < 0)
4001                goto out;
4002        ret = 0;
4003
4004out:
4005        return ret;
4006}
4007
4008/*
4009 * Record and process all refs at once. Needed when an inode changes the
4010 * generation number, which means that it was deleted and recreated.
4011 */
4012static int process_all_refs(struct send_ctx *sctx,
4013                            enum btrfs_compare_tree_result cmd)
4014{
4015        int ret;
4016        struct btrfs_root *root;
4017        struct btrfs_path *path;
4018        struct btrfs_key key;
4019        struct btrfs_key found_key;
4020        struct extent_buffer *eb;
4021        int slot;
4022        iterate_inode_ref_t cb;
4023        int pending_move = 0;
4024
4025        path = alloc_path_for_send();
4026        if (!path)
4027                return -ENOMEM;
4028
4029        if (cmd == BTRFS_COMPARE_TREE_NEW) {
4030                root = sctx->send_root;
4031                cb = __record_new_ref;
4032        } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
4033                root = sctx->parent_root;
4034                cb = __record_deleted_ref;
4035        } else {
4036                btrfs_err(sctx->send_root->fs_info,
4037                                "Wrong command %d in process_all_refs", cmd);
4038                ret = -EINVAL;
4039                goto out;
4040        }
4041
4042        key.objectid = sctx->cmp_key->objectid;
4043        key.type = BTRFS_INODE_REF_KEY;
4044        key.offset = 0;
4045        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4046        if (ret < 0)
4047                goto out;
4048
4049        while (1) {
4050                eb = path->nodes[0];
4051                slot = path->slots[0];
4052                if (slot >= btrfs_header_nritems(eb)) {
4053                        ret = btrfs_next_leaf(root, path);
4054                        if (ret < 0)
4055                                goto out;
4056                        else if (ret > 0)
4057                                break;
4058                        continue;
4059                }
4060
4061                btrfs_item_key_to_cpu(eb, &found_key, slot);
4062
4063                if (found_key.objectid != key.objectid ||
4064                    (found_key.type != BTRFS_INODE_REF_KEY &&
4065                     found_key.type != BTRFS_INODE_EXTREF_KEY))
4066                        break;
4067
4068                ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
4069                if (ret < 0)
4070                        goto out;
4071
4072                path->slots[0]++;
4073        }
4074        btrfs_release_path(path);
4075
4076        ret = process_recorded_refs(sctx, &pending_move);
4077        /* Only applicable to an incremental send. */
4078        ASSERT(pending_move == 0);
4079
4080out:
4081        btrfs_free_path(path);
4082        return ret;
4083}
4084
4085static int send_set_xattr(struct send_ctx *sctx,
4086                          struct fs_path *path,
4087                          const char *name, int name_len,
4088                          const char *data, int data_len)
4089{
4090        int ret = 0;
4091
4092        ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
4093        if (ret < 0)
4094                goto out;
4095
4096        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4097        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4098        TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
4099
4100        ret = send_cmd(sctx);
4101
4102tlv_put_failure:
4103out:
4104        return ret;
4105}
4106
4107static int send_remove_xattr(struct send_ctx *sctx,
4108                          struct fs_path *path,
4109                          const char *name, int name_len)
4110{
4111        int ret = 0;
4112
4113        ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
4114        if (ret < 0)
4115                goto out;
4116
4117        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4118        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4119
4120        ret = send_cmd(sctx);
4121
4122tlv_put_failure:
4123out:
4124        return ret;
4125}
4126
4127static int __process_new_xattr(int num, struct btrfs_key *di_key,
4128                               const char *name, int name_len,
4129                               const char *data, int data_len,
4130                               u8 type, void *ctx)
4131{
4132        int ret;
4133        struct send_ctx *sctx = ctx;
4134        struct fs_path *p;
4135        posix_acl_xattr_header dummy_acl;
4136
4137        p = fs_path_alloc();
4138        if (!p)
4139                return -ENOMEM;
4140
4141        /*
4142         * This hack is needed because empty acl's are stored as zero byte
4143         * data in xattrs. Problem with that is, that receiving these zero byte
4144         * acl's will fail later. To fix this, we send a dummy acl list that
4145         * only contains the version number and no entries.
4146         */
4147        if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
4148            !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
4149                if (data_len == 0) {
4150                        dummy_acl.a_version =
4151                                        cpu_to_le32(POSIX_ACL_XATTR_VERSION);
4152                        data = (char *)&dummy_acl;
4153                        data_len = sizeof(dummy_acl);
4154                }
4155        }
4156
4157        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4158        if (ret < 0)
4159                goto out;
4160
4161        ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
4162
4163out:
4164        fs_path_free(p);
4165        return ret;
4166}
4167
4168static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
4169                                   const char *name, int name_len,
4170                                   const char *data, int data_len,
4171                                   u8 type, void *ctx)
4172{
4173        int ret;
4174        struct send_ctx *sctx = ctx;
4175        struct fs_path *p;
4176
4177        p = fs_path_alloc();
4178        if (!p)
4179                return -ENOMEM;
4180
4181        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4182        if (ret < 0)
4183                goto out;
4184
4185        ret = send_remove_xattr(sctx, p, name, name_len);
4186
4187out:
4188        fs_path_free(p);
4189        return ret;
4190}
4191
4192static int process_new_xattr(struct send_ctx *sctx)
4193{
4194        int ret = 0;
4195
4196        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4197                               sctx->cmp_key, __process_new_xattr, sctx);
4198
4199        return ret;
4200}
4201
4202static int process_deleted_xattr(struct send_ctx *sctx)
4203{
4204        int ret;
4205
4206        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4207                               sctx->cmp_key, __process_deleted_xattr, sctx);
4208
4209        return ret;
4210}
4211
4212struct find_xattr_ctx {
4213        const char *name;
4214        int name_len;
4215        int found_idx;
4216        char *found_data;
4217        int found_data_len;
4218};
4219
4220static int __find_xattr(int num, struct btrfs_key *di_key,
4221                        const char *name, int name_len,
4222                        const char *data, int data_len,
4223                        u8 type, void *vctx)
4224{
4225        struct find_xattr_ctx *ctx = vctx;
4226
4227        if (name_len == ctx->name_len &&
4228            strncmp(name, ctx->name, name_len) == 0) {
4229                ctx->found_idx = num;
4230                ctx->found_data_len = data_len;
4231                ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
4232                if (!ctx->found_data)
4233                        return -ENOMEM;
4234                return 1;
4235        }
4236        return 0;
4237}
4238
4239static int find_xattr(struct btrfs_root *root,
4240                      struct btrfs_path *path,
4241                      struct btrfs_key *key,
4242                      const char *name, int name_len,
4243                      char **data, int *data_len)
4244{
4245        int ret;
4246        struct find_xattr_ctx ctx;
4247
4248        ctx.name = name;
4249        ctx.name_len = name_len;
4250        ctx.found_idx = -1;
4251        ctx.found_data = NULL;
4252        ctx.found_data_len = 0;
4253
4254        ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
4255        if (ret < 0)
4256                return ret;
4257
4258        if (ctx.found_idx == -1)
4259                return -ENOENT;
4260        if (data) {
4261                *data = ctx.found_data;
4262                *data_len = ctx.found_data_len;
4263        } else {
4264                kfree(ctx.found_data);
4265        }
4266        return ctx.found_idx;
4267}
4268
4269
4270static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4271                                       const char *name, int name_len,
4272                                       const char *data, int data_len,
4273                                       u8 type, void *ctx)
4274{
4275        int ret;
4276        struct send_ctx *sctx = ctx;
4277        char *found_data = NULL;
4278        int found_data_len  = 0;
4279
4280        ret = find_xattr(sctx->parent_root, sctx->right_path,
4281                         sctx->cmp_key, name, name_len, &found_data,
4282                         &found_data_len);
4283        if (ret == -ENOENT) {
4284                ret = __process_new_xattr(num, di_key, name, name_len, data,
4285                                data_len, type, ctx);
4286        } else if (ret >= 0) {
4287                if (data_len != found_data_len ||
4288                    memcmp(data, found_data, data_len)) {
4289                        ret = __process_new_xattr(num, di_key, name, name_len,
4290                                        data, data_len, type, ctx);
4291                } else {
4292                        ret = 0;
4293                }
4294        }
4295
4296        kfree(found_data);
4297        return ret;
4298}
4299
4300static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4301                                           const char *name, int name_len,
4302                                           const char *data, int data_len,
4303                                           u8 type, void *ctx)
4304{
4305        int ret;
4306        struct send_ctx *sctx = ctx;
4307
4308        ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4309                         name, name_len, NULL, NULL);
4310        if (ret == -ENOENT)
4311                ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4312                                data_len, type, ctx);
4313        else if (ret >= 0)
4314                ret = 0;
4315
4316        return ret;
4317}
4318
4319static int process_changed_xattr(struct send_ctx *sctx)
4320{
4321        int ret = 0;
4322
4323        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4324                        sctx->cmp_key, __process_changed_new_xattr, sctx);
4325        if (ret < 0)
4326                goto out;
4327        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4328                        sctx->cmp_key, __process_changed_deleted_xattr, sctx);
4329
4330out:
4331        return ret;
4332}
4333
4334static int process_all_new_xattrs(struct send_ctx *sctx)
4335{
4336        int ret;
4337        struct btrfs_root *root;
4338        struct btrfs_path *path;
4339        struct btrfs_key key;
4340        struct btrfs_key found_key;
4341        struct extent_buffer *eb;
4342        int slot;
4343
4344        path = alloc_path_for_send();
4345        if (!path)
4346                return -ENOMEM;
4347
4348        root = sctx->send_root;
4349
4350        key.objectid = sctx->cmp_key->objectid;
4351        key.type = BTRFS_XATTR_ITEM_KEY;
4352        key.offset = 0;
4353        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4354        if (ret < 0)
4355                goto out;
4356
4357        while (1) {
4358                eb = path->nodes[0];
4359                slot = path->slots[0];
4360                if (slot >= btrfs_header_nritems(eb)) {
4361                        ret = btrfs_next_leaf(root, path);
4362                        if (ret < 0) {
4363                                goto out;
4364                        } else if (ret > 0) {
4365                                ret = 0;
4366                                break;
4367                        }
4368                        continue;
4369                }
4370
4371                btrfs_item_key_to_cpu(eb, &found_key, slot);
4372                if (found_key.objectid != key.objectid ||
4373                    found_key.type != key.type) {
4374                        ret = 0;
4375                        goto out;
4376                }
4377
4378                ret = iterate_dir_item(root, path, &found_key,
4379                                       __process_new_xattr, sctx);
4380                if (ret < 0)
4381                        goto out;
4382
4383                path->slots[0]++;
4384        }
4385
4386out:
4387        btrfs_free_path(path);
4388        return ret;
4389}
4390
4391static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4392{
4393        struct btrfs_root *root = sctx->send_root;
4394        struct btrfs_fs_info *fs_info = root->fs_info;
4395        struct inode *inode;
4396        struct page *page;
4397        char *addr;
4398        struct btrfs_key key;
4399        pgoff_t index = offset >> PAGE_CACHE_SHIFT;
4400        pgoff_t last_index;
4401        unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
4402        ssize_t ret = 0;
4403
4404        key.objectid = sctx->cur_ino;
4405        key.type = BTRFS_INODE_ITEM_KEY;
4406        key.offset = 0;
4407
4408        inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4409        if (IS_ERR(inode))
4410                return PTR_ERR(inode);
4411
4412        if (offset + len > i_size_read(inode)) {
4413                if (offset > i_size_read(inode))
4414                        len = 0;
4415                else
4416                        len = offset - i_size_read(inode);
4417        }
4418        if (len == 0)
4419                goto out;
4420
4421        last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
4422
4423        /* initial readahead */
4424        memset(&sctx->ra, 0, sizeof(struct file_ra_state));
4425        file_ra_state_init(&sctx->ra, inode->i_mapping);
4426        btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index,
4427                       last_index - index + 1);
4428
4429        while (index <= last_index) {
4430                unsigned cur_len = min_t(unsigned, len,
4431                                         PAGE_CACHE_SIZE - pg_offset);
4432                page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4433                if (!page) {
4434                        ret = -ENOMEM;
4435                        break;
4436                }
4437
4438                if (!PageUptodate(page)) {
4439                        btrfs_readpage(NULL, page);
4440                        lock_page(page);
4441                        if (!PageUptodate(page)) {
4442                                unlock_page(page);
4443                                page_cache_release(page);
4444                                ret = -EIO;
4445                                break;
4446                        }
4447                }
4448
4449                addr = kmap(page);
4450                memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4451                kunmap(page);
4452                unlock_page(page);
4453                page_cache_release(page);
4454                index++;
4455                pg_offset = 0;
4456                len -= cur_len;
4457                ret += cur_len;
4458        }
4459out:
4460        iput(inode);
4461        return ret;
4462}
4463
4464/*
4465 * Read some bytes from the current inode/file and send a write command to
4466 * user space.
4467 */
4468static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4469{
4470        int ret = 0;
4471        struct fs_path *p;
4472        ssize_t num_read = 0;
4473
4474        p = fs_path_alloc();
4475        if (!p)
4476                return -ENOMEM;
4477
4478verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
4479
4480        num_read = fill_read_buf(sctx, offset, len);
4481        if (num_read <= 0) {
4482                if (num_read < 0)
4483                        ret = num_read;
4484                goto out;
4485        }
4486
4487        ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4488        if (ret < 0)
4489                goto out;
4490
4491        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4492        if (ret < 0)
4493                goto out;
4494
4495        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4496        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4497        TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4498
4499        ret = send_cmd(sctx);
4500
4501tlv_put_failure:
4502out:
4503        fs_path_free(p);
4504        if (ret < 0)
4505                return ret;
4506        return num_read;
4507}
4508
4509/*
4510 * Send a clone command to user space.
4511 */
4512static int send_clone(struct send_ctx *sctx,
4513                      u64 offset, u32 len,
4514                      struct clone_root *clone_root)
4515{
4516        int ret = 0;
4517        struct fs_path *p;
4518        u64 gen;
4519
4520verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
4521               "clone_inode=%llu, clone_offset=%llu\n", offset, len,
4522                clone_root->root->objectid, clone_root->ino,
4523                clone_root->offset);
4524
4525        p = fs_path_alloc();
4526        if (!p)
4527                return -ENOMEM;
4528
4529        ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4530        if (ret < 0)
4531                goto out;
4532
4533        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4534        if (ret < 0)
4535                goto out;
4536
4537        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4538        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4539        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4540
4541        if (clone_root->root == sctx->send_root) {
4542                ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4543                                &gen, NULL, NULL, NULL, NULL);
4544                if (ret < 0)
4545                        goto out;
4546                ret = get_cur_path(sctx, clone_root->ino, gen, p);
4547        } else {
4548                ret = get_inode_path(clone_root->root, clone_root->ino, p);
4549        }
4550        if (ret < 0)
4551                goto out;
4552
4553        TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4554                        clone_root->root->root_item.uuid);
4555        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4556                    le64_to_cpu(clone_root->root->root_item.ctransid));
4557        TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4558        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4559                        clone_root->offset);
4560
4561        ret = send_cmd(sctx);
4562
4563tlv_put_failure:
4564out:
4565        fs_path_free(p);
4566        return ret;
4567}
4568
4569/*
4570 * Send an update extent command to user space.
4571 */
4572static int send_update_extent(struct send_ctx *sctx,
4573                              u64 offset, u32 len)
4574{
4575        int ret = 0;
4576        struct fs_path *p;
4577
4578        p = fs_path_alloc();
4579        if (!p)
4580                return -ENOMEM;
4581
4582        ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
4583        if (ret < 0)
4584                goto out;
4585
4586        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4587        if (ret < 0)
4588                goto out;
4589
4590        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4591        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4592        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
4593
4594        ret = send_cmd(sctx);
4595
4596tlv_put_failure:
4597out:
4598        fs_path_free(p);
4599        return ret;
4600}
4601
4602static int send_hole(struct send_ctx *sctx, u64 end)
4603{
4604        struct fs_path *p = NULL;
4605        u64 offset = sctx->cur_inode_last_extent;
4606        u64 len;
4607        int ret = 0;
4608
4609        p = fs_path_alloc();
4610        if (!p)
4611                return -ENOMEM;
4612        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4613        if (ret < 0)
4614                goto tlv_put_failure;
4615        memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4616        while (offset < end) {
4617                len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4618
4619                ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4620                if (ret < 0)
4621                        break;
4622                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4623                TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4624                TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4625                ret = send_cmd(sctx);
4626                if (ret < 0)
4627                        break;
4628                offset += len;
4629        }
4630tlv_put_failure:
4631        fs_path_free(p);
4632        return ret;
4633}
4634
4635static int send_write_or_clone(struct send_ctx *sctx,
4636                               struct btrfs_path *path,
4637                               struct btrfs_key *key,
4638                               struct clone_root *clone_root)
4639{
4640        int ret = 0;
4641        struct btrfs_file_extent_item *ei;
4642        u64 offset = key->offset;
4643        u64 pos = 0;
4644        u64 len;
4645        u32 l;
4646        u8 type;
4647        u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
4648
4649        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4650                        struct btrfs_file_extent_item);
4651        type = btrfs_file_extent_type(path->nodes[0], ei);
4652        if (type == BTRFS_FILE_EXTENT_INLINE) {
4653                len = btrfs_file_extent_inline_len(path->nodes[0],
4654                                                   path->slots[0], ei);
4655                /*
4656                 * it is possible the inline item won't cover the whole page,
4657                 * but there may be items after this page.  Make
4658                 * sure to send the whole thing
4659                 */
4660                len = PAGE_CACHE_ALIGN(len);
4661        } else {
4662                len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
4663        }
4664
4665        if (offset + len > sctx->cur_inode_size)
4666                len = sctx->cur_inode_size - offset;
4667        if (len == 0) {
4668                ret = 0;
4669                goto out;
4670        }
4671
4672        if (clone_root && IS_ALIGNED(offset + len, bs)) {
4673                ret = send_clone(sctx, offset, len, clone_root);
4674        } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
4675                ret = send_update_extent(sctx, offset, len);
4676        } else {
4677                while (pos < len) {
4678                        l = len - pos;
4679                        if (l > BTRFS_SEND_READ_SIZE)
4680                                l = BTRFS_SEND_READ_SIZE;
4681                        ret = send_write(sctx, pos + offset, l);
4682                        if (ret < 0)
4683                                goto out;
4684                        if (!ret)
4685                                break;
4686                        pos += ret;
4687                }
4688                ret = 0;
4689        }
4690out:
4691        return ret;
4692}
4693
4694static int is_extent_unchanged(struct send_ctx *sctx,
4695                               struct btrfs_path *left_path,
4696                               struct btrfs_key *ekey)
4697{
4698        int ret = 0;
4699        struct btrfs_key key;
4700        struct btrfs_path *path = NULL;
4701        struct extent_buffer *eb;
4702        int slot;
4703        struct btrfs_key found_key;
4704        struct btrfs_file_extent_item *ei;
4705        u64 left_disknr;
4706        u64 right_disknr;
4707        u64 left_offset;
4708        u64 right_offset;
4709        u64 left_offset_fixed;
4710        u64 left_len;
4711        u64 right_len;
4712        u64 left_gen;
4713        u64 right_gen;
4714        u8 left_type;
4715        u8 right_type;
4716
4717        path = alloc_path_for_send();
4718        if (!path)
4719                return -ENOMEM;
4720
4721        eb = left_path->nodes[0];
4722        slot = left_path->slots[0];
4723        ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4724        left_type = btrfs_file_extent_type(eb, ei);
4725
4726        if (left_type != BTRFS_FILE_EXTENT_REG) {
4727                ret = 0;
4728                goto out;
4729        }
4730        left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
4731        left_len = btrfs_file_extent_num_bytes(eb, ei);
4732        left_offset = btrfs_file_extent_offset(eb, ei);
4733        left_gen = btrfs_file_extent_generation(eb, ei);
4734
4735        /*
4736         * Following comments will refer to these graphics. L is the left
4737         * extents which we are checking at the moment. 1-8 are the right
4738         * extents that we iterate.
4739         *
4740         *       |-----L-----|
4741         * |-1-|-2a-|-3-|-4-|-5-|-6-|
4742         *
4743         *       |-----L-----|
4744         * |--1--|-2b-|...(same as above)
4745         *
4746         * Alternative situation. Happens on files where extents got split.
4747         *       |-----L-----|
4748         * |-----------7-----------|-6-|
4749         *
4750         * Alternative situation. Happens on files which got larger.
4751         *       |-----L-----|
4752         * |-8-|
4753         * Nothing follows after 8.
4754         */
4755
4756        key.objectid = ekey->objectid;
4757        key.type = BTRFS_EXTENT_DATA_KEY;
4758        key.offset = ekey->offset;
4759        ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
4760        if (ret < 0)
4761                goto out;
4762        if (ret) {
4763                ret = 0;
4764                goto out;
4765        }
4766
4767        /*
4768         * Handle special case where the right side has no extents at all.
4769         */
4770        eb = path->nodes[0];
4771        slot = path->slots[0];
4772        btrfs_item_key_to_cpu(eb, &found_key, slot);
4773        if (found_key.objectid != key.objectid ||
4774            found_key.type != key.type) {
4775                /* If we're a hole then just pretend nothing changed */
4776                ret = (left_disknr) ? 0 : 1;
4777                goto out;
4778        }
4779
4780        /*
4781         * We're now on 2a, 2b or 7.
4782         */
4783        key = found_key;
4784        while (key.offset < ekey->offset + left_len) {
4785                ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4786                right_type = btrfs_file_extent_type(eb, ei);
4787                if (right_type != BTRFS_FILE_EXTENT_REG) {
4788                        ret = 0;
4789                        goto out;
4790                }
4791
4792                right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
4793                right_len = btrfs_file_extent_num_bytes(eb, ei);
4794                right_offset = btrfs_file_extent_offset(eb, ei);
4795                right_gen = btrfs_file_extent_generation(eb, ei);
4796
4797                /*
4798                 * Are we at extent 8? If yes, we know the extent is changed.
4799                 * This may only happen on the first iteration.
4800                 */
4801                if (found_key.offset + right_len <= ekey->offset) {
4802                        /* If we're a hole just pretend nothing changed */
4803                        ret = (left_disknr) ? 0 : 1;
4804                        goto out;
4805                }
4806
4807                left_offset_fixed = left_offset;
4808                if (key.offset < ekey->offset) {
4809                        /* Fix the right offset for 2a and 7. */
4810                        right_offset += ekey->offset - key.offset;
4811                } else {
4812                        /* Fix the left offset for all behind 2a and 2b */
4813                        left_offset_fixed += key.offset - ekey->offset;
4814                }
4815
4816                /*
4817                 * Check if we have the same extent.
4818                 */
4819                if (left_disknr != right_disknr ||
4820                    left_offset_fixed != right_offset ||
4821                    left_gen != right_gen) {
4822                        ret = 0;
4823                        goto out;
4824                }
4825
4826                /*
4827                 * Go to the next extent.
4828                 */
4829                ret = btrfs_next_item(sctx->parent_root, path);
4830                if (ret < 0)
4831                        goto out;
4832                if (!ret) {
4833                        eb = path->nodes[0];
4834                        slot = path->slots[0];
4835                        btrfs_item_key_to_cpu(eb, &found_key, slot);
4836                }
4837                if (ret || found_key.objectid != key.objectid ||
4838                    found_key.type != key.type) {
4839                        key.offset += right_len;
4840                        break;
4841                }
4842                if (found_key.offset != key.offset + right_len) {
4843                        ret = 0;
4844                        goto out;
4845                }
4846                key = found_key;
4847        }
4848
4849        /*
4850         * We're now behind the left extent (treat as unchanged) or at the end
4851         * of the right side (treat as changed).
4852         */
4853        if (key.offset >= ekey->offset + left_len)
4854                ret = 1;
4855        else
4856                ret = 0;
4857
4858
4859out:
4860        btrfs_free_path(path);
4861        return ret;
4862}
4863
4864static int get_last_extent(struct send_ctx *sctx, u64 offset)
4865{
4866        struct btrfs_path *path;
4867        struct btrfs_root *root = sctx->send_root;
4868        struct btrfs_file_extent_item *fi;
4869        struct btrfs_key key;
4870        u64 extent_end;
4871        u8 type;
4872        int ret;
4873
4874        path = alloc_path_for_send();
4875        if (!path)
4876                return -ENOMEM;
4877
4878        sctx->cur_inode_last_extent = 0;
4879
4880        key.objectid = sctx->cur_ino;
4881        key.type = BTRFS_EXTENT_DATA_KEY;
4882        key.offset = offset;
4883        ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
4884        if (ret < 0)
4885                goto out;
4886        ret = 0;
4887        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4888        if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
4889                goto out;
4890
4891        fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4892                            struct btrfs_file_extent_item);
4893        type = btrfs_file_extent_type(path->nodes[0], fi);
4894        if (type == BTRFS_FILE_EXTENT_INLINE) {
4895                u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4896                                                        path->slots[0], fi);
4897                extent_end = ALIGN(key.offset + size,
4898                                   sctx->send_root->sectorsize);
4899        } else {
4900                extent_end = key.offset +
4901                        btrfs_file_extent_num_bytes(path->nodes[0], fi);
4902        }
4903        sctx->cur_inode_last_extent = extent_end;
4904out:
4905        btrfs_free_path(path);
4906        return ret;
4907}
4908
4909static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
4910                           struct btrfs_key *key)
4911{
4912        struct btrfs_file_extent_item *fi;
4913        u64 extent_end;
4914        u8 type;
4915        int ret = 0;
4916
4917        if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
4918                return 0;
4919
4920        if (sctx->cur_inode_last_extent == (u64)-1) {
4921                ret = get_last_extent(sctx, key->offset - 1);
4922                if (ret)
4923                        return ret;
4924        }
4925
4926        fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4927                            struct btrfs_file_extent_item);
4928        type = btrfs_file_extent_type(path->nodes[0], fi);
4929        if (type == BTRFS_FILE_EXTENT_INLINE) {
4930                u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4931                                                        path->slots[0], fi);
4932                extent_end = ALIGN(key->offset + size,
4933                                   sctx->send_root->sectorsize);
4934        } else {
4935                extent_end = key->offset +
4936                        btrfs_file_extent_num_bytes(path->nodes[0], fi);
4937        }
4938
4939        if (path->slots[0] == 0 &&
4940            sctx->cur_inode_last_extent < key->offset) {
4941                /*
4942                 * We might have skipped entire leafs that contained only
4943                 * file extent items for our current inode. These leafs have
4944                 * a generation number smaller (older) than the one in the
4945                 * current leaf and the leaf our last extent came from, and
4946                 * are located between these 2 leafs.
4947                 */
4948                ret = get_last_extent(sctx, key->offset - 1);
4949                if (ret)
4950                        return ret;
4951        }
4952
4953        if (sctx->cur_inode_last_extent < key->offset)
4954                ret = send_hole(sctx, key->offset);
4955        sctx->cur_inode_last_extent = extent_end;
4956        return ret;
4957}
4958
4959static int process_extent(struct send_ctx *sctx,
4960                          struct btrfs_path *path,
4961                          struct btrfs_key *key)
4962{
4963        struct clone_root *found_clone = NULL;
4964        int ret = 0;
4965
4966        if (S_ISLNK(sctx->cur_inode_mode))
4967                return 0;
4968
4969        if (sctx->parent_root && !sctx->cur_inode_new) {
4970                ret = is_extent_unchanged(sctx, path, key);
4971                if (ret < 0)
4972                        goto out;
4973                if (ret) {
4974                        ret = 0;
4975                        goto out_hole;
4976                }
4977        } else {
4978                struct btrfs_file_extent_item *ei;
4979                u8 type;
4980
4981                ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4982                                    struct btrfs_file_extent_item);
4983                type = btrfs_file_extent_type(path->nodes[0], ei);
4984                if (type == BTRFS_FILE_EXTENT_PREALLOC ||
4985                    type == BTRFS_FILE_EXTENT_REG) {
4986                        /*
4987                         * The send spec does not have a prealloc command yet,
4988                         * so just leave a hole for prealloc'ed extents until
4989                         * we have enough commands queued up to justify rev'ing
4990                         * the send spec.
4991                         */
4992                        if (type == BTRFS_FILE_EXTENT_PREALLOC) {
4993                                ret = 0;
4994                                goto out;
4995                        }
4996
4997                        /* Have a hole, just skip it. */
4998                        if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
4999                                ret = 0;
5000                                goto out;
5001                        }
5002                }
5003        }
5004
5005        ret = find_extent_clone(sctx, path, key->objectid, key->offset,
5006                        sctx->cur_inode_size, &found_clone);
5007        if (ret != -ENOENT && ret < 0)
5008                goto out;
5009
5010        ret = send_write_or_clone(sctx, path, key, found_clone);
5011        if (ret)
5012                goto out;
5013out_hole:
5014        ret = maybe_send_hole(sctx, path, key);
5015out:
5016        return ret;
5017}
5018
5019static int process_all_extents(struct send_ctx *sctx)
5020{
5021        int ret;
5022        struct btrfs_root *root;
5023        struct btrfs_path *path;
5024        struct btrfs_key key;
5025        struct btrfs_key found_key;
5026        struct extent_buffer *eb;
5027        int slot;
5028
5029        root = sctx->send_root;
5030        path = alloc_path_for_send();
5031        if (!path)
5032                return -ENOMEM;
5033
5034        key.objectid = sctx->cmp_key->objectid;
5035        key.type = BTRFS_EXTENT_DATA_KEY;
5036        key.offset = 0;
5037        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5038        if (ret < 0)
5039                goto out;
5040
5041        while (1) {
5042                eb = path->nodes[0];
5043                slot = path->slots[0];
5044
5045                if (slot >= btrfs_header_nritems(eb)) {
5046                        ret = btrfs_next_leaf(root, path);
5047                        if (ret < 0) {
5048                                goto out;
5049                        } else if (ret > 0) {
5050                                ret = 0;
5051                                break;
5052                        }
5053                        continue;
5054                }
5055
5056                btrfs_item_key_to_cpu(eb, &found_key, slot);
5057
5058                if (found_key.objectid != key.objectid ||
5059                    found_key.type != key.type) {
5060                        ret = 0;
5061                        goto out;
5062                }
5063
5064                ret = process_extent(sctx, path, &found_key);
5065                if (ret < 0)
5066                        goto out;
5067
5068                path->slots[0]++;
5069        }
5070
5071out:
5072        btrfs_free_path(path);
5073        return ret;
5074}
5075
5076static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
5077                                           int *pending_move,
5078                                           int *refs_processed)
5079{
5080        int ret = 0;
5081
5082        if (sctx->cur_ino == 0)
5083                goto out;
5084        if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
5085            sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
5086                goto out;
5087        if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
5088                goto out;
5089
5090        ret = process_recorded_refs(sctx, pending_move);
5091        if (ret < 0)
5092                goto out;
5093
5094        *refs_processed = 1;
5095out:
5096        return ret;
5097}
5098
5099static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5100{
5101        int ret = 0;
5102        u64 left_mode;
5103        u64 left_uid;
5104        u64 left_gid;
5105        u64 right_mode;
5106        u64 right_uid;
5107        u64 right_gid;
5108        int need_chmod = 0;
5109        int need_chown = 0;
5110        int pending_move = 0;
5111        int refs_processed = 0;
5112
5113        ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5114                                              &refs_processed);
5115        if (ret < 0)
5116                goto out;
5117
5118        /*
5119         * We have processed the refs and thus need to advance send_progress.
5120         * Now, calls to get_cur_xxx will take the updated refs of the current
5121         * inode into account.
5122         *
5123         * On the other hand, if our current inode is a directory and couldn't
5124         * be moved/renamed because its parent was renamed/moved too and it has
5125         * a higher inode number, we can only move/rename our current inode
5126         * after we moved/renamed its parent. Therefore in this case operate on
5127         * the old path (pre move/rename) of our current inode, and the
5128         * move/rename will be performed later.
5129         */
5130        if (refs_processed && !pending_move)
5131                sctx->send_progress = sctx->cur_ino + 1;
5132
5133        if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
5134                goto out;
5135        if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
5136                goto out;
5137
5138        ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
5139                        &left_mode, &left_uid, &left_gid, NULL);
5140        if (ret < 0)
5141                goto out;
5142
5143        if (!sctx->parent_root || sctx->cur_inode_new) {
5144                need_chown = 1;
5145                if (!S_ISLNK(sctx->cur_inode_mode))
5146                        need_chmod = 1;
5147        } else {
5148                ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
5149                                NULL, NULL, &right_mode, &right_uid,
5150                                &right_gid, NULL);
5151                if (ret < 0)
5152                        goto out;
5153
5154                if (left_uid != right_uid || left_gid != right_gid)
5155                        need_chown = 1;
5156                if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
5157                        need_chmod = 1;
5158        }
5159
5160        if (S_ISREG(sctx->cur_inode_mode)) {
5161                if (need_send_hole(sctx)) {
5162                        if (sctx->cur_inode_last_extent == (u64)-1 ||
5163                            sctx->cur_inode_last_extent <
5164                            sctx->cur_inode_size) {
5165                                ret = get_last_extent(sctx, (u64)-1);
5166                                if (ret)
5167                                        goto out;
5168                        }
5169                        if (sctx->cur_inode_last_extent <
5170                            sctx->cur_inode_size) {
5171                                ret = send_hole(sctx, sctx->cur_inode_size);
5172                                if (ret)
5173                                        goto out;
5174                        }
5175                }
5176                ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5177                                sctx->cur_inode_size);
5178                if (ret < 0)
5179                        goto out;
5180        }
5181
5182        if (need_chown) {
5183                ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5184                                left_uid, left_gid);
5185                if (ret < 0)
5186                        goto out;
5187        }
5188        if (need_chmod) {
5189                ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5190                                left_mode);
5191                if (ret < 0)
5192                        goto out;
5193        }
5194
5195        /*
5196         * If other directory inodes depended on our current directory
5197         * inode's move/rename, now do their move/rename operations.
5198         */
5199        if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
5200                ret = apply_children_dir_moves(sctx);
5201                if (ret)
5202                        goto out;
5203                /*
5204                 * Need to send that every time, no matter if it actually
5205                 * changed between the two trees as we have done changes to
5206                 * the inode before. If our inode is a directory and it's
5207                 * waiting to be moved/renamed, we will send its utimes when
5208                 * it's moved/renamed, therefore we don't need to do it here.
5209                 */
5210                sctx->send_progress = sctx->cur_ino + 1;
5211                ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
5212                if (ret < 0)
5213                        goto out;
5214        }
5215
5216out:
5217        return ret;
5218}
5219
5220static int changed_inode(struct send_ctx *sctx,
5221                         enum btrfs_compare_tree_result result)
5222{
5223        int ret = 0;
5224        struct btrfs_key *key = sctx->cmp_key;
5225        struct btrfs_inode_item *left_ii = NULL;
5226        struct btrfs_inode_item *right_ii = NULL;
5227        u64 left_gen = 0;
5228        u64 right_gen = 0;
5229
5230        sctx->cur_ino = key->objectid;
5231        sctx->cur_inode_new_gen = 0;
5232        sctx->cur_inode_last_extent = (u64)-1;
5233
5234        /*
5235         * Set send_progress to current inode. This will tell all get_cur_xxx
5236         * functions that the current inode's refs are not updated yet. Later,
5237         * when process_recorded_refs is finished, it is set to cur_ino + 1.
5238         */
5239        sctx->send_progress = sctx->cur_ino;
5240
5241        if (result == BTRFS_COMPARE_TREE_NEW ||
5242            result == BTRFS_COMPARE_TREE_CHANGED) {
5243                left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
5244                                sctx->left_path->slots[0],
5245                                struct btrfs_inode_item);
5246                left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
5247                                left_ii);
5248        } else {
5249                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5250                                sctx->right_path->slots[0],
5251                                struct btrfs_inode_item);
5252                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5253                                right_ii);
5254        }
5255        if (result == BTRFS_COMPARE_TREE_CHANGED) {
5256                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5257                                sctx->right_path->slots[0],
5258                                struct btrfs_inode_item);
5259
5260                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5261                                right_ii);
5262
5263                /*
5264                 * The cur_ino = root dir case is special here. We can't treat
5265                 * the inode as deleted+reused because it would generate a
5266                 * stream that tries to delete/mkdir the root dir.
5267                 */
5268                if (left_gen != right_gen &&
5269                    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5270                        sctx->cur_inode_new_gen = 1;
5271        }
5272
5273        if (result == BTRFS_COMPARE_TREE_NEW) {
5274                sctx->cur_inode_gen = left_gen;
5275                sctx->cur_inode_new = 1;
5276                sctx->cur_inode_deleted = 0;
5277                sctx->cur_inode_size = btrfs_inode_size(
5278                                sctx->left_path->nodes[0], left_ii);
5279                sctx->cur_inode_mode = btrfs_inode_mode(
5280                                sctx->left_path->nodes[0], left_ii);
5281                sctx->cur_inode_rdev = btrfs_inode_rdev(
5282                                sctx->left_path->nodes[0], left_ii);
5283                if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5284                        ret = send_create_inode_if_needed(sctx);
5285        } else if (result == BTRFS_COMPARE_TREE_DELETED) {
5286                sctx->cur_inode_gen = right_gen;
5287                sctx->cur_inode_new = 0;
5288                sctx->cur_inode_deleted = 1;
5289                sctx->cur_inode_size = btrfs_inode_size(
5290                                sctx->right_path->nodes[0], right_ii);
5291                sctx->cur_inode_mode = btrfs_inode_mode(
5292                                sctx->right_path->nodes[0], right_ii);
5293        } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
5294                /*
5295                 * We need to do some special handling in case the inode was
5296                 * reported as changed with a changed generation number. This
5297                 * means that the original inode was deleted and new inode
5298                 * reused the same inum. So we have to treat the old inode as
5299                 * deleted and the new one as new.
5300                 */
5301                if (sctx->cur_inode_new_gen) {
5302                        /*
5303                         * First, process the inode as if it was deleted.
5304                         */
5305                        sctx->cur_inode_gen = right_gen;
5306                        sctx->cur_inode_new = 0;
5307                        sctx->cur_inode_deleted = 1;
5308                        sctx->cur_inode_size = btrfs_inode_size(
5309                                        sctx->right_path->nodes[0], right_ii);
5310                        sctx->cur_inode_mode = btrfs_inode_mode(
5311                                        sctx->right_path->nodes[0], right_ii);
5312                        ret = process_all_refs(sctx,
5313                                        BTRFS_COMPARE_TREE_DELETED);
5314                        if (ret < 0)
5315                                goto out;
5316
5317                        /*
5318                         * Now process the inode as if it was new.
5319                         */
5320                        sctx->cur_inode_gen = left_gen;
5321                        sctx->cur_inode_new = 1;
5322                        sctx->cur_inode_deleted = 0;
5323                        sctx->cur_inode_size = btrfs_inode_size(
5324                                        sctx->left_path->nodes[0], left_ii);
5325                        sctx->cur_inode_mode = btrfs_inode_mode(
5326                                        sctx->left_path->nodes[0], left_ii);
5327                        sctx->cur_inode_rdev = btrfs_inode_rdev(
5328                                        sctx->left_path->nodes[0], left_ii);
5329                        ret = send_create_inode_if_needed(sctx);
5330                        if (ret < 0)
5331                                goto out;
5332
5333                        ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
5334                        if (ret < 0)
5335                                goto out;
5336                        /*
5337                         * Advance send_progress now as we did not get into
5338                         * process_recorded_refs_if_needed in the new_gen case.
5339                         */
5340                        sctx->send_progress = sctx->cur_ino + 1;
5341
5342                        /*
5343                         * Now process all extents and xattrs of the inode as if
5344                         * they were all new.
5345                         */
5346                        ret = process_all_extents(sctx);
5347                        if (ret < 0)
5348                                goto out;
5349                        ret = process_all_new_xattrs(sctx);
5350                        if (ret < 0)
5351                                goto out;
5352                } else {
5353                        sctx->cur_inode_gen = left_gen;
5354                        sctx->cur_inode_new = 0;
5355                        sctx->cur_inode_new_gen = 0;
5356                        sctx->cur_inode_deleted = 0;
5357                        sctx->cur_inode_size = btrfs_inode_size(
5358                                        sctx->left_path->nodes[0], left_ii);
5359                        sctx->cur_inode_mode = btrfs_inode_mode(
5360                                        sctx->left_path->nodes[0], left_ii);
5361                }
5362        }
5363
5364out:
5365        return ret;
5366}
5367
5368/*
5369 * We have to process new refs before deleted refs, but compare_trees gives us
5370 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
5371 * first and later process them in process_recorded_refs.
5372 * For the cur_inode_new_gen case, we skip recording completely because
5373 * changed_inode did already initiate processing of refs. The reason for this is
5374 * that in this case, compare_tree actually compares the refs of 2 different
5375 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
5376 * refs of the right tree as deleted and all refs of the left tree as new.
5377 */
5378static int changed_ref(struct send_ctx *sctx,
5379                       enum btrfs_compare_tree_result result)
5380{
5381        int ret = 0;
5382
5383        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5384
5385        if (!sctx->cur_inode_new_gen &&
5386            sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
5387                if (result == BTRFS_COMPARE_TREE_NEW)
5388                        ret = record_new_ref(sctx);
5389                else if (result == BTRFS_COMPARE_TREE_DELETED)
5390                        ret = record_deleted_ref(sctx);
5391                else if (result == BTRFS_COMPARE_TREE_CHANGED)
5392                        ret = record_changed_ref(sctx);
5393        }
5394
5395        return ret;
5396}
5397
5398/*
5399 * Process new/deleted/changed xattrs. We skip processing in the
5400 * cur_inode_new_gen case because changed_inode did already initiate processing
5401 * of xattrs. The reason is the same as in changed_ref
5402 */
5403static int changed_xattr(struct send_ctx *sctx,
5404                         enum btrfs_compare_tree_result result)
5405{
5406        int ret = 0;
5407
5408        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5409
5410        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5411                if (result == BTRFS_COMPARE_TREE_NEW)
5412                        ret = process_new_xattr(sctx);
5413                else if (result == BTRFS_COMPARE_TREE_DELETED)
5414                        ret = process_deleted_xattr(sctx);
5415                else if (result == BTRFS_COMPARE_TREE_CHANGED)
5416                        ret = process_changed_xattr(sctx);
5417        }
5418
5419        return ret;
5420}
5421
5422/*
5423 * Process new/deleted/changed extents. We skip processing in the
5424 * cur_inode_new_gen case because changed_inode did already initiate processing
5425 * of extents. The reason is the same as in changed_ref
5426 */
5427static int changed_extent(struct send_ctx *sctx,
5428                          enum btrfs_compare_tree_result result)
5429{
5430        int ret = 0;
5431
5432        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5433
5434        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5435                if (result != BTRFS_COMPARE_TREE_DELETED)
5436                        ret = process_extent(sctx, sctx->left_path,
5437                                        sctx->cmp_key);
5438        }
5439
5440        return ret;
5441}
5442
5443static int dir_changed(struct send_ctx *sctx, u64 dir)
5444{
5445        u64 orig_gen, new_gen;
5446        int ret;
5447
5448        ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
5449                             NULL, NULL);
5450        if (ret)
5451                return ret;
5452
5453        ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
5454                             NULL, NULL, NULL);
5455        if (ret)
5456                return ret;
5457
5458        return (orig_gen != new_gen) ? 1 : 0;
5459}
5460
5461static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
5462                        struct btrfs_key *key)
5463{
5464        struct btrfs_inode_extref *extref;
5465        struct extent_buffer *leaf;
5466        u64 dirid = 0, last_dirid = 0;
5467        unsigned long ptr;
5468        u32 item_size;
5469        u32 cur_offset = 0;
5470        int ref_name_len;
5471        int ret = 0;
5472
5473        /* Easy case, just check this one dirid */
5474        if (key->type == BTRFS_INODE_REF_KEY) {
5475                dirid = key->offset;
5476
5477                ret = dir_changed(sctx, dirid);
5478                goto out;
5479        }
5480
5481        leaf = path->nodes[0];
5482        item_size = btrfs_item_size_nr(leaf, path->slots[0]);
5483        ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
5484        while (cur_offset < item_size) {
5485                extref = (struct btrfs_inode_extref *)(ptr +
5486                                                       cur_offset);
5487                dirid = btrfs_inode_extref_parent(leaf, extref);
5488                ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
5489                cur_offset += ref_name_len + sizeof(*extref);
5490                if (dirid == last_dirid)
5491                        continue;
5492                ret = dir_changed(sctx, dirid);
5493                if (ret)
5494                        break;
5495                last_dirid = dirid;
5496        }
5497out:
5498        return ret;
5499}
5500
5501/*
5502 * Updates compare related fields in sctx and simply forwards to the actual
5503 * changed_xxx functions.
5504 */
5505static int changed_cb(struct btrfs_root *left_root,
5506                      struct btrfs_root *right_root,
5507                      struct btrfs_path *left_path,
5508                      struct btrfs_path *right_path,
5509                      struct btrfs_key *key,
5510                      enum btrfs_compare_tree_result result,
5511                      void *ctx)
5512{
5513        int ret = 0;
5514        struct send_ctx *sctx = ctx;
5515
5516        if (result == BTRFS_COMPARE_TREE_SAME) {
5517                if (key->type == BTRFS_INODE_REF_KEY ||
5518                    key->type == BTRFS_INODE_EXTREF_KEY) {
5519                        ret = compare_refs(sctx, left_path, key);
5520                        if (!ret)
5521                                return 0;
5522                        if (ret < 0)
5523                                return ret;
5524                } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5525                        return maybe_send_hole(sctx, left_path, key);
5526                } else {
5527                        return 0;
5528                }
5529                result = BTRFS_COMPARE_TREE_CHANGED;
5530                ret = 0;
5531        }
5532
5533        sctx->left_path = left_path;
5534        sctx->right_path = right_path;
5535        sctx->cmp_key = key;
5536
5537        ret = finish_inode_if_needed(sctx, 0);
5538        if (ret < 0)
5539                goto out;
5540
5541        /* Ignore non-FS objects */
5542        if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
5543            key->objectid == BTRFS_FREE_SPACE_OBJECTID)
5544                goto out;
5545
5546        if (key->type == BTRFS_INODE_ITEM_KEY)
5547                ret = changed_inode(sctx, result);
5548        else if (key->type == BTRFS_INODE_REF_KEY ||
5549                 key->type == BTRFS_INODE_EXTREF_KEY)
5550                ret = changed_ref(sctx, result);
5551        else if (key->type == BTRFS_XATTR_ITEM_KEY)
5552                ret = changed_xattr(sctx, result);
5553        else if (key->type == BTRFS_EXTENT_DATA_KEY)
5554                ret = changed_extent(sctx, result);
5555
5556out:
5557        return ret;
5558}
5559
5560static int full_send_tree(struct send_ctx *sctx)
5561{
5562        int ret;
5563        struct btrfs_root *send_root = sctx->send_root;
5564        struct btrfs_key key;
5565        struct btrfs_key found_key;
5566        struct btrfs_path *path;
5567        struct extent_buffer *eb;
5568        int slot;
5569
5570        path = alloc_path_for_send();
5571        if (!path)
5572                return -ENOMEM;
5573
5574        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5575        key.type = BTRFS_INODE_ITEM_KEY;
5576        key.offset = 0;
5577
5578        ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
5579        if (ret < 0)
5580                goto out;
5581        if (ret)
5582                goto out_finish;
5583
5584        while (1) {
5585                eb = path->nodes[0];
5586                slot = path->slots[0];
5587                btrfs_item_key_to_cpu(eb, &found_key, slot);
5588
5589                ret = changed_cb(send_root, NULL, path, NULL,
5590                                &found_key, BTRFS_COMPARE_TREE_NEW, sctx);
5591                if (ret < 0)
5592                        goto out;
5593
5594                key.objectid = found_key.objectid;
5595                key.type = found_key.type;
5596                key.offset = found_key.offset + 1;
5597
5598                ret = btrfs_next_item(send_root, path);
5599                if (ret < 0)
5600                        goto out;
5601                if (ret) {
5602                        ret  = 0;
5603                        break;
5604                }
5605        }
5606
5607out_finish:
5608        ret = finish_inode_if_needed(sctx, 1);
5609
5610out:
5611        btrfs_free_path(path);
5612        return ret;
5613}
5614
5615static int send_subvol(struct send_ctx *sctx)
5616{
5617        int ret;
5618
5619        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
5620                ret = send_header(sctx);
5621                if (ret < 0)
5622                        goto out;
5623        }
5624
5625        ret = send_subvol_begin(sctx);
5626        if (ret < 0)
5627                goto out;
5628
5629        if (sctx->parent_root) {
5630                ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
5631                                changed_cb, sctx);
5632                if (ret < 0)
5633                        goto out;
5634                ret = finish_inode_if_needed(sctx, 1);
5635                if (ret < 0)
5636                        goto out;
5637        } else {
5638                ret = full_send_tree(sctx);
5639                if (ret < 0)
5640                        goto out;
5641        }
5642
5643out:
5644        free_recorded_refs(sctx);
5645        return ret;
5646}
5647
5648/*
5649 * If orphan cleanup did remove any orphans from a root, it means the tree
5650 * was modified and therefore the commit root is not the same as the current
5651 * root anymore. This is a problem, because send uses the commit root and
5652 * therefore can see inode items that don't exist in the current root anymore,
5653 * and for example make calls to btrfs_iget, which will do tree lookups based
5654 * on the current root and not on the commit root. Those lookups will fail,
5655 * returning a -ESTALE error, and making send fail with that error. So make
5656 * sure a send does not see any orphans we have just removed, and that it will
5657 * see the same inodes regardless of whether a transaction commit happened
5658 * before it started (meaning that the commit root will be the same as the
5659 * current root) or not.
5660 */
5661static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
5662{
5663        int i;
5664        struct btrfs_trans_handle *trans = NULL;
5665
5666again:
5667        if (sctx->parent_root &&
5668            sctx->parent_root->node != sctx->parent_root->commit_root)
5669                goto commit_trans;
5670
5671        for (i = 0; i < sctx->clone_roots_cnt; i++)
5672                if (sctx->clone_roots[i].root->node !=
5673                    sctx->clone_roots[i].root->commit_root)
5674                        goto commit_trans;
5675
5676        if (trans)
5677                return btrfs_end_transaction(trans, sctx->send_root);
5678
5679        return 0;
5680
5681commit_trans:
5682        /* Use any root, all fs roots will get their commit roots updated. */
5683        if (!trans) {
5684                trans = btrfs_join_transaction(sctx->send_root);
5685                if (IS_ERR(trans))
5686                        return PTR_ERR(trans);
5687                goto again;
5688        }
5689
5690        return btrfs_commit_transaction(trans, sctx->send_root);
5691}
5692
5693static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
5694{
5695        spin_lock(&root->root_item_lock);
5696        root->send_in_progress--;
5697        /*
5698         * Not much left to do, we don't know why it's unbalanced and
5699         * can't blindly reset it to 0.
5700         */
5701        if (root->send_in_progress < 0)
5702                btrfs_err(root->fs_info,
5703                        "send_in_progres unbalanced %d root %llu",
5704                        root->send_in_progress, root->root_key.objectid);
5705        spin_unlock(&root->root_item_lock);
5706}
5707
5708long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5709{
5710        int ret = 0;
5711        struct btrfs_root *send_root;
5712        struct btrfs_root *clone_root;
5713        struct btrfs_fs_info *fs_info;
5714        struct btrfs_ioctl_send_args *arg = NULL;
5715        struct btrfs_key key;
5716        struct send_ctx *sctx = NULL;
5717        u32 i;
5718        u64 *clone_sources_tmp = NULL;
5719        int clone_sources_to_rollback = 0;
5720        int sort_clone_roots = 0;
5721        int index;
5722
5723        if (!capable(CAP_SYS_ADMIN))
5724                return -EPERM;
5725
5726        send_root = BTRFS_I(file_inode(mnt_file))->root;
5727        fs_info = send_root->fs_info;
5728
5729        /*
5730         * The subvolume must remain read-only during send, protect against
5731         * making it RW. This also protects against deletion.
5732         */
5733        spin_lock(&send_root->root_item_lock);
5734        send_root->send_in_progress++;
5735        spin_unlock(&send_root->root_item_lock);
5736
5737        /*
5738         * This is done when we lookup the root, it should already be complete
5739         * by the time we get here.
5740         */
5741        WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
5742
5743        /*
5744         * Userspace tools do the checks and warn the user if it's
5745         * not RO.
5746         */
5747        if (!btrfs_root_readonly(send_root)) {
5748                ret = -EPERM;
5749                goto out;
5750        }
5751
5752        arg = memdup_user(arg_, sizeof(*arg));
5753        if (IS_ERR(arg)) {
5754                ret = PTR_ERR(arg);
5755                arg = NULL;
5756                goto out;
5757        }
5758
5759        if (!access_ok(VERIFY_READ, arg->clone_sources,
5760                        sizeof(*arg->clone_sources) *
5761                        arg->clone_sources_count)) {
5762                ret = -EFAULT;
5763                goto out;
5764        }
5765
5766        if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
5767                ret = -EINVAL;
5768                goto out;
5769        }
5770
5771        sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
5772        if (!sctx) {
5773                ret = -ENOMEM;
5774                goto out;
5775        }
5776
5777        INIT_LIST_HEAD(&sctx->new_refs);
5778        INIT_LIST_HEAD(&sctx->deleted_refs);
5779        INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
5780        INIT_LIST_HEAD(&sctx->name_cache_list);
5781
5782        sctx->flags = arg->flags;
5783
5784        sctx->send_filp = fget(arg->send_fd);
5785        if (!sctx->send_filp) {
5786                ret = -EBADF;
5787                goto out;
5788        }
5789
5790        sctx->send_root = send_root;
5791        /*
5792         * Unlikely but possible, if the subvolume is marked for deletion but
5793         * is slow to remove the directory entry, send can still be started
5794         */
5795        if (btrfs_root_dead(sctx->send_root)) {
5796                ret = -EPERM;
5797                goto out;
5798        }
5799
5800        sctx->clone_roots_cnt = arg->clone_sources_count;
5801
5802        sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
5803        sctx->send_buf = vmalloc(sctx->send_max_size);
5804        if (!sctx->send_buf) {
5805                ret = -ENOMEM;
5806                goto out;
5807        }
5808
5809        sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
5810        if (!sctx->read_buf) {
5811                ret = -ENOMEM;
5812                goto out;
5813        }
5814
5815        sctx->pending_dir_moves = RB_ROOT;
5816        sctx->waiting_dir_moves = RB_ROOT;
5817        sctx->orphan_dirs = RB_ROOT;
5818
5819        sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
5820                        (arg->clone_sources_count + 1));
5821        if (!sctx->clone_roots) {
5822                ret = -ENOMEM;
5823                goto out;
5824        }
5825
5826        if (arg->clone_sources_count) {
5827                clone_sources_tmp = vmalloc(arg->clone_sources_count *
5828                                sizeof(*arg->clone_sources));
5829                if (!clone_sources_tmp) {
5830                        ret = -ENOMEM;
5831                        goto out;
5832                }
5833
5834                ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
5835                                arg->clone_sources_count *
5836                                sizeof(*arg->clone_sources));
5837                if (ret) {
5838                        ret = -EFAULT;
5839                        goto out;
5840                }
5841
5842                for (i = 0; i < arg->clone_sources_count; i++) {
5843                        key.objectid = clone_sources_tmp[i];
5844                        key.type = BTRFS_ROOT_ITEM_KEY;
5845                        key.offset = (u64)-1;
5846
5847                        index = srcu_read_lock(&fs_info->subvol_srcu);
5848
5849                        clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
5850                        if (IS_ERR(clone_root)) {
5851                                srcu_read_unlock(&fs_info->subvol_srcu, index);
5852                                ret = PTR_ERR(clone_root);
5853                                goto out;
5854                        }
5855                        clone_sources_to_rollback = i + 1;
5856                        spin_lock(&clone_root->root_item_lock);
5857                        clone_root->send_in_progress++;
5858                        if (!btrfs_root_readonly(clone_root)) {
5859                                spin_unlock(&clone_root->root_item_lock);
5860                                srcu_read_unlock(&fs_info->subvol_srcu, index);
5861                                ret = -EPERM;
5862                                goto out;
5863                        }
5864                        spin_unlock(&clone_root->root_item_lock);
5865                        srcu_read_unlock(&fs_info->subvol_srcu, index);
5866
5867                        sctx->clone_roots[i].root = clone_root;
5868                }
5869                vfree(clone_sources_tmp);
5870                clone_sources_tmp = NULL;
5871        }
5872
5873        if (arg->parent_root) {
5874                key.objectid = arg->parent_root;
5875                key.type = BTRFS_ROOT_ITEM_KEY;
5876                key.offset = (u64)-1;
5877
5878                index = srcu_read_lock(&fs_info->subvol_srcu);
5879
5880                sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
5881                if (IS_ERR(sctx->parent_root)) {
5882                        srcu_read_unlock(&fs_info->subvol_srcu, index);
5883                        ret = PTR_ERR(sctx->parent_root);
5884                        goto out;
5885                }
5886
5887                spin_lock(&sctx->parent_root->root_item_lock);
5888                sctx->parent_root->send_in_progress++;
5889                if (!btrfs_root_readonly(sctx->parent_root) ||
5890                                btrfs_root_dead(sctx->parent_root)) {
5891                        spin_unlock(&sctx->parent_root->root_item_lock);
5892                        srcu_read_unlock(&fs_info->subvol_srcu, index);
5893                        ret = -EPERM;
5894                        goto out;
5895                }
5896                spin_unlock(&sctx->parent_root->root_item_lock);
5897
5898                srcu_read_unlock(&fs_info->subvol_srcu, index);
5899        }
5900
5901        /*
5902         * Clones from send_root are allowed, but only if the clone source
5903         * is behind the current send position. This is checked while searching
5904         * for possible clone sources.
5905         */
5906        sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
5907
5908        /* We do a bsearch later */
5909        sort(sctx->clone_roots, sctx->clone_roots_cnt,
5910                        sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
5911                        NULL);
5912        sort_clone_roots = 1;
5913
5914        ret = ensure_commit_roots_uptodate(sctx);
5915        if (ret)
5916                goto out;
5917
5918        current->journal_info = BTRFS_SEND_TRANS_STUB;
5919        ret = send_subvol(sctx);
5920        current->journal_info = NULL;
5921        if (ret < 0)
5922                goto out;
5923
5924        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
5925                ret = begin_cmd(sctx, BTRFS_SEND_C_END);
5926                if (ret < 0)
5927                        goto out;
5928                ret = send_cmd(sctx);
5929                if (ret < 0)
5930                        goto out;
5931        }
5932
5933out:
5934        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
5935        while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
5936                struct rb_node *n;
5937                struct pending_dir_move *pm;
5938
5939                n = rb_first(&sctx->pending_dir_moves);
5940                pm = rb_entry(n, struct pending_dir_move, node);
5941                while (!list_empty(&pm->list)) {
5942                        struct pending_dir_move *pm2;
5943
5944                        pm2 = list_first_entry(&pm->list,
5945                                               struct pending_dir_move, list);
5946                        free_pending_move(sctx, pm2);
5947                }
5948                free_pending_move(sctx, pm);
5949        }
5950
5951        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
5952        while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
5953                struct rb_node *n;
5954                struct waiting_dir_move *dm;
5955
5956                n = rb_first(&sctx->waiting_dir_moves);
5957                dm = rb_entry(n, struct waiting_dir_move, node);
5958                rb_erase(&dm->node, &sctx->waiting_dir_moves);
5959                kfree(dm);
5960        }
5961
5962        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
5963        while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
5964                struct rb_node *n;
5965                struct orphan_dir_info *odi;
5966
5967                n = rb_first(&sctx->orphan_dirs);
5968                odi = rb_entry(n, struct orphan_dir_info, node);
5969                free_orphan_dir_info(sctx, odi);
5970        }
5971
5972        if (sort_clone_roots) {
5973                for (i = 0; i < sctx->clone_roots_cnt; i++)
5974                        btrfs_root_dec_send_in_progress(
5975                                        sctx->clone_roots[i].root);
5976        } else {
5977                for (i = 0; sctx && i < clone_sources_to_rollback; i++)
5978                        btrfs_root_dec_send_in_progress(
5979                                        sctx->clone_roots[i].root);
5980
5981                btrfs_root_dec_send_in_progress(send_root);
5982        }
5983        if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
5984                btrfs_root_dec_send_in_progress(sctx->parent_root);
5985
5986        kfree(arg);
5987        vfree(clone_sources_tmp);
5988
5989        if (sctx) {
5990                if (sctx->send_filp)
5991                        fput(sctx->send_filp);
5992
5993                vfree(sctx->clone_roots);
5994                vfree(sctx->send_buf);
5995                vfree(sctx->read_buf);
5996
5997                name_cache_free(sctx);
5998
5999                kfree(sctx);
6000        }
6001
6002        return ret;
6003}
6004