linux/fs/btrfs/send.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/bsearch.h>
  20#include <linux/fs.h>
  21#include <linux/file.h>
  22#include <linux/sort.h>
  23#include <linux/mount.h>
  24#include <linux/xattr.h>
  25#include <linux/posix_acl_xattr.h>
  26#include <linux/radix-tree.h>
  27#include <linux/vmalloc.h>
  28#include <linux/string.h>
  29
  30#include "send.h"
  31#include "backref.h"
  32#include "hash.h"
  33#include "locking.h"
  34#include "disk-io.h"
  35#include "btrfs_inode.h"
  36#include "transaction.h"
  37
  38static int g_verbose = 0;
  39
  40#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
  41
  42/*
  43 * A fs_path is a helper to dynamically build path names with unknown size.
  44 * It reallocates the internal buffer on demand.
  45 * It allows fast adding of path elements on the right side (normal path) and
  46 * fast adding to the left side (reversed path). A reversed path can also be
  47 * unreversed if needed.
  48 */
  49struct fs_path {
  50        union {
  51                struct {
  52                        char *start;
  53                        char *end;
  54
  55                        char *buf;
  56                        unsigned short buf_len:15;
  57                        unsigned short reversed:1;
  58                        char inline_buf[];
  59                };
  60                /*
  61                 * Average path length does not exceed 200 bytes, we'll have
  62                 * better packing in the slab and higher chance to satisfy
  63                 * a allocation later during send.
  64                 */
  65                char pad[256];
  66        };
  67};
  68#define FS_PATH_INLINE_SIZE \
  69        (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
  70
  71
  72/* reused for each extent */
  73struct clone_root {
  74        struct btrfs_root *root;
  75        u64 ino;
  76        u64 offset;
  77
  78        u64 found_refs;
  79};
  80
  81#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
  82#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
  83
  84struct send_ctx {
  85        struct file *send_filp;
  86        loff_t send_off;
  87        char *send_buf;
  88        u32 send_size;
  89        u32 send_max_size;
  90        u64 total_send_size;
  91        u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
  92        u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
  93
  94        struct btrfs_root *send_root;
  95        struct btrfs_root *parent_root;
  96        struct clone_root *clone_roots;
  97        int clone_roots_cnt;
  98
  99        /* current state of the compare_tree call */
 100        struct btrfs_path *left_path;
 101        struct btrfs_path *right_path;
 102        struct btrfs_key *cmp_key;
 103
 104        /*
 105         * infos of the currently processed inode. In case of deleted inodes,
 106         * these are the values from the deleted inode.
 107         */
 108        u64 cur_ino;
 109        u64 cur_inode_gen;
 110        int cur_inode_new;
 111        int cur_inode_new_gen;
 112        int cur_inode_deleted;
 113        u64 cur_inode_size;
 114        u64 cur_inode_mode;
 115        u64 cur_inode_rdev;
 116        u64 cur_inode_last_extent;
 117
 118        u64 send_progress;
 119
 120        struct list_head new_refs;
 121        struct list_head deleted_refs;
 122
 123        struct radix_tree_root name_cache;
 124        struct list_head name_cache_list;
 125        int name_cache_size;
 126
 127        struct file_ra_state ra;
 128
 129        char *read_buf;
 130
 131        /*
 132         * We process inodes by their increasing order, so if before an
 133         * incremental send we reverse the parent/child relationship of
 134         * directories such that a directory with a lower inode number was
 135         * the parent of a directory with a higher inode number, and the one
 136         * becoming the new parent got renamed too, we can't rename/move the
 137         * directory with lower inode number when we finish processing it - we
 138         * must process the directory with higher inode number first, then
 139         * rename/move it and then rename/move the directory with lower inode
 140         * number. Example follows.
 141         *
 142         * Tree state when the first send was performed:
 143         *
 144         * .
 145         * |-- a                   (ino 257)
 146         *     |-- b               (ino 258)
 147         *         |
 148         *         |
 149         *         |-- c           (ino 259)
 150         *         |   |-- d       (ino 260)
 151         *         |
 152         *         |-- c2          (ino 261)
 153         *
 154         * Tree state when the second (incremental) send is performed:
 155         *
 156         * .
 157         * |-- a                   (ino 257)
 158         *     |-- b               (ino 258)
 159         *         |-- c2          (ino 261)
 160         *             |-- d2      (ino 260)
 161         *                 |-- cc  (ino 259)
 162         *
 163         * The sequence of steps that lead to the second state was:
 164         *
 165         * mv /a/b/c/d /a/b/c2/d2
 166         * mv /a/b/c /a/b/c2/d2/cc
 167         *
 168         * "c" has lower inode number, but we can't move it (2nd mv operation)
 169         * before we move "d", which has higher inode number.
 170         *
 171         * So we just memorize which move/rename operations must be performed
 172         * later when their respective parent is processed and moved/renamed.
 173         */
 174
 175        /* Indexed by parent directory inode number. */
 176        struct rb_root pending_dir_moves;
 177
 178        /*
 179         * Reverse index, indexed by the inode number of a directory that
 180         * is waiting for the move/rename of its immediate parent before its
 181         * own move/rename can be performed.
 182         */
 183        struct rb_root waiting_dir_moves;
 184
 185        /*
 186         * A directory that is going to be rm'ed might have a child directory
 187         * which is in the pending directory moves index above. In this case,
 188         * the directory can only be removed after the move/rename of its child
 189         * is performed. Example:
 190         *
 191         * Parent snapshot:
 192         *
 193         * .                        (ino 256)
 194         * |-- a/                   (ino 257)
 195         *     |-- b/               (ino 258)
 196         *         |-- c/           (ino 259)
 197         *         |   |-- x/       (ino 260)
 198         *         |
 199         *         |-- y/           (ino 261)
 200         *
 201         * Send snapshot:
 202         *
 203         * .                        (ino 256)
 204         * |-- a/                   (ino 257)
 205         *     |-- b/               (ino 258)
 206         *         |-- YY/          (ino 261)
 207         *              |-- x/      (ino 260)
 208         *
 209         * Sequence of steps that lead to the send snapshot:
 210         * rm -f /a/b/c/foo.txt
 211         * mv /a/b/y /a/b/YY
 212         * mv /a/b/c/x /a/b/YY
 213         * rmdir /a/b/c
 214         *
 215         * When the child is processed, its move/rename is delayed until its
 216         * parent is processed (as explained above), but all other operations
 217         * like update utimes, chown, chgrp, etc, are performed and the paths
 218         * that it uses for those operations must use the orphanized name of
 219         * its parent (the directory we're going to rm later), so we need to
 220         * memorize that name.
 221         *
 222         * Indexed by the inode number of the directory to be deleted.
 223         */
 224        struct rb_root orphan_dirs;
 225};
 226
 227struct pending_dir_move {
 228        struct rb_node node;
 229        struct list_head list;
 230        u64 parent_ino;
 231        u64 ino;
 232        u64 gen;
 233        bool is_orphan;
 234        struct list_head update_refs;
 235};
 236
 237struct waiting_dir_move {
 238        struct rb_node node;
 239        u64 ino;
 240        /*
 241         * There might be some directory that could not be removed because it
 242         * was waiting for this directory inode to be moved first. Therefore
 243         * after this directory is moved, we can try to rmdir the ino rmdir_ino.
 244         */
 245        u64 rmdir_ino;
 246        bool orphanized;
 247};
 248
 249struct orphan_dir_info {
 250        struct rb_node node;
 251        u64 ino;
 252        u64 gen;
 253};
 254
 255struct name_cache_entry {
 256        struct list_head list;
 257        /*
 258         * radix_tree has only 32bit entries but we need to handle 64bit inums.
 259         * We use the lower 32bit of the 64bit inum to store it in the tree. If
 260         * more then one inum would fall into the same entry, we use radix_list
 261         * to store the additional entries. radix_list is also used to store
 262         * entries where two entries have the same inum but different
 263         * generations.
 264         */
 265        struct list_head radix_list;
 266        u64 ino;
 267        u64 gen;
 268        u64 parent_ino;
 269        u64 parent_gen;
 270        int ret;
 271        int need_later_update;
 272        int name_len;
 273        char name[];
 274};
 275
 276static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 277
 278static struct waiting_dir_move *
 279get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
 280
 281static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
 282
 283static int need_send_hole(struct send_ctx *sctx)
 284{
 285        return (sctx->parent_root && !sctx->cur_inode_new &&
 286                !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
 287                S_ISREG(sctx->cur_inode_mode));
 288}
 289
 290static void fs_path_reset(struct fs_path *p)
 291{
 292        if (p->reversed) {
 293                p->start = p->buf + p->buf_len - 1;
 294                p->end = p->start;
 295                *p->start = 0;
 296        } else {
 297                p->start = p->buf;
 298                p->end = p->start;
 299                *p->start = 0;
 300        }
 301}
 302
 303static struct fs_path *fs_path_alloc(void)
 304{
 305        struct fs_path *p;
 306
 307        p = kmalloc(sizeof(*p), GFP_NOFS);
 308        if (!p)
 309                return NULL;
 310        p->reversed = 0;
 311        p->buf = p->inline_buf;
 312        p->buf_len = FS_PATH_INLINE_SIZE;
 313        fs_path_reset(p);
 314        return p;
 315}
 316
 317static struct fs_path *fs_path_alloc_reversed(void)
 318{
 319        struct fs_path *p;
 320
 321        p = fs_path_alloc();
 322        if (!p)
 323                return NULL;
 324        p->reversed = 1;
 325        fs_path_reset(p);
 326        return p;
 327}
 328
 329static void fs_path_free(struct fs_path *p)
 330{
 331        if (!p)
 332                return;
 333        if (p->buf != p->inline_buf)
 334                kfree(p->buf);
 335        kfree(p);
 336}
 337
 338static int fs_path_len(struct fs_path *p)
 339{
 340        return p->end - p->start;
 341}
 342
 343static int fs_path_ensure_buf(struct fs_path *p, int len)
 344{
 345        char *tmp_buf;
 346        int path_len;
 347        int old_buf_len;
 348
 349        len++;
 350
 351        if (p->buf_len >= len)
 352                return 0;
 353
 354        if (len > PATH_MAX) {
 355                WARN_ON(1);
 356                return -ENOMEM;
 357        }
 358
 359        path_len = p->end - p->start;
 360        old_buf_len = p->buf_len;
 361
 362        /*
 363         * First time the inline_buf does not suffice
 364         */
 365        if (p->buf == p->inline_buf) {
 366                tmp_buf = kmalloc(len, GFP_NOFS);
 367                if (tmp_buf)
 368                        memcpy(tmp_buf, p->buf, old_buf_len);
 369        } else {
 370                tmp_buf = krealloc(p->buf, len, GFP_NOFS);
 371        }
 372        if (!tmp_buf)
 373                return -ENOMEM;
 374        p->buf = tmp_buf;
 375        /*
 376         * The real size of the buffer is bigger, this will let the fast path
 377         * happen most of the time
 378         */
 379        p->buf_len = ksize(p->buf);
 380
 381        if (p->reversed) {
 382                tmp_buf = p->buf + old_buf_len - path_len - 1;
 383                p->end = p->buf + p->buf_len - 1;
 384                p->start = p->end - path_len;
 385                memmove(p->start, tmp_buf, path_len + 1);
 386        } else {
 387                p->start = p->buf;
 388                p->end = p->start + path_len;
 389        }
 390        return 0;
 391}
 392
 393static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
 394                                   char **prepared)
 395{
 396        int ret;
 397        int new_len;
 398
 399        new_len = p->end - p->start + name_len;
 400        if (p->start != p->end)
 401                new_len++;
 402        ret = fs_path_ensure_buf(p, new_len);
 403        if (ret < 0)
 404                goto out;
 405
 406        if (p->reversed) {
 407                if (p->start != p->end)
 408                        *--p->start = '/';
 409                p->start -= name_len;
 410                *prepared = p->start;
 411        } else {
 412                if (p->start != p->end)
 413                        *p->end++ = '/';
 414                *prepared = p->end;
 415                p->end += name_len;
 416                *p->end = 0;
 417        }
 418
 419out:
 420        return ret;
 421}
 422
 423static int fs_path_add(struct fs_path *p, const char *name, int name_len)
 424{
 425        int ret;
 426        char *prepared;
 427
 428        ret = fs_path_prepare_for_add(p, name_len, &prepared);
 429        if (ret < 0)
 430                goto out;
 431        memcpy(prepared, name, name_len);
 432
 433out:
 434        return ret;
 435}
 436
 437static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
 438{
 439        int ret;
 440        char *prepared;
 441
 442        ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
 443        if (ret < 0)
 444                goto out;
 445        memcpy(prepared, p2->start, p2->end - p2->start);
 446
 447out:
 448        return ret;
 449}
 450
 451static int fs_path_add_from_extent_buffer(struct fs_path *p,
 452                                          struct extent_buffer *eb,
 453                                          unsigned long off, int len)
 454{
 455        int ret;
 456        char *prepared;
 457
 458        ret = fs_path_prepare_for_add(p, len, &prepared);
 459        if (ret < 0)
 460                goto out;
 461
 462        read_extent_buffer(eb, prepared, off, len);
 463
 464out:
 465        return ret;
 466}
 467
 468static int fs_path_copy(struct fs_path *p, struct fs_path *from)
 469{
 470        int ret;
 471
 472        p->reversed = from->reversed;
 473        fs_path_reset(p);
 474
 475        ret = fs_path_add_path(p, from);
 476
 477        return ret;
 478}
 479
 480
 481static void fs_path_unreverse(struct fs_path *p)
 482{
 483        char *tmp;
 484        int len;
 485
 486        if (!p->reversed)
 487                return;
 488
 489        tmp = p->start;
 490        len = p->end - p->start;
 491        p->start = p->buf;
 492        p->end = p->start + len;
 493        memmove(p->start, tmp, len + 1);
 494        p->reversed = 0;
 495}
 496
 497static struct btrfs_path *alloc_path_for_send(void)
 498{
 499        struct btrfs_path *path;
 500
 501        path = btrfs_alloc_path();
 502        if (!path)
 503                return NULL;
 504        path->search_commit_root = 1;
 505        path->skip_locking = 1;
 506        path->need_commit_sem = 1;
 507        return path;
 508}
 509
 510static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
 511{
 512        int ret;
 513        mm_segment_t old_fs;
 514        u32 pos = 0;
 515
 516        old_fs = get_fs();
 517        set_fs(KERNEL_DS);
 518
 519        while (pos < len) {
 520                ret = vfs_write(filp, (__force const char __user *)buf + pos,
 521                                len - pos, off);
 522                /* TODO handle that correctly */
 523                /*if (ret == -ERESTARTSYS) {
 524                        continue;
 525                }*/
 526                if (ret < 0)
 527                        goto out;
 528                if (ret == 0) {
 529                        ret = -EIO;
 530                        goto out;
 531                }
 532                pos += ret;
 533        }
 534
 535        ret = 0;
 536
 537out:
 538        set_fs(old_fs);
 539        return ret;
 540}
 541
 542static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
 543{
 544        struct btrfs_tlv_header *hdr;
 545        int total_len = sizeof(*hdr) + len;
 546        int left = sctx->send_max_size - sctx->send_size;
 547
 548        if (unlikely(left < total_len))
 549                return -EOVERFLOW;
 550
 551        hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
 552        hdr->tlv_type = cpu_to_le16(attr);
 553        hdr->tlv_len = cpu_to_le16(len);
 554        memcpy(hdr + 1, data, len);
 555        sctx->send_size += total_len;
 556
 557        return 0;
 558}
 559
 560#define TLV_PUT_DEFINE_INT(bits) \
 561        static int tlv_put_u##bits(struct send_ctx *sctx,               \
 562                        u##bits attr, u##bits value)                    \
 563        {                                                               \
 564                __le##bits __tmp = cpu_to_le##bits(value);              \
 565                return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));      \
 566        }
 567
 568TLV_PUT_DEFINE_INT(64)
 569
 570static int tlv_put_string(struct send_ctx *sctx, u16 attr,
 571                          const char *str, int len)
 572{
 573        if (len == -1)
 574                len = strlen(str);
 575        return tlv_put(sctx, attr, str, len);
 576}
 577
 578static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
 579                        const u8 *uuid)
 580{
 581        return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
 582}
 583
 584static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
 585                                  struct extent_buffer *eb,
 586                                  struct btrfs_timespec *ts)
 587{
 588        struct btrfs_timespec bts;
 589        read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
 590        return tlv_put(sctx, attr, &bts, sizeof(bts));
 591}
 592
 593
 594#define TLV_PUT(sctx, attrtype, attrlen, data) \
 595        do { \
 596                ret = tlv_put(sctx, attrtype, attrlen, data); \
 597                if (ret < 0) \
 598                        goto tlv_put_failure; \
 599        } while (0)
 600
 601#define TLV_PUT_INT(sctx, attrtype, bits, value) \
 602        do { \
 603                ret = tlv_put_u##bits(sctx, attrtype, value); \
 604                if (ret < 0) \
 605                        goto tlv_put_failure; \
 606        } while (0)
 607
 608#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
 609#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
 610#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
 611#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
 612#define TLV_PUT_STRING(sctx, attrtype, str, len) \
 613        do { \
 614                ret = tlv_put_string(sctx, attrtype, str, len); \
 615                if (ret < 0) \
 616                        goto tlv_put_failure; \
 617        } while (0)
 618#define TLV_PUT_PATH(sctx, attrtype, p) \
 619        do { \
 620                ret = tlv_put_string(sctx, attrtype, p->start, \
 621                        p->end - p->start); \
 622                if (ret < 0) \
 623                        goto tlv_put_failure; \
 624        } while(0)
 625#define TLV_PUT_UUID(sctx, attrtype, uuid) \
 626        do { \
 627                ret = tlv_put_uuid(sctx, attrtype, uuid); \
 628                if (ret < 0) \
 629                        goto tlv_put_failure; \
 630        } while (0)
 631#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
 632        do { \
 633                ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
 634                if (ret < 0) \
 635                        goto tlv_put_failure; \
 636        } while (0)
 637
 638static int send_header(struct send_ctx *sctx)
 639{
 640        struct btrfs_stream_header hdr;
 641
 642        strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
 643        hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
 644
 645        return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
 646                                        &sctx->send_off);
 647}
 648
 649/*
 650 * For each command/item we want to send to userspace, we call this function.
 651 */
 652static int begin_cmd(struct send_ctx *sctx, int cmd)
 653{
 654        struct btrfs_cmd_header *hdr;
 655
 656        if (WARN_ON(!sctx->send_buf))
 657                return -EINVAL;
 658
 659        BUG_ON(sctx->send_size);
 660
 661        sctx->send_size += sizeof(*hdr);
 662        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 663        hdr->cmd = cpu_to_le16(cmd);
 664
 665        return 0;
 666}
 667
 668static int send_cmd(struct send_ctx *sctx)
 669{
 670        int ret;
 671        struct btrfs_cmd_header *hdr;
 672        u32 crc;
 673
 674        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 675        hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 676        hdr->crc = 0;
 677
 678        crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 679        hdr->crc = cpu_to_le32(crc);
 680
 681        ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
 682                                        &sctx->send_off);
 683
 684        sctx->total_send_size += sctx->send_size;
 685        sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
 686        sctx->send_size = 0;
 687
 688        return ret;
 689}
 690
 691/*
 692 * Sends a move instruction to user space
 693 */
 694static int send_rename(struct send_ctx *sctx,
 695                     struct fs_path *from, struct fs_path *to)
 696{
 697        int ret;
 698
 699verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
 700
 701        ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
 702        if (ret < 0)
 703                goto out;
 704
 705        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
 706        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
 707
 708        ret = send_cmd(sctx);
 709
 710tlv_put_failure:
 711out:
 712        return ret;
 713}
 714
 715/*
 716 * Sends a link instruction to user space
 717 */
 718static int send_link(struct send_ctx *sctx,
 719                     struct fs_path *path, struct fs_path *lnk)
 720{
 721        int ret;
 722
 723verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
 724
 725        ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
 726        if (ret < 0)
 727                goto out;
 728
 729        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 730        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
 731
 732        ret = send_cmd(sctx);
 733
 734tlv_put_failure:
 735out:
 736        return ret;
 737}
 738
 739/*
 740 * Sends an unlink instruction to user space
 741 */
 742static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
 743{
 744        int ret;
 745
 746verbose_printk("btrfs: send_unlink %s\n", path->start);
 747
 748        ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
 749        if (ret < 0)
 750                goto out;
 751
 752        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 753
 754        ret = send_cmd(sctx);
 755
 756tlv_put_failure:
 757out:
 758        return ret;
 759}
 760
 761/*
 762 * Sends a rmdir instruction to user space
 763 */
 764static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
 765{
 766        int ret;
 767
 768verbose_printk("btrfs: send_rmdir %s\n", path->start);
 769
 770        ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
 771        if (ret < 0)
 772                goto out;
 773
 774        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 775
 776        ret = send_cmd(sctx);
 777
 778tlv_put_failure:
 779out:
 780        return ret;
 781}
 782
 783/*
 784 * Helper function to retrieve some fields from an inode item.
 785 */
 786static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
 787                          u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
 788                          u64 *gid, u64 *rdev)
 789{
 790        int ret;
 791        struct btrfs_inode_item *ii;
 792        struct btrfs_key key;
 793
 794        key.objectid = ino;
 795        key.type = BTRFS_INODE_ITEM_KEY;
 796        key.offset = 0;
 797        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 798        if (ret) {
 799                if (ret > 0)
 800                        ret = -ENOENT;
 801                return ret;
 802        }
 803
 804        ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
 805                        struct btrfs_inode_item);
 806        if (size)
 807                *size = btrfs_inode_size(path->nodes[0], ii);
 808        if (gen)
 809                *gen = btrfs_inode_generation(path->nodes[0], ii);
 810        if (mode)
 811                *mode = btrfs_inode_mode(path->nodes[0], ii);
 812        if (uid)
 813                *uid = btrfs_inode_uid(path->nodes[0], ii);
 814        if (gid)
 815                *gid = btrfs_inode_gid(path->nodes[0], ii);
 816        if (rdev)
 817                *rdev = btrfs_inode_rdev(path->nodes[0], ii);
 818
 819        return ret;
 820}
 821
 822static int get_inode_info(struct btrfs_root *root,
 823                          u64 ino, u64 *size, u64 *gen,
 824                          u64 *mode, u64 *uid, u64 *gid,
 825                          u64 *rdev)
 826{
 827        struct btrfs_path *path;
 828        int ret;
 829
 830        path = alloc_path_for_send();
 831        if (!path)
 832                return -ENOMEM;
 833        ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
 834                               rdev);
 835        btrfs_free_path(path);
 836        return ret;
 837}
 838
 839typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
 840                                   struct fs_path *p,
 841                                   void *ctx);
 842
 843/*
 844 * Helper function to iterate the entries in ONE btrfs_inode_ref or
 845 * btrfs_inode_extref.
 846 * The iterate callback may return a non zero value to stop iteration. This can
 847 * be a negative value for error codes or 1 to simply stop it.
 848 *
 849 * path must point to the INODE_REF or INODE_EXTREF when called.
 850 */
 851static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
 852                             struct btrfs_key *found_key, int resolve,
 853                             iterate_inode_ref_t iterate, void *ctx)
 854{
 855        struct extent_buffer *eb = path->nodes[0];
 856        struct btrfs_item *item;
 857        struct btrfs_inode_ref *iref;
 858        struct btrfs_inode_extref *extref;
 859        struct btrfs_path *tmp_path;
 860        struct fs_path *p;
 861        u32 cur = 0;
 862        u32 total;
 863        int slot = path->slots[0];
 864        u32 name_len;
 865        char *start;
 866        int ret = 0;
 867        int num = 0;
 868        int index;
 869        u64 dir;
 870        unsigned long name_off;
 871        unsigned long elem_size;
 872        unsigned long ptr;
 873
 874        p = fs_path_alloc_reversed();
 875        if (!p)
 876                return -ENOMEM;
 877
 878        tmp_path = alloc_path_for_send();
 879        if (!tmp_path) {
 880                fs_path_free(p);
 881                return -ENOMEM;
 882        }
 883
 884
 885        if (found_key->type == BTRFS_INODE_REF_KEY) {
 886                ptr = (unsigned long)btrfs_item_ptr(eb, slot,
 887                                                    struct btrfs_inode_ref);
 888                item = btrfs_item_nr(slot);
 889                total = btrfs_item_size(eb, item);
 890                elem_size = sizeof(*iref);
 891        } else {
 892                ptr = btrfs_item_ptr_offset(eb, slot);
 893                total = btrfs_item_size_nr(eb, slot);
 894                elem_size = sizeof(*extref);
 895        }
 896
 897        while (cur < total) {
 898                fs_path_reset(p);
 899
 900                if (found_key->type == BTRFS_INODE_REF_KEY) {
 901                        iref = (struct btrfs_inode_ref *)(ptr + cur);
 902                        name_len = btrfs_inode_ref_name_len(eb, iref);
 903                        name_off = (unsigned long)(iref + 1);
 904                        index = btrfs_inode_ref_index(eb, iref);
 905                        dir = found_key->offset;
 906                } else {
 907                        extref = (struct btrfs_inode_extref *)(ptr + cur);
 908                        name_len = btrfs_inode_extref_name_len(eb, extref);
 909                        name_off = (unsigned long)&extref->name;
 910                        index = btrfs_inode_extref_index(eb, extref);
 911                        dir = btrfs_inode_extref_parent(eb, extref);
 912                }
 913
 914                if (resolve) {
 915                        start = btrfs_ref_to_path(root, tmp_path, name_len,
 916                                                  name_off, eb, dir,
 917                                                  p->buf, p->buf_len);
 918                        if (IS_ERR(start)) {
 919                                ret = PTR_ERR(start);
 920                                goto out;
 921                        }
 922                        if (start < p->buf) {
 923                                /* overflow , try again with larger buffer */
 924                                ret = fs_path_ensure_buf(p,
 925                                                p->buf_len + p->buf - start);
 926                                if (ret < 0)
 927                                        goto out;
 928                                start = btrfs_ref_to_path(root, tmp_path,
 929                                                          name_len, name_off,
 930                                                          eb, dir,
 931                                                          p->buf, p->buf_len);
 932                                if (IS_ERR(start)) {
 933                                        ret = PTR_ERR(start);
 934                                        goto out;
 935                                }
 936                                BUG_ON(start < p->buf);
 937                        }
 938                        p->start = start;
 939                } else {
 940                        ret = fs_path_add_from_extent_buffer(p, eb, name_off,
 941                                                             name_len);
 942                        if (ret < 0)
 943                                goto out;
 944                }
 945
 946                cur += elem_size + name_len;
 947                ret = iterate(num, dir, index, p, ctx);
 948                if (ret)
 949                        goto out;
 950                num++;
 951        }
 952
 953out:
 954        btrfs_free_path(tmp_path);
 955        fs_path_free(p);
 956        return ret;
 957}
 958
 959typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
 960                                  const char *name, int name_len,
 961                                  const char *data, int data_len,
 962                                  u8 type, void *ctx);
 963
 964/*
 965 * Helper function to iterate the entries in ONE btrfs_dir_item.
 966 * The iterate callback may return a non zero value to stop iteration. This can
 967 * be a negative value for error codes or 1 to simply stop it.
 968 *
 969 * path must point to the dir item when called.
 970 */
 971static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
 972                            struct btrfs_key *found_key,
 973                            iterate_dir_item_t iterate, void *ctx)
 974{
 975        int ret = 0;
 976        struct extent_buffer *eb;
 977        struct btrfs_item *item;
 978        struct btrfs_dir_item *di;
 979        struct btrfs_key di_key;
 980        char *buf = NULL;
 981        int buf_len;
 982        u32 name_len;
 983        u32 data_len;
 984        u32 cur;
 985        u32 len;
 986        u32 total;
 987        int slot;
 988        int num;
 989        u8 type;
 990
 991        /*
 992         * Start with a small buffer (1 page). If later we end up needing more
 993         * space, which can happen for xattrs on a fs with a leaf size greater
 994         * then the page size, attempt to increase the buffer. Typically xattr
 995         * values are small.
 996         */
 997        buf_len = PATH_MAX;
 998        buf = kmalloc(buf_len, GFP_NOFS);
 999        if (!buf) {
1000                ret = -ENOMEM;
1001                goto out;
1002        }
1003
1004        eb = path->nodes[0];
1005        slot = path->slots[0];
1006        item = btrfs_item_nr(slot);
1007        di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1008        cur = 0;
1009        len = 0;
1010        total = btrfs_item_size(eb, item);
1011
1012        num = 0;
1013        while (cur < total) {
1014                name_len = btrfs_dir_name_len(eb, di);
1015                data_len = btrfs_dir_data_len(eb, di);
1016                type = btrfs_dir_type(eb, di);
1017                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
1018
1019                if (type == BTRFS_FT_XATTR) {
1020                        if (name_len > XATTR_NAME_MAX) {
1021                                ret = -ENAMETOOLONG;
1022                                goto out;
1023                        }
1024                        if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) {
1025                                ret = -E2BIG;
1026                                goto out;
1027                        }
1028                } else {
1029                        /*
1030                         * Path too long
1031                         */
1032                        if (name_len + data_len > PATH_MAX) {
1033                                ret = -ENAMETOOLONG;
1034                                goto out;
1035                        }
1036                }
1037
1038                if (name_len + data_len > buf_len) {
1039                        buf_len = name_len + data_len;
1040                        if (is_vmalloc_addr(buf)) {
1041                                vfree(buf);
1042                                buf = NULL;
1043                        } else {
1044                                char *tmp = krealloc(buf, buf_len,
1045                                                     GFP_NOFS | __GFP_NOWARN);
1046
1047                                if (!tmp)
1048                                        kfree(buf);
1049                                buf = tmp;
1050                        }
1051                        if (!buf) {
1052                                buf = vmalloc(buf_len);
1053                                if (!buf) {
1054                                        ret = -ENOMEM;
1055                                        goto out;
1056                                }
1057                        }
1058                }
1059
1060                read_extent_buffer(eb, buf, (unsigned long)(di + 1),
1061                                name_len + data_len);
1062
1063                len = sizeof(*di) + name_len + data_len;
1064                di = (struct btrfs_dir_item *)((char *)di + len);
1065                cur += len;
1066
1067                ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1068                                data_len, type, ctx);
1069                if (ret < 0)
1070                        goto out;
1071                if (ret) {
1072                        ret = 0;
1073                        goto out;
1074                }
1075
1076                num++;
1077        }
1078
1079out:
1080        kvfree(buf);
1081        return ret;
1082}
1083
1084static int __copy_first_ref(int num, u64 dir, int index,
1085                            struct fs_path *p, void *ctx)
1086{
1087        int ret;
1088        struct fs_path *pt = ctx;
1089
1090        ret = fs_path_copy(pt, p);
1091        if (ret < 0)
1092                return ret;
1093
1094        /* we want the first only */
1095        return 1;
1096}
1097
1098/*
1099 * Retrieve the first path of an inode. If an inode has more then one
1100 * ref/hardlink, this is ignored.
1101 */
1102static int get_inode_path(struct btrfs_root *root,
1103                          u64 ino, struct fs_path *path)
1104{
1105        int ret;
1106        struct btrfs_key key, found_key;
1107        struct btrfs_path *p;
1108
1109        p = alloc_path_for_send();
1110        if (!p)
1111                return -ENOMEM;
1112
1113        fs_path_reset(path);
1114
1115        key.objectid = ino;
1116        key.type = BTRFS_INODE_REF_KEY;
1117        key.offset = 0;
1118
1119        ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1120        if (ret < 0)
1121                goto out;
1122        if (ret) {
1123                ret = 1;
1124                goto out;
1125        }
1126        btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1127        if (found_key.objectid != ino ||
1128            (found_key.type != BTRFS_INODE_REF_KEY &&
1129             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1130                ret = -ENOENT;
1131                goto out;
1132        }
1133
1134        ret = iterate_inode_ref(root, p, &found_key, 1,
1135                                __copy_first_ref, path);
1136        if (ret < 0)
1137                goto out;
1138        ret = 0;
1139
1140out:
1141        btrfs_free_path(p);
1142        return ret;
1143}
1144
1145struct backref_ctx {
1146        struct send_ctx *sctx;
1147
1148        struct btrfs_path *path;
1149        /* number of total found references */
1150        u64 found;
1151
1152        /*
1153         * used for clones found in send_root. clones found behind cur_objectid
1154         * and cur_offset are not considered as allowed clones.
1155         */
1156        u64 cur_objectid;
1157        u64 cur_offset;
1158
1159        /* may be truncated in case it's the last extent in a file */
1160        u64 extent_len;
1161
1162        /* data offset in the file extent item */
1163        u64 data_offset;
1164
1165        /* Just to check for bugs in backref resolving */
1166        int found_itself;
1167};
1168
1169static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1170{
1171        u64 root = (u64)(uintptr_t)key;
1172        struct clone_root *cr = (struct clone_root *)elt;
1173
1174        if (root < cr->root->objectid)
1175                return -1;
1176        if (root > cr->root->objectid)
1177                return 1;
1178        return 0;
1179}
1180
1181static int __clone_root_cmp_sort(const void *e1, const void *e2)
1182{
1183        struct clone_root *cr1 = (struct clone_root *)e1;
1184        struct clone_root *cr2 = (struct clone_root *)e2;
1185
1186        if (cr1->root->objectid < cr2->root->objectid)
1187                return -1;
1188        if (cr1->root->objectid > cr2->root->objectid)
1189                return 1;
1190        return 0;
1191}
1192
1193/*
1194 * Called for every backref that is found for the current extent.
1195 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1196 */
1197static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1198{
1199        struct backref_ctx *bctx = ctx_;
1200        struct clone_root *found;
1201        int ret;
1202        u64 i_size;
1203
1204        /* First check if the root is in the list of accepted clone sources */
1205        found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1206                        bctx->sctx->clone_roots_cnt,
1207                        sizeof(struct clone_root),
1208                        __clone_root_cmp_bsearch);
1209        if (!found)
1210                return 0;
1211
1212        if (found->root == bctx->sctx->send_root &&
1213            ino == bctx->cur_objectid &&
1214            offset == bctx->cur_offset) {
1215                bctx->found_itself = 1;
1216        }
1217
1218        /*
1219         * There are inodes that have extents that lie behind its i_size. Don't
1220         * accept clones from these extents.
1221         */
1222        ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
1223                               NULL, NULL, NULL);
1224        btrfs_release_path(bctx->path);
1225        if (ret < 0)
1226                return ret;
1227
1228        if (offset + bctx->data_offset + bctx->extent_len > i_size)
1229                return 0;
1230
1231        /*
1232         * Make sure we don't consider clones from send_root that are
1233         * behind the current inode/offset.
1234         */
1235        if (found->root == bctx->sctx->send_root) {
1236                /*
1237                 * TODO for the moment we don't accept clones from the inode
1238                 * that is currently send. We may change this when
1239                 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1240                 * file.
1241                 */
1242                if (ino >= bctx->cur_objectid)
1243                        return 0;
1244#if 0
1245                if (ino > bctx->cur_objectid)
1246                        return 0;
1247                if (offset + bctx->extent_len > bctx->cur_offset)
1248                        return 0;
1249#endif
1250        }
1251
1252        bctx->found++;
1253        found->found_refs++;
1254        if (ino < found->ino) {
1255                found->ino = ino;
1256                found->offset = offset;
1257        } else if (found->ino == ino) {
1258                /*
1259                 * same extent found more then once in the same file.
1260                 */
1261                if (found->offset > offset + bctx->extent_len)
1262                        found->offset = offset;
1263        }
1264
1265        return 0;
1266}
1267
1268/*
1269 * Given an inode, offset and extent item, it finds a good clone for a clone
1270 * instruction. Returns -ENOENT when none could be found. The function makes
1271 * sure that the returned clone is usable at the point where sending is at the
1272 * moment. This means, that no clones are accepted which lie behind the current
1273 * inode+offset.
1274 *
1275 * path must point to the extent item when called.
1276 */
1277static int find_extent_clone(struct send_ctx *sctx,
1278                             struct btrfs_path *path,
1279                             u64 ino, u64 data_offset,
1280                             u64 ino_size,
1281                             struct clone_root **found)
1282{
1283        int ret;
1284        int extent_type;
1285        u64 logical;
1286        u64 disk_byte;
1287        u64 num_bytes;
1288        u64 extent_item_pos;
1289        u64 flags = 0;
1290        struct btrfs_file_extent_item *fi;
1291        struct extent_buffer *eb = path->nodes[0];
1292        struct backref_ctx *backref_ctx = NULL;
1293        struct clone_root *cur_clone_root;
1294        struct btrfs_key found_key;
1295        struct btrfs_path *tmp_path;
1296        int compressed;
1297        u32 i;
1298
1299        tmp_path = alloc_path_for_send();
1300        if (!tmp_path)
1301                return -ENOMEM;
1302
1303        /* We only use this path under the commit sem */
1304        tmp_path->need_commit_sem = 0;
1305
1306        backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1307        if (!backref_ctx) {
1308                ret = -ENOMEM;
1309                goto out;
1310        }
1311
1312        backref_ctx->path = tmp_path;
1313
1314        if (data_offset >= ino_size) {
1315                /*
1316                 * There may be extents that lie behind the file's size.
1317                 * I at least had this in combination with snapshotting while
1318                 * writing large files.
1319                 */
1320                ret = 0;
1321                goto out;
1322        }
1323
1324        fi = btrfs_item_ptr(eb, path->slots[0],
1325                        struct btrfs_file_extent_item);
1326        extent_type = btrfs_file_extent_type(eb, fi);
1327        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1328                ret = -ENOENT;
1329                goto out;
1330        }
1331        compressed = btrfs_file_extent_compression(eb, fi);
1332
1333        num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1334        disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1335        if (disk_byte == 0) {
1336                ret = -ENOENT;
1337                goto out;
1338        }
1339        logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1340
1341        down_read(&sctx->send_root->fs_info->commit_root_sem);
1342        ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1343                                  &found_key, &flags);
1344        up_read(&sctx->send_root->fs_info->commit_root_sem);
1345        btrfs_release_path(tmp_path);
1346
1347        if (ret < 0)
1348                goto out;
1349        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1350                ret = -EIO;
1351                goto out;
1352        }
1353
1354        /*
1355         * Setup the clone roots.
1356         */
1357        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1358                cur_clone_root = sctx->clone_roots + i;
1359                cur_clone_root->ino = (u64)-1;
1360                cur_clone_root->offset = 0;
1361                cur_clone_root->found_refs = 0;
1362        }
1363
1364        backref_ctx->sctx = sctx;
1365        backref_ctx->found = 0;
1366        backref_ctx->cur_objectid = ino;
1367        backref_ctx->cur_offset = data_offset;
1368        backref_ctx->found_itself = 0;
1369        backref_ctx->extent_len = num_bytes;
1370        /*
1371         * For non-compressed extents iterate_extent_inodes() gives us extent
1372         * offsets that already take into account the data offset, but not for
1373         * compressed extents, since the offset is logical and not relative to
1374         * the physical extent locations. We must take this into account to
1375         * avoid sending clone offsets that go beyond the source file's size,
1376         * which would result in the clone ioctl failing with -EINVAL on the
1377         * receiving end.
1378         */
1379        if (compressed == BTRFS_COMPRESS_NONE)
1380                backref_ctx->data_offset = 0;
1381        else
1382                backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
1383
1384        /*
1385         * The last extent of a file may be too large due to page alignment.
1386         * We need to adjust extent_len in this case so that the checks in
1387         * __iterate_backrefs work.
1388         */
1389        if (data_offset + num_bytes >= ino_size)
1390                backref_ctx->extent_len = ino_size - data_offset;
1391
1392        /*
1393         * Now collect all backrefs.
1394         */
1395        if (compressed == BTRFS_COMPRESS_NONE)
1396                extent_item_pos = logical - found_key.objectid;
1397        else
1398                extent_item_pos = 0;
1399        ret = iterate_extent_inodes(sctx->send_root->fs_info,
1400                                        found_key.objectid, extent_item_pos, 1,
1401                                        __iterate_backrefs, backref_ctx);
1402
1403        if (ret < 0)
1404                goto out;
1405
1406        if (!backref_ctx->found_itself) {
1407                /* found a bug in backref code? */
1408                ret = -EIO;
1409                btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1410                                "send_root. inode=%llu, offset=%llu, "
1411                                "disk_byte=%llu found extent=%llu",
1412                                ino, data_offset, disk_byte, found_key.objectid);
1413                goto out;
1414        }
1415
1416verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1417                "ino=%llu, "
1418                "num_bytes=%llu, logical=%llu\n",
1419                data_offset, ino, num_bytes, logical);
1420
1421        if (!backref_ctx->found)
1422                verbose_printk("btrfs:    no clones found\n");
1423
1424        cur_clone_root = NULL;
1425        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1426                if (sctx->clone_roots[i].found_refs) {
1427                        if (!cur_clone_root)
1428                                cur_clone_root = sctx->clone_roots + i;
1429                        else if (sctx->clone_roots[i].root == sctx->send_root)
1430                                /* prefer clones from send_root over others */
1431                                cur_clone_root = sctx->clone_roots + i;
1432                }
1433
1434        }
1435
1436        if (cur_clone_root) {
1437                *found = cur_clone_root;
1438                ret = 0;
1439        } else {
1440                ret = -ENOENT;
1441        }
1442
1443out:
1444        btrfs_free_path(tmp_path);
1445        kfree(backref_ctx);
1446        return ret;
1447}
1448
1449static int read_symlink(struct btrfs_root *root,
1450                        u64 ino,
1451                        struct fs_path *dest)
1452{
1453        int ret;
1454        struct btrfs_path *path;
1455        struct btrfs_key key;
1456        struct btrfs_file_extent_item *ei;
1457        u8 type;
1458        u8 compression;
1459        unsigned long off;
1460        int len;
1461
1462        path = alloc_path_for_send();
1463        if (!path)
1464                return -ENOMEM;
1465
1466        key.objectid = ino;
1467        key.type = BTRFS_EXTENT_DATA_KEY;
1468        key.offset = 0;
1469        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1470        if (ret < 0)
1471                goto out;
1472        BUG_ON(ret);
1473
1474        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1475                        struct btrfs_file_extent_item);
1476        type = btrfs_file_extent_type(path->nodes[0], ei);
1477        compression = btrfs_file_extent_compression(path->nodes[0], ei);
1478        BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1479        BUG_ON(compression);
1480
1481        off = btrfs_file_extent_inline_start(ei);
1482        len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1483
1484        ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1485
1486out:
1487        btrfs_free_path(path);
1488        return ret;
1489}
1490
1491/*
1492 * Helper function to generate a file name that is unique in the root of
1493 * send_root and parent_root. This is used to generate names for orphan inodes.
1494 */
1495static int gen_unique_name(struct send_ctx *sctx,
1496                           u64 ino, u64 gen,
1497                           struct fs_path *dest)
1498{
1499        int ret = 0;
1500        struct btrfs_path *path;
1501        struct btrfs_dir_item *di;
1502        char tmp[64];
1503        int len;
1504        u64 idx = 0;
1505
1506        path = alloc_path_for_send();
1507        if (!path)
1508                return -ENOMEM;
1509
1510        while (1) {
1511                len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1512                                ino, gen, idx);
1513                ASSERT(len < sizeof(tmp));
1514
1515                di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1516                                path, BTRFS_FIRST_FREE_OBJECTID,
1517                                tmp, strlen(tmp), 0);
1518                btrfs_release_path(path);
1519                if (IS_ERR(di)) {
1520                        ret = PTR_ERR(di);
1521                        goto out;
1522                }
1523                if (di) {
1524                        /* not unique, try again */
1525                        idx++;
1526                        continue;
1527                }
1528
1529                if (!sctx->parent_root) {
1530                        /* unique */
1531                        ret = 0;
1532                        break;
1533                }
1534
1535                di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1536                                path, BTRFS_FIRST_FREE_OBJECTID,
1537                                tmp, strlen(tmp), 0);
1538                btrfs_release_path(path);
1539                if (IS_ERR(di)) {
1540                        ret = PTR_ERR(di);
1541                        goto out;
1542                }
1543                if (di) {
1544                        /* not unique, try again */
1545                        idx++;
1546                        continue;
1547                }
1548                /* unique */
1549                break;
1550        }
1551
1552        ret = fs_path_add(dest, tmp, strlen(tmp));
1553
1554out:
1555        btrfs_free_path(path);
1556        return ret;
1557}
1558
1559enum inode_state {
1560        inode_state_no_change,
1561        inode_state_will_create,
1562        inode_state_did_create,
1563        inode_state_will_delete,
1564        inode_state_did_delete,
1565};
1566
1567static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1568{
1569        int ret;
1570        int left_ret;
1571        int right_ret;
1572        u64 left_gen;
1573        u64 right_gen;
1574
1575        ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1576                        NULL, NULL);
1577        if (ret < 0 && ret != -ENOENT)
1578                goto out;
1579        left_ret = ret;
1580
1581        if (!sctx->parent_root) {
1582                right_ret = -ENOENT;
1583        } else {
1584                ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1585                                NULL, NULL, NULL, NULL);
1586                if (ret < 0 && ret != -ENOENT)
1587                        goto out;
1588                right_ret = ret;
1589        }
1590
1591        if (!left_ret && !right_ret) {
1592                if (left_gen == gen && right_gen == gen) {
1593                        ret = inode_state_no_change;
1594                } else if (left_gen == gen) {
1595                        if (ino < sctx->send_progress)
1596                                ret = inode_state_did_create;
1597                        else
1598                                ret = inode_state_will_create;
1599                } else if (right_gen == gen) {
1600                        if (ino < sctx->send_progress)
1601                                ret = inode_state_did_delete;
1602                        else
1603                                ret = inode_state_will_delete;
1604                } else  {
1605                        ret = -ENOENT;
1606                }
1607        } else if (!left_ret) {
1608                if (left_gen == gen) {
1609                        if (ino < sctx->send_progress)
1610                                ret = inode_state_did_create;
1611                        else
1612                                ret = inode_state_will_create;
1613                } else {
1614                        ret = -ENOENT;
1615                }
1616        } else if (!right_ret) {
1617                if (right_gen == gen) {
1618                        if (ino < sctx->send_progress)
1619                                ret = inode_state_did_delete;
1620                        else
1621                                ret = inode_state_will_delete;
1622                } else {
1623                        ret = -ENOENT;
1624                }
1625        } else {
1626                ret = -ENOENT;
1627        }
1628
1629out:
1630        return ret;
1631}
1632
1633static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1634{
1635        int ret;
1636
1637        ret = get_cur_inode_state(sctx, ino, gen);
1638        if (ret < 0)
1639                goto out;
1640
1641        if (ret == inode_state_no_change ||
1642            ret == inode_state_did_create ||
1643            ret == inode_state_will_delete)
1644                ret = 1;
1645        else
1646                ret = 0;
1647
1648out:
1649        return ret;
1650}
1651
1652/*
1653 * Helper function to lookup a dir item in a dir.
1654 */
1655static int lookup_dir_item_inode(struct btrfs_root *root,
1656                                 u64 dir, const char *name, int name_len,
1657                                 u64 *found_inode,
1658                                 u8 *found_type)
1659{
1660        int ret = 0;
1661        struct btrfs_dir_item *di;
1662        struct btrfs_key key;
1663        struct btrfs_path *path;
1664
1665        path = alloc_path_for_send();
1666        if (!path)
1667                return -ENOMEM;
1668
1669        di = btrfs_lookup_dir_item(NULL, root, path,
1670                        dir, name, name_len, 0);
1671        if (!di) {
1672                ret = -ENOENT;
1673                goto out;
1674        }
1675        if (IS_ERR(di)) {
1676                ret = PTR_ERR(di);
1677                goto out;
1678        }
1679        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1680        if (key.type == BTRFS_ROOT_ITEM_KEY) {
1681                ret = -ENOENT;
1682                goto out;
1683        }
1684        *found_inode = key.objectid;
1685        *found_type = btrfs_dir_type(path->nodes[0], di);
1686
1687out:
1688        btrfs_free_path(path);
1689        return ret;
1690}
1691
1692/*
1693 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1694 * generation of the parent dir and the name of the dir entry.
1695 */
1696static int get_first_ref(struct btrfs_root *root, u64 ino,
1697                         u64 *dir, u64 *dir_gen, struct fs_path *name)
1698{
1699        int ret;
1700        struct btrfs_key key;
1701        struct btrfs_key found_key;
1702        struct btrfs_path *path;
1703        int len;
1704        u64 parent_dir;
1705
1706        path = alloc_path_for_send();
1707        if (!path)
1708                return -ENOMEM;
1709
1710        key.objectid = ino;
1711        key.type = BTRFS_INODE_REF_KEY;
1712        key.offset = 0;
1713
1714        ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1715        if (ret < 0)
1716                goto out;
1717        if (!ret)
1718                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1719                                path->slots[0]);
1720        if (ret || found_key.objectid != ino ||
1721            (found_key.type != BTRFS_INODE_REF_KEY &&
1722             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1723                ret = -ENOENT;
1724                goto out;
1725        }
1726
1727        if (found_key.type == BTRFS_INODE_REF_KEY) {
1728                struct btrfs_inode_ref *iref;
1729                iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1730                                      struct btrfs_inode_ref);
1731                len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1732                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1733                                                     (unsigned long)(iref + 1),
1734                                                     len);
1735                parent_dir = found_key.offset;
1736        } else {
1737                struct btrfs_inode_extref *extref;
1738                extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1739                                        struct btrfs_inode_extref);
1740                len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1741                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1742                                        (unsigned long)&extref->name, len);
1743                parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1744        }
1745        if (ret < 0)
1746                goto out;
1747        btrfs_release_path(path);
1748
1749        if (dir_gen) {
1750                ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
1751                                     NULL, NULL, NULL);
1752                if (ret < 0)
1753                        goto out;
1754        }
1755
1756        *dir = parent_dir;
1757
1758out:
1759        btrfs_free_path(path);
1760        return ret;
1761}
1762
1763static int is_first_ref(struct btrfs_root *root,
1764                        u64 ino, u64 dir,
1765                        const char *name, int name_len)
1766{
1767        int ret;
1768        struct fs_path *tmp_name;
1769        u64 tmp_dir;
1770
1771        tmp_name = fs_path_alloc();
1772        if (!tmp_name)
1773                return -ENOMEM;
1774
1775        ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
1776        if (ret < 0)
1777                goto out;
1778
1779        if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1780                ret = 0;
1781                goto out;
1782        }
1783
1784        ret = !memcmp(tmp_name->start, name, name_len);
1785
1786out:
1787        fs_path_free(tmp_name);
1788        return ret;
1789}
1790
1791/*
1792 * Used by process_recorded_refs to determine if a new ref would overwrite an
1793 * already existing ref. In case it detects an overwrite, it returns the
1794 * inode/gen in who_ino/who_gen.
1795 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1796 * to make sure later references to the overwritten inode are possible.
1797 * Orphanizing is however only required for the first ref of an inode.
1798 * process_recorded_refs does an additional is_first_ref check to see if
1799 * orphanizing is really required.
1800 */
1801static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1802                              const char *name, int name_len,
1803                              u64 *who_ino, u64 *who_gen)
1804{
1805        int ret = 0;
1806        u64 gen;
1807        u64 other_inode = 0;
1808        u8 other_type = 0;
1809
1810        if (!sctx->parent_root)
1811                goto out;
1812
1813        ret = is_inode_existent(sctx, dir, dir_gen);
1814        if (ret <= 0)
1815                goto out;
1816
1817        /*
1818         * If we have a parent root we need to verify that the parent dir was
1819         * not delted and then re-created, if it was then we have no overwrite
1820         * and we can just unlink this entry.
1821         */
1822        if (sctx->parent_root) {
1823                ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1824                                     NULL, NULL, NULL);
1825                if (ret < 0 && ret != -ENOENT)
1826                        goto out;
1827                if (ret) {
1828                        ret = 0;
1829                        goto out;
1830                }
1831                if (gen != dir_gen)
1832                        goto out;
1833        }
1834
1835        ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1836                        &other_inode, &other_type);
1837        if (ret < 0 && ret != -ENOENT)
1838                goto out;
1839        if (ret) {
1840                ret = 0;
1841                goto out;
1842        }
1843
1844        /*
1845         * Check if the overwritten ref was already processed. If yes, the ref
1846         * was already unlinked/moved, so we can safely assume that we will not
1847         * overwrite anything at this point in time.
1848         */
1849        if (other_inode > sctx->send_progress) {
1850                ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1851                                who_gen, NULL, NULL, NULL, NULL);
1852                if (ret < 0)
1853                        goto out;
1854
1855                ret = 1;
1856                *who_ino = other_inode;
1857        } else {
1858                ret = 0;
1859        }
1860
1861out:
1862        return ret;
1863}
1864
1865/*
1866 * Checks if the ref was overwritten by an already processed inode. This is
1867 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1868 * thus the orphan name needs be used.
1869 * process_recorded_refs also uses it to avoid unlinking of refs that were
1870 * overwritten.
1871 */
1872static int did_overwrite_ref(struct send_ctx *sctx,
1873                            u64 dir, u64 dir_gen,
1874                            u64 ino, u64 ino_gen,
1875                            const char *name, int name_len)
1876{
1877        int ret = 0;
1878        u64 gen;
1879        u64 ow_inode;
1880        u8 other_type;
1881
1882        if (!sctx->parent_root)
1883                goto out;
1884
1885        ret = is_inode_existent(sctx, dir, dir_gen);
1886        if (ret <= 0)
1887                goto out;
1888
1889        /* check if the ref was overwritten by another ref */
1890        ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1891                        &ow_inode, &other_type);
1892        if (ret < 0 && ret != -ENOENT)
1893                goto out;
1894        if (ret) {
1895                /* was never and will never be overwritten */
1896                ret = 0;
1897                goto out;
1898        }
1899
1900        ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1901                        NULL, NULL);
1902        if (ret < 0)
1903                goto out;
1904
1905        if (ow_inode == ino && gen == ino_gen) {
1906                ret = 0;
1907                goto out;
1908        }
1909
1910        /*
1911         * We know that it is or will be overwritten. Check this now.
1912         * The current inode being processed might have been the one that caused
1913         * inode 'ino' to be orphanized, therefore check if ow_inode matches
1914         * the current inode being processed.
1915         */
1916        if ((ow_inode < sctx->send_progress) ||
1917            (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
1918             gen == sctx->cur_inode_gen))
1919                ret = 1;
1920        else
1921                ret = 0;
1922
1923out:
1924        return ret;
1925}
1926
1927/*
1928 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1929 * that got overwritten. This is used by process_recorded_refs to determine
1930 * if it has to use the path as returned by get_cur_path or the orphan name.
1931 */
1932static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1933{
1934        int ret = 0;
1935        struct fs_path *name = NULL;
1936        u64 dir;
1937        u64 dir_gen;
1938
1939        if (!sctx->parent_root)
1940                goto out;
1941
1942        name = fs_path_alloc();
1943        if (!name)
1944                return -ENOMEM;
1945
1946        ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
1947        if (ret < 0)
1948                goto out;
1949
1950        ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1951                        name->start, fs_path_len(name));
1952
1953out:
1954        fs_path_free(name);
1955        return ret;
1956}
1957
1958/*
1959 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
1960 * so we need to do some special handling in case we have clashes. This function
1961 * takes care of this with the help of name_cache_entry::radix_list.
1962 * In case of error, nce is kfreed.
1963 */
1964static int name_cache_insert(struct send_ctx *sctx,
1965                             struct name_cache_entry *nce)
1966{
1967        int ret = 0;
1968        struct list_head *nce_head;
1969
1970        nce_head = radix_tree_lookup(&sctx->name_cache,
1971                        (unsigned long)nce->ino);
1972        if (!nce_head) {
1973                nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1974                if (!nce_head) {
1975                        kfree(nce);
1976                        return -ENOMEM;
1977                }
1978                INIT_LIST_HEAD(nce_head);
1979
1980                ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
1981                if (ret < 0) {
1982                        kfree(nce_head);
1983                        kfree(nce);
1984                        return ret;
1985                }
1986        }
1987        list_add_tail(&nce->radix_list, nce_head);
1988        list_add_tail(&nce->list, &sctx->name_cache_list);
1989        sctx->name_cache_size++;
1990
1991        return ret;
1992}
1993
1994static void name_cache_delete(struct send_ctx *sctx,
1995                              struct name_cache_entry *nce)
1996{
1997        struct list_head *nce_head;
1998
1999        nce_head = radix_tree_lookup(&sctx->name_cache,
2000                        (unsigned long)nce->ino);
2001        if (!nce_head) {
2002                btrfs_err(sctx->send_root->fs_info,
2003              "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
2004                        nce->ino, sctx->name_cache_size);
2005        }
2006
2007        list_del(&nce->radix_list);
2008        list_del(&nce->list);
2009        sctx->name_cache_size--;
2010
2011        /*
2012         * We may not get to the final release of nce_head if the lookup fails
2013         */
2014        if (nce_head && list_empty(nce_head)) {
2015                radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
2016                kfree(nce_head);
2017        }
2018}
2019
2020static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
2021                                                    u64 ino, u64 gen)
2022{
2023        struct list_head *nce_head;
2024        struct name_cache_entry *cur;
2025
2026        nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
2027        if (!nce_head)
2028                return NULL;
2029
2030        list_for_each_entry(cur, nce_head, radix_list) {
2031                if (cur->ino == ino && cur->gen == gen)
2032                        return cur;
2033        }
2034        return NULL;
2035}
2036
2037/*
2038 * Removes the entry from the list and adds it back to the end. This marks the
2039 * entry as recently used so that name_cache_clean_unused does not remove it.
2040 */
2041static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
2042{
2043        list_del(&nce->list);
2044        list_add_tail(&nce->list, &sctx->name_cache_list);
2045}
2046
2047/*
2048 * Remove some entries from the beginning of name_cache_list.
2049 */
2050static void name_cache_clean_unused(struct send_ctx *sctx)
2051{
2052        struct name_cache_entry *nce;
2053
2054        if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
2055                return;
2056
2057        while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
2058                nce = list_entry(sctx->name_cache_list.next,
2059                                struct name_cache_entry, list);
2060                name_cache_delete(sctx, nce);
2061                kfree(nce);
2062        }
2063}
2064
2065static void name_cache_free(struct send_ctx *sctx)
2066{
2067        struct name_cache_entry *nce;
2068
2069        while (!list_empty(&sctx->name_cache_list)) {
2070                nce = list_entry(sctx->name_cache_list.next,
2071                                struct name_cache_entry, list);
2072                name_cache_delete(sctx, nce);
2073                kfree(nce);
2074        }
2075}
2076
2077/*
2078 * Used by get_cur_path for each ref up to the root.
2079 * Returns 0 if it succeeded.
2080 * Returns 1 if the inode is not existent or got overwritten. In that case, the
2081 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
2082 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
2083 * Returns <0 in case of error.
2084 */
2085static int __get_cur_name_and_parent(struct send_ctx *sctx,
2086                                     u64 ino, u64 gen,
2087                                     u64 *parent_ino,
2088                                     u64 *parent_gen,
2089                                     struct fs_path *dest)
2090{
2091        int ret;
2092        int nce_ret;
2093        struct name_cache_entry *nce = NULL;
2094
2095        /*
2096         * First check if we already did a call to this function with the same
2097         * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2098         * return the cached result.
2099         */
2100        nce = name_cache_search(sctx, ino, gen);
2101        if (nce) {
2102                if (ino < sctx->send_progress && nce->need_later_update) {
2103                        name_cache_delete(sctx, nce);
2104                        kfree(nce);
2105                        nce = NULL;
2106                } else {
2107                        name_cache_used(sctx, nce);
2108                        *parent_ino = nce->parent_ino;
2109                        *parent_gen = nce->parent_gen;
2110                        ret = fs_path_add(dest, nce->name, nce->name_len);
2111                        if (ret < 0)
2112                                goto out;
2113                        ret = nce->ret;
2114                        goto out;
2115                }
2116        }
2117
2118        /*
2119         * If the inode is not existent yet, add the orphan name and return 1.
2120         * This should only happen for the parent dir that we determine in
2121         * __record_new_ref
2122         */
2123        ret = is_inode_existent(sctx, ino, gen);
2124        if (ret < 0)
2125                goto out;
2126
2127        if (!ret) {
2128                ret = gen_unique_name(sctx, ino, gen, dest);
2129                if (ret < 0)
2130                        goto out;
2131                ret = 1;
2132                goto out_cache;
2133        }
2134
2135        /*
2136         * Depending on whether the inode was already processed or not, use
2137         * send_root or parent_root for ref lookup.
2138         */
2139        if (ino < sctx->send_progress)
2140                ret = get_first_ref(sctx->send_root, ino,
2141                                    parent_ino, parent_gen, dest);
2142        else
2143                ret = get_first_ref(sctx->parent_root, ino,
2144                                    parent_ino, parent_gen, dest);
2145        if (ret < 0)
2146                goto out;
2147
2148        /*
2149         * Check if the ref was overwritten by an inode's ref that was processed
2150         * earlier. If yes, treat as orphan and return 1.
2151         */
2152        ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2153                        dest->start, dest->end - dest->start);
2154        if (ret < 0)
2155                goto out;
2156        if (ret) {
2157                fs_path_reset(dest);
2158                ret = gen_unique_name(sctx, ino, gen, dest);
2159                if (ret < 0)
2160                        goto out;
2161                ret = 1;
2162        }
2163
2164out_cache:
2165        /*
2166         * Store the result of the lookup in the name cache.
2167         */
2168        nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
2169        if (!nce) {
2170                ret = -ENOMEM;
2171                goto out;
2172        }
2173
2174        nce->ino = ino;
2175        nce->gen = gen;
2176        nce->parent_ino = *parent_ino;
2177        nce->parent_gen = *parent_gen;
2178        nce->name_len = fs_path_len(dest);
2179        nce->ret = ret;
2180        strcpy(nce->name, dest->start);
2181
2182        if (ino < sctx->send_progress)
2183                nce->need_later_update = 0;
2184        else
2185                nce->need_later_update = 1;
2186
2187        nce_ret = name_cache_insert(sctx, nce);
2188        if (nce_ret < 0)
2189                ret = nce_ret;
2190        name_cache_clean_unused(sctx);
2191
2192out:
2193        return ret;
2194}
2195
2196/*
2197 * Magic happens here. This function returns the first ref to an inode as it
2198 * would look like while receiving the stream at this point in time.
2199 * We walk the path up to the root. For every inode in between, we check if it
2200 * was already processed/sent. If yes, we continue with the parent as found
2201 * in send_root. If not, we continue with the parent as found in parent_root.
2202 * If we encounter an inode that was deleted at this point in time, we use the
2203 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2204 * that were not created yet and overwritten inodes/refs.
2205 *
2206 * When do we have have orphan inodes:
2207 * 1. When an inode is freshly created and thus no valid refs are available yet
2208 * 2. When a directory lost all it's refs (deleted) but still has dir items
2209 *    inside which were not processed yet (pending for move/delete). If anyone
2210 *    tried to get the path to the dir items, it would get a path inside that
2211 *    orphan directory.
2212 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2213 *    of an unprocessed inode. If in that case the first ref would be
2214 *    overwritten, the overwritten inode gets "orphanized". Later when we
2215 *    process this overwritten inode, it is restored at a new place by moving
2216 *    the orphan inode.
2217 *
2218 * sctx->send_progress tells this function at which point in time receiving
2219 * would be.
2220 */
2221static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2222                        struct fs_path *dest)
2223{
2224        int ret = 0;
2225        struct fs_path *name = NULL;
2226        u64 parent_inode = 0;
2227        u64 parent_gen = 0;
2228        int stop = 0;
2229
2230        name = fs_path_alloc();
2231        if (!name) {
2232                ret = -ENOMEM;
2233                goto out;
2234        }
2235
2236        dest->reversed = 1;
2237        fs_path_reset(dest);
2238
2239        while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2240                struct waiting_dir_move *wdm;
2241
2242                fs_path_reset(name);
2243
2244                if (is_waiting_for_rm(sctx, ino)) {
2245                        ret = gen_unique_name(sctx, ino, gen, name);
2246                        if (ret < 0)
2247                                goto out;
2248                        ret = fs_path_add_path(dest, name);
2249                        break;
2250                }
2251
2252                wdm = get_waiting_dir_move(sctx, ino);
2253                if (wdm && wdm->orphanized) {
2254                        ret = gen_unique_name(sctx, ino, gen, name);
2255                        stop = 1;
2256                } else if (wdm) {
2257                        ret = get_first_ref(sctx->parent_root, ino,
2258                                            &parent_inode, &parent_gen, name);
2259                } else {
2260                        ret = __get_cur_name_and_parent(sctx, ino, gen,
2261                                                        &parent_inode,
2262                                                        &parent_gen, name);
2263                        if (ret)
2264                                stop = 1;
2265                }
2266
2267                if (ret < 0)
2268                        goto out;
2269
2270                ret = fs_path_add_path(dest, name);
2271                if (ret < 0)
2272                        goto out;
2273
2274                ino = parent_inode;
2275                gen = parent_gen;
2276        }
2277
2278out:
2279        fs_path_free(name);
2280        if (!ret)
2281                fs_path_unreverse(dest);
2282        return ret;
2283}
2284
2285/*
2286 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2287 */
2288static int send_subvol_begin(struct send_ctx *sctx)
2289{
2290        int ret;
2291        struct btrfs_root *send_root = sctx->send_root;
2292        struct btrfs_root *parent_root = sctx->parent_root;
2293        struct btrfs_path *path;
2294        struct btrfs_key key;
2295        struct btrfs_root_ref *ref;
2296        struct extent_buffer *leaf;
2297        char *name = NULL;
2298        int namelen;
2299
2300        path = btrfs_alloc_path();
2301        if (!path)
2302                return -ENOMEM;
2303
2304        name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2305        if (!name) {
2306                btrfs_free_path(path);
2307                return -ENOMEM;
2308        }
2309
2310        key.objectid = send_root->objectid;
2311        key.type = BTRFS_ROOT_BACKREF_KEY;
2312        key.offset = 0;
2313
2314        ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2315                                &key, path, 1, 0);
2316        if (ret < 0)
2317                goto out;
2318        if (ret) {
2319                ret = -ENOENT;
2320                goto out;
2321        }
2322
2323        leaf = path->nodes[0];
2324        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2325        if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2326            key.objectid != send_root->objectid) {
2327                ret = -ENOENT;
2328                goto out;
2329        }
2330        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2331        namelen = btrfs_root_ref_name_len(leaf, ref);
2332        read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2333        btrfs_release_path(path);
2334
2335        if (parent_root) {
2336                ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2337                if (ret < 0)
2338                        goto out;
2339        } else {
2340                ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2341                if (ret < 0)
2342                        goto out;
2343        }
2344
2345        TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2346
2347        if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
2348                TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2349                            sctx->send_root->root_item.received_uuid);
2350        else
2351                TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2352                            sctx->send_root->root_item.uuid);
2353
2354        TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2355                    le64_to_cpu(sctx->send_root->root_item.ctransid));
2356        if (parent_root) {
2357                if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
2358                        TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2359                                     parent_root->root_item.received_uuid);
2360                else
2361                        TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2362                                     parent_root->root_item.uuid);
2363                TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2364                            le64_to_cpu(sctx->parent_root->root_item.ctransid));
2365        }
2366
2367        ret = send_cmd(sctx);
2368
2369tlv_put_failure:
2370out:
2371        btrfs_free_path(path);
2372        kfree(name);
2373        return ret;
2374}
2375
2376static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2377{
2378        int ret = 0;
2379        struct fs_path *p;
2380
2381verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2382
2383        p = fs_path_alloc();
2384        if (!p)
2385                return -ENOMEM;
2386
2387        ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2388        if (ret < 0)
2389                goto out;
2390
2391        ret = get_cur_path(sctx, ino, gen, p);
2392        if (ret < 0)
2393                goto out;
2394        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2395        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2396
2397        ret = send_cmd(sctx);
2398
2399tlv_put_failure:
2400out:
2401        fs_path_free(p);
2402        return ret;
2403}
2404
2405static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2406{
2407        int ret = 0;
2408        struct fs_path *p;
2409
2410verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2411
2412        p = fs_path_alloc();
2413        if (!p)
2414                return -ENOMEM;
2415
2416        ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2417        if (ret < 0)
2418                goto out;
2419
2420        ret = get_cur_path(sctx, ino, gen, p);
2421        if (ret < 0)
2422                goto out;
2423        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2424        TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2425
2426        ret = send_cmd(sctx);
2427
2428tlv_put_failure:
2429out:
2430        fs_path_free(p);
2431        return ret;
2432}
2433
2434static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2435{
2436        int ret = 0;
2437        struct fs_path *p;
2438
2439verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2440
2441        p = fs_path_alloc();
2442        if (!p)
2443                return -ENOMEM;
2444
2445        ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2446        if (ret < 0)
2447                goto out;
2448
2449        ret = get_cur_path(sctx, ino, gen, p);
2450        if (ret < 0)
2451                goto out;
2452        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2453        TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2454        TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2455
2456        ret = send_cmd(sctx);
2457
2458tlv_put_failure:
2459out:
2460        fs_path_free(p);
2461        return ret;
2462}
2463
2464static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2465{
2466        int ret = 0;
2467        struct fs_path *p = NULL;
2468        struct btrfs_inode_item *ii;
2469        struct btrfs_path *path = NULL;
2470        struct extent_buffer *eb;
2471        struct btrfs_key key;
2472        int slot;
2473
2474verbose_printk("btrfs: send_utimes %llu\n", ino);
2475
2476        p = fs_path_alloc();
2477        if (!p)
2478                return -ENOMEM;
2479
2480        path = alloc_path_for_send();
2481        if (!path) {
2482                ret = -ENOMEM;
2483                goto out;
2484        }
2485
2486        key.objectid = ino;
2487        key.type = BTRFS_INODE_ITEM_KEY;
2488        key.offset = 0;
2489        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2490        if (ret < 0)
2491                goto out;
2492
2493        eb = path->nodes[0];
2494        slot = path->slots[0];
2495        ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2496
2497        ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2498        if (ret < 0)
2499                goto out;
2500
2501        ret = get_cur_path(sctx, ino, gen, p);
2502        if (ret < 0)
2503                goto out;
2504        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2505        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2506        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2507        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2508        /* TODO Add otime support when the otime patches get into upstream */
2509
2510        ret = send_cmd(sctx);
2511
2512tlv_put_failure:
2513out:
2514        fs_path_free(p);
2515        btrfs_free_path(path);
2516        return ret;
2517}
2518
2519/*
2520 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2521 * a valid path yet because we did not process the refs yet. So, the inode
2522 * is created as orphan.
2523 */
2524static int send_create_inode(struct send_ctx *sctx, u64 ino)
2525{
2526        int ret = 0;
2527        struct fs_path *p;
2528        int cmd;
2529        u64 gen;
2530        u64 mode;
2531        u64 rdev;
2532
2533verbose_printk("btrfs: send_create_inode %llu\n", ino);
2534
2535        p = fs_path_alloc();
2536        if (!p)
2537                return -ENOMEM;
2538
2539        if (ino != sctx->cur_ino) {
2540                ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
2541                                     NULL, NULL, &rdev);
2542                if (ret < 0)
2543                        goto out;
2544        } else {
2545                gen = sctx->cur_inode_gen;
2546                mode = sctx->cur_inode_mode;
2547                rdev = sctx->cur_inode_rdev;
2548        }
2549
2550        if (S_ISREG(mode)) {
2551                cmd = BTRFS_SEND_C_MKFILE;
2552        } else if (S_ISDIR(mode)) {
2553                cmd = BTRFS_SEND_C_MKDIR;
2554        } else if (S_ISLNK(mode)) {
2555                cmd = BTRFS_SEND_C_SYMLINK;
2556        } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2557                cmd = BTRFS_SEND_C_MKNOD;
2558        } else if (S_ISFIFO(mode)) {
2559                cmd = BTRFS_SEND_C_MKFIFO;
2560        } else if (S_ISSOCK(mode)) {
2561                cmd = BTRFS_SEND_C_MKSOCK;
2562        } else {
2563                btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
2564                                (int)(mode & S_IFMT));
2565                ret = -ENOTSUPP;
2566                goto out;
2567        }
2568
2569        ret = begin_cmd(sctx, cmd);
2570        if (ret < 0)
2571                goto out;
2572
2573        ret = gen_unique_name(sctx, ino, gen, p);
2574        if (ret < 0)
2575                goto out;
2576
2577        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2578        TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2579
2580        if (S_ISLNK(mode)) {
2581                fs_path_reset(p);
2582                ret = read_symlink(sctx->send_root, ino, p);
2583                if (ret < 0)
2584                        goto out;
2585                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2586        } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2587                   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2588                TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2589                TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2590        }
2591
2592        ret = send_cmd(sctx);
2593        if (ret < 0)
2594                goto out;
2595
2596
2597tlv_put_failure:
2598out:
2599        fs_path_free(p);
2600        return ret;
2601}
2602
2603/*
2604 * We need some special handling for inodes that get processed before the parent
2605 * directory got created. See process_recorded_refs for details.
2606 * This function does the check if we already created the dir out of order.
2607 */
2608static int did_create_dir(struct send_ctx *sctx, u64 dir)
2609{
2610        int ret = 0;
2611        struct btrfs_path *path = NULL;
2612        struct btrfs_key key;
2613        struct btrfs_key found_key;
2614        struct btrfs_key di_key;
2615        struct extent_buffer *eb;
2616        struct btrfs_dir_item *di;
2617        int slot;
2618
2619        path = alloc_path_for_send();
2620        if (!path) {
2621                ret = -ENOMEM;
2622                goto out;
2623        }
2624
2625        key.objectid = dir;
2626        key.type = BTRFS_DIR_INDEX_KEY;
2627        key.offset = 0;
2628        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2629        if (ret < 0)
2630                goto out;
2631
2632        while (1) {
2633                eb = path->nodes[0];
2634                slot = path->slots[0];
2635                if (slot >= btrfs_header_nritems(eb)) {
2636                        ret = btrfs_next_leaf(sctx->send_root, path);
2637                        if (ret < 0) {
2638                                goto out;
2639                        } else if (ret > 0) {
2640                                ret = 0;
2641                                break;
2642                        }
2643                        continue;
2644                }
2645
2646                btrfs_item_key_to_cpu(eb, &found_key, slot);
2647                if (found_key.objectid != key.objectid ||
2648                    found_key.type != key.type) {
2649                        ret = 0;
2650                        goto out;
2651                }
2652
2653                di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2654                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2655
2656                if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2657                    di_key.objectid < sctx->send_progress) {
2658                        ret = 1;
2659                        goto out;
2660                }
2661
2662                path->slots[0]++;
2663        }
2664
2665out:
2666        btrfs_free_path(path);
2667        return ret;
2668}
2669
2670/*
2671 * Only creates the inode if it is:
2672 * 1. Not a directory
2673 * 2. Or a directory which was not created already due to out of order
2674 *    directories. See did_create_dir and process_recorded_refs for details.
2675 */
2676static int send_create_inode_if_needed(struct send_ctx *sctx)
2677{
2678        int ret;
2679
2680        if (S_ISDIR(sctx->cur_inode_mode)) {
2681                ret = did_create_dir(sctx, sctx->cur_ino);
2682                if (ret < 0)
2683                        goto out;
2684                if (ret) {
2685                        ret = 0;
2686                        goto out;
2687                }
2688        }
2689
2690        ret = send_create_inode(sctx, sctx->cur_ino);
2691        if (ret < 0)
2692                goto out;
2693
2694out:
2695        return ret;
2696}
2697
2698struct recorded_ref {
2699        struct list_head list;
2700        char *dir_path;
2701        char *name;
2702        struct fs_path *full_path;
2703        u64 dir;
2704        u64 dir_gen;
2705        int dir_path_len;
2706        int name_len;
2707};
2708
2709/*
2710 * We need to process new refs before deleted refs, but compare_tree gives us
2711 * everything mixed. So we first record all refs and later process them.
2712 * This function is a helper to record one ref.
2713 */
2714static int __record_ref(struct list_head *head, u64 dir,
2715                      u64 dir_gen, struct fs_path *path)
2716{
2717        struct recorded_ref *ref;
2718
2719        ref = kmalloc(sizeof(*ref), GFP_NOFS);
2720        if (!ref)
2721                return -ENOMEM;
2722
2723        ref->dir = dir;
2724        ref->dir_gen = dir_gen;
2725        ref->full_path = path;
2726
2727        ref->name = (char *)kbasename(ref->full_path->start);
2728        ref->name_len = ref->full_path->end - ref->name;
2729        ref->dir_path = ref->full_path->start;
2730        if (ref->name == ref->full_path->start)
2731                ref->dir_path_len = 0;
2732        else
2733                ref->dir_path_len = ref->full_path->end -
2734                                ref->full_path->start - 1 - ref->name_len;
2735
2736        list_add_tail(&ref->list, head);
2737        return 0;
2738}
2739
2740static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2741{
2742        struct recorded_ref *new;
2743
2744        new = kmalloc(sizeof(*ref), GFP_NOFS);
2745        if (!new)
2746                return -ENOMEM;
2747
2748        new->dir = ref->dir;
2749        new->dir_gen = ref->dir_gen;
2750        new->full_path = NULL;
2751        INIT_LIST_HEAD(&new->list);
2752        list_add_tail(&new->list, list);
2753        return 0;
2754}
2755
2756static void __free_recorded_refs(struct list_head *head)
2757{
2758        struct recorded_ref *cur;
2759
2760        while (!list_empty(head)) {
2761                cur = list_entry(head->next, struct recorded_ref, list);
2762                fs_path_free(cur->full_path);
2763                list_del(&cur->list);
2764                kfree(cur);
2765        }
2766}
2767
2768static void free_recorded_refs(struct send_ctx *sctx)
2769{
2770        __free_recorded_refs(&sctx->new_refs);
2771        __free_recorded_refs(&sctx->deleted_refs);
2772}
2773
2774/*
2775 * Renames/moves a file/dir to its orphan name. Used when the first
2776 * ref of an unprocessed inode gets overwritten and for all non empty
2777 * directories.
2778 */
2779static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2780                          struct fs_path *path)
2781{
2782        int ret;
2783        struct fs_path *orphan;
2784
2785        orphan = fs_path_alloc();
2786        if (!orphan)
2787                return -ENOMEM;
2788
2789        ret = gen_unique_name(sctx, ino, gen, orphan);
2790        if (ret < 0)
2791                goto out;
2792
2793        ret = send_rename(sctx, path, orphan);
2794
2795out:
2796        fs_path_free(orphan);
2797        return ret;
2798}
2799
2800static struct orphan_dir_info *
2801add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2802{
2803        struct rb_node **p = &sctx->orphan_dirs.rb_node;
2804        struct rb_node *parent = NULL;
2805        struct orphan_dir_info *entry, *odi;
2806
2807        odi = kmalloc(sizeof(*odi), GFP_NOFS);
2808        if (!odi)
2809                return ERR_PTR(-ENOMEM);
2810        odi->ino = dir_ino;
2811        odi->gen = 0;
2812
2813        while (*p) {
2814                parent = *p;
2815                entry = rb_entry(parent, struct orphan_dir_info, node);
2816                if (dir_ino < entry->ino) {
2817                        p = &(*p)->rb_left;
2818                } else if (dir_ino > entry->ino) {
2819                        p = &(*p)->rb_right;
2820                } else {
2821                        kfree(odi);
2822                        return entry;
2823                }
2824        }
2825
2826        rb_link_node(&odi->node, parent, p);
2827        rb_insert_color(&odi->node, &sctx->orphan_dirs);
2828        return odi;
2829}
2830
2831static struct orphan_dir_info *
2832get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2833{
2834        struct rb_node *n = sctx->orphan_dirs.rb_node;
2835        struct orphan_dir_info *entry;
2836
2837        while (n) {
2838                entry = rb_entry(n, struct orphan_dir_info, node);
2839                if (dir_ino < entry->ino)
2840                        n = n->rb_left;
2841                else if (dir_ino > entry->ino)
2842                        n = n->rb_right;
2843                else
2844                        return entry;
2845        }
2846        return NULL;
2847}
2848
2849static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2850{
2851        struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2852
2853        return odi != NULL;
2854}
2855
2856static void free_orphan_dir_info(struct send_ctx *sctx,
2857                                 struct orphan_dir_info *odi)
2858{
2859        if (!odi)
2860                return;
2861        rb_erase(&odi->node, &sctx->orphan_dirs);
2862        kfree(odi);
2863}
2864
2865/*
2866 * Returns 1 if a directory can be removed at this point in time.
2867 * We check this by iterating all dir items and checking if the inode behind
2868 * the dir item was already processed.
2869 */
2870static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2871                     u64 send_progress)
2872{
2873        int ret = 0;
2874        struct btrfs_root *root = sctx->parent_root;
2875        struct btrfs_path *path;
2876        struct btrfs_key key;
2877        struct btrfs_key found_key;
2878        struct btrfs_key loc;
2879        struct btrfs_dir_item *di;
2880
2881        /*
2882         * Don't try to rmdir the top/root subvolume dir.
2883         */
2884        if (dir == BTRFS_FIRST_FREE_OBJECTID)
2885                return 0;
2886
2887        path = alloc_path_for_send();
2888        if (!path)
2889                return -ENOMEM;
2890
2891        key.objectid = dir;
2892        key.type = BTRFS_DIR_INDEX_KEY;
2893        key.offset = 0;
2894        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2895        if (ret < 0)
2896                goto out;
2897
2898        while (1) {
2899                struct waiting_dir_move *dm;
2900
2901                if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2902                        ret = btrfs_next_leaf(root, path);
2903                        if (ret < 0)
2904                                goto out;
2905                        else if (ret > 0)
2906                                break;
2907                        continue;
2908                }
2909                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2910                                      path->slots[0]);
2911                if (found_key.objectid != key.objectid ||
2912                    found_key.type != key.type)
2913                        break;
2914
2915                di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2916                                struct btrfs_dir_item);
2917                btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2918
2919                dm = get_waiting_dir_move(sctx, loc.objectid);
2920                if (dm) {
2921                        struct orphan_dir_info *odi;
2922
2923                        odi = add_orphan_dir_info(sctx, dir);
2924                        if (IS_ERR(odi)) {
2925                                ret = PTR_ERR(odi);
2926                                goto out;
2927                        }
2928                        odi->gen = dir_gen;
2929                        dm->rmdir_ino = dir;
2930                        ret = 0;
2931                        goto out;
2932                }
2933
2934                if (loc.objectid > send_progress) {
2935                        ret = 0;
2936                        goto out;
2937                }
2938
2939                path->slots[0]++;
2940        }
2941
2942        ret = 1;
2943
2944out:
2945        btrfs_free_path(path);
2946        return ret;
2947}
2948
2949static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2950{
2951        struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
2952
2953        return entry != NULL;
2954}
2955
2956static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
2957{
2958        struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2959        struct rb_node *parent = NULL;
2960        struct waiting_dir_move *entry, *dm;
2961
2962        dm = kmalloc(sizeof(*dm), GFP_NOFS);
2963        if (!dm)
2964                return -ENOMEM;
2965        dm->ino = ino;
2966        dm->rmdir_ino = 0;
2967        dm->orphanized = orphanized;
2968
2969        while (*p) {
2970                parent = *p;
2971                entry = rb_entry(parent, struct waiting_dir_move, node);
2972                if (ino < entry->ino) {
2973                        p = &(*p)->rb_left;
2974                } else if (ino > entry->ino) {
2975                        p = &(*p)->rb_right;
2976                } else {
2977                        kfree(dm);
2978                        return -EEXIST;
2979                }
2980        }
2981
2982        rb_link_node(&dm->node, parent, p);
2983        rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2984        return 0;
2985}
2986
2987static struct waiting_dir_move *
2988get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2989{
2990        struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2991        struct waiting_dir_move *entry;
2992
2993        while (n) {
2994                entry = rb_entry(n, struct waiting_dir_move, node);
2995                if (ino < entry->ino)
2996                        n = n->rb_left;
2997                else if (ino > entry->ino)
2998                        n = n->rb_right;
2999                else
3000                        return entry;
3001        }
3002        return NULL;
3003}
3004
3005static void free_waiting_dir_move(struct send_ctx *sctx,
3006                                  struct waiting_dir_move *dm)
3007{
3008        if (!dm)
3009                return;
3010        rb_erase(&dm->node, &sctx->waiting_dir_moves);
3011        kfree(dm);
3012}
3013
3014static int add_pending_dir_move(struct send_ctx *sctx,
3015                                u64 ino,
3016                                u64 ino_gen,
3017                                u64 parent_ino,
3018                                struct list_head *new_refs,
3019                                struct list_head *deleted_refs,
3020                                const bool is_orphan)
3021{
3022        struct rb_node **p = &sctx->pending_dir_moves.rb_node;
3023        struct rb_node *parent = NULL;
3024        struct pending_dir_move *entry = NULL, *pm;
3025        struct recorded_ref *cur;
3026        int exists = 0;
3027        int ret;
3028
3029        pm = kmalloc(sizeof(*pm), GFP_NOFS);
3030        if (!pm)
3031                return -ENOMEM;
3032        pm->parent_ino = parent_ino;
3033        pm->ino = ino;
3034        pm->gen = ino_gen;
3035        pm->is_orphan = is_orphan;
3036        INIT_LIST_HEAD(&pm->list);
3037        INIT_LIST_HEAD(&pm->update_refs);
3038        RB_CLEAR_NODE(&pm->node);
3039
3040        while (*p) {
3041                parent = *p;
3042                entry = rb_entry(parent, struct pending_dir_move, node);
3043                if (parent_ino < entry->parent_ino) {
3044                        p = &(*p)->rb_left;
3045                } else if (parent_ino > entry->parent_ino) {
3046                        p = &(*p)->rb_right;
3047                } else {
3048                        exists = 1;
3049                        break;
3050                }
3051        }
3052
3053        list_for_each_entry(cur, deleted_refs, list) {
3054                ret = dup_ref(cur, &pm->update_refs);
3055                if (ret < 0)
3056                        goto out;
3057        }
3058        list_for_each_entry(cur, new_refs, list) {
3059                ret = dup_ref(cur, &pm->update_refs);
3060                if (ret < 0)
3061                        goto out;
3062        }
3063
3064        ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
3065        if (ret)
3066                goto out;
3067
3068        if (exists) {
3069                list_add_tail(&pm->list, &entry->list);
3070        } else {
3071                rb_link_node(&pm->node, parent, p);
3072                rb_insert_color(&pm->node, &sctx->pending_dir_moves);
3073        }
3074        ret = 0;
3075out:
3076        if (ret) {
3077                __free_recorded_refs(&pm->update_refs);
3078                kfree(pm);
3079        }
3080        return ret;
3081}
3082
3083static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3084                                                      u64 parent_ino)
3085{
3086        struct rb_node *n = sctx->pending_dir_moves.rb_node;
3087        struct pending_dir_move *entry;
3088
3089        while (n) {
3090                entry = rb_entry(n, struct pending_dir_move, node);
3091                if (parent_ino < entry->parent_ino)
3092                        n = n->rb_left;
3093                else if (parent_ino > entry->parent_ino)
3094                        n = n->rb_right;
3095                else
3096                        return entry;
3097        }
3098        return NULL;
3099}
3100
3101static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3102{
3103        struct fs_path *from_path = NULL;
3104        struct fs_path *to_path = NULL;
3105        struct fs_path *name = NULL;
3106        u64 orig_progress = sctx->send_progress;
3107        struct recorded_ref *cur;
3108        u64 parent_ino, parent_gen;
3109        struct waiting_dir_move *dm = NULL;
3110        u64 rmdir_ino = 0;
3111        int ret;
3112
3113        name = fs_path_alloc();
3114        from_path = fs_path_alloc();
3115        if (!name || !from_path) {
3116                ret = -ENOMEM;
3117                goto out;
3118        }
3119
3120        dm = get_waiting_dir_move(sctx, pm->ino);
3121        ASSERT(dm);
3122        rmdir_ino = dm->rmdir_ino;
3123        free_waiting_dir_move(sctx, dm);
3124
3125        if (pm->is_orphan) {
3126                ret = gen_unique_name(sctx, pm->ino,
3127                                      pm->gen, from_path);
3128        } else {
3129                ret = get_first_ref(sctx->parent_root, pm->ino,
3130                                    &parent_ino, &parent_gen, name);
3131                if (ret < 0)
3132                        goto out;
3133                ret = get_cur_path(sctx, parent_ino, parent_gen,
3134                                   from_path);
3135                if (ret < 0)
3136                        goto out;
3137                ret = fs_path_add_path(from_path, name);
3138        }
3139        if (ret < 0)
3140                goto out;
3141
3142        sctx->send_progress = sctx->cur_ino + 1;
3143        fs_path_reset(name);
3144        to_path = name;
3145        name = NULL;
3146        ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3147        if (ret < 0)
3148                goto out;
3149
3150        ret = send_rename(sctx, from_path, to_path);
3151        if (ret < 0)
3152                goto out;
3153
3154        if (rmdir_ino) {
3155                struct orphan_dir_info *odi;
3156
3157                odi = get_orphan_dir_info(sctx, rmdir_ino);
3158                if (!odi) {
3159                        /* already deleted */
3160                        goto finish;
3161                }
3162                ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
3163                if (ret < 0)
3164                        goto out;
3165                if (!ret)
3166                        goto finish;
3167
3168                name = fs_path_alloc();
3169                if (!name) {
3170                        ret = -ENOMEM;
3171                        goto out;
3172                }
3173                ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
3174                if (ret < 0)
3175                        goto out;
3176                ret = send_rmdir(sctx, name);
3177                if (ret < 0)
3178                        goto out;
3179                free_orphan_dir_info(sctx, odi);
3180        }
3181
3182finish:
3183        ret = send_utimes(sctx, pm->ino, pm->gen);
3184        if (ret < 0)
3185                goto out;
3186
3187        /*
3188         * After rename/move, need to update the utimes of both new parent(s)
3189         * and old parent(s).
3190         */
3191        list_for_each_entry(cur, &pm->update_refs, list) {
3192                if (cur->dir == rmdir_ino)
3193                        continue;
3194                ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3195                if (ret < 0)
3196                        goto out;
3197        }
3198
3199out:
3200        fs_path_free(name);
3201        fs_path_free(from_path);
3202        fs_path_free(to_path);
3203        sctx->send_progress = orig_progress;
3204
3205        return ret;
3206}
3207
3208static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3209{
3210        if (!list_empty(&m->list))
3211                list_del(&m->list);
3212        if (!RB_EMPTY_NODE(&m->node))
3213                rb_erase(&m->node, &sctx->pending_dir_moves);
3214        __free_recorded_refs(&m->update_refs);
3215        kfree(m);
3216}
3217
3218static void tail_append_pending_moves(struct pending_dir_move *moves,
3219                                      struct list_head *stack)
3220{
3221        if (list_empty(&moves->list)) {
3222                list_add_tail(&moves->list, stack);
3223        } else {
3224                LIST_HEAD(list);
3225                list_splice_init(&moves->list, &list);
3226                list_add_tail(&moves->list, stack);
3227                list_splice_tail(&list, stack);
3228        }
3229}
3230
3231static int apply_children_dir_moves(struct send_ctx *sctx)
3232{
3233        struct pending_dir_move *pm;
3234        struct list_head stack;
3235        u64 parent_ino = sctx->cur_ino;
3236        int ret = 0;
3237
3238        pm = get_pending_dir_moves(sctx, parent_ino);
3239        if (!pm)
3240                return 0;
3241
3242        INIT_LIST_HEAD(&stack);
3243        tail_append_pending_moves(pm, &stack);
3244
3245        while (!list_empty(&stack)) {
3246                pm = list_first_entry(&stack, struct pending_dir_move, list);
3247                parent_ino = pm->ino;
3248                ret = apply_dir_move(sctx, pm);
3249                free_pending_move(sctx, pm);
3250                if (ret)
3251                        goto out;
3252                pm = get_pending_dir_moves(sctx, parent_ino);
3253                if (pm)
3254                        tail_append_pending_moves(pm, &stack);
3255        }
3256        return 0;
3257
3258out:
3259        while (!list_empty(&stack)) {
3260                pm = list_first_entry(&stack, struct pending_dir_move, list);
3261                free_pending_move(sctx, pm);
3262        }
3263        return ret;
3264}
3265
3266/*
3267 * We might need to delay a directory rename even when no ancestor directory
3268 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3269 * renamed. This happens when we rename a directory to the old name (the name
3270 * in the parent root) of some other unrelated directory that got its rename
3271 * delayed due to some ancestor with higher number that got renamed.
3272 *
3273 * Example:
3274 *
3275 * Parent snapshot:
3276 * .                                       (ino 256)
3277 * |---- a/                                (ino 257)
3278 * |     |---- file                        (ino 260)
3279 * |
3280 * |---- b/                                (ino 258)
3281 * |---- c/                                (ino 259)
3282 *
3283 * Send snapshot:
3284 * .                                       (ino 256)
3285 * |---- a/                                (ino 258)
3286 * |---- x/                                (ino 259)
3287 *       |---- y/                          (ino 257)
3288 *             |----- file                 (ino 260)
3289 *
3290 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3291 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3292 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3293 * must issue is:
3294 *
3295 * 1 - rename 259 from 'c' to 'x'
3296 * 2 - rename 257 from 'a' to 'x/y'
3297 * 3 - rename 258 from 'b' to 'a'
3298 *
3299 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3300 * be done right away and < 0 on error.
3301 */
3302static int wait_for_dest_dir_move(struct send_ctx *sctx,
3303                                  struct recorded_ref *parent_ref,
3304                                  const bool is_orphan)
3305{
3306        struct btrfs_path *path;
3307        struct btrfs_key key;
3308        struct btrfs_key di_key;
3309        struct btrfs_dir_item *di;
3310        u64 left_gen;
3311        u64 right_gen;
3312        int ret = 0;
3313
3314        if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3315                return 0;
3316
3317        path = alloc_path_for_send();
3318        if (!path)
3319                return -ENOMEM;
3320
3321        key.objectid = parent_ref->dir;
3322        key.type = BTRFS_DIR_ITEM_KEY;
3323        key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3324
3325        ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3326        if (ret < 0) {
3327                goto out;
3328        } else if (ret > 0) {
3329                ret = 0;
3330                goto out;
3331        }
3332
3333        di = btrfs_match_dir_item_name(sctx->parent_root, path,
3334                                       parent_ref->name, parent_ref->name_len);
3335        if (!di) {
3336                ret = 0;
3337                goto out;
3338        }
3339        /*
3340         * di_key.objectid has the number of the inode that has a dentry in the
3341         * parent directory with the same name that sctx->cur_ino is being
3342         * renamed to. We need to check if that inode is in the send root as
3343         * well and if it is currently marked as an inode with a pending rename,
3344         * if it is, we need to delay the rename of sctx->cur_ino as well, so
3345         * that it happens after that other inode is renamed.
3346         */
3347        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3348        if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3349                ret = 0;
3350                goto out;
3351        }
3352
3353        ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3354                             &left_gen, NULL, NULL, NULL, NULL);
3355        if (ret < 0)
3356                goto out;
3357        ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3358                             &right_gen, NULL, NULL, NULL, NULL);
3359        if (ret < 0) {
3360                if (ret == -ENOENT)
3361                        ret = 0;
3362                goto out;
3363        }
3364
3365        /* Different inode, no need to delay the rename of sctx->cur_ino */
3366        if (right_gen != left_gen) {
3367                ret = 0;
3368                goto out;
3369        }
3370
3371        if (is_waiting_for_move(sctx, di_key.objectid)) {
3372                ret = add_pending_dir_move(sctx,
3373                                           sctx->cur_ino,
3374                                           sctx->cur_inode_gen,
3375                                           di_key.objectid,
3376                                           &sctx->new_refs,
3377                                           &sctx->deleted_refs,
3378                                           is_orphan);
3379                if (!ret)
3380                        ret = 1;
3381        }
3382out:
3383        btrfs_free_path(path);
3384        return ret;
3385}
3386
3387/*
3388 * Check if ino ino1 is an ancestor of inode ino2 in the given root.
3389 * Return 1 if true, 0 if false and < 0 on error.
3390 */
3391static int is_ancestor(struct btrfs_root *root,
3392                       const u64 ino1,
3393                       const u64 ino1_gen,
3394                       const u64 ino2,
3395                       struct fs_path *fs_path)
3396{
3397        u64 ino = ino2;
3398
3399        while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3400                int ret;
3401                u64 parent;
3402                u64 parent_gen;
3403
3404                fs_path_reset(fs_path);
3405                ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
3406                if (ret < 0) {
3407                        if (ret == -ENOENT && ino == ino2)
3408                                ret = 0;
3409                        return ret;
3410                }
3411                if (parent == ino1)
3412                        return parent_gen == ino1_gen ? 1 : 0;
3413                ino = parent;
3414        }
3415        return 0;
3416}
3417
3418static int wait_for_parent_move(struct send_ctx *sctx,
3419                                struct recorded_ref *parent_ref,
3420                                const bool is_orphan)
3421{
3422        int ret = 0;
3423        u64 ino = parent_ref->dir;
3424        u64 parent_ino_before, parent_ino_after;
3425        struct fs_path *path_before = NULL;
3426        struct fs_path *path_after = NULL;
3427        int len1, len2;
3428
3429        path_after = fs_path_alloc();
3430        path_before = fs_path_alloc();
3431        if (!path_after || !path_before) {
3432                ret = -ENOMEM;
3433                goto out;
3434        }
3435
3436        /*
3437         * Our current directory inode may not yet be renamed/moved because some
3438         * ancestor (immediate or not) has to be renamed/moved first. So find if
3439         * such ancestor exists and make sure our own rename/move happens after
3440         * that ancestor is processed to avoid path build infinite loops (done
3441         * at get_cur_path()).
3442         */
3443        while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3444                if (is_waiting_for_move(sctx, ino)) {
3445                        /*
3446                         * If the current inode is an ancestor of ino in the
3447                         * parent root, we need to delay the rename of the
3448                         * current inode, otherwise don't delayed the rename
3449                         * because we can end up with a circular dependency
3450                         * of renames, resulting in some directories never
3451                         * getting the respective rename operations issued in
3452                         * the send stream or getting into infinite path build
3453                         * loops.
3454                         */
3455                        ret = is_ancestor(sctx->parent_root,
3456                                          sctx->cur_ino, sctx->cur_inode_gen,
3457                                          ino, path_before);
3458                        break;
3459                }
3460
3461                fs_path_reset(path_before);
3462                fs_path_reset(path_after);
3463
3464                ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3465                                    NULL, path_after);
3466                if (ret < 0)
3467                        goto out;
3468                ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3469                                    NULL, path_before);
3470                if (ret < 0 && ret != -ENOENT) {
3471                        goto out;
3472                } else if (ret == -ENOENT) {
3473                        ret = 0;
3474                        break;
3475                }
3476
3477                len1 = fs_path_len(path_before);
3478                len2 = fs_path_len(path_after);
3479                if (ino > sctx->cur_ino &&
3480                    (parent_ino_before != parent_ino_after || len1 != len2 ||
3481                     memcmp(path_before->start, path_after->start, len1))) {
3482                        ret = 1;
3483                        break;
3484                }
3485                ino = parent_ino_after;
3486        }
3487
3488out:
3489        fs_path_free(path_before);
3490        fs_path_free(path_after);
3491
3492        if (ret == 1) {
3493                ret = add_pending_dir_move(sctx,
3494                                           sctx->cur_ino,
3495                                           sctx->cur_inode_gen,
3496                                           ino,
3497                                           &sctx->new_refs,
3498                                           &sctx->deleted_refs,
3499                                           is_orphan);
3500                if (!ret)
3501                        ret = 1;
3502        }
3503
3504        return ret;
3505}
3506
3507/*
3508 * This does all the move/link/unlink/rmdir magic.
3509 */
3510static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3511{
3512        int ret = 0;
3513        struct recorded_ref *cur;
3514        struct recorded_ref *cur2;
3515        struct list_head check_dirs;
3516        struct fs_path *valid_path = NULL;
3517        u64 ow_inode = 0;
3518        u64 ow_gen;
3519        int did_overwrite = 0;
3520        int is_orphan = 0;
3521        u64 last_dir_ino_rm = 0;
3522        bool can_rename = true;
3523
3524verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3525
3526        /*
3527         * This should never happen as the root dir always has the same ref
3528         * which is always '..'
3529         */
3530        BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3531        INIT_LIST_HEAD(&check_dirs);
3532
3533        valid_path = fs_path_alloc();
3534        if (!valid_path) {
3535                ret = -ENOMEM;
3536                goto out;
3537        }
3538
3539        /*
3540         * First, check if the first ref of the current inode was overwritten
3541         * before. If yes, we know that the current inode was already orphanized
3542         * and thus use the orphan name. If not, we can use get_cur_path to
3543         * get the path of the first ref as it would like while receiving at
3544         * this point in time.
3545         * New inodes are always orphan at the beginning, so force to use the
3546         * orphan name in this case.
3547         * The first ref is stored in valid_path and will be updated if it
3548         * gets moved around.
3549         */
3550        if (!sctx->cur_inode_new) {
3551                ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3552                                sctx->cur_inode_gen);
3553                if (ret < 0)
3554                        goto out;
3555                if (ret)
3556                        did_overwrite = 1;
3557        }
3558        if (sctx->cur_inode_new || did_overwrite) {
3559                ret = gen_unique_name(sctx, sctx->cur_ino,
3560                                sctx->cur_inode_gen, valid_path);
3561                if (ret < 0)
3562                        goto out;
3563                is_orphan = 1;
3564        } else {
3565                ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3566                                valid_path);
3567                if (ret < 0)
3568                        goto out;
3569        }
3570
3571        list_for_each_entry(cur, &sctx->new_refs, list) {
3572                /*
3573                 * We may have refs where the parent directory does not exist
3574                 * yet. This happens if the parent directories inum is higher
3575                 * the the current inum. To handle this case, we create the
3576                 * parent directory out of order. But we need to check if this
3577                 * did already happen before due to other refs in the same dir.
3578                 */
3579                ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3580                if (ret < 0)
3581                        goto out;
3582                if (ret == inode_state_will_create) {
3583                        ret = 0;
3584                        /*
3585                         * First check if any of the current inodes refs did
3586                         * already create the dir.
3587                         */
3588                        list_for_each_entry(cur2, &sctx->new_refs, list) {
3589                                if (cur == cur2)
3590                                        break;
3591                                if (cur2->dir == cur->dir) {
3592                                        ret = 1;
3593                                        break;
3594                                }
3595                        }
3596
3597                        /*
3598                         * If that did not happen, check if a previous inode
3599                         * did already create the dir.
3600                         */
3601                        if (!ret)
3602                                ret = did_create_dir(sctx, cur->dir);
3603                        if (ret < 0)
3604                                goto out;
3605                        if (!ret) {
3606                                ret = send_create_inode(sctx, cur->dir);
3607                                if (ret < 0)
3608                                        goto out;
3609                        }
3610                }
3611
3612                /*
3613                 * Check if this new ref would overwrite the first ref of
3614                 * another unprocessed inode. If yes, orphanize the
3615                 * overwritten inode. If we find an overwritten ref that is
3616                 * not the first ref, simply unlink it.
3617                 */
3618                ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3619                                cur->name, cur->name_len,
3620                                &ow_inode, &ow_gen);
3621                if (ret < 0)
3622                        goto out;
3623                if (ret) {
3624                        ret = is_first_ref(sctx->parent_root,
3625                                           ow_inode, cur->dir, cur->name,
3626                                           cur->name_len);
3627                        if (ret < 0)
3628                                goto out;
3629                        if (ret) {
3630                                struct name_cache_entry *nce;
3631
3632                                ret = orphanize_inode(sctx, ow_inode, ow_gen,
3633                                                cur->full_path);
3634                                if (ret < 0)
3635                                        goto out;
3636                                /*
3637                                 * Make sure we clear our orphanized inode's
3638                                 * name from the name cache. This is because the
3639                                 * inode ow_inode might be an ancestor of some
3640                                 * other inode that will be orphanized as well
3641                                 * later and has an inode number greater than
3642                                 * sctx->send_progress. We need to prevent
3643                                 * future name lookups from using the old name
3644                                 * and get instead the orphan name.
3645                                 */
3646                                nce = name_cache_search(sctx, ow_inode, ow_gen);
3647                                if (nce) {
3648                                        name_cache_delete(sctx, nce);
3649                                        kfree(nce);
3650                                }
3651                        } else {
3652                                ret = send_unlink(sctx, cur->full_path);
3653                                if (ret < 0)
3654                                        goto out;
3655                        }
3656                }
3657
3658                if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3659                        ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3660                        if (ret < 0)
3661                                goto out;
3662                        if (ret == 1) {
3663                                can_rename = false;
3664                                *pending_move = 1;
3665                        }
3666                }
3667
3668                if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
3669                    can_rename) {
3670                        ret = wait_for_parent_move(sctx, cur, is_orphan);
3671                        if (ret < 0)
3672                                goto out;
3673                        if (ret == 1) {
3674                                can_rename = false;
3675                                *pending_move = 1;
3676                        }
3677                }
3678
3679                /*
3680                 * link/move the ref to the new place. If we have an orphan
3681                 * inode, move it and update valid_path. If not, link or move
3682                 * it depending on the inode mode.
3683                 */
3684                if (is_orphan && can_rename) {
3685                        ret = send_rename(sctx, valid_path, cur->full_path);
3686                        if (ret < 0)
3687                                goto out;
3688                        is_orphan = 0;
3689                        ret = fs_path_copy(valid_path, cur->full_path);
3690                        if (ret < 0)
3691                                goto out;
3692                } else if (can_rename) {
3693                        if (S_ISDIR(sctx->cur_inode_mode)) {
3694                                /*
3695                                 * Dirs can't be linked, so move it. For moved
3696                                 * dirs, we always have one new and one deleted
3697                                 * ref. The deleted ref is ignored later.
3698                                 */
3699                                ret = send_rename(sctx, valid_path,
3700                                                  cur->full_path);
3701                                if (!ret)
3702                                        ret = fs_path_copy(valid_path,
3703                                                           cur->full_path);
3704                                if (ret < 0)
3705                                        goto out;
3706                        } else {
3707                                ret = send_link(sctx, cur->full_path,
3708                                                valid_path);
3709                                if (ret < 0)
3710                                        goto out;
3711                        }
3712                }
3713                ret = dup_ref(cur, &check_dirs);
3714                if (ret < 0)
3715                        goto out;
3716        }
3717
3718        if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
3719                /*
3720                 * Check if we can already rmdir the directory. If not,
3721                 * orphanize it. For every dir item inside that gets deleted
3722                 * later, we do this check again and rmdir it then if possible.
3723                 * See the use of check_dirs for more details.
3724                 */
3725                ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3726                                sctx->cur_ino);
3727                if (ret < 0)
3728                        goto out;
3729                if (ret) {
3730                        ret = send_rmdir(sctx, valid_path);
3731                        if (ret < 0)
3732                                goto out;
3733                } else if (!is_orphan) {
3734                        ret = orphanize_inode(sctx, sctx->cur_ino,
3735                                        sctx->cur_inode_gen, valid_path);
3736                        if (ret < 0)
3737                                goto out;
3738                        is_orphan = 1;
3739                }
3740
3741                list_for_each_entry(cur, &sctx->deleted_refs, list) {
3742                        ret = dup_ref(cur, &check_dirs);
3743                        if (ret < 0)
3744                                goto out;
3745                }
3746        } else if (S_ISDIR(sctx->cur_inode_mode) &&
3747                   !list_empty(&sctx->deleted_refs)) {
3748                /*
3749                 * We have a moved dir. Add the old parent to check_dirs
3750                 */
3751                cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
3752                                list);
3753                ret = dup_ref(cur, &check_dirs);
3754                if (ret < 0)
3755                        goto out;
3756        } else if (!S_ISDIR(sctx->cur_inode_mode)) {
3757                /*
3758                 * We have a non dir inode. Go through all deleted refs and
3759                 * unlink them if they were not already overwritten by other
3760                 * inodes.
3761                 */
3762                list_for_each_entry(cur, &sctx->deleted_refs, list) {
3763                        ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3764                                        sctx->cur_ino, sctx->cur_inode_gen,
3765                                        cur->name, cur->name_len);
3766                        if (ret < 0)
3767                                goto out;
3768                        if (!ret) {
3769                                ret = send_unlink(sctx, cur->full_path);
3770                                if (ret < 0)
3771                                        goto out;
3772                        }
3773                        ret = dup_ref(cur, &check_dirs);
3774                        if (ret < 0)
3775                                goto out;
3776                }
3777                /*
3778                 * If the inode is still orphan, unlink the orphan. This may
3779                 * happen when a previous inode did overwrite the first ref
3780                 * of this inode and no new refs were added for the current
3781                 * inode. Unlinking does not mean that the inode is deleted in
3782                 * all cases. There may still be links to this inode in other
3783                 * places.
3784                 */
3785                if (is_orphan) {
3786                        ret = send_unlink(sctx, valid_path);
3787                        if (ret < 0)
3788                                goto out;
3789                }
3790        }
3791
3792        /*
3793         * We did collect all parent dirs where cur_inode was once located. We
3794         * now go through all these dirs and check if they are pending for
3795         * deletion and if it's finally possible to perform the rmdir now.
3796         * We also update the inode stats of the parent dirs here.
3797         */
3798        list_for_each_entry(cur, &check_dirs, list) {
3799                /*
3800                 * In case we had refs into dirs that were not processed yet,
3801                 * we don't need to do the utime and rmdir logic for these dirs.
3802                 * The dir will be processed later.
3803                 */
3804                if (cur->dir > sctx->cur_ino)
3805                        continue;
3806
3807                ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3808                if (ret < 0)
3809                        goto out;
3810
3811                if (ret == inode_state_did_create ||
3812                    ret == inode_state_no_change) {
3813                        /* TODO delayed utimes */
3814                        ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3815                        if (ret < 0)
3816                                goto out;
3817                } else if (ret == inode_state_did_delete &&
3818                           cur->dir != last_dir_ino_rm) {
3819                        ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
3820                                        sctx->cur_ino);
3821                        if (ret < 0)
3822                                goto out;
3823                        if (ret) {
3824                                ret = get_cur_path(sctx, cur->dir,
3825                                                   cur->dir_gen, valid_path);
3826                                if (ret < 0)
3827                                        goto out;
3828                                ret = send_rmdir(sctx, valid_path);
3829                                if (ret < 0)
3830                                        goto out;
3831                                last_dir_ino_rm = cur->dir;
3832                        }
3833                }
3834        }
3835
3836        ret = 0;
3837
3838out:
3839        __free_recorded_refs(&check_dirs);
3840        free_recorded_refs(sctx);
3841        fs_path_free(valid_path);
3842        return ret;
3843}
3844
3845static int record_ref(struct btrfs_root *root, int num, u64 dir, int index,
3846                      struct fs_path *name, void *ctx, struct list_head *refs)
3847{
3848        int ret = 0;
3849        struct send_ctx *sctx = ctx;
3850        struct fs_path *p;
3851        u64 gen;
3852
3853        p = fs_path_alloc();
3854        if (!p)
3855                return -ENOMEM;
3856
3857        ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL,
3858                        NULL, NULL);
3859        if (ret < 0)
3860                goto out;
3861
3862        ret = get_cur_path(sctx, dir, gen, p);
3863        if (ret < 0)
3864                goto out;
3865        ret = fs_path_add_path(p, name);
3866        if (ret < 0)
3867                goto out;
3868
3869        ret = __record_ref(refs, dir, gen, p);
3870
3871out:
3872        if (ret)
3873                fs_path_free(p);
3874        return ret;
3875}
3876
3877static int __record_new_ref(int num, u64 dir, int index,
3878                            struct fs_path *name,
3879                            void *ctx)
3880{
3881        struct send_ctx *sctx = ctx;
3882        return record_ref(sctx->send_root, num, dir, index, name,
3883                          ctx, &sctx->new_refs);
3884}
3885
3886
3887static int __record_deleted_ref(int num, u64 dir, int index,
3888                                struct fs_path *name,
3889                                void *ctx)
3890{
3891        struct send_ctx *sctx = ctx;
3892        return record_ref(sctx->parent_root, num, dir, index, name,
3893                          ctx, &sctx->deleted_refs);
3894}
3895
3896static int record_new_ref(struct send_ctx *sctx)
3897{
3898        int ret;
3899
3900        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3901                                sctx->cmp_key, 0, __record_new_ref, sctx);
3902        if (ret < 0)
3903                goto out;
3904        ret = 0;
3905
3906out:
3907        return ret;
3908}
3909
3910static int record_deleted_ref(struct send_ctx *sctx)
3911{
3912        int ret;
3913
3914        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3915                                sctx->cmp_key, 0, __record_deleted_ref, sctx);
3916        if (ret < 0)
3917                goto out;
3918        ret = 0;
3919
3920out:
3921        return ret;
3922}
3923
3924struct find_ref_ctx {
3925        u64 dir;
3926        u64 dir_gen;
3927        struct btrfs_root *root;
3928        struct fs_path *name;
3929        int found_idx;
3930};
3931
3932static int __find_iref(int num, u64 dir, int index,
3933                       struct fs_path *name,
3934                       void *ctx_)
3935{
3936        struct find_ref_ctx *ctx = ctx_;
3937        u64 dir_gen;
3938        int ret;
3939
3940        if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3941            strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3942                /*
3943                 * To avoid doing extra lookups we'll only do this if everything
3944                 * else matches.
3945                 */
3946                ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
3947                                     NULL, NULL, NULL);
3948                if (ret)
3949                        return ret;
3950                if (dir_gen != ctx->dir_gen)
3951                        return 0;
3952                ctx->found_idx = num;
3953                return 1;
3954        }
3955        return 0;
3956}
3957
3958static int find_iref(struct btrfs_root *root,
3959                     struct btrfs_path *path,
3960                     struct btrfs_key *key,
3961                     u64 dir, u64 dir_gen, struct fs_path *name)
3962{
3963        int ret;
3964        struct find_ref_ctx ctx;
3965
3966        ctx.dir = dir;
3967        ctx.name = name;
3968        ctx.dir_gen = dir_gen;
3969        ctx.found_idx = -1;
3970        ctx.root = root;
3971
3972        ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3973        if (ret < 0)
3974                return ret;
3975
3976        if (ctx.found_idx == -1)
3977                return -ENOENT;
3978
3979        return ctx.found_idx;
3980}
3981
3982static int __record_changed_new_ref(int num, u64 dir, int index,
3983                                    struct fs_path *name,
3984                                    void *ctx)
3985{
3986        u64 dir_gen;
3987        int ret;
3988        struct send_ctx *sctx = ctx;
3989
3990        ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
3991                             NULL, NULL, NULL);
3992        if (ret)
3993                return ret;
3994
3995        ret = find_iref(sctx->parent_root, sctx->right_path,
3996                        sctx->cmp_key, dir, dir_gen, name);
3997        if (ret == -ENOENT)
3998                ret = __record_new_ref(num, dir, index, name, sctx);
3999        else if (ret > 0)
4000                ret = 0;
4001
4002        return ret;
4003}
4004
4005static int __record_changed_deleted_ref(int num, u64 dir, int index,
4006                                        struct fs_path *name,
4007                                        void *ctx)
4008{
4009        u64 dir_gen;
4010        int ret;
4011        struct send_ctx *sctx = ctx;
4012
4013        ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
4014                             NULL, NULL, NULL);
4015        if (ret)
4016                return ret;
4017
4018        ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
4019                        dir, dir_gen, name);
4020        if (ret == -ENOENT)
4021                ret = __record_deleted_ref(num, dir, index, name, sctx);
4022        else if (ret > 0)
4023                ret = 0;
4024
4025        return ret;
4026}
4027
4028static int record_changed_ref(struct send_ctx *sctx)
4029{
4030        int ret = 0;
4031
4032        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4033                        sctx->cmp_key, 0, __record_changed_new_ref, sctx);
4034        if (ret < 0)
4035                goto out;
4036        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4037                        sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
4038        if (ret < 0)
4039                goto out;
4040        ret = 0;
4041
4042out:
4043        return ret;
4044}
4045
4046/*
4047 * Record and process all refs at once. Needed when an inode changes the
4048 * generation number, which means that it was deleted and recreated.
4049 */
4050static int process_all_refs(struct send_ctx *sctx,
4051                            enum btrfs_compare_tree_result cmd)
4052{
4053        int ret;
4054        struct btrfs_root *root;
4055        struct btrfs_path *path;
4056        struct btrfs_key key;
4057        struct btrfs_key found_key;
4058        struct extent_buffer *eb;
4059        int slot;
4060        iterate_inode_ref_t cb;
4061        int pending_move = 0;
4062
4063        path = alloc_path_for_send();
4064        if (!path)
4065                return -ENOMEM;
4066
4067        if (cmd == BTRFS_COMPARE_TREE_NEW) {
4068                root = sctx->send_root;
4069                cb = __record_new_ref;
4070        } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
4071                root = sctx->parent_root;
4072                cb = __record_deleted_ref;
4073        } else {
4074                btrfs_err(sctx->send_root->fs_info,
4075                                "Wrong command %d in process_all_refs", cmd);
4076                ret = -EINVAL;
4077                goto out;
4078        }
4079
4080        key.objectid = sctx->cmp_key->objectid;
4081        key.type = BTRFS_INODE_REF_KEY;
4082        key.offset = 0;
4083        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4084        if (ret < 0)
4085                goto out;
4086
4087        while (1) {
4088                eb = path->nodes[0];
4089                slot = path->slots[0];
4090                if (slot >= btrfs_header_nritems(eb)) {
4091                        ret = btrfs_next_leaf(root, path);
4092                        if (ret < 0)
4093                                goto out;
4094                        else if (ret > 0)
4095                                break;
4096                        continue;
4097                }
4098
4099                btrfs_item_key_to_cpu(eb, &found_key, slot);
4100
4101                if (found_key.objectid != key.objectid ||
4102                    (found_key.type != BTRFS_INODE_REF_KEY &&
4103                     found_key.type != BTRFS_INODE_EXTREF_KEY))
4104                        break;
4105
4106                ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
4107                if (ret < 0)
4108                        goto out;
4109
4110                path->slots[0]++;
4111        }
4112        btrfs_release_path(path);
4113
4114        ret = process_recorded_refs(sctx, &pending_move);
4115        /* Only applicable to an incremental send. */
4116        ASSERT(pending_move == 0);
4117
4118out:
4119        btrfs_free_path(path);
4120        return ret;
4121}
4122
4123static int send_set_xattr(struct send_ctx *sctx,
4124                          struct fs_path *path,
4125                          const char *name, int name_len,
4126                          const char *data, int data_len)
4127{
4128        int ret = 0;
4129
4130        ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
4131        if (ret < 0)
4132                goto out;
4133
4134        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4135        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4136        TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
4137
4138        ret = send_cmd(sctx);
4139
4140tlv_put_failure:
4141out:
4142        return ret;
4143}
4144
4145static int send_remove_xattr(struct send_ctx *sctx,
4146                          struct fs_path *path,
4147                          const char *name, int name_len)
4148{
4149        int ret = 0;
4150
4151        ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
4152        if (ret < 0)
4153                goto out;
4154
4155        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4156        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4157
4158        ret = send_cmd(sctx);
4159
4160tlv_put_failure:
4161out:
4162        return ret;
4163}
4164
4165static int __process_new_xattr(int num, struct btrfs_key *di_key,
4166                               const char *name, int name_len,
4167                               const char *data, int data_len,
4168                               u8 type, void *ctx)
4169{
4170        int ret;
4171        struct send_ctx *sctx = ctx;
4172        struct fs_path *p;
4173        posix_acl_xattr_header dummy_acl;
4174
4175        p = fs_path_alloc();
4176        if (!p)
4177                return -ENOMEM;
4178
4179        /*
4180         * This hack is needed because empty acl's are stored as zero byte
4181         * data in xattrs. Problem with that is, that receiving these zero byte
4182         * acl's will fail later. To fix this, we send a dummy acl list that
4183         * only contains the version number and no entries.
4184         */
4185        if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
4186            !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
4187                if (data_len == 0) {
4188                        dummy_acl.a_version =
4189                                        cpu_to_le32(POSIX_ACL_XATTR_VERSION);
4190                        data = (char *)&dummy_acl;
4191                        data_len = sizeof(dummy_acl);
4192                }
4193        }
4194
4195        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4196        if (ret < 0)
4197                goto out;
4198
4199        ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
4200
4201out:
4202        fs_path_free(p);
4203        return ret;
4204}
4205
4206static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
4207                                   const char *name, int name_len,
4208                                   const char *data, int data_len,
4209                                   u8 type, void *ctx)
4210{
4211        int ret;
4212        struct send_ctx *sctx = ctx;
4213        struct fs_path *p;
4214
4215        p = fs_path_alloc();
4216        if (!p)
4217                return -ENOMEM;
4218
4219        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4220        if (ret < 0)
4221                goto out;
4222
4223        ret = send_remove_xattr(sctx, p, name, name_len);
4224
4225out:
4226        fs_path_free(p);
4227        return ret;
4228}
4229
4230static int process_new_xattr(struct send_ctx *sctx)
4231{
4232        int ret = 0;
4233
4234        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4235                               sctx->cmp_key, __process_new_xattr, sctx);
4236
4237        return ret;
4238}
4239
4240static int process_deleted_xattr(struct send_ctx *sctx)
4241{
4242        int ret;
4243
4244        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4245                               sctx->cmp_key, __process_deleted_xattr, sctx);
4246
4247        return ret;
4248}
4249
4250struct find_xattr_ctx {
4251        const char *name;
4252        int name_len;
4253        int found_idx;
4254        char *found_data;
4255        int found_data_len;
4256};
4257
4258static int __find_xattr(int num, struct btrfs_key *di_key,
4259                        const char *name, int name_len,
4260                        const char *data, int data_len,
4261                        u8 type, void *vctx)
4262{
4263        struct find_xattr_ctx *ctx = vctx;
4264
4265        if (name_len == ctx->name_len &&
4266            strncmp(name, ctx->name, name_len) == 0) {
4267                ctx->found_idx = num;
4268                ctx->found_data_len = data_len;
4269                ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
4270                if (!ctx->found_data)
4271                        return -ENOMEM;
4272                return 1;
4273        }
4274        return 0;
4275}
4276
4277static int find_xattr(struct btrfs_root *root,
4278                      struct btrfs_path *path,
4279                      struct btrfs_key *key,
4280                      const char *name, int name_len,
4281                      char **data, int *data_len)
4282{
4283        int ret;
4284        struct find_xattr_ctx ctx;
4285
4286        ctx.name = name;
4287        ctx.name_len = name_len;
4288        ctx.found_idx = -1;
4289        ctx.found_data = NULL;
4290        ctx.found_data_len = 0;
4291
4292        ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
4293        if (ret < 0)
4294                return ret;
4295
4296        if (ctx.found_idx == -1)
4297                return -ENOENT;
4298        if (data) {
4299                *data = ctx.found_data;
4300                *data_len = ctx.found_data_len;
4301        } else {
4302                kfree(ctx.found_data);
4303        }
4304        return ctx.found_idx;
4305}
4306
4307
4308static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4309                                       const char *name, int name_len,
4310                                       const char *data, int data_len,
4311                                       u8 type, void *ctx)
4312{
4313        int ret;
4314        struct send_ctx *sctx = ctx;
4315        char *found_data = NULL;
4316        int found_data_len  = 0;
4317
4318        ret = find_xattr(sctx->parent_root, sctx->right_path,
4319                         sctx->cmp_key, name, name_len, &found_data,
4320                         &found_data_len);
4321        if (ret == -ENOENT) {
4322                ret = __process_new_xattr(num, di_key, name, name_len, data,
4323                                data_len, type, ctx);
4324        } else if (ret >= 0) {
4325                if (data_len != found_data_len ||
4326                    memcmp(data, found_data, data_len)) {
4327                        ret = __process_new_xattr(num, di_key, name, name_len,
4328                                        data, data_len, type, ctx);
4329                } else {
4330                        ret = 0;
4331                }
4332        }
4333
4334        kfree(found_data);
4335        return ret;
4336}
4337
4338static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4339                                           const char *name, int name_len,
4340                                           const char *data, int data_len,
4341                                           u8 type, void *ctx)
4342{
4343        int ret;
4344        struct send_ctx *sctx = ctx;
4345
4346        ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4347                         name, name_len, NULL, NULL);
4348        if (ret == -ENOENT)
4349                ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4350                                data_len, type, ctx);
4351        else if (ret >= 0)
4352                ret = 0;
4353
4354        return ret;
4355}
4356
4357static int process_changed_xattr(struct send_ctx *sctx)
4358{
4359        int ret = 0;
4360
4361        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4362                        sctx->cmp_key, __process_changed_new_xattr, sctx);
4363        if (ret < 0)
4364                goto out;
4365        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4366                        sctx->cmp_key, __process_changed_deleted_xattr, sctx);
4367
4368out:
4369        return ret;
4370}
4371
4372static int process_all_new_xattrs(struct send_ctx *sctx)
4373{
4374        int ret;
4375        struct btrfs_root *root;
4376        struct btrfs_path *path;
4377        struct btrfs_key key;
4378        struct btrfs_key found_key;
4379        struct extent_buffer *eb;
4380        int slot;
4381
4382        path = alloc_path_for_send();
4383        if (!path)
4384                return -ENOMEM;
4385
4386        root = sctx->send_root;
4387
4388        key.objectid = sctx->cmp_key->objectid;
4389        key.type = BTRFS_XATTR_ITEM_KEY;
4390        key.offset = 0;
4391        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4392        if (ret < 0)
4393                goto out;
4394
4395        while (1) {
4396                eb = path->nodes[0];
4397                slot = path->slots[0];
4398                if (slot >= btrfs_header_nritems(eb)) {
4399                        ret = btrfs_next_leaf(root, path);
4400                        if (ret < 0) {
4401                                goto out;
4402                        } else if (ret > 0) {
4403                                ret = 0;
4404                                break;
4405                        }
4406                        continue;
4407                }
4408
4409                btrfs_item_key_to_cpu(eb, &found_key, slot);
4410                if (found_key.objectid != key.objectid ||
4411                    found_key.type != key.type) {
4412                        ret = 0;
4413                        goto out;
4414                }
4415
4416                ret = iterate_dir_item(root, path, &found_key,
4417                                       __process_new_xattr, sctx);
4418                if (ret < 0)
4419                        goto out;
4420
4421                path->slots[0]++;
4422        }
4423
4424out:
4425        btrfs_free_path(path);
4426        return ret;
4427}
4428
4429static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4430{
4431        struct btrfs_root *root = sctx->send_root;
4432        struct btrfs_fs_info *fs_info = root->fs_info;
4433        struct inode *inode;
4434        struct page *page;
4435        char *addr;
4436        struct btrfs_key key;
4437        pgoff_t index = offset >> PAGE_CACHE_SHIFT;
4438        pgoff_t last_index;
4439        unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
4440        ssize_t ret = 0;
4441
4442        key.objectid = sctx->cur_ino;
4443        key.type = BTRFS_INODE_ITEM_KEY;
4444        key.offset = 0;
4445
4446        inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4447        if (IS_ERR(inode))
4448                return PTR_ERR(inode);
4449
4450        if (offset + len > i_size_read(inode)) {
4451                if (offset > i_size_read(inode))
4452                        len = 0;
4453                else
4454                        len = offset - i_size_read(inode);
4455        }
4456        if (len == 0)
4457                goto out;
4458
4459        last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
4460
4461        /* initial readahead */
4462        memset(&sctx->ra, 0, sizeof(struct file_ra_state));
4463        file_ra_state_init(&sctx->ra, inode->i_mapping);
4464        btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index,
4465                       last_index - index + 1);
4466
4467        while (index <= last_index) {
4468                unsigned cur_len = min_t(unsigned, len,
4469                                         PAGE_CACHE_SIZE - pg_offset);
4470                page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4471                if (!page) {
4472                        ret = -ENOMEM;
4473                        break;
4474                }
4475
4476                if (!PageUptodate(page)) {
4477                        btrfs_readpage(NULL, page);
4478                        lock_page(page);
4479                        if (!PageUptodate(page)) {
4480                                unlock_page(page);
4481                                page_cache_release(page);
4482                                ret = -EIO;
4483                                break;
4484                        }
4485                }
4486
4487                addr = kmap(page);
4488                memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4489                kunmap(page);
4490                unlock_page(page);
4491                page_cache_release(page);
4492                index++;
4493                pg_offset = 0;
4494                len -= cur_len;
4495                ret += cur_len;
4496        }
4497out:
4498        iput(inode);
4499        return ret;
4500}
4501
4502/*
4503 * Read some bytes from the current inode/file and send a write command to
4504 * user space.
4505 */
4506static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4507{
4508        int ret = 0;
4509        struct fs_path *p;
4510        ssize_t num_read = 0;
4511
4512        p = fs_path_alloc();
4513        if (!p)
4514                return -ENOMEM;
4515
4516verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
4517
4518        num_read = fill_read_buf(sctx, offset, len);
4519        if (num_read <= 0) {
4520                if (num_read < 0)
4521                        ret = num_read;
4522                goto out;
4523        }
4524
4525        ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4526        if (ret < 0)
4527                goto out;
4528
4529        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4530        if (ret < 0)
4531                goto out;
4532
4533        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4534        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4535        TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4536
4537        ret = send_cmd(sctx);
4538
4539tlv_put_failure:
4540out:
4541        fs_path_free(p);
4542        if (ret < 0)
4543                return ret;
4544        return num_read;
4545}
4546
4547/*
4548 * Send a clone command to user space.
4549 */
4550static int send_clone(struct send_ctx *sctx,
4551                      u64 offset, u32 len,
4552                      struct clone_root *clone_root)
4553{
4554        int ret = 0;
4555        struct fs_path *p;
4556        u64 gen;
4557
4558verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
4559               "clone_inode=%llu, clone_offset=%llu\n", offset, len,
4560                clone_root->root->objectid, clone_root->ino,
4561                clone_root->offset);
4562
4563        p = fs_path_alloc();
4564        if (!p)
4565                return -ENOMEM;
4566
4567        ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4568        if (ret < 0)
4569                goto out;
4570
4571        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4572        if (ret < 0)
4573                goto out;
4574
4575        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4576        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4577        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4578
4579        if (clone_root->root == sctx->send_root) {
4580                ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4581                                &gen, NULL, NULL, NULL, NULL);
4582                if (ret < 0)
4583                        goto out;
4584                ret = get_cur_path(sctx, clone_root->ino, gen, p);
4585        } else {
4586                ret = get_inode_path(clone_root->root, clone_root->ino, p);
4587        }
4588        if (ret < 0)
4589                goto out;
4590
4591        /*
4592         * If the parent we're using has a received_uuid set then use that as
4593         * our clone source as that is what we will look for when doing a
4594         * receive.
4595         *
4596         * This covers the case that we create a snapshot off of a received
4597         * subvolume and then use that as the parent and try to receive on a
4598         * different host.
4599         */
4600        if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
4601                TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4602                             clone_root->root->root_item.received_uuid);
4603        else
4604                TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4605                             clone_root->root->root_item.uuid);
4606        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4607                    le64_to_cpu(clone_root->root->root_item.ctransid));
4608        TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4609        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4610                        clone_root->offset);
4611
4612        ret = send_cmd(sctx);
4613
4614tlv_put_failure:
4615out:
4616        fs_path_free(p);
4617        return ret;
4618}
4619
4620/*
4621 * Send an update extent command to user space.
4622 */
4623static int send_update_extent(struct send_ctx *sctx,
4624                              u64 offset, u32 len)
4625{
4626        int ret = 0;
4627        struct fs_path *p;
4628
4629        p = fs_path_alloc();
4630        if (!p)
4631                return -ENOMEM;
4632
4633        ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
4634        if (ret < 0)
4635                goto out;
4636
4637        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4638        if (ret < 0)
4639                goto out;
4640
4641        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4642        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4643        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
4644
4645        ret = send_cmd(sctx);
4646
4647tlv_put_failure:
4648out:
4649        fs_path_free(p);
4650        return ret;
4651}
4652
4653static int send_hole(struct send_ctx *sctx, u64 end)
4654{
4655        struct fs_path *p = NULL;
4656        u64 offset = sctx->cur_inode_last_extent;
4657        u64 len;
4658        int ret = 0;
4659
4660        p = fs_path_alloc();
4661        if (!p)
4662                return -ENOMEM;
4663        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4664        if (ret < 0)
4665                goto tlv_put_failure;
4666        memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4667        while (offset < end) {
4668                len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4669
4670                ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4671                if (ret < 0)
4672                        break;
4673                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4674                TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4675                TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4676                ret = send_cmd(sctx);
4677                if (ret < 0)
4678                        break;
4679                offset += len;
4680        }
4681tlv_put_failure:
4682        fs_path_free(p);
4683        return ret;
4684}
4685
4686static int send_extent_data(struct send_ctx *sctx,
4687                            const u64 offset,
4688                            const u64 len)
4689{
4690        u64 sent = 0;
4691
4692        if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
4693                return send_update_extent(sctx, offset, len);
4694
4695        while (sent < len) {
4696                u64 size = len - sent;
4697                int ret;
4698
4699                if (size > BTRFS_SEND_READ_SIZE)
4700                        size = BTRFS_SEND_READ_SIZE;
4701                ret = send_write(sctx, offset + sent, size);
4702                if (ret < 0)
4703                        return ret;
4704                if (!ret)
4705                        break;
4706                sent += ret;
4707        }
4708        return 0;
4709}
4710
4711static int clone_range(struct send_ctx *sctx,
4712                       struct clone_root *clone_root,
4713                       const u64 disk_byte,
4714                       u64 data_offset,
4715                       u64 offset,
4716                       u64 len)
4717{
4718        struct btrfs_path *path;
4719        struct btrfs_key key;
4720        int ret;
4721
4722        path = alloc_path_for_send();
4723        if (!path)
4724                return -ENOMEM;
4725
4726        /*
4727         * We can't send a clone operation for the entire range if we find
4728         * extent items in the respective range in the source file that
4729         * refer to different extents or if we find holes.
4730         * So check for that and do a mix of clone and regular write/copy
4731         * operations if needed.
4732         *
4733         * Example:
4734         *
4735         * mkfs.btrfs -f /dev/sda
4736         * mount /dev/sda /mnt
4737         * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
4738         * cp --reflink=always /mnt/foo /mnt/bar
4739         * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
4740         * btrfs subvolume snapshot -r /mnt /mnt/snap
4741         *
4742         * If when we send the snapshot and we are processing file bar (which
4743         * has a higher inode number than foo) we blindly send a clone operation
4744         * for the [0, 100K[ range from foo to bar, the receiver ends up getting
4745         * a file bar that matches the content of file foo - iow, doesn't match
4746         * the content from bar in the original filesystem.
4747         */
4748        key.objectid = clone_root->ino;
4749        key.type = BTRFS_EXTENT_DATA_KEY;
4750        key.offset = clone_root->offset;
4751        ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
4752        if (ret < 0)
4753                goto out;
4754        if (ret > 0 && path->slots[0] > 0) {
4755                btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
4756                if (key.objectid == clone_root->ino &&
4757                    key.type == BTRFS_EXTENT_DATA_KEY)
4758                        path->slots[0]--;
4759        }
4760
4761        while (true) {
4762                struct extent_buffer *leaf = path->nodes[0];
4763                int slot = path->slots[0];
4764                struct btrfs_file_extent_item *ei;
4765                u8 type;
4766                u64 ext_len;
4767                u64 clone_len;
4768
4769                if (slot >= btrfs_header_nritems(leaf)) {
4770                        ret = btrfs_next_leaf(clone_root->root, path);
4771                        if (ret < 0)
4772                                goto out;
4773                        else if (ret > 0)
4774                                break;
4775                        continue;
4776                }
4777
4778                btrfs_item_key_to_cpu(leaf, &key, slot);
4779
4780                /*
4781                 * We might have an implicit trailing hole (NO_HOLES feature
4782                 * enabled). We deal with it after leaving this loop.
4783                 */
4784                if (key.objectid != clone_root->ino ||
4785                    key.type != BTRFS_EXTENT_DATA_KEY)
4786                        break;
4787
4788                ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
4789                type = btrfs_file_extent_type(leaf, ei);
4790                if (type == BTRFS_FILE_EXTENT_INLINE) {
4791                        ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
4792                        ext_len = PAGE_CACHE_ALIGN(ext_len);
4793                } else {
4794                        ext_len = btrfs_file_extent_num_bytes(leaf, ei);
4795                }
4796
4797                if (key.offset + ext_len <= clone_root->offset)
4798                        goto next;
4799
4800                if (key.offset > clone_root->offset) {
4801                        /* Implicit hole, NO_HOLES feature enabled. */
4802                        u64 hole_len = key.offset - clone_root->offset;
4803
4804                        if (hole_len > len)
4805                                hole_len = len;
4806                        ret = send_extent_data(sctx, offset, hole_len);
4807                        if (ret < 0)
4808                                goto out;
4809
4810                        len -= hole_len;
4811                        if (len == 0)
4812                                break;
4813                        offset += hole_len;
4814                        clone_root->offset += hole_len;
4815                        data_offset += hole_len;
4816                }
4817
4818                if (key.offset >= clone_root->offset + len)
4819                        break;
4820
4821                clone_len = min_t(u64, ext_len, len);
4822
4823                if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
4824                    btrfs_file_extent_offset(leaf, ei) == data_offset)
4825                        ret = send_clone(sctx, offset, clone_len, clone_root);
4826                else
4827                        ret = send_extent_data(sctx, offset, clone_len);
4828
4829                if (ret < 0)
4830                        goto out;
4831
4832                len -= clone_len;
4833                if (len == 0)
4834                        break;
4835                offset += clone_len;
4836                clone_root->offset += clone_len;
4837                data_offset += clone_len;
4838next:
4839                path->slots[0]++;
4840        }
4841
4842        if (len > 0)
4843                ret = send_extent_data(sctx, offset, len);
4844        else
4845                ret = 0;
4846out:
4847        btrfs_free_path(path);
4848        return ret;
4849}
4850
4851static int send_write_or_clone(struct send_ctx *sctx,
4852                               struct btrfs_path *path,
4853                               struct btrfs_key *key,
4854                               struct clone_root *clone_root)
4855{
4856        int ret = 0;
4857        struct btrfs_file_extent_item *ei;
4858        u64 offset = key->offset;
4859        u64 len;
4860        u8 type;
4861        u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
4862
4863        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4864                        struct btrfs_file_extent_item);
4865        type = btrfs_file_extent_type(path->nodes[0], ei);
4866        if (type == BTRFS_FILE_EXTENT_INLINE) {
4867                len = btrfs_file_extent_inline_len(path->nodes[0],
4868                                                   path->slots[0], ei);
4869                /*
4870                 * it is possible the inline item won't cover the whole page,
4871                 * but there may be items after this page.  Make
4872                 * sure to send the whole thing
4873                 */
4874                len = PAGE_CACHE_ALIGN(len);
4875        } else {
4876                len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
4877        }
4878
4879        if (offset + len > sctx->cur_inode_size)
4880                len = sctx->cur_inode_size - offset;
4881        if (len == 0) {
4882                ret = 0;
4883                goto out;
4884        }
4885
4886        if (clone_root && IS_ALIGNED(offset + len, bs)) {
4887                u64 disk_byte;
4888                u64 data_offset;
4889
4890                disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
4891                data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
4892                ret = clone_range(sctx, clone_root, disk_byte, data_offset,
4893                                  offset, len);
4894        } else {
4895                ret = send_extent_data(sctx, offset, len);
4896        }
4897out:
4898        return ret;
4899}
4900
4901static int is_extent_unchanged(struct send_ctx *sctx,
4902                               struct btrfs_path *left_path,
4903                               struct btrfs_key *ekey)
4904{
4905        int ret = 0;
4906        struct btrfs_key key;
4907        struct btrfs_path *path = NULL;
4908        struct extent_buffer *eb;
4909        int slot;
4910        struct btrfs_key found_key;
4911        struct btrfs_file_extent_item *ei;
4912        u64 left_disknr;
4913        u64 right_disknr;
4914        u64 left_offset;
4915        u64 right_offset;
4916        u64 left_offset_fixed;
4917        u64 left_len;
4918        u64 right_len;
4919        u64 left_gen;
4920        u64 right_gen;
4921        u8 left_type;
4922        u8 right_type;
4923
4924        path = alloc_path_for_send();
4925        if (!path)
4926                return -ENOMEM;
4927
4928        eb = left_path->nodes[0];
4929        slot = left_path->slots[0];
4930        ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4931        left_type = btrfs_file_extent_type(eb, ei);
4932
4933        if (left_type != BTRFS_FILE_EXTENT_REG) {
4934                ret = 0;
4935                goto out;
4936        }
4937        left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
4938        left_len = btrfs_file_extent_num_bytes(eb, ei);
4939        left_offset = btrfs_file_extent_offset(eb, ei);
4940        left_gen = btrfs_file_extent_generation(eb, ei);
4941
4942        /*
4943         * Following comments will refer to these graphics. L is the left
4944         * extents which we are checking at the moment. 1-8 are the right
4945         * extents that we iterate.
4946         *
4947         *       |-----L-----|
4948         * |-1-|-2a-|-3-|-4-|-5-|-6-|
4949         *
4950         *       |-----L-----|
4951         * |--1--|-2b-|...(same as above)
4952         *
4953         * Alternative situation. Happens on files where extents got split.
4954         *       |-----L-----|
4955         * |-----------7-----------|-6-|
4956         *
4957         * Alternative situation. Happens on files which got larger.
4958         *       |-----L-----|
4959         * |-8-|
4960         * Nothing follows after 8.
4961         */
4962
4963        key.objectid = ekey->objectid;
4964        key.type = BTRFS_EXTENT_DATA_KEY;
4965        key.offset = ekey->offset;
4966        ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
4967        if (ret < 0)
4968                goto out;
4969        if (ret) {
4970                ret = 0;
4971                goto out;
4972        }
4973
4974        /*
4975         * Handle special case where the right side has no extents at all.
4976         */
4977        eb = path->nodes[0];
4978        slot = path->slots[0];
4979        btrfs_item_key_to_cpu(eb, &found_key, slot);
4980        if (found_key.objectid != key.objectid ||
4981            found_key.type != key.type) {
4982                /* If we're a hole then just pretend nothing changed */
4983                ret = (left_disknr) ? 0 : 1;
4984                goto out;
4985        }
4986
4987        /*
4988         * We're now on 2a, 2b or 7.
4989         */
4990        key = found_key;
4991        while (key.offset < ekey->offset + left_len) {
4992                ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
4993                right_type = btrfs_file_extent_type(eb, ei);
4994                if (right_type != BTRFS_FILE_EXTENT_REG) {
4995                        ret = 0;
4996                        goto out;
4997                }
4998
4999                right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5000                right_len = btrfs_file_extent_num_bytes(eb, ei);
5001                right_offset = btrfs_file_extent_offset(eb, ei);
5002                right_gen = btrfs_file_extent_generation(eb, ei);
5003
5004                /*
5005                 * Are we at extent 8? If yes, we know the extent is changed.
5006                 * This may only happen on the first iteration.
5007                 */
5008                if (found_key.offset + right_len <= ekey->offset) {
5009                        /* If we're a hole just pretend nothing changed */
5010                        ret = (left_disknr) ? 0 : 1;
5011                        goto out;
5012                }
5013
5014                left_offset_fixed = left_offset;
5015                if (key.offset < ekey->offset) {
5016                        /* Fix the right offset for 2a and 7. */
5017                        right_offset += ekey->offset - key.offset;
5018                } else {
5019                        /* Fix the left offset for all behind 2a and 2b */
5020                        left_offset_fixed += key.offset - ekey->offset;
5021                }
5022
5023                /*
5024                 * Check if we have the same extent.
5025                 */
5026                if (left_disknr != right_disknr ||
5027                    left_offset_fixed != right_offset ||
5028                    left_gen != right_gen) {
5029                        ret = 0;
5030                        goto out;
5031                }
5032
5033                /*
5034                 * Go to the next extent.
5035                 */
5036                ret = btrfs_next_item(sctx->parent_root, path);
5037                if (ret < 0)
5038                        goto out;
5039                if (!ret) {
5040                        eb = path->nodes[0];
5041                        slot = path->slots[0];
5042                        btrfs_item_key_to_cpu(eb, &found_key, slot);
5043                }
5044                if (ret || found_key.objectid != key.objectid ||
5045                    found_key.type != key.type) {
5046                        key.offset += right_len;
5047                        break;
5048                }
5049                if (found_key.offset != key.offset + right_len) {
5050                        ret = 0;
5051                        goto out;
5052                }
5053                key = found_key;
5054        }
5055
5056        /*
5057         * We're now behind the left extent (treat as unchanged) or at the end
5058         * of the right side (treat as changed).
5059         */
5060        if (key.offset >= ekey->offset + left_len)
5061                ret = 1;
5062        else
5063                ret = 0;
5064
5065
5066out:
5067        btrfs_free_path(path);
5068        return ret;
5069}
5070
5071static int get_last_extent(struct send_ctx *sctx, u64 offset)
5072{
5073        struct btrfs_path *path;
5074        struct btrfs_root *root = sctx->send_root;
5075        struct btrfs_file_extent_item *fi;
5076        struct btrfs_key key;
5077        u64 extent_end;
5078        u8 type;
5079        int ret;
5080
5081        path = alloc_path_for_send();
5082        if (!path)
5083                return -ENOMEM;
5084
5085        sctx->cur_inode_last_extent = 0;
5086
5087        key.objectid = sctx->cur_ino;
5088        key.type = BTRFS_EXTENT_DATA_KEY;
5089        key.offset = offset;
5090        ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
5091        if (ret < 0)
5092                goto out;
5093        ret = 0;
5094        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5095        if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
5096                goto out;
5097
5098        fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5099                            struct btrfs_file_extent_item);
5100        type = btrfs_file_extent_type(path->nodes[0], fi);
5101        if (type == BTRFS_FILE_EXTENT_INLINE) {
5102                u64 size = btrfs_file_extent_inline_len(path->nodes[0],
5103                                                        path->slots[0], fi);
5104                extent_end = ALIGN(key.offset + size,
5105                                   sctx->send_root->sectorsize);
5106        } else {
5107                extent_end = key.offset +
5108                        btrfs_file_extent_num_bytes(path->nodes[0], fi);
5109        }
5110        sctx->cur_inode_last_extent = extent_end;
5111out:
5112        btrfs_free_path(path);
5113        return ret;
5114}
5115
5116static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
5117                           struct btrfs_key *key)
5118{
5119        struct btrfs_file_extent_item *fi;
5120        u64 extent_end;
5121        u8 type;
5122        int ret = 0;
5123
5124        if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
5125                return 0;
5126
5127        if (sctx->cur_inode_last_extent == (u64)-1) {
5128                ret = get_last_extent(sctx, key->offset - 1);
5129                if (ret)
5130                        return ret;
5131        }
5132
5133        fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5134                            struct btrfs_file_extent_item);
5135        type = btrfs_file_extent_type(path->nodes[0], fi);
5136        if (type == BTRFS_FILE_EXTENT_INLINE) {
5137                u64 size = btrfs_file_extent_inline_len(path->nodes[0],
5138                                                        path->slots[0], fi);
5139                extent_end = ALIGN(key->offset + size,
5140                                   sctx->send_root->sectorsize);
5141        } else {
5142                extent_end = key->offset +
5143                        btrfs_file_extent_num_bytes(path->nodes[0], fi);
5144        }
5145
5146        if (path->slots[0] == 0 &&
5147            sctx->cur_inode_last_extent < key->offset) {
5148                /*
5149                 * We might have skipped entire leafs that contained only
5150                 * file extent items for our current inode. These leafs have
5151                 * a generation number smaller (older) than the one in the
5152                 * current leaf and the leaf our last extent came from, and
5153                 * are located between these 2 leafs.
5154                 */
5155                ret = get_last_extent(sctx, key->offset - 1);
5156                if (ret)
5157                        return ret;
5158        }
5159
5160        if (sctx->cur_inode_last_extent < key->offset)
5161                ret = send_hole(sctx, key->offset);
5162        sctx->cur_inode_last_extent = extent_end;
5163        return ret;
5164}
5165
5166static int process_extent(struct send_ctx *sctx,
5167                          struct btrfs_path *path,
5168                          struct btrfs_key *key)
5169{
5170        struct clone_root *found_clone = NULL;
5171        int ret = 0;
5172
5173        if (S_ISLNK(sctx->cur_inode_mode))
5174                return 0;
5175
5176        if (sctx->parent_root && !sctx->cur_inode_new) {
5177                ret = is_extent_unchanged(sctx, path, key);
5178                if (ret < 0)
5179                        goto out;
5180                if (ret) {
5181                        ret = 0;
5182                        goto out_hole;
5183                }
5184        } else {
5185                struct btrfs_file_extent_item *ei;
5186                u8 type;
5187
5188                ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5189                                    struct btrfs_file_extent_item);
5190                type = btrfs_file_extent_type(path->nodes[0], ei);
5191                if (type == BTRFS_FILE_EXTENT_PREALLOC ||
5192                    type == BTRFS_FILE_EXTENT_REG) {
5193                        /*
5194                         * The send spec does not have a prealloc command yet,
5195                         * so just leave a hole for prealloc'ed extents until
5196                         * we have enough commands queued up to justify rev'ing
5197                         * the send spec.
5198                         */
5199                        if (type == BTRFS_FILE_EXTENT_PREALLOC) {
5200                                ret = 0;
5201                                goto out;
5202                        }
5203
5204                        /* Have a hole, just skip it. */
5205                        if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
5206                                ret = 0;
5207                                goto out;
5208                        }
5209                }
5210        }
5211
5212        ret = find_extent_clone(sctx, path, key->objectid, key->offset,
5213                        sctx->cur_inode_size, &found_clone);
5214        if (ret != -ENOENT && ret < 0)
5215                goto out;
5216
5217        ret = send_write_or_clone(sctx, path, key, found_clone);
5218        if (ret)
5219                goto out;
5220out_hole:
5221        ret = maybe_send_hole(sctx, path, key);
5222out:
5223        return ret;
5224}
5225
5226static int process_all_extents(struct send_ctx *sctx)
5227{
5228        int ret;
5229        struct btrfs_root *root;
5230        struct btrfs_path *path;
5231        struct btrfs_key key;
5232        struct btrfs_key found_key;
5233        struct extent_buffer *eb;
5234        int slot;
5235
5236        root = sctx->send_root;
5237        path = alloc_path_for_send();
5238        if (!path)
5239                return -ENOMEM;
5240
5241        key.objectid = sctx->cmp_key->objectid;
5242        key.type = BTRFS_EXTENT_DATA_KEY;
5243        key.offset = 0;
5244        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5245        if (ret < 0)
5246                goto out;
5247
5248        while (1) {
5249                eb = path->nodes[0];
5250                slot = path->slots[0];
5251
5252                if (slot >= btrfs_header_nritems(eb)) {
5253                        ret = btrfs_next_leaf(root, path);
5254                        if (ret < 0) {
5255                                goto out;
5256                        } else if (ret > 0) {
5257                                ret = 0;
5258                                break;
5259                        }
5260                        continue;
5261                }
5262
5263                btrfs_item_key_to_cpu(eb, &found_key, slot);
5264
5265                if (found_key.objectid != key.objectid ||
5266                    found_key.type != key.type) {
5267                        ret = 0;
5268                        goto out;
5269                }
5270
5271                ret = process_extent(sctx, path, &found_key);
5272                if (ret < 0)
5273                        goto out;
5274
5275                path->slots[0]++;
5276        }
5277
5278out:
5279        btrfs_free_path(path);
5280        return ret;
5281}
5282
5283static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
5284                                           int *pending_move,
5285                                           int *refs_processed)
5286{
5287        int ret = 0;
5288
5289        if (sctx->cur_ino == 0)
5290                goto out;
5291        if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
5292            sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
5293                goto out;
5294        if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
5295                goto out;
5296
5297        ret = process_recorded_refs(sctx, pending_move);
5298        if (ret < 0)
5299                goto out;
5300
5301        *refs_processed = 1;
5302out:
5303        return ret;
5304}
5305
5306static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5307{
5308        int ret = 0;
5309        u64 left_mode;
5310        u64 left_uid;
5311        u64 left_gid;
5312        u64 right_mode;
5313        u64 right_uid;
5314        u64 right_gid;
5315        int need_chmod = 0;
5316        int need_chown = 0;
5317        int pending_move = 0;
5318        int refs_processed = 0;
5319
5320        ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5321                                              &refs_processed);
5322        if (ret < 0)
5323                goto out;
5324
5325        /*
5326         * We have processed the refs and thus need to advance send_progress.
5327         * Now, calls to get_cur_xxx will take the updated refs of the current
5328         * inode into account.
5329         *
5330         * On the other hand, if our current inode is a directory and couldn't
5331         * be moved/renamed because its parent was renamed/moved too and it has
5332         * a higher inode number, we can only move/rename our current inode
5333         * after we moved/renamed its parent. Therefore in this case operate on
5334         * the old path (pre move/rename) of our current inode, and the
5335         * move/rename will be performed later.
5336         */
5337        if (refs_processed && !pending_move)
5338                sctx->send_progress = sctx->cur_ino + 1;
5339
5340        if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
5341                goto out;
5342        if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
5343                goto out;
5344
5345        ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
5346                        &left_mode, &left_uid, &left_gid, NULL);
5347        if (ret < 0)
5348                goto out;
5349
5350        if (!sctx->parent_root || sctx->cur_inode_new) {
5351                need_chown = 1;
5352                if (!S_ISLNK(sctx->cur_inode_mode))
5353                        need_chmod = 1;
5354        } else {
5355                ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
5356                                NULL, NULL, &right_mode, &right_uid,
5357                                &right_gid, NULL);
5358                if (ret < 0)
5359                        goto out;
5360
5361                if (left_uid != right_uid || left_gid != right_gid)
5362                        need_chown = 1;
5363                if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
5364                        need_chmod = 1;
5365        }
5366
5367        if (S_ISREG(sctx->cur_inode_mode)) {
5368                if (need_send_hole(sctx)) {
5369                        if (sctx->cur_inode_last_extent == (u64)-1 ||
5370                            sctx->cur_inode_last_extent <
5371                            sctx->cur_inode_size) {
5372                                ret = get_last_extent(sctx, (u64)-1);
5373                                if (ret)
5374                                        goto out;
5375                        }
5376                        if (sctx->cur_inode_last_extent <
5377                            sctx->cur_inode_size) {
5378                                ret = send_hole(sctx, sctx->cur_inode_size);
5379                                if (ret)
5380                                        goto out;
5381                        }
5382                }
5383                ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5384                                sctx->cur_inode_size);
5385                if (ret < 0)
5386                        goto out;
5387        }
5388
5389        if (need_chown) {
5390                ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5391                                left_uid, left_gid);
5392                if (ret < 0)
5393                        goto out;
5394        }
5395        if (need_chmod) {
5396                ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5397                                left_mode);
5398                if (ret < 0)
5399                        goto out;
5400        }
5401
5402        /*
5403         * If other directory inodes depended on our current directory
5404         * inode's move/rename, now do their move/rename operations.
5405         */
5406        if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
5407                ret = apply_children_dir_moves(sctx);
5408                if (ret)
5409                        goto out;
5410                /*
5411                 * Need to send that every time, no matter if it actually
5412                 * changed between the two trees as we have done changes to
5413                 * the inode before. If our inode is a directory and it's
5414                 * waiting to be moved/renamed, we will send its utimes when
5415                 * it's moved/renamed, therefore we don't need to do it here.
5416                 */
5417                sctx->send_progress = sctx->cur_ino + 1;
5418                ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
5419                if (ret < 0)
5420                        goto out;
5421        }
5422
5423out:
5424        return ret;
5425}
5426
5427static int changed_inode(struct send_ctx *sctx,
5428                         enum btrfs_compare_tree_result result)
5429{
5430        int ret = 0;
5431        struct btrfs_key *key = sctx->cmp_key;
5432        struct btrfs_inode_item *left_ii = NULL;
5433        struct btrfs_inode_item *right_ii = NULL;
5434        u64 left_gen = 0;
5435        u64 right_gen = 0;
5436
5437        sctx->cur_ino = key->objectid;
5438        sctx->cur_inode_new_gen = 0;
5439        sctx->cur_inode_last_extent = (u64)-1;
5440
5441        /*
5442         * Set send_progress to current inode. This will tell all get_cur_xxx
5443         * functions that the current inode's refs are not updated yet. Later,
5444         * when process_recorded_refs is finished, it is set to cur_ino + 1.
5445         */
5446        sctx->send_progress = sctx->cur_ino;
5447
5448        if (result == BTRFS_COMPARE_TREE_NEW ||
5449            result == BTRFS_COMPARE_TREE_CHANGED) {
5450                left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
5451                                sctx->left_path->slots[0],
5452                                struct btrfs_inode_item);
5453                left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
5454                                left_ii);
5455        } else {
5456                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5457                                sctx->right_path->slots[0],
5458                                struct btrfs_inode_item);
5459                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5460                                right_ii);
5461        }
5462        if (result == BTRFS_COMPARE_TREE_CHANGED) {
5463                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
5464                                sctx->right_path->slots[0],
5465                                struct btrfs_inode_item);
5466
5467                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
5468                                right_ii);
5469
5470                /*
5471                 * The cur_ino = root dir case is special here. We can't treat
5472                 * the inode as deleted+reused because it would generate a
5473                 * stream that tries to delete/mkdir the root dir.
5474                 */
5475                if (left_gen != right_gen &&
5476                    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5477                        sctx->cur_inode_new_gen = 1;
5478        }
5479
5480        if (result == BTRFS_COMPARE_TREE_NEW) {
5481                sctx->cur_inode_gen = left_gen;
5482                sctx->cur_inode_new = 1;
5483                sctx->cur_inode_deleted = 0;
5484                sctx->cur_inode_size = btrfs_inode_size(
5485                                sctx->left_path->nodes[0], left_ii);
5486                sctx->cur_inode_mode = btrfs_inode_mode(
5487                                sctx->left_path->nodes[0], left_ii);
5488                sctx->cur_inode_rdev = btrfs_inode_rdev(
5489                                sctx->left_path->nodes[0], left_ii);
5490                if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
5491                        ret = send_create_inode_if_needed(sctx);
5492        } else if (result == BTRFS_COMPARE_TREE_DELETED) {
5493                sctx->cur_inode_gen = right_gen;
5494                sctx->cur_inode_new = 0;
5495                sctx->cur_inode_deleted = 1;
5496                sctx->cur_inode_size = btrfs_inode_size(
5497                                sctx->right_path->nodes[0], right_ii);
5498                sctx->cur_inode_mode = btrfs_inode_mode(
5499                                sctx->right_path->nodes[0], right_ii);
5500        } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
5501                /*
5502                 * We need to do some special handling in case the inode was
5503                 * reported as changed with a changed generation number. This
5504                 * means that the original inode was deleted and new inode
5505                 * reused the same inum. So we have to treat the old inode as
5506                 * deleted and the new one as new.
5507                 */
5508                if (sctx->cur_inode_new_gen) {
5509                        /*
5510                         * First, process the inode as if it was deleted.
5511                         */
5512                        sctx->cur_inode_gen = right_gen;
5513                        sctx->cur_inode_new = 0;
5514                        sctx->cur_inode_deleted = 1;
5515                        sctx->cur_inode_size = btrfs_inode_size(
5516                                        sctx->right_path->nodes[0], right_ii);
5517                        sctx->cur_inode_mode = btrfs_inode_mode(
5518                                        sctx->right_path->nodes[0], right_ii);
5519                        ret = process_all_refs(sctx,
5520                                        BTRFS_COMPARE_TREE_DELETED);
5521                        if (ret < 0)
5522                                goto out;
5523
5524                        /*
5525                         * Now process the inode as if it was new.
5526                         */
5527                        sctx->cur_inode_gen = left_gen;
5528                        sctx->cur_inode_new = 1;
5529                        sctx->cur_inode_deleted = 0;
5530                        sctx->cur_inode_size = btrfs_inode_size(
5531                                        sctx->left_path->nodes[0], left_ii);
5532                        sctx->cur_inode_mode = btrfs_inode_mode(
5533                                        sctx->left_path->nodes[0], left_ii);
5534                        sctx->cur_inode_rdev = btrfs_inode_rdev(
5535                                        sctx->left_path->nodes[0], left_ii);
5536                        ret = send_create_inode_if_needed(sctx);
5537                        if (ret < 0)
5538                                goto out;
5539
5540                        ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
5541                        if (ret < 0)
5542                                goto out;
5543                        /*
5544                         * Advance send_progress now as we did not get into
5545                         * process_recorded_refs_if_needed in the new_gen case.
5546                         */
5547                        sctx->send_progress = sctx->cur_ino + 1;
5548
5549                        /*
5550                         * Now process all extents and xattrs of the inode as if
5551                         * they were all new.
5552                         */
5553                        ret = process_all_extents(sctx);
5554                        if (ret < 0)
5555                                goto out;
5556                        ret = process_all_new_xattrs(sctx);
5557                        if (ret < 0)
5558                                goto out;
5559                } else {
5560                        sctx->cur_inode_gen = left_gen;
5561                        sctx->cur_inode_new = 0;
5562                        sctx->cur_inode_new_gen = 0;
5563                        sctx->cur_inode_deleted = 0;
5564                        sctx->cur_inode_size = btrfs_inode_size(
5565                                        sctx->left_path->nodes[0], left_ii);
5566                        sctx->cur_inode_mode = btrfs_inode_mode(
5567                                        sctx->left_path->nodes[0], left_ii);
5568                }
5569        }
5570
5571out:
5572        return ret;
5573}
5574
5575/*
5576 * We have to process new refs before deleted refs, but compare_trees gives us
5577 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
5578 * first and later process them in process_recorded_refs.
5579 * For the cur_inode_new_gen case, we skip recording completely because
5580 * changed_inode did already initiate processing of refs. The reason for this is
5581 * that in this case, compare_tree actually compares the refs of 2 different
5582 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
5583 * refs of the right tree as deleted and all refs of the left tree as new.
5584 */
5585static int changed_ref(struct send_ctx *sctx,
5586                       enum btrfs_compare_tree_result result)
5587{
5588        int ret = 0;
5589
5590        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5591
5592        if (!sctx->cur_inode_new_gen &&
5593            sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
5594                if (result == BTRFS_COMPARE_TREE_NEW)
5595                        ret = record_new_ref(sctx);
5596                else if (result == BTRFS_COMPARE_TREE_DELETED)
5597                        ret = record_deleted_ref(sctx);
5598                else if (result == BTRFS_COMPARE_TREE_CHANGED)
5599                        ret = record_changed_ref(sctx);
5600        }
5601
5602        return ret;
5603}
5604
5605/*
5606 * Process new/deleted/changed xattrs. We skip processing in the
5607 * cur_inode_new_gen case because changed_inode did already initiate processing
5608 * of xattrs. The reason is the same as in changed_ref
5609 */
5610static int changed_xattr(struct send_ctx *sctx,
5611                         enum btrfs_compare_tree_result result)
5612{
5613        int ret = 0;
5614
5615        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5616
5617        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5618                if (result == BTRFS_COMPARE_TREE_NEW)
5619                        ret = process_new_xattr(sctx);
5620                else if (result == BTRFS_COMPARE_TREE_DELETED)
5621                        ret = process_deleted_xattr(sctx);
5622                else if (result == BTRFS_COMPARE_TREE_CHANGED)
5623                        ret = process_changed_xattr(sctx);
5624        }
5625
5626        return ret;
5627}
5628
5629/*
5630 * Process new/deleted/changed extents. We skip processing in the
5631 * cur_inode_new_gen case because changed_inode did already initiate processing
5632 * of extents. The reason is the same as in changed_ref
5633 */
5634static int changed_extent(struct send_ctx *sctx,
5635                          enum btrfs_compare_tree_result result)
5636{
5637        int ret = 0;
5638
5639        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
5640
5641        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5642                if (result != BTRFS_COMPARE_TREE_DELETED)
5643                        ret = process_extent(sctx, sctx->left_path,
5644                                        sctx->cmp_key);
5645        }
5646
5647        return ret;
5648}
5649
5650static int dir_changed(struct send_ctx *sctx, u64 dir)
5651{
5652        u64 orig_gen, new_gen;
5653        int ret;
5654
5655        ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
5656                             NULL, NULL);
5657        if (ret)
5658                return ret;
5659
5660        ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
5661                             NULL, NULL, NULL);
5662        if (ret)
5663                return ret;
5664
5665        return (orig_gen != new_gen) ? 1 : 0;
5666}
5667
5668static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
5669                        struct btrfs_key *key)
5670{
5671        struct btrfs_inode_extref *extref;
5672        struct extent_buffer *leaf;
5673        u64 dirid = 0, last_dirid = 0;
5674        unsigned long ptr;
5675        u32 item_size;
5676        u32 cur_offset = 0;
5677        int ref_name_len;
5678        int ret = 0;
5679
5680        /* Easy case, just check this one dirid */
5681        if (key->type == BTRFS_INODE_REF_KEY) {
5682                dirid = key->offset;
5683
5684                ret = dir_changed(sctx, dirid);
5685                goto out;
5686        }
5687
5688        leaf = path->nodes[0];
5689        item_size = btrfs_item_size_nr(leaf, path->slots[0]);
5690        ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
5691        while (cur_offset < item_size) {
5692                extref = (struct btrfs_inode_extref *)(ptr +
5693                                                       cur_offset);
5694                dirid = btrfs_inode_extref_parent(leaf, extref);
5695                ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
5696                cur_offset += ref_name_len + sizeof(*extref);
5697                if (dirid == last_dirid)
5698                        continue;
5699                ret = dir_changed(sctx, dirid);
5700                if (ret)
5701                        break;
5702                last_dirid = dirid;
5703        }
5704out:
5705        return ret;
5706}
5707
5708/*
5709 * Updates compare related fields in sctx and simply forwards to the actual
5710 * changed_xxx functions.
5711 */
5712static int changed_cb(struct btrfs_root *left_root,
5713                      struct btrfs_root *right_root,
5714                      struct btrfs_path *left_path,
5715                      struct btrfs_path *right_path,
5716                      struct btrfs_key *key,
5717                      enum btrfs_compare_tree_result result,
5718                      void *ctx)
5719{
5720        int ret = 0;
5721        struct send_ctx *sctx = ctx;
5722
5723        if (result == BTRFS_COMPARE_TREE_SAME) {
5724                if (key->type == BTRFS_INODE_REF_KEY ||
5725                    key->type == BTRFS_INODE_EXTREF_KEY) {
5726                        ret = compare_refs(sctx, left_path, key);
5727                        if (!ret)
5728                                return 0;
5729                        if (ret < 0)
5730                                return ret;
5731                } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5732                        return maybe_send_hole(sctx, left_path, key);
5733                } else {
5734                        return 0;
5735                }
5736                result = BTRFS_COMPARE_TREE_CHANGED;
5737                ret = 0;
5738        }
5739
5740        sctx->left_path = left_path;
5741        sctx->right_path = right_path;
5742        sctx->cmp_key = key;
5743
5744        ret = finish_inode_if_needed(sctx, 0);
5745        if (ret < 0)
5746                goto out;
5747
5748        /* Ignore non-FS objects */
5749        if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
5750            key->objectid == BTRFS_FREE_SPACE_OBJECTID)
5751                goto out;
5752
5753        if (key->type == BTRFS_INODE_ITEM_KEY)
5754                ret = changed_inode(sctx, result);
5755        else if (key->type == BTRFS_INODE_REF_KEY ||
5756                 key->type == BTRFS_INODE_EXTREF_KEY)
5757                ret = changed_ref(sctx, result);
5758        else if (key->type == BTRFS_XATTR_ITEM_KEY)
5759                ret = changed_xattr(sctx, result);
5760        else if (key->type == BTRFS_EXTENT_DATA_KEY)
5761                ret = changed_extent(sctx, result);
5762
5763out:
5764        return ret;
5765}
5766
5767static int full_send_tree(struct send_ctx *sctx)
5768{
5769        int ret;
5770        struct btrfs_root *send_root = sctx->send_root;
5771        struct btrfs_key key;
5772        struct btrfs_key found_key;
5773        struct btrfs_path *path;
5774        struct extent_buffer *eb;
5775        int slot;
5776
5777        path = alloc_path_for_send();
5778        if (!path)
5779                return -ENOMEM;
5780
5781        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5782        key.type = BTRFS_INODE_ITEM_KEY;
5783        key.offset = 0;
5784
5785        ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
5786        if (ret < 0)
5787                goto out;
5788        if (ret)
5789                goto out_finish;
5790
5791        while (1) {
5792                eb = path->nodes[0];
5793                slot = path->slots[0];
5794                btrfs_item_key_to_cpu(eb, &found_key, slot);
5795
5796                ret = changed_cb(send_root, NULL, path, NULL,
5797                                &found_key, BTRFS_COMPARE_TREE_NEW, sctx);
5798                if (ret < 0)
5799                        goto out;
5800
5801                key.objectid = found_key.objectid;
5802                key.type = found_key.type;
5803                key.offset = found_key.offset + 1;
5804
5805                ret = btrfs_next_item(send_root, path);
5806                if (ret < 0)
5807                        goto out;
5808                if (ret) {
5809                        ret  = 0;
5810                        break;
5811                }
5812        }
5813
5814out_finish:
5815        ret = finish_inode_if_needed(sctx, 1);
5816
5817out:
5818        btrfs_free_path(path);
5819        return ret;
5820}
5821
5822static int send_subvol(struct send_ctx *sctx)
5823{
5824        int ret;
5825
5826        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
5827                ret = send_header(sctx);
5828                if (ret < 0)
5829                        goto out;
5830        }
5831
5832        ret = send_subvol_begin(sctx);
5833        if (ret < 0)
5834                goto out;
5835
5836        if (sctx->parent_root) {
5837                ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
5838                                changed_cb, sctx);
5839                if (ret < 0)
5840                        goto out;
5841                ret = finish_inode_if_needed(sctx, 1);
5842                if (ret < 0)
5843                        goto out;
5844        } else {
5845                ret = full_send_tree(sctx);
5846                if (ret < 0)
5847                        goto out;
5848        }
5849
5850out:
5851        free_recorded_refs(sctx);
5852        return ret;
5853}
5854
5855/*
5856 * If orphan cleanup did remove any orphans from a root, it means the tree
5857 * was modified and therefore the commit root is not the same as the current
5858 * root anymore. This is a problem, because send uses the commit root and
5859 * therefore can see inode items that don't exist in the current root anymore,
5860 * and for example make calls to btrfs_iget, which will do tree lookups based
5861 * on the current root and not on the commit root. Those lookups will fail,
5862 * returning a -ESTALE error, and making send fail with that error. So make
5863 * sure a send does not see any orphans we have just removed, and that it will
5864 * see the same inodes regardless of whether a transaction commit happened
5865 * before it started (meaning that the commit root will be the same as the
5866 * current root) or not.
5867 */
5868static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
5869{
5870        int i;
5871        struct btrfs_trans_handle *trans = NULL;
5872
5873again:
5874        if (sctx->parent_root &&
5875            sctx->parent_root->node != sctx->parent_root->commit_root)
5876                goto commit_trans;
5877
5878        for (i = 0; i < sctx->clone_roots_cnt; i++)
5879                if (sctx->clone_roots[i].root->node !=
5880                    sctx->clone_roots[i].root->commit_root)
5881                        goto commit_trans;
5882
5883        if (trans)
5884                return btrfs_end_transaction(trans, sctx->send_root);
5885
5886        return 0;
5887
5888commit_trans:
5889        /* Use any root, all fs roots will get their commit roots updated. */
5890        if (!trans) {
5891                trans = btrfs_join_transaction(sctx->send_root);
5892                if (IS_ERR(trans))
5893                        return PTR_ERR(trans);
5894                goto again;
5895        }
5896
5897        return btrfs_commit_transaction(trans, sctx->send_root);
5898}
5899
5900static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
5901{
5902        spin_lock(&root->root_item_lock);
5903        root->send_in_progress--;
5904        /*
5905         * Not much left to do, we don't know why it's unbalanced and
5906         * can't blindly reset it to 0.
5907         */
5908        if (root->send_in_progress < 0)
5909                btrfs_err(root->fs_info,
5910                        "send_in_progres unbalanced %d root %llu",
5911                        root->send_in_progress, root->root_key.objectid);
5912        spin_unlock(&root->root_item_lock);
5913}
5914
5915long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5916{
5917        int ret = 0;
5918        struct btrfs_root *send_root;
5919        struct btrfs_root *clone_root;
5920        struct btrfs_fs_info *fs_info;
5921        struct btrfs_ioctl_send_args *arg = NULL;
5922        struct btrfs_key key;
5923        struct send_ctx *sctx = NULL;
5924        u32 i;
5925        u64 *clone_sources_tmp = NULL;
5926        int clone_sources_to_rollback = 0;
5927        int sort_clone_roots = 0;
5928        int index;
5929
5930        if (!capable(CAP_SYS_ADMIN))
5931                return -EPERM;
5932
5933        send_root = BTRFS_I(file_inode(mnt_file))->root;
5934        fs_info = send_root->fs_info;
5935
5936        /*
5937         * The subvolume must remain read-only during send, protect against
5938         * making it RW. This also protects against deletion.
5939         */
5940        spin_lock(&send_root->root_item_lock);
5941        send_root->send_in_progress++;
5942        spin_unlock(&send_root->root_item_lock);
5943
5944        /*
5945         * This is done when we lookup the root, it should already be complete
5946         * by the time we get here.
5947         */
5948        WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
5949
5950        /*
5951         * Userspace tools do the checks and warn the user if it's
5952         * not RO.
5953         */
5954        if (!btrfs_root_readonly(send_root)) {
5955                ret = -EPERM;
5956                goto out;
5957        }
5958
5959        arg = memdup_user(arg_, sizeof(*arg));
5960        if (IS_ERR(arg)) {
5961                ret = PTR_ERR(arg);
5962                arg = NULL;
5963                goto out;
5964        }
5965
5966        if (!access_ok(VERIFY_READ, arg->clone_sources,
5967                        sizeof(*arg->clone_sources) *
5968                        arg->clone_sources_count)) {
5969                ret = -EFAULT;
5970                goto out;
5971        }
5972
5973        if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
5974                ret = -EINVAL;
5975                goto out;
5976        }
5977
5978        sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
5979        if (!sctx) {
5980                ret = -ENOMEM;
5981                goto out;
5982        }
5983
5984        INIT_LIST_HEAD(&sctx->new_refs);
5985        INIT_LIST_HEAD(&sctx->deleted_refs);
5986        INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
5987        INIT_LIST_HEAD(&sctx->name_cache_list);
5988
5989        sctx->flags = arg->flags;
5990
5991        sctx->send_filp = fget(arg->send_fd);
5992        if (!sctx->send_filp) {
5993                ret = -EBADF;
5994                goto out;
5995        }
5996
5997        sctx->send_root = send_root;
5998        /*
5999         * Unlikely but possible, if the subvolume is marked for deletion but
6000         * is slow to remove the directory entry, send can still be started
6001         */
6002        if (btrfs_root_dead(sctx->send_root)) {
6003                ret = -EPERM;
6004                goto out;
6005        }
6006
6007        sctx->clone_roots_cnt = arg->clone_sources_count;
6008
6009        sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
6010        sctx->send_buf = vmalloc(sctx->send_max_size);
6011        if (!sctx->send_buf) {
6012                ret = -ENOMEM;
6013                goto out;
6014        }
6015
6016        sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
6017        if (!sctx->read_buf) {
6018                ret = -ENOMEM;
6019                goto out;
6020        }
6021
6022        sctx->pending_dir_moves = RB_ROOT;
6023        sctx->waiting_dir_moves = RB_ROOT;
6024        sctx->orphan_dirs = RB_ROOT;
6025
6026        sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
6027                        (arg->clone_sources_count + 1));
6028        if (!sctx->clone_roots) {
6029                ret = -ENOMEM;
6030                goto out;
6031        }
6032
6033        if (arg->clone_sources_count) {
6034                clone_sources_tmp = vmalloc(arg->clone_sources_count *
6035                                sizeof(*arg->clone_sources));
6036                if (!clone_sources_tmp) {
6037                        ret = -ENOMEM;
6038                        goto out;
6039                }
6040
6041                ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
6042                                arg->clone_sources_count *
6043                                sizeof(*arg->clone_sources));
6044                if (ret) {
6045                        ret = -EFAULT;
6046                        goto out;
6047                }
6048
6049                for (i = 0; i < arg->clone_sources_count; i++) {
6050                        key.objectid = clone_sources_tmp[i];
6051                        key.type = BTRFS_ROOT_ITEM_KEY;
6052                        key.offset = (u64)-1;
6053
6054                        index = srcu_read_lock(&fs_info->subvol_srcu);
6055
6056                        clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
6057                        if (IS_ERR(clone_root)) {
6058                                srcu_read_unlock(&fs_info->subvol_srcu, index);
6059                                ret = PTR_ERR(clone_root);
6060                                goto out;
6061                        }
6062                        spin_lock(&clone_root->root_item_lock);
6063                        if (!btrfs_root_readonly(clone_root) ||
6064                            btrfs_root_dead(clone_root)) {
6065                                spin_unlock(&clone_root->root_item_lock);
6066                                srcu_read_unlock(&fs_info->subvol_srcu, index);
6067                                ret = -EPERM;
6068                                goto out;
6069                        }
6070                        clone_root->send_in_progress++;
6071                        spin_unlock(&clone_root->root_item_lock);
6072                        srcu_read_unlock(&fs_info->subvol_srcu, index);
6073
6074                        sctx->clone_roots[i].root = clone_root;
6075                        clone_sources_to_rollback = i + 1;
6076                }
6077                vfree(clone_sources_tmp);
6078                clone_sources_tmp = NULL;
6079        }
6080
6081        if (arg->parent_root) {
6082                key.objectid = arg->parent_root;
6083                key.type = BTRFS_ROOT_ITEM_KEY;
6084                key.offset = (u64)-1;
6085
6086                index = srcu_read_lock(&fs_info->subvol_srcu);
6087
6088                sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
6089                if (IS_ERR(sctx->parent_root)) {
6090                        srcu_read_unlock(&fs_info->subvol_srcu, index);
6091                        ret = PTR_ERR(sctx->parent_root);
6092                        goto out;
6093                }
6094
6095                spin_lock(&sctx->parent_root->root_item_lock);
6096                sctx->parent_root->send_in_progress++;
6097                if (!btrfs_root_readonly(sctx->parent_root) ||
6098                                btrfs_root_dead(sctx->parent_root)) {
6099                        spin_unlock(&sctx->parent_root->root_item_lock);
6100                        srcu_read_unlock(&fs_info->subvol_srcu, index);
6101                        ret = -EPERM;
6102                        goto out;
6103                }
6104                spin_unlock(&sctx->parent_root->root_item_lock);
6105
6106                srcu_read_unlock(&fs_info->subvol_srcu, index);
6107        }
6108
6109        /*
6110         * Clones from send_root are allowed, but only if the clone source
6111         * is behind the current send position. This is checked while searching
6112         * for possible clone sources.
6113         */
6114        sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
6115
6116        /* We do a bsearch later */
6117        sort(sctx->clone_roots, sctx->clone_roots_cnt,
6118                        sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
6119                        NULL);
6120        sort_clone_roots = 1;
6121
6122        ret = ensure_commit_roots_uptodate(sctx);
6123        if (ret)
6124                goto out;
6125
6126        current->journal_info = BTRFS_SEND_TRANS_STUB;
6127        ret = send_subvol(sctx);
6128        current->journal_info = NULL;
6129        if (ret < 0)
6130                goto out;
6131
6132        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
6133                ret = begin_cmd(sctx, BTRFS_SEND_C_END);
6134                if (ret < 0)
6135                        goto out;
6136                ret = send_cmd(sctx);
6137                if (ret < 0)
6138                        goto out;
6139        }
6140
6141out:
6142        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
6143        while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
6144                struct rb_node *n;
6145                struct pending_dir_move *pm;
6146
6147                n = rb_first(&sctx->pending_dir_moves);
6148                pm = rb_entry(n, struct pending_dir_move, node);
6149                while (!list_empty(&pm->list)) {
6150                        struct pending_dir_move *pm2;
6151
6152                        pm2 = list_first_entry(&pm->list,
6153                                               struct pending_dir_move, list);
6154                        free_pending_move(sctx, pm2);
6155                }
6156                free_pending_move(sctx, pm);
6157        }
6158
6159        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
6160        while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
6161                struct rb_node *n;
6162                struct waiting_dir_move *dm;
6163
6164                n = rb_first(&sctx->waiting_dir_moves);
6165                dm = rb_entry(n, struct waiting_dir_move, node);
6166                rb_erase(&dm->node, &sctx->waiting_dir_moves);
6167                kfree(dm);
6168        }
6169
6170        WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
6171        while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
6172                struct rb_node *n;
6173                struct orphan_dir_info *odi;
6174
6175                n = rb_first(&sctx->orphan_dirs);
6176                odi = rb_entry(n, struct orphan_dir_info, node);
6177                free_orphan_dir_info(sctx, odi);
6178        }
6179
6180        if (sort_clone_roots) {
6181                for (i = 0; i < sctx->clone_roots_cnt; i++)
6182                        btrfs_root_dec_send_in_progress(
6183                                        sctx->clone_roots[i].root);
6184        } else {
6185                for (i = 0; sctx && i < clone_sources_to_rollback; i++)
6186                        btrfs_root_dec_send_in_progress(
6187                                        sctx->clone_roots[i].root);
6188
6189                btrfs_root_dec_send_in_progress(send_root);
6190        }
6191        if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
6192                btrfs_root_dec_send_in_progress(sctx->parent_root);
6193
6194        kfree(arg);
6195        vfree(clone_sources_tmp);
6196
6197        if (sctx) {
6198                if (sctx->send_filp)
6199                        fput(sctx->send_filp);
6200
6201                vfree(sctx->clone_roots);
6202                vfree(sctx->send_buf);
6203                vfree(sctx->read_buf);
6204
6205                name_cache_free(sctx);
6206
6207                kfree(sctx);
6208        }
6209
6210        return ret;
6211}
6212