linux/fs/btrfs/send.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Alexander Block.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/bsearch.h>
  20#include <linux/fs.h>
  21#include <linux/file.h>
  22#include <linux/sort.h>
  23#include <linux/mount.h>
  24#include <linux/xattr.h>
  25#include <linux/posix_acl_xattr.h>
  26#include <linux/radix-tree.h>
  27#include <linux/crc32c.h>
  28#include <linux/vmalloc.h>
  29
  30#include "send.h"
  31#include "backref.h"
  32#include "locking.h"
  33#include "disk-io.h"
  34#include "btrfs_inode.h"
  35#include "transaction.h"
  36
  37static int g_verbose = 0;
  38
  39#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
  40
  41/*
  42 * A fs_path is a helper to dynamically build path names with unknown size.
  43 * It reallocates the internal buffer on demand.
  44 * It allows fast adding of path elements on the right side (normal path) and
  45 * fast adding to the left side (reversed path). A reversed path can also be
  46 * unreversed if needed.
  47 */
  48struct fs_path {
  49        union {
  50                struct {
  51                        char *start;
  52                        char *end;
  53                        char *prepared;
  54
  55                        char *buf;
  56                        int buf_len;
  57                        int reversed:1;
  58                        int virtual_mem:1;
  59                        char inline_buf[];
  60                };
  61                char pad[PAGE_SIZE];
  62        };
  63};
  64#define FS_PATH_INLINE_SIZE \
  65        (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
  66
  67
  68/* reused for each extent */
  69struct clone_root {
  70        struct btrfs_root *root;
  71        u64 ino;
  72        u64 offset;
  73
  74        u64 found_refs;
  75};
  76
  77#define SEND_CTX_MAX_NAME_CACHE_SIZE 128
  78#define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
  79
  80struct send_ctx {
  81        struct file *send_filp;
  82        loff_t send_off;
  83        char *send_buf;
  84        u32 send_size;
  85        u32 send_max_size;
  86        u64 total_send_size;
  87        u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
  88        u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
  89
  90        struct vfsmount *mnt;
  91
  92        struct btrfs_root *send_root;
  93        struct btrfs_root *parent_root;
  94        struct clone_root *clone_roots;
  95        int clone_roots_cnt;
  96
  97        /* current state of the compare_tree call */
  98        struct btrfs_path *left_path;
  99        struct btrfs_path *right_path;
 100        struct btrfs_key *cmp_key;
 101
 102        /*
 103         * infos of the currently processed inode. In case of deleted inodes,
 104         * these are the values from the deleted inode.
 105         */
 106        u64 cur_ino;
 107        u64 cur_inode_gen;
 108        int cur_inode_new;
 109        int cur_inode_new_gen;
 110        int cur_inode_deleted;
 111        u64 cur_inode_size;
 112        u64 cur_inode_mode;
 113
 114        u64 send_progress;
 115
 116        struct list_head new_refs;
 117        struct list_head deleted_refs;
 118
 119        struct radix_tree_root name_cache;
 120        struct list_head name_cache_list;
 121        int name_cache_size;
 122
 123        struct file *cur_inode_filp;
 124        char *read_buf;
 125};
 126
 127struct name_cache_entry {
 128        struct list_head list;
 129        /*
 130         * radix_tree has only 32bit entries but we need to handle 64bit inums.
 131         * We use the lower 32bit of the 64bit inum to store it in the tree. If
 132         * more then one inum would fall into the same entry, we use radix_list
 133         * to store the additional entries. radix_list is also used to store
 134         * entries where two entries have the same inum but different
 135         * generations.
 136         */
 137        struct list_head radix_list;
 138        u64 ino;
 139        u64 gen;
 140        u64 parent_ino;
 141        u64 parent_gen;
 142        int ret;
 143        int need_later_update;
 144        int name_len;
 145        char name[];
 146};
 147
 148static void fs_path_reset(struct fs_path *p)
 149{
 150        if (p->reversed) {
 151                p->start = p->buf + p->buf_len - 1;
 152                p->end = p->start;
 153                *p->start = 0;
 154        } else {
 155                p->start = p->buf;
 156                p->end = p->start;
 157                *p->start = 0;
 158        }
 159}
 160
 161static struct fs_path *fs_path_alloc(void)
 162{
 163        struct fs_path *p;
 164
 165        p = kmalloc(sizeof(*p), GFP_NOFS);
 166        if (!p)
 167                return NULL;
 168        p->reversed = 0;
 169        p->virtual_mem = 0;
 170        p->buf = p->inline_buf;
 171        p->buf_len = FS_PATH_INLINE_SIZE;
 172        fs_path_reset(p);
 173        return p;
 174}
 175
 176static struct fs_path *fs_path_alloc_reversed(void)
 177{
 178        struct fs_path *p;
 179
 180        p = fs_path_alloc();
 181        if (!p)
 182                return NULL;
 183        p->reversed = 1;
 184        fs_path_reset(p);
 185        return p;
 186}
 187
 188static void fs_path_free(struct fs_path *p)
 189{
 190        if (!p)
 191                return;
 192        if (p->buf != p->inline_buf) {
 193                if (p->virtual_mem)
 194                        vfree(p->buf);
 195                else
 196                        kfree(p->buf);
 197        }
 198        kfree(p);
 199}
 200
 201static int fs_path_len(struct fs_path *p)
 202{
 203        return p->end - p->start;
 204}
 205
 206static int fs_path_ensure_buf(struct fs_path *p, int len)
 207{
 208        char *tmp_buf;
 209        int path_len;
 210        int old_buf_len;
 211
 212        len++;
 213
 214        if (p->buf_len >= len)
 215                return 0;
 216
 217        path_len = p->end - p->start;
 218        old_buf_len = p->buf_len;
 219        len = PAGE_ALIGN(len);
 220
 221        if (p->buf == p->inline_buf) {
 222                tmp_buf = kmalloc(len, GFP_NOFS);
 223                if (!tmp_buf) {
 224                        tmp_buf = vmalloc(len);
 225                        if (!tmp_buf)
 226                                return -ENOMEM;
 227                        p->virtual_mem = 1;
 228                }
 229                memcpy(tmp_buf, p->buf, p->buf_len);
 230                p->buf = tmp_buf;
 231                p->buf_len = len;
 232        } else {
 233                if (p->virtual_mem) {
 234                        tmp_buf = vmalloc(len);
 235                        if (!tmp_buf)
 236                                return -ENOMEM;
 237                        memcpy(tmp_buf, p->buf, p->buf_len);
 238                        vfree(p->buf);
 239                } else {
 240                        tmp_buf = krealloc(p->buf, len, GFP_NOFS);
 241                        if (!tmp_buf) {
 242                                tmp_buf = vmalloc(len);
 243                                if (!tmp_buf)
 244                                        return -ENOMEM;
 245                                memcpy(tmp_buf, p->buf, p->buf_len);
 246                                kfree(p->buf);
 247                                p->virtual_mem = 1;
 248                        }
 249                }
 250                p->buf = tmp_buf;
 251                p->buf_len = len;
 252        }
 253        if (p->reversed) {
 254                tmp_buf = p->buf + old_buf_len - path_len - 1;
 255                p->end = p->buf + p->buf_len - 1;
 256                p->start = p->end - path_len;
 257                memmove(p->start, tmp_buf, path_len + 1);
 258        } else {
 259                p->start = p->buf;
 260                p->end = p->start + path_len;
 261        }
 262        return 0;
 263}
 264
 265static int fs_path_prepare_for_add(struct fs_path *p, int name_len)
 266{
 267        int ret;
 268        int new_len;
 269
 270        new_len = p->end - p->start + name_len;
 271        if (p->start != p->end)
 272                new_len++;
 273        ret = fs_path_ensure_buf(p, new_len);
 274        if (ret < 0)
 275                goto out;
 276
 277        if (p->reversed) {
 278                if (p->start != p->end)
 279                        *--p->start = '/';
 280                p->start -= name_len;
 281                p->prepared = p->start;
 282        } else {
 283                if (p->start != p->end)
 284                        *p->end++ = '/';
 285                p->prepared = p->end;
 286                p->end += name_len;
 287                *p->end = 0;
 288        }
 289
 290out:
 291        return ret;
 292}
 293
 294static int fs_path_add(struct fs_path *p, const char *name, int name_len)
 295{
 296        int ret;
 297
 298        ret = fs_path_prepare_for_add(p, name_len);
 299        if (ret < 0)
 300                goto out;
 301        memcpy(p->prepared, name, name_len);
 302        p->prepared = NULL;
 303
 304out:
 305        return ret;
 306}
 307
 308static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
 309{
 310        int ret;
 311
 312        ret = fs_path_prepare_for_add(p, p2->end - p2->start);
 313        if (ret < 0)
 314                goto out;
 315        memcpy(p->prepared, p2->start, p2->end - p2->start);
 316        p->prepared = NULL;
 317
 318out:
 319        return ret;
 320}
 321
 322static int fs_path_add_from_extent_buffer(struct fs_path *p,
 323                                          struct extent_buffer *eb,
 324                                          unsigned long off, int len)
 325{
 326        int ret;
 327
 328        ret = fs_path_prepare_for_add(p, len);
 329        if (ret < 0)
 330                goto out;
 331
 332        read_extent_buffer(eb, p->prepared, off, len);
 333        p->prepared = NULL;
 334
 335out:
 336        return ret;
 337}
 338
 339#if 0
 340static void fs_path_remove(struct fs_path *p)
 341{
 342        BUG_ON(p->reversed);
 343        while (p->start != p->end && *p->end != '/')
 344                p->end--;
 345        *p->end = 0;
 346}
 347#endif
 348
 349static int fs_path_copy(struct fs_path *p, struct fs_path *from)
 350{
 351        int ret;
 352
 353        p->reversed = from->reversed;
 354        fs_path_reset(p);
 355
 356        ret = fs_path_add_path(p, from);
 357
 358        return ret;
 359}
 360
 361
 362static void fs_path_unreverse(struct fs_path *p)
 363{
 364        char *tmp;
 365        int len;
 366
 367        if (!p->reversed)
 368                return;
 369
 370        tmp = p->start;
 371        len = p->end - p->start;
 372        p->start = p->buf;
 373        p->end = p->start + len;
 374        memmove(p->start, tmp, len + 1);
 375        p->reversed = 0;
 376}
 377
 378static struct btrfs_path *alloc_path_for_send(void)
 379{
 380        struct btrfs_path *path;
 381
 382        path = btrfs_alloc_path();
 383        if (!path)
 384                return NULL;
 385        path->search_commit_root = 1;
 386        path->skip_locking = 1;
 387        return path;
 388}
 389
 390static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
 391{
 392        int ret;
 393        mm_segment_t old_fs;
 394        u32 pos = 0;
 395
 396        old_fs = get_fs();
 397        set_fs(KERNEL_DS);
 398
 399        while (pos < len) {
 400                ret = vfs_write(filp, (char *)buf + pos, len - pos, off);
 401                /* TODO handle that correctly */
 402                /*if (ret == -ERESTARTSYS) {
 403                        continue;
 404                }*/
 405                if (ret < 0)
 406                        goto out;
 407                if (ret == 0) {
 408                        ret = -EIO;
 409                        goto out;
 410                }
 411                pos += ret;
 412        }
 413
 414        ret = 0;
 415
 416out:
 417        set_fs(old_fs);
 418        return ret;
 419}
 420
 421static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
 422{
 423        struct btrfs_tlv_header *hdr;
 424        int total_len = sizeof(*hdr) + len;
 425        int left = sctx->send_max_size - sctx->send_size;
 426
 427        if (unlikely(left < total_len))
 428                return -EOVERFLOW;
 429
 430        hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
 431        hdr->tlv_type = cpu_to_le16(attr);
 432        hdr->tlv_len = cpu_to_le16(len);
 433        memcpy(hdr + 1, data, len);
 434        sctx->send_size += total_len;
 435
 436        return 0;
 437}
 438
 439#if 0
 440static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value)
 441{
 442        return tlv_put(sctx, attr, &value, sizeof(value));
 443}
 444
 445static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value)
 446{
 447        __le16 tmp = cpu_to_le16(value);
 448        return tlv_put(sctx, attr, &tmp, sizeof(tmp));
 449}
 450
 451static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
 452{
 453        __le32 tmp = cpu_to_le32(value);
 454        return tlv_put(sctx, attr, &tmp, sizeof(tmp));
 455}
 456#endif
 457
 458static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value)
 459{
 460        __le64 tmp = cpu_to_le64(value);
 461        return tlv_put(sctx, attr, &tmp, sizeof(tmp));
 462}
 463
 464static int tlv_put_string(struct send_ctx *sctx, u16 attr,
 465                          const char *str, int len)
 466{
 467        if (len == -1)
 468                len = strlen(str);
 469        return tlv_put(sctx, attr, str, len);
 470}
 471
 472static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
 473                        const u8 *uuid)
 474{
 475        return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
 476}
 477
 478#if 0
 479static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
 480                            struct timespec *ts)
 481{
 482        struct btrfs_timespec bts;
 483        bts.sec = cpu_to_le64(ts->tv_sec);
 484        bts.nsec = cpu_to_le32(ts->tv_nsec);
 485        return tlv_put(sctx, attr, &bts, sizeof(bts));
 486}
 487#endif
 488
 489static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
 490                                  struct extent_buffer *eb,
 491                                  struct btrfs_timespec *ts)
 492{
 493        struct btrfs_timespec bts;
 494        read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
 495        return tlv_put(sctx, attr, &bts, sizeof(bts));
 496}
 497
 498
 499#define TLV_PUT(sctx, attrtype, attrlen, data) \
 500        do { \
 501                ret = tlv_put(sctx, attrtype, attrlen, data); \
 502                if (ret < 0) \
 503                        goto tlv_put_failure; \
 504        } while (0)
 505
 506#define TLV_PUT_INT(sctx, attrtype, bits, value) \
 507        do { \
 508                ret = tlv_put_u##bits(sctx, attrtype, value); \
 509                if (ret < 0) \
 510                        goto tlv_put_failure; \
 511        } while (0)
 512
 513#define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
 514#define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
 515#define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
 516#define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
 517#define TLV_PUT_STRING(sctx, attrtype, str, len) \
 518        do { \
 519                ret = tlv_put_string(sctx, attrtype, str, len); \
 520                if (ret < 0) \
 521                        goto tlv_put_failure; \
 522        } while (0)
 523#define TLV_PUT_PATH(sctx, attrtype, p) \
 524        do { \
 525                ret = tlv_put_string(sctx, attrtype, p->start, \
 526                        p->end - p->start); \
 527                if (ret < 0) \
 528                        goto tlv_put_failure; \
 529        } while(0)
 530#define TLV_PUT_UUID(sctx, attrtype, uuid) \
 531        do { \
 532                ret = tlv_put_uuid(sctx, attrtype, uuid); \
 533                if (ret < 0) \
 534                        goto tlv_put_failure; \
 535        } while (0)
 536#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
 537        do { \
 538                ret = tlv_put_timespec(sctx, attrtype, ts); \
 539                if (ret < 0) \
 540                        goto tlv_put_failure; \
 541        } while (0)
 542#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
 543        do { \
 544                ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
 545                if (ret < 0) \
 546                        goto tlv_put_failure; \
 547        } while (0)
 548
 549static int send_header(struct send_ctx *sctx)
 550{
 551        struct btrfs_stream_header hdr;
 552
 553        strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
 554        hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
 555
 556        return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
 557                                        &sctx->send_off);
 558}
 559
 560/*
 561 * For each command/item we want to send to userspace, we call this function.
 562 */
 563static int begin_cmd(struct send_ctx *sctx, int cmd)
 564{
 565        struct btrfs_cmd_header *hdr;
 566
 567        if (!sctx->send_buf) {
 568                WARN_ON(1);
 569                return -EINVAL;
 570        }
 571
 572        BUG_ON(sctx->send_size);
 573
 574        sctx->send_size += sizeof(*hdr);
 575        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 576        hdr->cmd = cpu_to_le16(cmd);
 577
 578        return 0;
 579}
 580
 581static int send_cmd(struct send_ctx *sctx)
 582{
 583        int ret;
 584        struct btrfs_cmd_header *hdr;
 585        u32 crc;
 586
 587        hdr = (struct btrfs_cmd_header *)sctx->send_buf;
 588        hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 589        hdr->crc = 0;
 590
 591        crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 592        hdr->crc = cpu_to_le32(crc);
 593
 594        ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
 595                                        &sctx->send_off);
 596
 597        sctx->total_send_size += sctx->send_size;
 598        sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
 599        sctx->send_size = 0;
 600
 601        return ret;
 602}
 603
 604/*
 605 * Sends a move instruction to user space
 606 */
 607static int send_rename(struct send_ctx *sctx,
 608                     struct fs_path *from, struct fs_path *to)
 609{
 610        int ret;
 611
 612verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
 613
 614        ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
 615        if (ret < 0)
 616                goto out;
 617
 618        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
 619        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
 620
 621        ret = send_cmd(sctx);
 622
 623tlv_put_failure:
 624out:
 625        return ret;
 626}
 627
 628/*
 629 * Sends a link instruction to user space
 630 */
 631static int send_link(struct send_ctx *sctx,
 632                     struct fs_path *path, struct fs_path *lnk)
 633{
 634        int ret;
 635
 636verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
 637
 638        ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
 639        if (ret < 0)
 640                goto out;
 641
 642        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 643        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
 644
 645        ret = send_cmd(sctx);
 646
 647tlv_put_failure:
 648out:
 649        return ret;
 650}
 651
 652/*
 653 * Sends an unlink instruction to user space
 654 */
 655static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
 656{
 657        int ret;
 658
 659verbose_printk("btrfs: send_unlink %s\n", path->start);
 660
 661        ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
 662        if (ret < 0)
 663                goto out;
 664
 665        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 666
 667        ret = send_cmd(sctx);
 668
 669tlv_put_failure:
 670out:
 671        return ret;
 672}
 673
 674/*
 675 * Sends a rmdir instruction to user space
 676 */
 677static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
 678{
 679        int ret;
 680
 681verbose_printk("btrfs: send_rmdir %s\n", path->start);
 682
 683        ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
 684        if (ret < 0)
 685                goto out;
 686
 687        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
 688
 689        ret = send_cmd(sctx);
 690
 691tlv_put_failure:
 692out:
 693        return ret;
 694}
 695
 696/*
 697 * Helper function to retrieve some fields from an inode item.
 698 */
 699static int get_inode_info(struct btrfs_root *root,
 700                          u64 ino, u64 *size, u64 *gen,
 701                          u64 *mode, u64 *uid, u64 *gid,
 702                          u64 *rdev)
 703{
 704        int ret;
 705        struct btrfs_inode_item *ii;
 706        struct btrfs_key key;
 707        struct btrfs_path *path;
 708
 709        path = alloc_path_for_send();
 710        if (!path)
 711                return -ENOMEM;
 712
 713        key.objectid = ino;
 714        key.type = BTRFS_INODE_ITEM_KEY;
 715        key.offset = 0;
 716        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 717        if (ret < 0)
 718                goto out;
 719        if (ret) {
 720                ret = -ENOENT;
 721                goto out;
 722        }
 723
 724        ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
 725                        struct btrfs_inode_item);
 726        if (size)
 727                *size = btrfs_inode_size(path->nodes[0], ii);
 728        if (gen)
 729                *gen = btrfs_inode_generation(path->nodes[0], ii);
 730        if (mode)
 731                *mode = btrfs_inode_mode(path->nodes[0], ii);
 732        if (uid)
 733                *uid = btrfs_inode_uid(path->nodes[0], ii);
 734        if (gid)
 735                *gid = btrfs_inode_gid(path->nodes[0], ii);
 736        if (rdev)
 737                *rdev = btrfs_inode_rdev(path->nodes[0], ii);
 738
 739out:
 740        btrfs_free_path(path);
 741        return ret;
 742}
 743
 744typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
 745                                   struct fs_path *p,
 746                                   void *ctx);
 747
 748/*
 749 * Helper function to iterate the entries in ONE btrfs_inode_ref or
 750 * btrfs_inode_extref.
 751 * The iterate callback may return a non zero value to stop iteration. This can
 752 * be a negative value for error codes or 1 to simply stop it.
 753 *
 754 * path must point to the INODE_REF or INODE_EXTREF when called.
 755 */
 756static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
 757                             struct btrfs_key *found_key, int resolve,
 758                             iterate_inode_ref_t iterate, void *ctx)
 759{
 760        struct extent_buffer *eb = path->nodes[0];
 761        struct btrfs_item *item;
 762        struct btrfs_inode_ref *iref;
 763        struct btrfs_inode_extref *extref;
 764        struct btrfs_path *tmp_path;
 765        struct fs_path *p;
 766        u32 cur = 0;
 767        u32 total;
 768        int slot = path->slots[0];
 769        u32 name_len;
 770        char *start;
 771        int ret = 0;
 772        int num = 0;
 773        int index;
 774        u64 dir;
 775        unsigned long name_off;
 776        unsigned long elem_size;
 777        unsigned long ptr;
 778
 779        p = fs_path_alloc_reversed();
 780        if (!p)
 781                return -ENOMEM;
 782
 783        tmp_path = alloc_path_for_send();
 784        if (!tmp_path) {
 785                fs_path_free(p);
 786                return -ENOMEM;
 787        }
 788
 789
 790        if (found_key->type == BTRFS_INODE_REF_KEY) {
 791                ptr = (unsigned long)btrfs_item_ptr(eb, slot,
 792                                                    struct btrfs_inode_ref);
 793                item = btrfs_item_nr(eb, slot);
 794                total = btrfs_item_size(eb, item);
 795                elem_size = sizeof(*iref);
 796        } else {
 797                ptr = btrfs_item_ptr_offset(eb, slot);
 798                total = btrfs_item_size_nr(eb, slot);
 799                elem_size = sizeof(*extref);
 800        }
 801
 802        while (cur < total) {
 803                fs_path_reset(p);
 804
 805                if (found_key->type == BTRFS_INODE_REF_KEY) {
 806                        iref = (struct btrfs_inode_ref *)(ptr + cur);
 807                        name_len = btrfs_inode_ref_name_len(eb, iref);
 808                        name_off = (unsigned long)(iref + 1);
 809                        index = btrfs_inode_ref_index(eb, iref);
 810                        dir = found_key->offset;
 811                } else {
 812                        extref = (struct btrfs_inode_extref *)(ptr + cur);
 813                        name_len = btrfs_inode_extref_name_len(eb, extref);
 814                        name_off = (unsigned long)&extref->name;
 815                        index = btrfs_inode_extref_index(eb, extref);
 816                        dir = btrfs_inode_extref_parent(eb, extref);
 817                }
 818
 819                if (resolve) {
 820                        start = btrfs_ref_to_path(root, tmp_path, name_len,
 821                                                  name_off, eb, dir,
 822                                                  p->buf, p->buf_len);
 823                        if (IS_ERR(start)) {
 824                                ret = PTR_ERR(start);
 825                                goto out;
 826                        }
 827                        if (start < p->buf) {
 828                                /* overflow , try again with larger buffer */
 829                                ret = fs_path_ensure_buf(p,
 830                                                p->buf_len + p->buf - start);
 831                                if (ret < 0)
 832                                        goto out;
 833                                start = btrfs_ref_to_path(root, tmp_path,
 834                                                          name_len, name_off,
 835                                                          eb, dir,
 836                                                          p->buf, p->buf_len);
 837                                if (IS_ERR(start)) {
 838                                        ret = PTR_ERR(start);
 839                                        goto out;
 840                                }
 841                                BUG_ON(start < p->buf);
 842                        }
 843                        p->start = start;
 844                } else {
 845                        ret = fs_path_add_from_extent_buffer(p, eb, name_off,
 846                                                             name_len);
 847                        if (ret < 0)
 848                                goto out;
 849                }
 850
 851                cur += elem_size + name_len;
 852                ret = iterate(num, dir, index, p, ctx);
 853                if (ret)
 854                        goto out;
 855                num++;
 856        }
 857
 858out:
 859        btrfs_free_path(tmp_path);
 860        fs_path_free(p);
 861        return ret;
 862}
 863
 864typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
 865                                  const char *name, int name_len,
 866                                  const char *data, int data_len,
 867                                  u8 type, void *ctx);
 868
 869/*
 870 * Helper function to iterate the entries in ONE btrfs_dir_item.
 871 * The iterate callback may return a non zero value to stop iteration. This can
 872 * be a negative value for error codes or 1 to simply stop it.
 873 *
 874 * path must point to the dir item when called.
 875 */
 876static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
 877                            struct btrfs_key *found_key,
 878                            iterate_dir_item_t iterate, void *ctx)
 879{
 880        int ret = 0;
 881        struct extent_buffer *eb;
 882        struct btrfs_item *item;
 883        struct btrfs_dir_item *di;
 884        struct btrfs_key di_key;
 885        char *buf = NULL;
 886        char *buf2 = NULL;
 887        int buf_len;
 888        int buf_virtual = 0;
 889        u32 name_len;
 890        u32 data_len;
 891        u32 cur;
 892        u32 len;
 893        u32 total;
 894        int slot;
 895        int num;
 896        u8 type;
 897
 898        buf_len = PAGE_SIZE;
 899        buf = kmalloc(buf_len, GFP_NOFS);
 900        if (!buf) {
 901                ret = -ENOMEM;
 902                goto out;
 903        }
 904
 905        eb = path->nodes[0];
 906        slot = path->slots[0];
 907        item = btrfs_item_nr(eb, slot);
 908        di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
 909        cur = 0;
 910        len = 0;
 911        total = btrfs_item_size(eb, item);
 912
 913        num = 0;
 914        while (cur < total) {
 915                name_len = btrfs_dir_name_len(eb, di);
 916                data_len = btrfs_dir_data_len(eb, di);
 917                type = btrfs_dir_type(eb, di);
 918                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
 919
 920                if (name_len + data_len > buf_len) {
 921                        buf_len = PAGE_ALIGN(name_len + data_len);
 922                        if (buf_virtual) {
 923                                buf2 = vmalloc(buf_len);
 924                                if (!buf2) {
 925                                        ret = -ENOMEM;
 926                                        goto out;
 927                                }
 928                                vfree(buf);
 929                        } else {
 930                                buf2 = krealloc(buf, buf_len, GFP_NOFS);
 931                                if (!buf2) {
 932                                        buf2 = vmalloc(buf_len);
 933                                        if (!buf2) {
 934                                                ret = -ENOMEM;
 935                                                goto out;
 936                                        }
 937                                        kfree(buf);
 938                                        buf_virtual = 1;
 939                                }
 940                        }
 941
 942                        buf = buf2;
 943                        buf2 = NULL;
 944                }
 945
 946                read_extent_buffer(eb, buf, (unsigned long)(di + 1),
 947                                name_len + data_len);
 948
 949                len = sizeof(*di) + name_len + data_len;
 950                di = (struct btrfs_dir_item *)((char *)di + len);
 951                cur += len;
 952
 953                ret = iterate(num, &di_key, buf, name_len, buf + name_len,
 954                                data_len, type, ctx);
 955                if (ret < 0)
 956                        goto out;
 957                if (ret) {
 958                        ret = 0;
 959                        goto out;
 960                }
 961
 962                num++;
 963        }
 964
 965out:
 966        if (buf_virtual)
 967                vfree(buf);
 968        else
 969                kfree(buf);
 970        return ret;
 971}
 972
 973static int __copy_first_ref(int num, u64 dir, int index,
 974                            struct fs_path *p, void *ctx)
 975{
 976        int ret;
 977        struct fs_path *pt = ctx;
 978
 979        ret = fs_path_copy(pt, p);
 980        if (ret < 0)
 981                return ret;
 982
 983        /* we want the first only */
 984        return 1;
 985}
 986
 987/*
 988 * Retrieve the first path of an inode. If an inode has more then one
 989 * ref/hardlink, this is ignored.
 990 */
 991static int get_inode_path(struct btrfs_root *root,
 992                          u64 ino, struct fs_path *path)
 993{
 994        int ret;
 995        struct btrfs_key key, found_key;
 996        struct btrfs_path *p;
 997
 998        p = alloc_path_for_send();
 999        if (!p)
1000                return -ENOMEM;
1001
1002        fs_path_reset(path);
1003
1004        key.objectid = ino;
1005        key.type = BTRFS_INODE_REF_KEY;
1006        key.offset = 0;
1007
1008        ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1009        if (ret < 0)
1010                goto out;
1011        if (ret) {
1012                ret = 1;
1013                goto out;
1014        }
1015        btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1016        if (found_key.objectid != ino ||
1017            (found_key.type != BTRFS_INODE_REF_KEY &&
1018             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1019                ret = -ENOENT;
1020                goto out;
1021        }
1022
1023        ret = iterate_inode_ref(root, p, &found_key, 1,
1024                                __copy_first_ref, path);
1025        if (ret < 0)
1026                goto out;
1027        ret = 0;
1028
1029out:
1030        btrfs_free_path(p);
1031        return ret;
1032}
1033
1034struct backref_ctx {
1035        struct send_ctx *sctx;
1036
1037        /* number of total found references */
1038        u64 found;
1039
1040        /*
1041         * used for clones found in send_root. clones found behind cur_objectid
1042         * and cur_offset are not considered as allowed clones.
1043         */
1044        u64 cur_objectid;
1045        u64 cur_offset;
1046
1047        /* may be truncated in case it's the last extent in a file */
1048        u64 extent_len;
1049
1050        /* Just to check for bugs in backref resolving */
1051        int found_itself;
1052};
1053
1054static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1055{
1056        u64 root = (u64)(uintptr_t)key;
1057        struct clone_root *cr = (struct clone_root *)elt;
1058
1059        if (root < cr->root->objectid)
1060                return -1;
1061        if (root > cr->root->objectid)
1062                return 1;
1063        return 0;
1064}
1065
1066static int __clone_root_cmp_sort(const void *e1, const void *e2)
1067{
1068        struct clone_root *cr1 = (struct clone_root *)e1;
1069        struct clone_root *cr2 = (struct clone_root *)e2;
1070
1071        if (cr1->root->objectid < cr2->root->objectid)
1072                return -1;
1073        if (cr1->root->objectid > cr2->root->objectid)
1074                return 1;
1075        return 0;
1076}
1077
1078/*
1079 * Called for every backref that is found for the current extent.
1080 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1081 */
1082static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1083{
1084        struct backref_ctx *bctx = ctx_;
1085        struct clone_root *found;
1086        int ret;
1087        u64 i_size;
1088
1089        /* First check if the root is in the list of accepted clone sources */
1090        found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1091                        bctx->sctx->clone_roots_cnt,
1092                        sizeof(struct clone_root),
1093                        __clone_root_cmp_bsearch);
1094        if (!found)
1095                return 0;
1096
1097        if (found->root == bctx->sctx->send_root &&
1098            ino == bctx->cur_objectid &&
1099            offset == bctx->cur_offset) {
1100                bctx->found_itself = 1;
1101        }
1102
1103        /*
1104         * There are inodes that have extents that lie behind its i_size. Don't
1105         * accept clones from these extents.
1106         */
1107        ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
1108                        NULL);
1109        if (ret < 0)
1110                return ret;
1111
1112        if (offset + bctx->extent_len > i_size)
1113                return 0;
1114
1115        /*
1116         * Make sure we don't consider clones from send_root that are
1117         * behind the current inode/offset.
1118         */
1119        if (found->root == bctx->sctx->send_root) {
1120                /*
1121                 * TODO for the moment we don't accept clones from the inode
1122                 * that is currently send. We may change this when
1123                 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1124                 * file.
1125                 */
1126                if (ino >= bctx->cur_objectid)
1127                        return 0;
1128#if 0
1129                if (ino > bctx->cur_objectid)
1130                        return 0;
1131                if (offset + bctx->extent_len > bctx->cur_offset)
1132                        return 0;
1133#endif
1134        }
1135
1136        bctx->found++;
1137        found->found_refs++;
1138        if (ino < found->ino) {
1139                found->ino = ino;
1140                found->offset = offset;
1141        } else if (found->ino == ino) {
1142                /*
1143                 * same extent found more then once in the same file.
1144                 */
1145                if (found->offset > offset + bctx->extent_len)
1146                        found->offset = offset;
1147        }
1148
1149        return 0;
1150}
1151
1152/*
1153 * Given an inode, offset and extent item, it finds a good clone for a clone
1154 * instruction. Returns -ENOENT when none could be found. The function makes
1155 * sure that the returned clone is usable at the point where sending is at the
1156 * moment. This means, that no clones are accepted which lie behind the current
1157 * inode+offset.
1158 *
1159 * path must point to the extent item when called.
1160 */
1161static int find_extent_clone(struct send_ctx *sctx,
1162                             struct btrfs_path *path,
1163                             u64 ino, u64 data_offset,
1164                             u64 ino_size,
1165                             struct clone_root **found)
1166{
1167        int ret;
1168        int extent_type;
1169        u64 logical;
1170        u64 disk_byte;
1171        u64 num_bytes;
1172        u64 extent_item_pos;
1173        u64 flags = 0;
1174        struct btrfs_file_extent_item *fi;
1175        struct extent_buffer *eb = path->nodes[0];
1176        struct backref_ctx *backref_ctx = NULL;
1177        struct clone_root *cur_clone_root;
1178        struct btrfs_key found_key;
1179        struct btrfs_path *tmp_path;
1180        int compressed;
1181        u32 i;
1182
1183        tmp_path = alloc_path_for_send();
1184        if (!tmp_path)
1185                return -ENOMEM;
1186
1187        backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1188        if (!backref_ctx) {
1189                ret = -ENOMEM;
1190                goto out;
1191        }
1192
1193        if (data_offset >= ino_size) {
1194                /*
1195                 * There may be extents that lie behind the file's size.
1196                 * I at least had this in combination with snapshotting while
1197                 * writing large files.
1198                 */
1199                ret = 0;
1200                goto out;
1201        }
1202
1203        fi = btrfs_item_ptr(eb, path->slots[0],
1204                        struct btrfs_file_extent_item);
1205        extent_type = btrfs_file_extent_type(eb, fi);
1206        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1207                ret = -ENOENT;
1208                goto out;
1209        }
1210        compressed = btrfs_file_extent_compression(eb, fi);
1211
1212        num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1213        disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1214        if (disk_byte == 0) {
1215                ret = -ENOENT;
1216                goto out;
1217        }
1218        logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1219
1220        ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1221                                  &found_key, &flags);
1222        btrfs_release_path(tmp_path);
1223
1224        if (ret < 0)
1225                goto out;
1226        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1227                ret = -EIO;
1228                goto out;
1229        }
1230
1231        /*
1232         * Setup the clone roots.
1233         */
1234        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1235                cur_clone_root = sctx->clone_roots + i;
1236                cur_clone_root->ino = (u64)-1;
1237                cur_clone_root->offset = 0;
1238                cur_clone_root->found_refs = 0;
1239        }
1240
1241        backref_ctx->sctx = sctx;
1242        backref_ctx->found = 0;
1243        backref_ctx->cur_objectid = ino;
1244        backref_ctx->cur_offset = data_offset;
1245        backref_ctx->found_itself = 0;
1246        backref_ctx->extent_len = num_bytes;
1247
1248        /*
1249         * The last extent of a file may be too large due to page alignment.
1250         * We need to adjust extent_len in this case so that the checks in
1251         * __iterate_backrefs work.
1252         */
1253        if (data_offset + num_bytes >= ino_size)
1254                backref_ctx->extent_len = ino_size - data_offset;
1255
1256        /*
1257         * Now collect all backrefs.
1258         */
1259        if (compressed == BTRFS_COMPRESS_NONE)
1260                extent_item_pos = logical - found_key.objectid;
1261        else
1262                extent_item_pos = 0;
1263
1264        extent_item_pos = logical - found_key.objectid;
1265        ret = iterate_extent_inodes(sctx->send_root->fs_info,
1266                                        found_key.objectid, extent_item_pos, 1,
1267                                        __iterate_backrefs, backref_ctx);
1268
1269        if (ret < 0)
1270                goto out;
1271
1272        if (!backref_ctx->found_itself) {
1273                /* found a bug in backref code? */
1274                ret = -EIO;
1275                printk(KERN_ERR "btrfs: ERROR did not find backref in "
1276                                "send_root. inode=%llu, offset=%llu, "
1277                                "disk_byte=%llu found extent=%llu\n",
1278                                ino, data_offset, disk_byte, found_key.objectid);
1279                goto out;
1280        }
1281
1282verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1283                "ino=%llu, "
1284                "num_bytes=%llu, logical=%llu\n",
1285                data_offset, ino, num_bytes, logical);
1286
1287        if (!backref_ctx->found)
1288                verbose_printk("btrfs:    no clones found\n");
1289
1290        cur_clone_root = NULL;
1291        for (i = 0; i < sctx->clone_roots_cnt; i++) {
1292                if (sctx->clone_roots[i].found_refs) {
1293                        if (!cur_clone_root)
1294                                cur_clone_root = sctx->clone_roots + i;
1295                        else if (sctx->clone_roots[i].root == sctx->send_root)
1296                                /* prefer clones from send_root over others */
1297                                cur_clone_root = sctx->clone_roots + i;
1298                }
1299
1300        }
1301
1302        if (cur_clone_root) {
1303                *found = cur_clone_root;
1304                ret = 0;
1305        } else {
1306                ret = -ENOENT;
1307        }
1308
1309out:
1310        btrfs_free_path(tmp_path);
1311        kfree(backref_ctx);
1312        return ret;
1313}
1314
1315static int read_symlink(struct btrfs_root *root,
1316                        u64 ino,
1317                        struct fs_path *dest)
1318{
1319        int ret;
1320        struct btrfs_path *path;
1321        struct btrfs_key key;
1322        struct btrfs_file_extent_item *ei;
1323        u8 type;
1324        u8 compression;
1325        unsigned long off;
1326        int len;
1327
1328        path = alloc_path_for_send();
1329        if (!path)
1330                return -ENOMEM;
1331
1332        key.objectid = ino;
1333        key.type = BTRFS_EXTENT_DATA_KEY;
1334        key.offset = 0;
1335        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1336        if (ret < 0)
1337                goto out;
1338        BUG_ON(ret);
1339
1340        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1341                        struct btrfs_file_extent_item);
1342        type = btrfs_file_extent_type(path->nodes[0], ei);
1343        compression = btrfs_file_extent_compression(path->nodes[0], ei);
1344        BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1345        BUG_ON(compression);
1346
1347        off = btrfs_file_extent_inline_start(ei);
1348        len = btrfs_file_extent_inline_len(path->nodes[0], ei);
1349
1350        ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1351
1352out:
1353        btrfs_free_path(path);
1354        return ret;
1355}
1356
1357/*
1358 * Helper function to generate a file name that is unique in the root of
1359 * send_root and parent_root. This is used to generate names for orphan inodes.
1360 */
1361static int gen_unique_name(struct send_ctx *sctx,
1362                           u64 ino, u64 gen,
1363                           struct fs_path *dest)
1364{
1365        int ret = 0;
1366        struct btrfs_path *path;
1367        struct btrfs_dir_item *di;
1368        char tmp[64];
1369        int len;
1370        u64 idx = 0;
1371
1372        path = alloc_path_for_send();
1373        if (!path)
1374                return -ENOMEM;
1375
1376        while (1) {
1377                len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu",
1378                                ino, gen, idx);
1379                if (len >= sizeof(tmp)) {
1380                        /* should really not happen */
1381                        ret = -EOVERFLOW;
1382                        goto out;
1383                }
1384
1385                di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1386                                path, BTRFS_FIRST_FREE_OBJECTID,
1387                                tmp, strlen(tmp), 0);
1388                btrfs_release_path(path);
1389                if (IS_ERR(di)) {
1390                        ret = PTR_ERR(di);
1391                        goto out;
1392                }
1393                if (di) {
1394                        /* not unique, try again */
1395                        idx++;
1396                        continue;
1397                }
1398
1399                if (!sctx->parent_root) {
1400                        /* unique */
1401                        ret = 0;
1402                        break;
1403                }
1404
1405                di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1406                                path, BTRFS_FIRST_FREE_OBJECTID,
1407                                tmp, strlen(tmp), 0);
1408                btrfs_release_path(path);
1409                if (IS_ERR(di)) {
1410                        ret = PTR_ERR(di);
1411                        goto out;
1412                }
1413                if (di) {
1414                        /* not unique, try again */
1415                        idx++;
1416                        continue;
1417                }
1418                /* unique */
1419                break;
1420        }
1421
1422        ret = fs_path_add(dest, tmp, strlen(tmp));
1423
1424out:
1425        btrfs_free_path(path);
1426        return ret;
1427}
1428
1429enum inode_state {
1430        inode_state_no_change,
1431        inode_state_will_create,
1432        inode_state_did_create,
1433        inode_state_will_delete,
1434        inode_state_did_delete,
1435};
1436
1437static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1438{
1439        int ret;
1440        int left_ret;
1441        int right_ret;
1442        u64 left_gen;
1443        u64 right_gen;
1444
1445        ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1446                        NULL, NULL);
1447        if (ret < 0 && ret != -ENOENT)
1448                goto out;
1449        left_ret = ret;
1450
1451        if (!sctx->parent_root) {
1452                right_ret = -ENOENT;
1453        } else {
1454                ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1455                                NULL, NULL, NULL, NULL);
1456                if (ret < 0 && ret != -ENOENT)
1457                        goto out;
1458                right_ret = ret;
1459        }
1460
1461        if (!left_ret && !right_ret) {
1462                if (left_gen == gen && right_gen == gen) {
1463                        ret = inode_state_no_change;
1464                } else if (left_gen == gen) {
1465                        if (ino < sctx->send_progress)
1466                                ret = inode_state_did_create;
1467                        else
1468                                ret = inode_state_will_create;
1469                } else if (right_gen == gen) {
1470                        if (ino < sctx->send_progress)
1471                                ret = inode_state_did_delete;
1472                        else
1473                                ret = inode_state_will_delete;
1474                } else  {
1475                        ret = -ENOENT;
1476                }
1477        } else if (!left_ret) {
1478                if (left_gen == gen) {
1479                        if (ino < sctx->send_progress)
1480                                ret = inode_state_did_create;
1481                        else
1482                                ret = inode_state_will_create;
1483                } else {
1484                        ret = -ENOENT;
1485                }
1486        } else if (!right_ret) {
1487                if (right_gen == gen) {
1488                        if (ino < sctx->send_progress)
1489                                ret = inode_state_did_delete;
1490                        else
1491                                ret = inode_state_will_delete;
1492                } else {
1493                        ret = -ENOENT;
1494                }
1495        } else {
1496                ret = -ENOENT;
1497        }
1498
1499out:
1500        return ret;
1501}
1502
1503static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1504{
1505        int ret;
1506
1507        ret = get_cur_inode_state(sctx, ino, gen);
1508        if (ret < 0)
1509                goto out;
1510
1511        if (ret == inode_state_no_change ||
1512            ret == inode_state_did_create ||
1513            ret == inode_state_will_delete)
1514                ret = 1;
1515        else
1516                ret = 0;
1517
1518out:
1519        return ret;
1520}
1521
1522/*
1523 * Helper function to lookup a dir item in a dir.
1524 */
1525static int lookup_dir_item_inode(struct btrfs_root *root,
1526                                 u64 dir, const char *name, int name_len,
1527                                 u64 *found_inode,
1528                                 u8 *found_type)
1529{
1530        int ret = 0;
1531        struct btrfs_dir_item *di;
1532        struct btrfs_key key;
1533        struct btrfs_path *path;
1534
1535        path = alloc_path_for_send();
1536        if (!path)
1537                return -ENOMEM;
1538
1539        di = btrfs_lookup_dir_item(NULL, root, path,
1540                        dir, name, name_len, 0);
1541        if (!di) {
1542                ret = -ENOENT;
1543                goto out;
1544        }
1545        if (IS_ERR(di)) {
1546                ret = PTR_ERR(di);
1547                goto out;
1548        }
1549        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1550        *found_inode = key.objectid;
1551        *found_type = btrfs_dir_type(path->nodes[0], di);
1552
1553out:
1554        btrfs_free_path(path);
1555        return ret;
1556}
1557
1558/*
1559 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1560 * generation of the parent dir and the name of the dir entry.
1561 */
1562static int get_first_ref(struct btrfs_root *root, u64 ino,
1563                         u64 *dir, u64 *dir_gen, struct fs_path *name)
1564{
1565        int ret;
1566        struct btrfs_key key;
1567        struct btrfs_key found_key;
1568        struct btrfs_path *path;
1569        int len;
1570        u64 parent_dir;
1571
1572        path = alloc_path_for_send();
1573        if (!path)
1574                return -ENOMEM;
1575
1576        key.objectid = ino;
1577        key.type = BTRFS_INODE_REF_KEY;
1578        key.offset = 0;
1579
1580        ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1581        if (ret < 0)
1582                goto out;
1583        if (!ret)
1584                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1585                                path->slots[0]);
1586        if (ret || found_key.objectid != ino ||
1587            (found_key.type != BTRFS_INODE_REF_KEY &&
1588             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1589                ret = -ENOENT;
1590                goto out;
1591        }
1592
1593        if (key.type == BTRFS_INODE_REF_KEY) {
1594                struct btrfs_inode_ref *iref;
1595                iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1596                                      struct btrfs_inode_ref);
1597                len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1598                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1599                                                     (unsigned long)(iref + 1),
1600                                                     len);
1601                parent_dir = found_key.offset;
1602        } else {
1603                struct btrfs_inode_extref *extref;
1604                extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1605                                        struct btrfs_inode_extref);
1606                len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1607                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1608                                        (unsigned long)&extref->name, len);
1609                parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1610        }
1611        if (ret < 0)
1612                goto out;
1613        btrfs_release_path(path);
1614
1615        ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
1616                        NULL, NULL);
1617        if (ret < 0)
1618                goto out;
1619
1620        *dir = parent_dir;
1621
1622out:
1623        btrfs_free_path(path);
1624        return ret;
1625}
1626
1627static int is_first_ref(struct btrfs_root *root,
1628                        u64 ino, u64 dir,
1629                        const char *name, int name_len)
1630{
1631        int ret;
1632        struct fs_path *tmp_name;
1633        u64 tmp_dir;
1634        u64 tmp_dir_gen;
1635
1636        tmp_name = fs_path_alloc();
1637        if (!tmp_name)
1638                return -ENOMEM;
1639
1640        ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
1641        if (ret < 0)
1642                goto out;
1643
1644        if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1645                ret = 0;
1646                goto out;
1647        }
1648
1649        ret = !memcmp(tmp_name->start, name, name_len);
1650
1651out:
1652        fs_path_free(tmp_name);
1653        return ret;
1654}
1655
1656/*
1657 * Used by process_recorded_refs to determine if a new ref would overwrite an
1658 * already existing ref. In case it detects an overwrite, it returns the
1659 * inode/gen in who_ino/who_gen.
1660 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1661 * to make sure later references to the overwritten inode are possible.
1662 * Orphanizing is however only required for the first ref of an inode.
1663 * process_recorded_refs does an additional is_first_ref check to see if
1664 * orphanizing is really required.
1665 */
1666static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1667                              const char *name, int name_len,
1668                              u64 *who_ino, u64 *who_gen)
1669{
1670        int ret = 0;
1671        u64 other_inode = 0;
1672        u8 other_type = 0;
1673
1674        if (!sctx->parent_root)
1675                goto out;
1676
1677        ret = is_inode_existent(sctx, dir, dir_gen);
1678        if (ret <= 0)
1679                goto out;
1680
1681        ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1682                        &other_inode, &other_type);
1683        if (ret < 0 && ret != -ENOENT)
1684                goto out;
1685        if (ret) {
1686                ret = 0;
1687                goto out;
1688        }
1689
1690        /*
1691         * Check if the overwritten ref was already processed. If yes, the ref
1692         * was already unlinked/moved, so we can safely assume that we will not
1693         * overwrite anything at this point in time.
1694         */
1695        if (other_inode > sctx->send_progress) {
1696                ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1697                                who_gen, NULL, NULL, NULL, NULL);
1698                if (ret < 0)
1699                        goto out;
1700
1701                ret = 1;
1702                *who_ino = other_inode;
1703        } else {
1704                ret = 0;
1705        }
1706
1707out:
1708        return ret;
1709}
1710
1711/*
1712 * Checks if the ref was overwritten by an already processed inode. This is
1713 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1714 * thus the orphan name needs be used.
1715 * process_recorded_refs also uses it to avoid unlinking of refs that were
1716 * overwritten.
1717 */
1718static int did_overwrite_ref(struct send_ctx *sctx,
1719                            u64 dir, u64 dir_gen,
1720                            u64 ino, u64 ino_gen,
1721                            const char *name, int name_len)
1722{
1723        int ret = 0;
1724        u64 gen;
1725        u64 ow_inode;
1726        u8 other_type;
1727
1728        if (!sctx->parent_root)
1729                goto out;
1730
1731        ret = is_inode_existent(sctx, dir, dir_gen);
1732        if (ret <= 0)
1733                goto out;
1734
1735        /* check if the ref was overwritten by another ref */
1736        ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1737                        &ow_inode, &other_type);
1738        if (ret < 0 && ret != -ENOENT)
1739                goto out;
1740        if (ret) {
1741                /* was never and will never be overwritten */
1742                ret = 0;
1743                goto out;
1744        }
1745
1746        ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1747                        NULL, NULL);
1748        if (ret < 0)
1749                goto out;
1750
1751        if (ow_inode == ino && gen == ino_gen) {
1752                ret = 0;
1753                goto out;
1754        }
1755
1756        /* we know that it is or will be overwritten. check this now */
1757        if (ow_inode < sctx->send_progress)
1758                ret = 1;
1759        else
1760                ret = 0;
1761
1762out:
1763        return ret;
1764}
1765
1766/*
1767 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1768 * that got overwritten. This is used by process_recorded_refs to determine
1769 * if it has to use the path as returned by get_cur_path or the orphan name.
1770 */
1771static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1772{
1773        int ret = 0;
1774        struct fs_path *name = NULL;
1775        u64 dir;
1776        u64 dir_gen;
1777
1778        if (!sctx->parent_root)
1779                goto out;
1780
1781        name = fs_path_alloc();
1782        if (!name)
1783                return -ENOMEM;
1784
1785        ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
1786        if (ret < 0)
1787                goto out;
1788
1789        ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1790                        name->start, fs_path_len(name));
1791
1792out:
1793        fs_path_free(name);
1794        return ret;
1795}
1796
1797/*
1798 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
1799 * so we need to do some special handling in case we have clashes. This function
1800 * takes care of this with the help of name_cache_entry::radix_list.
1801 * In case of error, nce is kfreed.
1802 */
1803static int name_cache_insert(struct send_ctx *sctx,
1804                             struct name_cache_entry *nce)
1805{
1806        int ret = 0;
1807        struct list_head *nce_head;
1808
1809        nce_head = radix_tree_lookup(&sctx->name_cache,
1810                        (unsigned long)nce->ino);
1811        if (!nce_head) {
1812                nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1813                if (!nce_head) {
1814                        kfree(nce);
1815                        return -ENOMEM;
1816                }
1817                INIT_LIST_HEAD(nce_head);
1818
1819                ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
1820                if (ret < 0) {
1821                        kfree(nce_head);
1822                        kfree(nce);
1823                        return ret;
1824                }
1825        }
1826        list_add_tail(&nce->radix_list, nce_head);
1827        list_add_tail(&nce->list, &sctx->name_cache_list);
1828        sctx->name_cache_size++;
1829
1830        return ret;
1831}
1832
1833static void name_cache_delete(struct send_ctx *sctx,
1834                              struct name_cache_entry *nce)
1835{
1836        struct list_head *nce_head;
1837
1838        nce_head = radix_tree_lookup(&sctx->name_cache,
1839                        (unsigned long)nce->ino);
1840        BUG_ON(!nce_head);
1841
1842        list_del(&nce->radix_list);
1843        list_del(&nce->list);
1844        sctx->name_cache_size--;
1845
1846        if (list_empty(nce_head)) {
1847                radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
1848                kfree(nce_head);
1849        }
1850}
1851
1852static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
1853                                                    u64 ino, u64 gen)
1854{
1855        struct list_head *nce_head;
1856        struct name_cache_entry *cur;
1857
1858        nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
1859        if (!nce_head)
1860                return NULL;
1861
1862        list_for_each_entry(cur, nce_head, radix_list) {
1863                if (cur->ino == ino && cur->gen == gen)
1864                        return cur;
1865        }
1866        return NULL;
1867}
1868
1869/*
1870 * Removes the entry from the list and adds it back to the end. This marks the
1871 * entry as recently used so that name_cache_clean_unused does not remove it.
1872 */
1873static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
1874{
1875        list_del(&nce->list);
1876        list_add_tail(&nce->list, &sctx->name_cache_list);
1877}
1878
1879/*
1880 * Remove some entries from the beginning of name_cache_list.
1881 */
1882static void name_cache_clean_unused(struct send_ctx *sctx)
1883{
1884        struct name_cache_entry *nce;
1885
1886        if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
1887                return;
1888
1889        while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
1890                nce = list_entry(sctx->name_cache_list.next,
1891                                struct name_cache_entry, list);
1892                name_cache_delete(sctx, nce);
1893                kfree(nce);
1894        }
1895}
1896
1897static void name_cache_free(struct send_ctx *sctx)
1898{
1899        struct name_cache_entry *nce;
1900
1901        while (!list_empty(&sctx->name_cache_list)) {
1902                nce = list_entry(sctx->name_cache_list.next,
1903                                struct name_cache_entry, list);
1904                name_cache_delete(sctx, nce);
1905                kfree(nce);
1906        }
1907}
1908
1909/*
1910 * Used by get_cur_path for each ref up to the root.
1911 * Returns 0 if it succeeded.
1912 * Returns 1 if the inode is not existent or got overwritten. In that case, the
1913 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
1914 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
1915 * Returns <0 in case of error.
1916 */
1917static int __get_cur_name_and_parent(struct send_ctx *sctx,
1918                                     u64 ino, u64 gen,
1919                                     u64 *parent_ino,
1920                                     u64 *parent_gen,
1921                                     struct fs_path *dest)
1922{
1923        int ret;
1924        int nce_ret;
1925        struct btrfs_path *path = NULL;
1926        struct name_cache_entry *nce = NULL;
1927
1928        /*
1929         * First check if we already did a call to this function with the same
1930         * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
1931         * return the cached result.
1932         */
1933        nce = name_cache_search(sctx, ino, gen);
1934        if (nce) {
1935                if (ino < sctx->send_progress && nce->need_later_update) {
1936                        name_cache_delete(sctx, nce);
1937                        kfree(nce);
1938                        nce = NULL;
1939                } else {
1940                        name_cache_used(sctx, nce);
1941                        *parent_ino = nce->parent_ino;
1942                        *parent_gen = nce->parent_gen;
1943                        ret = fs_path_add(dest, nce->name, nce->name_len);
1944                        if (ret < 0)
1945                                goto out;
1946                        ret = nce->ret;
1947                        goto out;
1948                }
1949        }
1950
1951        path = alloc_path_for_send();
1952        if (!path)
1953                return -ENOMEM;
1954
1955        /*
1956         * If the inode is not existent yet, add the orphan name and return 1.
1957         * This should only happen for the parent dir that we determine in
1958         * __record_new_ref
1959         */
1960        ret = is_inode_existent(sctx, ino, gen);
1961        if (ret < 0)
1962                goto out;
1963
1964        if (!ret) {
1965                ret = gen_unique_name(sctx, ino, gen, dest);
1966                if (ret < 0)
1967                        goto out;
1968                ret = 1;
1969                goto out_cache;
1970        }
1971
1972        /*
1973         * Depending on whether the inode was already processed or not, use
1974         * send_root or parent_root for ref lookup.
1975         */
1976        if (ino < sctx->send_progress)
1977                ret = get_first_ref(sctx->send_root, ino,
1978                                    parent_ino, parent_gen, dest);
1979        else
1980                ret = get_first_ref(sctx->parent_root, ino,
1981                                    parent_ino, parent_gen, dest);
1982        if (ret < 0)
1983                goto out;
1984
1985        /*
1986         * Check if the ref was overwritten by an inode's ref that was processed
1987         * earlier. If yes, treat as orphan and return 1.
1988         */
1989        ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
1990                        dest->start, dest->end - dest->start);
1991        if (ret < 0)
1992                goto out;
1993        if (ret) {
1994                fs_path_reset(dest);
1995                ret = gen_unique_name(sctx, ino, gen, dest);
1996                if (ret < 0)
1997                        goto out;
1998                ret = 1;
1999        }
2000
2001out_cache:
2002        /*
2003         * Store the result of the lookup in the name cache.
2004         */
2005        nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
2006        if (!nce) {
2007                ret = -ENOMEM;
2008                goto out;
2009        }
2010
2011        nce->ino = ino;
2012        nce->gen = gen;
2013        nce->parent_ino = *parent_ino;
2014        nce->parent_gen = *parent_gen;
2015        nce->name_len = fs_path_len(dest);
2016        nce->ret = ret;
2017        strcpy(nce->name, dest->start);
2018
2019        if (ino < sctx->send_progress)
2020                nce->need_later_update = 0;
2021        else
2022                nce->need_later_update = 1;
2023
2024        nce_ret = name_cache_insert(sctx, nce);
2025        if (nce_ret < 0)
2026                ret = nce_ret;
2027        name_cache_clean_unused(sctx);
2028
2029out:
2030        btrfs_free_path(path);
2031        return ret;
2032}
2033
2034/*
2035 * Magic happens here. This function returns the first ref to an inode as it
2036 * would look like while receiving the stream at this point in time.
2037 * We walk the path up to the root. For every inode in between, we check if it
2038 * was already processed/sent. If yes, we continue with the parent as found
2039 * in send_root. If not, we continue with the parent as found in parent_root.
2040 * If we encounter an inode that was deleted at this point in time, we use the
2041 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2042 * that were not created yet and overwritten inodes/refs.
2043 *
2044 * When do we have have orphan inodes:
2045 * 1. When an inode is freshly created and thus no valid refs are available yet
2046 * 2. When a directory lost all it's refs (deleted) but still has dir items
2047 *    inside which were not processed yet (pending for move/delete). If anyone
2048 *    tried to get the path to the dir items, it would get a path inside that
2049 *    orphan directory.
2050 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2051 *    of an unprocessed inode. If in that case the first ref would be
2052 *    overwritten, the overwritten inode gets "orphanized". Later when we
2053 *    process this overwritten inode, it is restored at a new place by moving
2054 *    the orphan inode.
2055 *
2056 * sctx->send_progress tells this function at which point in time receiving
2057 * would be.
2058 */
2059static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2060                        struct fs_path *dest)
2061{
2062        int ret = 0;
2063        struct fs_path *name = NULL;
2064        u64 parent_inode = 0;
2065        u64 parent_gen = 0;
2066        int stop = 0;
2067
2068        name = fs_path_alloc();
2069        if (!name) {
2070                ret = -ENOMEM;
2071                goto out;
2072        }
2073
2074        dest->reversed = 1;
2075        fs_path_reset(dest);
2076
2077        while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2078                fs_path_reset(name);
2079
2080                ret = __get_cur_name_and_parent(sctx, ino, gen,
2081                                &parent_inode, &parent_gen, name);
2082                if (ret < 0)
2083                        goto out;
2084                if (ret)
2085                        stop = 1;
2086
2087                ret = fs_path_add_path(dest, name);
2088                if (ret < 0)
2089                        goto out;
2090
2091                ino = parent_inode;
2092                gen = parent_gen;
2093        }
2094
2095out:
2096        fs_path_free(name);
2097        if (!ret)
2098                fs_path_unreverse(dest);
2099        return ret;
2100}
2101
2102/*
2103 * Called for regular files when sending extents data. Opens a struct file
2104 * to read from the file.
2105 */
2106static int open_cur_inode_file(struct send_ctx *sctx)
2107{
2108        int ret = 0;
2109        struct btrfs_key key;
2110        struct path path;
2111        struct inode *inode;
2112        struct dentry *dentry;
2113        struct file *filp;
2114        int new = 0;
2115
2116        if (sctx->cur_inode_filp)
2117                goto out;
2118
2119        key.objectid = sctx->cur_ino;
2120        key.type = BTRFS_INODE_ITEM_KEY;
2121        key.offset = 0;
2122
2123        inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
2124                        &new);
2125        if (IS_ERR(inode)) {
2126                ret = PTR_ERR(inode);
2127                goto out;
2128        }
2129
2130        dentry = d_obtain_alias(inode);
2131        inode = NULL;
2132        if (IS_ERR(dentry)) {
2133                ret = PTR_ERR(dentry);
2134                goto out;
2135        }
2136
2137        path.mnt = sctx->mnt;
2138        path.dentry = dentry;
2139        filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
2140        dput(dentry);
2141        dentry = NULL;
2142        if (IS_ERR(filp)) {
2143                ret = PTR_ERR(filp);
2144                goto out;
2145        }
2146        sctx->cur_inode_filp = filp;
2147
2148out:
2149        /*
2150         * no xxxput required here as every vfs op
2151         * does it by itself on failure
2152         */
2153        return ret;
2154}
2155
2156/*
2157 * Closes the struct file that was created in open_cur_inode_file
2158 */
2159static int close_cur_inode_file(struct send_ctx *sctx)
2160{
2161        int ret = 0;
2162
2163        if (!sctx->cur_inode_filp)
2164                goto out;
2165
2166        ret = filp_close(sctx->cur_inode_filp, NULL);
2167        sctx->cur_inode_filp = NULL;
2168
2169out:
2170        return ret;
2171}
2172
2173/*
2174 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2175 */
2176static int send_subvol_begin(struct send_ctx *sctx)
2177{
2178        int ret;
2179        struct btrfs_root *send_root = sctx->send_root;
2180        struct btrfs_root *parent_root = sctx->parent_root;
2181        struct btrfs_path *path;
2182        struct btrfs_key key;
2183        struct btrfs_root_ref *ref;
2184        struct extent_buffer *leaf;
2185        char *name = NULL;
2186        int namelen;
2187
2188        path = alloc_path_for_send();
2189        if (!path)
2190                return -ENOMEM;
2191
2192        name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2193        if (!name) {
2194                btrfs_free_path(path);
2195                return -ENOMEM;
2196        }
2197
2198        key.objectid = send_root->objectid;
2199        key.type = BTRFS_ROOT_BACKREF_KEY;
2200        key.offset = 0;
2201
2202        ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2203                                &key, path, 1, 0);
2204        if (ret < 0)
2205                goto out;
2206        if (ret) {
2207                ret = -ENOENT;
2208                goto out;
2209        }
2210
2211        leaf = path->nodes[0];
2212        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2213        if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2214            key.objectid != send_root->objectid) {
2215                ret = -ENOENT;
2216                goto out;
2217        }
2218        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2219        namelen = btrfs_root_ref_name_len(leaf, ref);
2220        read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2221        btrfs_release_path(path);
2222
2223        if (parent_root) {
2224                ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2225                if (ret < 0)
2226                        goto out;
2227        } else {
2228                ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2229                if (ret < 0)
2230                        goto out;
2231        }
2232
2233        TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2234        TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2235                        sctx->send_root->root_item.uuid);
2236        TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2237                        sctx->send_root->root_item.ctransid);
2238        if (parent_root) {
2239                TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2240                                sctx->parent_root->root_item.uuid);
2241                TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2242                                sctx->parent_root->root_item.ctransid);
2243        }
2244
2245        ret = send_cmd(sctx);
2246
2247tlv_put_failure:
2248out:
2249        btrfs_free_path(path);
2250        kfree(name);
2251        return ret;
2252}
2253
2254static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2255{
2256        int ret = 0;
2257        struct fs_path *p;
2258
2259verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2260
2261        p = fs_path_alloc();
2262        if (!p)
2263                return -ENOMEM;
2264
2265        ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2266        if (ret < 0)
2267                goto out;
2268
2269        ret = get_cur_path(sctx, ino, gen, p);
2270        if (ret < 0)
2271                goto out;
2272        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2273        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2274
2275        ret = send_cmd(sctx);
2276
2277tlv_put_failure:
2278out:
2279        fs_path_free(p);
2280        return ret;
2281}
2282
2283static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2284{
2285        int ret = 0;
2286        struct fs_path *p;
2287
2288verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2289
2290        p = fs_path_alloc();
2291        if (!p)
2292                return -ENOMEM;
2293
2294        ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2295        if (ret < 0)
2296                goto out;
2297
2298        ret = get_cur_path(sctx, ino, gen, p);
2299        if (ret < 0)
2300                goto out;
2301        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2302        TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2303
2304        ret = send_cmd(sctx);
2305
2306tlv_put_failure:
2307out:
2308        fs_path_free(p);
2309        return ret;
2310}
2311
2312static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2313{
2314        int ret = 0;
2315        struct fs_path *p;
2316
2317verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2318
2319        p = fs_path_alloc();
2320        if (!p)
2321                return -ENOMEM;
2322
2323        ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2324        if (ret < 0)
2325                goto out;
2326
2327        ret = get_cur_path(sctx, ino, gen, p);
2328        if (ret < 0)
2329                goto out;
2330        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2331        TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2332        TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2333
2334        ret = send_cmd(sctx);
2335
2336tlv_put_failure:
2337out:
2338        fs_path_free(p);
2339        return ret;
2340}
2341
2342static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2343{
2344        int ret = 0;
2345        struct fs_path *p = NULL;
2346        struct btrfs_inode_item *ii;
2347        struct btrfs_path *path = NULL;
2348        struct extent_buffer *eb;
2349        struct btrfs_key key;
2350        int slot;
2351
2352verbose_printk("btrfs: send_utimes %llu\n", ino);
2353
2354        p = fs_path_alloc();
2355        if (!p)
2356                return -ENOMEM;
2357
2358        path = alloc_path_for_send();
2359        if (!path) {
2360                ret = -ENOMEM;
2361                goto out;
2362        }
2363
2364        key.objectid = ino;
2365        key.type = BTRFS_INODE_ITEM_KEY;
2366        key.offset = 0;
2367        ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2368        if (ret < 0)
2369                goto out;
2370
2371        eb = path->nodes[0];
2372        slot = path->slots[0];
2373        ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2374
2375        ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2376        if (ret < 0)
2377                goto out;
2378
2379        ret = get_cur_path(sctx, ino, gen, p);
2380        if (ret < 0)
2381                goto out;
2382        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2383        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
2384                        btrfs_inode_atime(ii));
2385        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
2386                        btrfs_inode_mtime(ii));
2387        TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2388                        btrfs_inode_ctime(ii));
2389        /* TODO Add otime support when the otime patches get into upstream */
2390
2391        ret = send_cmd(sctx);
2392
2393tlv_put_failure:
2394out:
2395        fs_path_free(p);
2396        btrfs_free_path(path);
2397        return ret;
2398}
2399
2400/*
2401 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2402 * a valid path yet because we did not process the refs yet. So, the inode
2403 * is created as orphan.
2404 */
2405static int send_create_inode(struct send_ctx *sctx, u64 ino)
2406{
2407        int ret = 0;
2408        struct fs_path *p;
2409        int cmd;
2410        u64 gen;
2411        u64 mode;
2412        u64 rdev;
2413
2414verbose_printk("btrfs: send_create_inode %llu\n", ino);
2415
2416        p = fs_path_alloc();
2417        if (!p)
2418                return -ENOMEM;
2419
2420        ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL,
2421                        NULL, &rdev);
2422        if (ret < 0)
2423                goto out;
2424
2425        if (S_ISREG(mode)) {
2426                cmd = BTRFS_SEND_C_MKFILE;
2427        } else if (S_ISDIR(mode)) {
2428                cmd = BTRFS_SEND_C_MKDIR;
2429        } else if (S_ISLNK(mode)) {
2430                cmd = BTRFS_SEND_C_SYMLINK;
2431        } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2432                cmd = BTRFS_SEND_C_MKNOD;
2433        } else if (S_ISFIFO(mode)) {
2434                cmd = BTRFS_SEND_C_MKFIFO;
2435        } else if (S_ISSOCK(mode)) {
2436                cmd = BTRFS_SEND_C_MKSOCK;
2437        } else {
2438                printk(KERN_WARNING "btrfs: unexpected inode type %o",
2439                                (int)(mode & S_IFMT));
2440                ret = -ENOTSUPP;
2441                goto out;
2442        }
2443
2444        ret = begin_cmd(sctx, cmd);
2445        if (ret < 0)
2446                goto out;
2447
2448        ret = gen_unique_name(sctx, ino, gen, p);
2449        if (ret < 0)
2450                goto out;
2451
2452        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2453        TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2454
2455        if (S_ISLNK(mode)) {
2456                fs_path_reset(p);
2457                ret = read_symlink(sctx->send_root, ino, p);
2458                if (ret < 0)
2459                        goto out;
2460                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2461        } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2462                   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2463                TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2464                TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2465        }
2466
2467        ret = send_cmd(sctx);
2468        if (ret < 0)
2469                goto out;
2470
2471
2472tlv_put_failure:
2473out:
2474        fs_path_free(p);
2475        return ret;
2476}
2477
2478/*
2479 * We need some special handling for inodes that get processed before the parent
2480 * directory got created. See process_recorded_refs for details.
2481 * This function does the check if we already created the dir out of order.
2482 */
2483static int did_create_dir(struct send_ctx *sctx, u64 dir)
2484{
2485        int ret = 0;
2486        struct btrfs_path *path = NULL;
2487        struct btrfs_key key;
2488        struct btrfs_key found_key;
2489        struct btrfs_key di_key;
2490        struct extent_buffer *eb;
2491        struct btrfs_dir_item *di;
2492        int slot;
2493
2494        path = alloc_path_for_send();
2495        if (!path) {
2496                ret = -ENOMEM;
2497                goto out;
2498        }
2499
2500        key.objectid = dir;
2501        key.type = BTRFS_DIR_INDEX_KEY;
2502        key.offset = 0;
2503        while (1) {
2504                ret = btrfs_search_slot_for_read(sctx->send_root, &key, path,
2505                                1, 0);
2506                if (ret < 0)
2507                        goto out;
2508                if (!ret) {
2509                        eb = path->nodes[0];
2510                        slot = path->slots[0];
2511                        btrfs_item_key_to_cpu(eb, &found_key, slot);
2512                }
2513                if (ret || found_key.objectid != key.objectid ||
2514                    found_key.type != key.type) {
2515                        ret = 0;
2516                        goto out;
2517                }
2518
2519                di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2520                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2521
2522                if (di_key.objectid < sctx->send_progress) {
2523                        ret = 1;
2524                        goto out;
2525                }
2526
2527                key.offset = found_key.offset + 1;
2528                btrfs_release_path(path);
2529        }
2530
2531out:
2532        btrfs_free_path(path);
2533        return ret;
2534}
2535
2536/*
2537 * Only creates the inode if it is:
2538 * 1. Not a directory
2539 * 2. Or a directory which was not created already due to out of order
2540 *    directories. See did_create_dir and process_recorded_refs for details.
2541 */
2542static int send_create_inode_if_needed(struct send_ctx *sctx)
2543{
2544        int ret;
2545
2546        if (S_ISDIR(sctx->cur_inode_mode)) {
2547                ret = did_create_dir(sctx, sctx->cur_ino);
2548                if (ret < 0)
2549                        goto out;
2550                if (ret) {
2551                        ret = 0;
2552                        goto out;
2553                }
2554        }
2555
2556        ret = send_create_inode(sctx, sctx->cur_ino);
2557        if (ret < 0)
2558                goto out;
2559
2560out:
2561        return ret;
2562}
2563
2564struct recorded_ref {
2565        struct list_head list;
2566        char *dir_path;
2567        char *name;
2568        struct fs_path *full_path;
2569        u64 dir;
2570        u64 dir_gen;
2571        int dir_path_len;
2572        int name_len;
2573};
2574
2575/*
2576 * We need to process new refs before deleted refs, but compare_tree gives us
2577 * everything mixed. So we first record all refs and later process them.
2578 * This function is a helper to record one ref.
2579 */
2580static int record_ref(struct list_head *head, u64 dir,
2581                      u64 dir_gen, struct fs_path *path)
2582{
2583        struct recorded_ref *ref;
2584        char *tmp;
2585
2586        ref = kmalloc(sizeof(*ref), GFP_NOFS);
2587        if (!ref)
2588                return -ENOMEM;
2589
2590        ref->dir = dir;
2591        ref->dir_gen = dir_gen;
2592        ref->full_path = path;
2593
2594        tmp = strrchr(ref->full_path->start, '/');
2595        if (!tmp) {
2596                ref->name_len = ref->full_path->end - ref->full_path->start;
2597                ref->name = ref->full_path->start;
2598                ref->dir_path_len = 0;
2599                ref->dir_path = ref->full_path->start;
2600        } else {
2601                tmp++;
2602                ref->name_len = ref->full_path->end - tmp;
2603                ref->name = tmp;
2604                ref->dir_path = ref->full_path->start;
2605                ref->dir_path_len = ref->full_path->end -
2606                                ref->full_path->start - 1 - ref->name_len;
2607        }
2608
2609        list_add_tail(&ref->list, head);
2610        return 0;
2611}
2612
2613static void __free_recorded_refs(struct list_head *head)
2614{
2615        struct recorded_ref *cur;
2616
2617        while (!list_empty(head)) {
2618                cur = list_entry(head->next, struct recorded_ref, list);
2619                fs_path_free(cur->full_path);
2620                list_del(&cur->list);
2621                kfree(cur);
2622        }
2623}
2624
2625static void free_recorded_refs(struct send_ctx *sctx)
2626{
2627        __free_recorded_refs(&sctx->new_refs);
2628        __free_recorded_refs(&sctx->deleted_refs);
2629}
2630
2631/*
2632 * Renames/moves a file/dir to its orphan name. Used when the first
2633 * ref of an unprocessed inode gets overwritten and for all non empty
2634 * directories.
2635 */
2636static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2637                          struct fs_path *path)
2638{
2639        int ret;
2640        struct fs_path *orphan;
2641
2642        orphan = fs_path_alloc();
2643        if (!orphan)
2644                return -ENOMEM;
2645
2646        ret = gen_unique_name(sctx, ino, gen, orphan);
2647        if (ret < 0)
2648                goto out;
2649
2650        ret = send_rename(sctx, path, orphan);
2651
2652out:
2653        fs_path_free(orphan);
2654        return ret;
2655}
2656
2657/*
2658 * Returns 1 if a directory can be removed at this point in time.
2659 * We check this by iterating all dir items and checking if the inode behind
2660 * the dir item was already processed.
2661 */
2662static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress)
2663{
2664        int ret = 0;
2665        struct btrfs_root *root = sctx->parent_root;
2666        struct btrfs_path *path;
2667        struct btrfs_key key;
2668        struct btrfs_key found_key;
2669        struct btrfs_key loc;
2670        struct btrfs_dir_item *di;
2671
2672        /*
2673         * Don't try to rmdir the top/root subvolume dir.
2674         */
2675        if (dir == BTRFS_FIRST_FREE_OBJECTID)
2676                return 0;
2677
2678        path = alloc_path_for_send();
2679        if (!path)
2680                return -ENOMEM;
2681
2682        key.objectid = dir;
2683        key.type = BTRFS_DIR_INDEX_KEY;
2684        key.offset = 0;
2685
2686        while (1) {
2687                ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
2688                if (ret < 0)
2689                        goto out;
2690                if (!ret) {
2691                        btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2692                                        path->slots[0]);
2693                }
2694                if (ret || found_key.objectid != key.objectid ||
2695                    found_key.type != key.type) {
2696                        break;
2697                }
2698
2699                di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2700                                struct btrfs_dir_item);
2701                btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2702
2703                if (loc.objectid > send_progress) {
2704                        ret = 0;
2705                        goto out;
2706                }
2707
2708                btrfs_release_path(path);
2709                key.offset = found_key.offset + 1;
2710        }
2711
2712        ret = 1;
2713
2714out:
2715        btrfs_free_path(path);
2716        return ret;
2717}
2718
2719/*
2720 * This does all the move/link/unlink/rmdir magic.
2721 */
2722static int process_recorded_refs(struct send_ctx *sctx)
2723{
2724        int ret = 0;
2725        struct recorded_ref *cur;
2726        struct recorded_ref *cur2;
2727        struct ulist *check_dirs = NULL;
2728        struct ulist_iterator uit;
2729        struct ulist_node *un;
2730        struct fs_path *valid_path = NULL;
2731        u64 ow_inode = 0;
2732        u64 ow_gen;
2733        int did_overwrite = 0;
2734        int is_orphan = 0;
2735
2736verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2737
2738        /*
2739         * This should never happen as the root dir always has the same ref
2740         * which is always '..'
2741         */
2742        BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
2743
2744        valid_path = fs_path_alloc();
2745        if (!valid_path) {
2746                ret = -ENOMEM;
2747                goto out;
2748        }
2749
2750        check_dirs = ulist_alloc(GFP_NOFS);
2751        if (!check_dirs) {
2752                ret = -ENOMEM;
2753                goto out;
2754        }
2755
2756        /*
2757         * First, check if the first ref of the current inode was overwritten
2758         * before. If yes, we know that the current inode was already orphanized
2759         * and thus use the orphan name. If not, we can use get_cur_path to
2760         * get the path of the first ref as it would like while receiving at
2761         * this point in time.
2762         * New inodes are always orphan at the beginning, so force to use the
2763         * orphan name in this case.
2764         * The first ref is stored in valid_path and will be updated if it
2765         * gets moved around.
2766         */
2767        if (!sctx->cur_inode_new) {
2768                ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
2769                                sctx->cur_inode_gen);
2770                if (ret < 0)
2771                        goto out;
2772                if (ret)
2773                        did_overwrite = 1;
2774        }
2775        if (sctx->cur_inode_new || did_overwrite) {
2776                ret = gen_unique_name(sctx, sctx->cur_ino,
2777                                sctx->cur_inode_gen, valid_path);
2778                if (ret < 0)
2779                        goto out;
2780                is_orphan = 1;
2781        } else {
2782                ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
2783                                valid_path);
2784                if (ret < 0)
2785                        goto out;
2786        }
2787
2788        list_for_each_entry(cur, &sctx->new_refs, list) {
2789                /*
2790                 * We may have refs where the parent directory does not exist
2791                 * yet. This happens if the parent directories inum is higher
2792                 * the the current inum. To handle this case, we create the
2793                 * parent directory out of order. But we need to check if this
2794                 * did already happen before due to other refs in the same dir.
2795                 */
2796                ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
2797                if (ret < 0)
2798                        goto out;
2799                if (ret == inode_state_will_create) {
2800                        ret = 0;
2801                        /*
2802                         * First check if any of the current inodes refs did
2803                         * already create the dir.
2804                         */
2805                        list_for_each_entry(cur2, &sctx->new_refs, list) {
2806                                if (cur == cur2)
2807                                        break;
2808                                if (cur2->dir == cur->dir) {
2809                                        ret = 1;
2810                                        break;
2811                                }
2812                        }
2813
2814                        /*
2815                         * If that did not happen, check if a previous inode
2816                         * did already create the dir.
2817                         */
2818                        if (!ret)
2819                                ret = did_create_dir(sctx, cur->dir);
2820                        if (ret < 0)
2821                                goto out;
2822                        if (!ret) {
2823                                ret = send_create_inode(sctx, cur->dir);
2824                                if (ret < 0)
2825                                        goto out;
2826                        }
2827                }
2828
2829                /*
2830                 * Check if this new ref would overwrite the first ref of
2831                 * another unprocessed inode. If yes, orphanize the
2832                 * overwritten inode. If we find an overwritten ref that is
2833                 * not the first ref, simply unlink it.
2834                 */
2835                ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2836                                cur->name, cur->name_len,
2837                                &ow_inode, &ow_gen);
2838                if (ret < 0)
2839                        goto out;
2840                if (ret) {
2841                        ret = is_first_ref(sctx->parent_root,
2842                                           ow_inode, cur->dir, cur->name,
2843                                           cur->name_len);
2844                        if (ret < 0)
2845                                goto out;
2846                        if (ret) {
2847                                ret = orphanize_inode(sctx, ow_inode, ow_gen,
2848                                                cur->full_path);
2849                                if (ret < 0)
2850                                        goto out;
2851                        } else {
2852                                ret = send_unlink(sctx, cur->full_path);
2853                                if (ret < 0)
2854                                        goto out;
2855                        }
2856                }
2857
2858                /*
2859                 * link/move the ref to the new place. If we have an orphan
2860                 * inode, move it and update valid_path. If not, link or move
2861                 * it depending on the inode mode.
2862                 */
2863                if (is_orphan) {
2864                        ret = send_rename(sctx, valid_path, cur->full_path);
2865                        if (ret < 0)
2866                                goto out;
2867                        is_orphan = 0;
2868                        ret = fs_path_copy(valid_path, cur->full_path);
2869                        if (ret < 0)
2870                                goto out;
2871                } else {
2872                        if (S_ISDIR(sctx->cur_inode_mode)) {
2873                                /*
2874                                 * Dirs can't be linked, so move it. For moved
2875                                 * dirs, we always have one new and one deleted
2876                                 * ref. The deleted ref is ignored later.
2877                                 */
2878                                ret = send_rename(sctx, valid_path,
2879                                                cur->full_path);
2880                                if (ret < 0)
2881                                        goto out;
2882                                ret = fs_path_copy(valid_path, cur->full_path);
2883                                if (ret < 0)
2884                                        goto out;
2885                        } else {
2886                                ret = send_link(sctx, cur->full_path,
2887                                                valid_path);
2888                                if (ret < 0)
2889                                        goto out;
2890                        }
2891                }
2892                ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2893                                GFP_NOFS);
2894                if (ret < 0)
2895                        goto out;
2896        }
2897
2898        if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
2899                /*
2900                 * Check if we can already rmdir the directory. If not,
2901                 * orphanize it. For every dir item inside that gets deleted
2902                 * later, we do this check again and rmdir it then if possible.
2903                 * See the use of check_dirs for more details.
2904                 */
2905                ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino);
2906                if (ret < 0)
2907                        goto out;
2908                if (ret) {
2909                        ret = send_rmdir(sctx, valid_path);
2910                        if (ret < 0)
2911                                goto out;
2912                } else if (!is_orphan) {
2913                        ret = orphanize_inode(sctx, sctx->cur_ino,
2914                                        sctx->cur_inode_gen, valid_path);
2915                        if (ret < 0)
2916                                goto out;
2917                        is_orphan = 1;
2918                }
2919
2920                list_for_each_entry(cur, &sctx->deleted_refs, list) {
2921                        ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2922                                        GFP_NOFS);
2923                        if (ret < 0)
2924                                goto out;
2925                }
2926        } else if (S_ISDIR(sctx->cur_inode_mode) &&
2927                   !list_empty(&sctx->deleted_refs)) {
2928                /*
2929                 * We have a moved dir. Add the old parent to check_dirs
2930                 */
2931                cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
2932                                list);
2933                ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2934                                GFP_NOFS);
2935                if (ret < 0)
2936                        goto out;
2937        } else if (!S_ISDIR(sctx->cur_inode_mode)) {
2938                /*
2939                 * We have a non dir inode. Go through all deleted refs and
2940                 * unlink them if they were not already overwritten by other
2941                 * inodes.
2942                 */
2943                list_for_each_entry(cur, &sctx->deleted_refs, list) {
2944                        ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2945                                        sctx->cur_ino, sctx->cur_inode_gen,
2946                                        cur->name, cur->name_len);
2947                        if (ret < 0)
2948                                goto out;
2949                        if (!ret) {
2950                                ret = send_unlink(sctx, cur->full_path);
2951                                if (ret < 0)
2952                                        goto out;
2953                        }
2954                        ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2955                                        GFP_NOFS);
2956                        if (ret < 0)
2957                                goto out;
2958                }
2959
2960                /*
2961                 * If the inode is still orphan, unlink the orphan. This may
2962                 * happen when a previous inode did overwrite the first ref
2963                 * of this inode and no new refs were added for the current
2964                 * inode. Unlinking does not mean that the inode is deleted in
2965                 * all cases. There may still be links to this inode in other
2966                 * places.
2967                 */
2968                if (is_orphan) {
2969                        ret = send_unlink(sctx, valid_path);
2970                        if (ret < 0)
2971                                goto out;
2972                }
2973        }
2974
2975        /*
2976         * We did collect all parent dirs where cur_inode was once located. We
2977         * now go through all these dirs and check if they are pending for
2978         * deletion and if it's finally possible to perform the rmdir now.
2979         * We also update the inode stats of the parent dirs here.
2980         */
2981        ULIST_ITER_INIT(&uit);
2982        while ((un = ulist_next(check_dirs, &uit))) {
2983                /*
2984                 * In case we had refs into dirs that were not processed yet,
2985                 * we don't need to do the utime and rmdir logic for these dirs.
2986                 * The dir will be processed later.
2987                 */
2988                if (un->val > sctx->cur_ino)
2989                        continue;
2990
2991                ret = get_cur_inode_state(sctx, un->val, un->aux);
2992                if (ret < 0)
2993                        goto out;
2994
2995                if (ret == inode_state_did_create ||
2996                    ret == inode_state_no_change) {
2997                        /* TODO delayed utimes */
2998                        ret = send_utimes(sctx, un->val, un->aux);
2999                        if (ret < 0)
3000                                goto out;
3001                } else if (ret == inode_state_did_delete) {
3002                        ret = can_rmdir(sctx, un->val, sctx->cur_ino);
3003                        if (ret < 0)
3004                                goto out;
3005                        if (ret) {
3006                                ret = get_cur_path(sctx, un->val, un->aux,
3007                                                valid_path);
3008                                if (ret < 0)
3009                                        goto out;
3010                                ret = send_rmdir(sctx, valid_path);
3011                                if (ret < 0)
3012                                        goto out;
3013                        }
3014                }
3015        }
3016
3017        ret = 0;
3018
3019out:
3020        free_recorded_refs(sctx);
3021        ulist_free(check_dirs);
3022        fs_path_free(valid_path);
3023        return ret;
3024}
3025
3026static int __record_new_ref(int num, u64 dir, int index,
3027                            struct fs_path *name,
3028                            void *ctx)
3029{
3030        int ret = 0;
3031        struct send_ctx *sctx = ctx;
3032        struct fs_path *p;
3033        u64 gen;
3034
3035        p = fs_path_alloc();
3036        if (!p)
3037                return -ENOMEM;
3038
3039        ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
3040                        NULL, NULL);
3041        if (ret < 0)
3042                goto out;
3043
3044        ret = get_cur_path(sctx, dir, gen, p);
3045        if (ret < 0)
3046                goto out;
3047        ret = fs_path_add_path(p, name);
3048        if (ret < 0)
3049                goto out;
3050
3051        ret = record_ref(&sctx->new_refs, dir, gen, p);
3052
3053out:
3054        if (ret)
3055                fs_path_free(p);
3056        return ret;
3057}
3058
3059static int __record_deleted_ref(int num, u64 dir, int index,
3060                                struct fs_path *name,
3061                                void *ctx)
3062{
3063        int ret = 0;
3064        struct send_ctx *sctx = ctx;
3065        struct fs_path *p;
3066        u64 gen;
3067
3068        p = fs_path_alloc();
3069        if (!p)
3070                return -ENOMEM;
3071
3072        ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
3073                        NULL, NULL);
3074        if (ret < 0)
3075                goto out;
3076
3077        ret = get_cur_path(sctx, dir, gen, p);
3078        if (ret < 0)
3079                goto out;
3080        ret = fs_path_add_path(p, name);
3081        if (ret < 0)
3082                goto out;
3083
3084        ret = record_ref(&sctx->deleted_refs, dir, gen, p);
3085
3086out:
3087        if (ret)
3088                fs_path_free(p);
3089        return ret;
3090}
3091
3092static int record_new_ref(struct send_ctx *sctx)
3093{
3094        int ret;
3095
3096        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3097                                sctx->cmp_key, 0, __record_new_ref, sctx);
3098        if (ret < 0)
3099                goto out;
3100        ret = 0;
3101
3102out:
3103        return ret;
3104}
3105
3106static int record_deleted_ref(struct send_ctx *sctx)
3107{
3108        int ret;
3109
3110        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3111                                sctx->cmp_key, 0, __record_deleted_ref, sctx);
3112        if (ret < 0)
3113                goto out;
3114        ret = 0;
3115
3116out:
3117        return ret;
3118}
3119
3120struct find_ref_ctx {
3121        u64 dir;
3122        struct fs_path *name;
3123        int found_idx;
3124};
3125
3126static int __find_iref(int num, u64 dir, int index,
3127                       struct fs_path *name,
3128                       void *ctx_)
3129{
3130        struct find_ref_ctx *ctx = ctx_;
3131
3132        if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3133            strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3134                ctx->found_idx = num;
3135                return 1;
3136        }
3137        return 0;
3138}
3139
3140static int find_iref(struct btrfs_root *root,
3141                     struct btrfs_path *path,
3142                     struct btrfs_key *key,
3143                     u64 dir, struct fs_path *name)
3144{
3145        int ret;
3146        struct find_ref_ctx ctx;
3147
3148        ctx.dir = dir;
3149        ctx.name = name;
3150        ctx.found_idx = -1;
3151
3152        ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3153        if (ret < 0)
3154                return ret;
3155
3156        if (ctx.found_idx == -1)
3157                return -ENOENT;
3158
3159        return ctx.found_idx;
3160}
3161
3162static int __record_changed_new_ref(int num, u64 dir, int index,
3163                                    struct fs_path *name,
3164                                    void *ctx)
3165{
3166        int ret;
3167        struct send_ctx *sctx = ctx;
3168
3169        ret = find_iref(sctx->parent_root, sctx->right_path,
3170                        sctx->cmp_key, dir, name);
3171        if (ret == -ENOENT)
3172                ret = __record_new_ref(num, dir, index, name, sctx);
3173        else if (ret > 0)
3174                ret = 0;
3175
3176        return ret;
3177}
3178
3179static int __record_changed_deleted_ref(int num, u64 dir, int index,
3180                                        struct fs_path *name,
3181                                        void *ctx)
3182{
3183        int ret;
3184        struct send_ctx *sctx = ctx;
3185
3186        ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
3187                        dir, name);
3188        if (ret == -ENOENT)
3189                ret = __record_deleted_ref(num, dir, index, name, sctx);
3190        else if (ret > 0)
3191                ret = 0;
3192
3193        return ret;
3194}
3195
3196static int record_changed_ref(struct send_ctx *sctx)
3197{
3198        int ret = 0;
3199
3200        ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3201                        sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3202        if (ret < 0)
3203                goto out;
3204        ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3205                        sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
3206        if (ret < 0)
3207                goto out;
3208        ret = 0;
3209
3210out:
3211        return ret;
3212}
3213
3214/*
3215 * Record and process all refs at once. Needed when an inode changes the
3216 * generation number, which means that it was deleted and recreated.
3217 */
3218static int process_all_refs(struct send_ctx *sctx,
3219                            enum btrfs_compare_tree_result cmd)
3220{
3221        int ret;
3222        struct btrfs_root *root;
3223        struct btrfs_path *path;
3224        struct btrfs_key key;
3225        struct btrfs_key found_key;
3226        struct extent_buffer *eb;
3227        int slot;
3228        iterate_inode_ref_t cb;
3229
3230        path = alloc_path_for_send();
3231        if (!path)
3232                return -ENOMEM;
3233
3234        if (cmd == BTRFS_COMPARE_TREE_NEW) {
3235                root = sctx->send_root;
3236                cb = __record_new_ref;
3237        } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
3238                root = sctx->parent_root;
3239                cb = __record_deleted_ref;
3240        } else {
3241                BUG();
3242        }
3243
3244        key.objectid = sctx->cmp_key->objectid;
3245        key.type = BTRFS_INODE_REF_KEY;
3246        key.offset = 0;
3247        while (1) {
3248                ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3249                if (ret < 0)
3250                        goto out;
3251                if (ret)
3252                        break;
3253
3254                eb = path->nodes[0];
3255                slot = path->slots[0];
3256                btrfs_item_key_to_cpu(eb, &found_key, slot);
3257
3258                if (found_key.objectid != key.objectid ||
3259                    (found_key.type != BTRFS_INODE_REF_KEY &&
3260                     found_key.type != BTRFS_INODE_EXTREF_KEY))
3261                        break;
3262
3263                ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
3264                btrfs_release_path(path);
3265                if (ret < 0)
3266                        goto out;
3267
3268                key.offset = found_key.offset + 1;
3269        }
3270        btrfs_release_path(path);
3271
3272        ret = process_recorded_refs(sctx);
3273
3274out:
3275        btrfs_free_path(path);
3276        return ret;
3277}
3278
3279static int send_set_xattr(struct send_ctx *sctx,
3280                          struct fs_path *path,
3281                          const char *name, int name_len,
3282                          const char *data, int data_len)
3283{
3284        int ret = 0;
3285
3286        ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
3287        if (ret < 0)
3288                goto out;
3289
3290        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3291        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3292        TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
3293
3294        ret = send_cmd(sctx);
3295
3296tlv_put_failure:
3297out:
3298        return ret;
3299}
3300
3301static int send_remove_xattr(struct send_ctx *sctx,
3302                          struct fs_path *path,
3303                          const char *name, int name_len)
3304{
3305        int ret = 0;
3306
3307        ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
3308        if (ret < 0)
3309                goto out;
3310
3311        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3312        TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3313
3314        ret = send_cmd(sctx);
3315
3316tlv_put_failure:
3317out:
3318        return ret;
3319}
3320
3321static int __process_new_xattr(int num, struct btrfs_key *di_key,
3322                               const char *name, int name_len,
3323                               const char *data, int data_len,
3324                               u8 type, void *ctx)
3325{
3326        int ret;
3327        struct send_ctx *sctx = ctx;
3328        struct fs_path *p;
3329        posix_acl_xattr_header dummy_acl;
3330
3331        p = fs_path_alloc();
3332        if (!p)
3333                return -ENOMEM;
3334
3335        /*
3336         * This hack is needed because empty acl's are stored as zero byte
3337         * data in xattrs. Problem with that is, that receiving these zero byte
3338         * acl's will fail later. To fix this, we send a dummy acl list that
3339         * only contains the version number and no entries.
3340         */
3341        if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
3342            !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
3343                if (data_len == 0) {
3344                        dummy_acl.a_version =
3345                                        cpu_to_le32(POSIX_ACL_XATTR_VERSION);
3346                        data = (char *)&dummy_acl;
3347                        data_len = sizeof(dummy_acl);
3348                }
3349        }
3350
3351        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3352        if (ret < 0)
3353                goto out;
3354
3355        ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
3356
3357out:
3358        fs_path_free(p);
3359        return ret;
3360}
3361
3362static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
3363                                   const char *name, int name_len,
3364                                   const char *data, int data_len,
3365                                   u8 type, void *ctx)
3366{
3367        int ret;
3368        struct send_ctx *sctx = ctx;
3369        struct fs_path *p;
3370
3371        p = fs_path_alloc();
3372        if (!p)
3373                return -ENOMEM;
3374
3375        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3376        if (ret < 0)
3377                goto out;
3378
3379        ret = send_remove_xattr(sctx, p, name, name_len);
3380
3381out:
3382        fs_path_free(p);
3383        return ret;
3384}
3385
3386static int process_new_xattr(struct send_ctx *sctx)
3387{
3388        int ret = 0;
3389
3390        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
3391                               sctx->cmp_key, __process_new_xattr, sctx);
3392
3393        return ret;
3394}
3395
3396static int process_deleted_xattr(struct send_ctx *sctx)
3397{
3398        int ret;
3399
3400        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
3401                               sctx->cmp_key, __process_deleted_xattr, sctx);
3402
3403        return ret;
3404}
3405
3406struct find_xattr_ctx {
3407        const char *name;
3408        int name_len;
3409        int found_idx;
3410        char *found_data;
3411        int found_data_len;
3412};
3413
3414static int __find_xattr(int num, struct btrfs_key *di_key,
3415                        const char *name, int name_len,
3416                        const char *data, int data_len,
3417                        u8 type, void *vctx)
3418{
3419        struct find_xattr_ctx *ctx = vctx;
3420
3421        if (name_len == ctx->name_len &&
3422            strncmp(name, ctx->name, name_len) == 0) {
3423                ctx->found_idx = num;
3424                ctx->found_data_len = data_len;
3425                ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
3426                if (!ctx->found_data)
3427                        return -ENOMEM;
3428                return 1;
3429        }
3430        return 0;
3431}
3432
3433static int find_xattr(struct btrfs_root *root,
3434                      struct btrfs_path *path,
3435                      struct btrfs_key *key,
3436                      const char *name, int name_len,
3437                      char **data, int *data_len)
3438{
3439        int ret;
3440        struct find_xattr_ctx ctx;
3441
3442        ctx.name = name;
3443        ctx.name_len = name_len;
3444        ctx.found_idx = -1;
3445        ctx.found_data = NULL;
3446        ctx.found_data_len = 0;
3447
3448        ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
3449        if (ret < 0)
3450                return ret;
3451
3452        if (ctx.found_idx == -1)
3453                return -ENOENT;
3454        if (data) {
3455                *data = ctx.found_data;
3456                *data_len = ctx.found_data_len;
3457        } else {
3458                kfree(ctx.found_data);
3459        }
3460        return ctx.found_idx;
3461}
3462
3463
3464static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3465                                       const char *name, int name_len,
3466                                       const char *data, int data_len,
3467                                       u8 type, void *ctx)
3468{
3469        int ret;
3470        struct send_ctx *sctx = ctx;
3471        char *found_data = NULL;
3472        int found_data_len  = 0;
3473
3474        ret = find_xattr(sctx->parent_root, sctx->right_path,
3475                         sctx->cmp_key, name, name_len, &found_data,
3476                         &found_data_len);
3477        if (ret == -ENOENT) {
3478                ret = __process_new_xattr(num, di_key, name, name_len, data,
3479                                data_len, type, ctx);
3480        } else if (ret >= 0) {
3481                if (data_len != found_data_len ||
3482                    memcmp(data, found_data, data_len)) {
3483                        ret = __process_new_xattr(num, di_key, name, name_len,
3484                                        data, data_len, type, ctx);
3485                } else {
3486                        ret = 0;
3487                }
3488        }
3489
3490        kfree(found_data);
3491        return ret;
3492}
3493
3494static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
3495                                           const char *name, int name_len,
3496                                           const char *data, int data_len,
3497                                           u8 type, void *ctx)
3498{
3499        int ret;
3500        struct send_ctx *sctx = ctx;
3501
3502        ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
3503                         name, name_len, NULL, NULL);
3504        if (ret == -ENOENT)
3505                ret = __process_deleted_xattr(num, di_key, name, name_len, data,
3506                                data_len, type, ctx);
3507        else if (ret >= 0)
3508                ret = 0;
3509
3510        return ret;
3511}
3512
3513static int process_changed_xattr(struct send_ctx *sctx)
3514{
3515        int ret = 0;
3516
3517        ret = iterate_dir_item(sctx->send_root, sctx->left_path,
3518                        sctx->cmp_key, __process_changed_new_xattr, sctx);
3519        if (ret < 0)
3520                goto out;
3521        ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
3522                        sctx->cmp_key, __process_changed_deleted_xattr, sctx);
3523
3524out:
3525        return ret;
3526}
3527
3528static int process_all_new_xattrs(struct send_ctx *sctx)
3529{
3530        int ret;
3531        struct btrfs_root *root;
3532        struct btrfs_path *path;
3533        struct btrfs_key key;
3534        struct btrfs_key found_key;
3535        struct extent_buffer *eb;
3536        int slot;
3537
3538        path = alloc_path_for_send();
3539        if (!path)
3540                return -ENOMEM;
3541
3542        root = sctx->send_root;
3543
3544        key.objectid = sctx->cmp_key->objectid;
3545        key.type = BTRFS_XATTR_ITEM_KEY;
3546        key.offset = 0;
3547        while (1) {
3548                ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3549                if (ret < 0)
3550                        goto out;
3551                if (ret) {
3552                        ret = 0;
3553                        goto out;
3554                }
3555
3556                eb = path->nodes[0];
3557                slot = path->slots[0];
3558                btrfs_item_key_to_cpu(eb, &found_key, slot);
3559
3560                if (found_key.objectid != key.objectid ||
3561                    found_key.type != key.type) {
3562                        ret = 0;
3563                        goto out;
3564                }
3565
3566                ret = iterate_dir_item(root, path, &found_key,
3567                                       __process_new_xattr, sctx);
3568                if (ret < 0)
3569                        goto out;
3570
3571                btrfs_release_path(path);
3572                key.offset = found_key.offset + 1;
3573        }
3574
3575out:
3576        btrfs_free_path(path);
3577        return ret;
3578}
3579
3580/*
3581 * Read some bytes from the current inode/file and send a write command to
3582 * user space.
3583 */
3584static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
3585{
3586        int ret = 0;
3587        struct fs_path *p;
3588        loff_t pos = offset;
3589        int num_read = 0;
3590        mm_segment_t old_fs;
3591
3592        p = fs_path_alloc();
3593        if (!p)
3594                return -ENOMEM;
3595
3596        /*
3597         * vfs normally only accepts user space buffers for security reasons.
3598         * we only read from the file and also only provide the read_buf buffer
3599         * to vfs. As this buffer does not come from a user space call, it's
3600         * ok to temporary allow kernel space buffers.
3601         */
3602        old_fs = get_fs();
3603        set_fs(KERNEL_DS);
3604
3605verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
3606
3607        ret = open_cur_inode_file(sctx);
3608        if (ret < 0)
3609                goto out;
3610
3611        ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
3612        if (ret < 0)
3613                goto out;
3614        num_read = ret;
3615        if (!num_read)
3616                goto out;
3617
3618        ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
3619        if (ret < 0)
3620                goto out;
3621
3622        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3623        if (ret < 0)
3624                goto out;
3625
3626        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3627        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3628        TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
3629
3630        ret = send_cmd(sctx);
3631
3632tlv_put_failure:
3633out:
3634        fs_path_free(p);
3635        set_fs(old_fs);
3636        if (ret < 0)
3637                return ret;
3638        return num_read;
3639}
3640
3641/*
3642 * Send a clone command to user space.
3643 */
3644static int send_clone(struct send_ctx *sctx,
3645                      u64 offset, u32 len,
3646                      struct clone_root *clone_root)
3647{
3648        int ret = 0;
3649        struct fs_path *p;
3650        u64 gen;
3651
3652verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3653               "clone_inode=%llu, clone_offset=%llu\n", offset, len,
3654                clone_root->root->objectid, clone_root->ino,
3655                clone_root->offset);
3656
3657        p = fs_path_alloc();
3658        if (!p)
3659                return -ENOMEM;
3660
3661        ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
3662        if (ret < 0)
3663                goto out;
3664
3665        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3666        if (ret < 0)
3667                goto out;
3668
3669        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3670        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
3671        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3672
3673        if (clone_root->root == sctx->send_root) {
3674                ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
3675                                &gen, NULL, NULL, NULL, NULL);
3676                if (ret < 0)
3677                        goto out;
3678                ret = get_cur_path(sctx, clone_root->ino, gen, p);
3679        } else {
3680                ret = get_inode_path(clone_root->root, clone_root->ino, p);
3681        }
3682        if (ret < 0)
3683                goto out;
3684
3685        TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3686                        clone_root->root->root_item.uuid);
3687        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3688                        clone_root->root->root_item.ctransid);
3689        TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3690        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3691                        clone_root->offset);
3692
3693        ret = send_cmd(sctx);
3694
3695tlv_put_failure:
3696out:
3697        fs_path_free(p);
3698        return ret;
3699}
3700
3701/*
3702 * Send an update extent command to user space.
3703 */
3704static int send_update_extent(struct send_ctx *sctx,
3705                              u64 offset, u32 len)
3706{
3707        int ret = 0;
3708        struct fs_path *p;
3709
3710        p = fs_path_alloc();
3711        if (!p)
3712                return -ENOMEM;
3713
3714        ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
3715        if (ret < 0)
3716                goto out;
3717
3718        ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3719        if (ret < 0)
3720                goto out;
3721
3722        TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3723        TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3724        TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
3725
3726        ret = send_cmd(sctx);
3727
3728tlv_put_failure:
3729out:
3730        fs_path_free(p);
3731        return ret;
3732}
3733
3734static int send_write_or_clone(struct send_ctx *sctx,
3735                               struct btrfs_path *path,
3736                               struct btrfs_key *key,
3737                               struct clone_root *clone_root)
3738{
3739        int ret = 0;
3740        struct btrfs_file_extent_item *ei;
3741        u64 offset = key->offset;
3742        u64 pos = 0;
3743        u64 len;
3744        u32 l;
3745        u8 type;
3746
3747        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3748                        struct btrfs_file_extent_item);
3749        type = btrfs_file_extent_type(path->nodes[0], ei);
3750        if (type == BTRFS_FILE_EXTENT_INLINE) {
3751                len = btrfs_file_extent_inline_len(path->nodes[0], ei);
3752                /*
3753                 * it is possible the inline item won't cover the whole page,
3754                 * but there may be items after this page.  Make
3755                 * sure to send the whole thing
3756                 */
3757                len = PAGE_CACHE_ALIGN(len);
3758        } else {
3759                len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
3760        }
3761
3762        if (offset + len > sctx->cur_inode_size)
3763                len = sctx->cur_inode_size - offset;
3764        if (len == 0) {
3765                ret = 0;
3766                goto out;
3767        }
3768
3769        if (clone_root) {
3770                ret = send_clone(sctx, offset, len, clone_root);
3771        } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
3772                ret = send_update_extent(sctx, offset, len);
3773        } else {
3774                while (pos < len) {
3775                        l = len - pos;
3776                        if (l > BTRFS_SEND_READ_SIZE)
3777                                l = BTRFS_SEND_READ_SIZE;
3778                        ret = send_write(sctx, pos + offset, l);
3779                        if (ret < 0)
3780                                goto out;
3781                        if (!ret)
3782                                break;
3783                        pos += ret;
3784                }
3785                ret = 0;
3786        }
3787out:
3788        return ret;
3789}
3790
3791static int is_extent_unchanged(struct send_ctx *sctx,
3792                               struct btrfs_path *left_path,
3793                               struct btrfs_key *ekey)
3794{
3795        int ret = 0;
3796        struct btrfs_key key;
3797        struct btrfs_path *path = NULL;
3798        struct extent_buffer *eb;
3799        int slot;
3800        struct btrfs_key found_key;
3801        struct btrfs_file_extent_item *ei;
3802        u64 left_disknr;
3803        u64 right_disknr;
3804        u64 left_offset;
3805        u64 right_offset;
3806        u64 left_offset_fixed;
3807        u64 left_len;
3808        u64 right_len;
3809        u64 left_gen;
3810        u64 right_gen;
3811        u8 left_type;
3812        u8 right_type;
3813
3814        path = alloc_path_for_send();
3815        if (!path)
3816                return -ENOMEM;
3817
3818        eb = left_path->nodes[0];
3819        slot = left_path->slots[0];
3820        ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3821        left_type = btrfs_file_extent_type(eb, ei);
3822
3823        if (left_type != BTRFS_FILE_EXTENT_REG) {
3824                ret = 0;
3825                goto out;
3826        }
3827        left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3828        left_len = btrfs_file_extent_num_bytes(eb, ei);
3829        left_offset = btrfs_file_extent_offset(eb, ei);
3830        left_gen = btrfs_file_extent_generation(eb, ei);
3831
3832        /*
3833         * Following comments will refer to these graphics. L is the left
3834         * extents which we are checking at the moment. 1-8 are the right
3835         * extents that we iterate.
3836         *
3837         *       |-----L-----|
3838         * |-1-|-2a-|-3-|-4-|-5-|-6-|
3839         *
3840         *       |-----L-----|
3841         * |--1--|-2b-|...(same as above)
3842         *
3843         * Alternative situation. Happens on files where extents got split.
3844         *       |-----L-----|
3845         * |-----------7-----------|-6-|
3846         *
3847         * Alternative situation. Happens on files which got larger.
3848         *       |-----L-----|
3849         * |-8-|
3850         * Nothing follows after 8.
3851         */
3852
3853        key.objectid = ekey->objectid;
3854        key.type = BTRFS_EXTENT_DATA_KEY;
3855        key.offset = ekey->offset;
3856        ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
3857        if (ret < 0)
3858                goto out;
3859        if (ret) {
3860                ret = 0;
3861                goto out;
3862        }
3863
3864        /*
3865         * Handle special case where the right side has no extents at all.
3866         */
3867        eb = path->nodes[0];
3868        slot = path->slots[0];
3869        btrfs_item_key_to_cpu(eb, &found_key, slot);
3870        if (found_key.objectid != key.objectid ||
3871            found_key.type != key.type) {
3872                ret = 0;
3873                goto out;
3874        }
3875
3876        /*
3877         * We're now on 2a, 2b or 7.
3878         */
3879        key = found_key;
3880        while (key.offset < ekey->offset + left_len) {
3881                ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3882                right_type = btrfs_file_extent_type(eb, ei);
3883                right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3884                right_len = btrfs_file_extent_num_bytes(eb, ei);
3885                right_offset = btrfs_file_extent_offset(eb, ei);
3886                right_gen = btrfs_file_extent_generation(eb, ei);
3887
3888                if (right_type != BTRFS_FILE_EXTENT_REG) {
3889                        ret = 0;
3890                        goto out;
3891                }
3892
3893                /*
3894                 * Are we at extent 8? If yes, we know the extent is changed.
3895                 * This may only happen on the first iteration.
3896                 */
3897                if (found_key.offset + right_len <= ekey->offset) {
3898                        ret = 0;
3899                        goto out;
3900                }
3901
3902                left_offset_fixed = left_offset;
3903                if (key.offset < ekey->offset) {
3904                        /* Fix the right offset for 2a and 7. */
3905                        right_offset += ekey->offset - key.offset;
3906                } else {
3907                        /* Fix the left offset for all behind 2a and 2b */
3908                        left_offset_fixed += key.offset - ekey->offset;
3909                }
3910
3911                /*
3912                 * Check if we have the same extent.
3913                 */
3914                if (left_disknr != right_disknr ||
3915                    left_offset_fixed != right_offset ||
3916                    left_gen != right_gen) {
3917                        ret = 0;
3918                        goto out;
3919                }
3920
3921                /*
3922                 * Go to the next extent.
3923                 */
3924                ret = btrfs_next_item(sctx->parent_root, path);
3925                if (ret < 0)
3926                        goto out;
3927                if (!ret) {
3928                        eb = path->nodes[0];
3929                        slot = path->slots[0];
3930                        btrfs_item_key_to_cpu(eb, &found_key, slot);
3931                }
3932                if (ret || found_key.objectid != key.objectid ||
3933                    found_key.type != key.type) {
3934                        key.offset += right_len;
3935                        break;
3936                }
3937                if (found_key.offset != key.offset + right_len) {
3938                        ret = 0;
3939                        goto out;
3940                }
3941                key = found_key;
3942        }
3943
3944        /*
3945         * We're now behind the left extent (treat as unchanged) or at the end
3946         * of the right side (treat as changed).
3947         */
3948        if (key.offset >= ekey->offset + left_len)
3949                ret = 1;
3950        else
3951                ret = 0;
3952
3953
3954out:
3955        btrfs_free_path(path);
3956        return ret;
3957}
3958
3959static int process_extent(struct send_ctx *sctx,
3960                          struct btrfs_path *path,
3961                          struct btrfs_key *key)
3962{
3963        int ret = 0;
3964        struct clone_root *found_clone = NULL;
3965
3966        if (S_ISLNK(sctx->cur_inode_mode))
3967                return 0;
3968
3969        if (sctx->parent_root && !sctx->cur_inode_new) {
3970                ret = is_extent_unchanged(sctx, path, key);
3971                if (ret < 0)
3972                        goto out;
3973                if (ret) {
3974                        ret = 0;
3975                        goto out;
3976                }
3977        }
3978
3979        ret = find_extent_clone(sctx, path, key->objectid, key->offset,
3980                        sctx->cur_inode_size, &found_clone);
3981        if (ret != -ENOENT && ret < 0)
3982                goto out;
3983
3984        ret = send_write_or_clone(sctx, path, key, found_clone);
3985
3986out:
3987        return ret;
3988}
3989
3990static int process_all_extents(struct send_ctx *sctx)
3991{
3992        int ret;
3993        struct btrfs_root *root;
3994        struct btrfs_path *path;
3995        struct btrfs_key key;
3996        struct btrfs_key found_key;
3997        struct extent_buffer *eb;
3998        int slot;
3999
4000        root = sctx->send_root;
4001        path = alloc_path_for_send();
4002        if (!path)
4003                return -ENOMEM;
4004
4005        key.objectid = sctx->cmp_key->objectid;
4006        key.type = BTRFS_EXTENT_DATA_KEY;
4007        key.offset = 0;
4008        while (1) {
4009                ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
4010                if (ret < 0)
4011                        goto out;
4012                if (ret) {
4013                        ret = 0;
4014                        goto out;
4015                }
4016
4017                eb = path->nodes[0];
4018                slot = path->slots[0];
4019                btrfs_item_key_to_cpu(eb, &found_key, slot);
4020
4021                if (found_key.objectid != key.objectid ||
4022                    found_key.type != key.type) {
4023                        ret = 0;
4024                        goto out;
4025                }
4026
4027                ret = process_extent(sctx, path, &found_key);
4028                if (ret < 0)
4029                        goto out;
4030
4031                btrfs_release_path(path);
4032                key.offset = found_key.offset + 1;
4033        }
4034
4035out:
4036        btrfs_free_path(path);
4037        return ret;
4038}
4039
4040static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
4041{
4042        int ret = 0;
4043
4044        if (sctx->cur_ino == 0)
4045                goto out;
4046        if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
4047            sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
4048                goto out;
4049        if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4050                goto out;
4051
4052        ret = process_recorded_refs(sctx);
4053        if (ret < 0)
4054                goto out;
4055
4056        /*
4057         * We have processed the refs and thus need to advance send_progress.
4058         * Now, calls to get_cur_xxx will take the updated refs of the current
4059         * inode into account.
4060         */
4061        sctx->send_progress = sctx->cur_ino + 1;
4062
4063out:
4064        return ret;
4065}
4066
4067static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4068{
4069        int ret = 0;
4070        u64 left_mode;
4071        u64 left_uid;
4072        u64 left_gid;
4073        u64 right_mode;
4074        u64 right_uid;
4075        u64 right_gid;
4076        int need_chmod = 0;
4077        int need_chown = 0;
4078
4079        ret = process_recorded_refs_if_needed(sctx, at_end);
4080        if (ret < 0)
4081                goto out;
4082
4083        if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4084                goto out;
4085        if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
4086                goto out;
4087
4088        ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
4089                        &left_mode, &left_uid, &left_gid, NULL);
4090        if (ret < 0)
4091                goto out;
4092
4093        if (!sctx->parent_root || sctx->cur_inode_new) {
4094                need_chown = 1;
4095                if (!S_ISLNK(sctx->cur_inode_mode))
4096                        need_chmod = 1;
4097        } else {
4098                ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
4099                                NULL, NULL, &right_mode, &right_uid,
4100                                &right_gid, NULL);
4101                if (ret < 0)
4102                        goto out;
4103
4104                if (left_uid != right_uid || left_gid != right_gid)
4105                        need_chown = 1;
4106                if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
4107                        need_chmod = 1;
4108        }
4109
4110        if (S_ISREG(sctx->cur_inode_mode)) {
4111                ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4112                                sctx->cur_inode_size);
4113                if (ret < 0)
4114                        goto out;
4115        }
4116
4117        if (need_chown) {
4118                ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4119                                left_uid, left_gid);
4120                if (ret < 0)
4121                        goto out;
4122        }
4123        if (need_chmod) {
4124                ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4125                                left_mode);
4126                if (ret < 0)
4127                        goto out;
4128        }
4129
4130        /*
4131         * Need to send that every time, no matter if it actually changed
4132         * between the two trees as we have done changes to the inode before.
4133         */
4134        ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4135        if (ret < 0)
4136                goto out;
4137
4138out:
4139        return ret;
4140}
4141
4142static int changed_inode(struct send_ctx *sctx,
4143                         enum btrfs_compare_tree_result result)
4144{
4145        int ret = 0;
4146        struct btrfs_key *key = sctx->cmp_key;
4147        struct btrfs_inode_item *left_ii = NULL;
4148        struct btrfs_inode_item *right_ii = NULL;
4149        u64 left_gen = 0;
4150        u64 right_gen = 0;
4151
4152        ret = close_cur_inode_file(sctx);
4153        if (ret < 0)
4154                goto out;
4155
4156        sctx->cur_ino = key->objectid;
4157        sctx->cur_inode_new_gen = 0;
4158
4159        /*
4160         * Set send_progress to current inode. This will tell all get_cur_xxx
4161         * functions that the current inode's refs are not updated yet. Later,
4162         * when process_recorded_refs is finished, it is set to cur_ino + 1.
4163         */
4164        sctx->send_progress = sctx->cur_ino;
4165
4166        if (result == BTRFS_COMPARE_TREE_NEW ||
4167            result == BTRFS_COMPARE_TREE_CHANGED) {
4168                left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
4169                                sctx->left_path->slots[0],
4170                                struct btrfs_inode_item);
4171                left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
4172                                left_ii);
4173        } else {
4174                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4175                                sctx->right_path->slots[0],
4176                                struct btrfs_inode_item);
4177                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4178                                right_ii);
4179        }
4180        if (result == BTRFS_COMPARE_TREE_CHANGED) {
4181                right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4182                                sctx->right_path->slots[0],
4183                                struct btrfs_inode_item);
4184
4185                right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4186                                right_ii);
4187
4188                /*
4189                 * The cur_ino = root dir case is special here. We can't treat
4190                 * the inode as deleted+reused because it would generate a
4191                 * stream that tries to delete/mkdir the root dir.
4192                 */
4193                if (left_gen != right_gen &&
4194                    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4195                        sctx->cur_inode_new_gen = 1;
4196        }
4197
4198        if (result == BTRFS_COMPARE_TREE_NEW) {
4199                sctx->cur_inode_gen = left_gen;
4200                sctx->cur_inode_new = 1;
4201                sctx->cur_inode_deleted = 0;
4202                sctx->cur_inode_size = btrfs_inode_size(
4203                                sctx->left_path->nodes[0], left_ii);
4204                sctx->cur_inode_mode = btrfs_inode_mode(
4205                                sctx->left_path->nodes[0], left_ii);
4206                if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4207                        ret = send_create_inode_if_needed(sctx);
4208        } else if (result == BTRFS_COMPARE_TREE_DELETED) {
4209                sctx->cur_inode_gen = right_gen;
4210                sctx->cur_inode_new = 0;
4211                sctx->cur_inode_deleted = 1;
4212                sctx->cur_inode_size = btrfs_inode_size(
4213                                sctx->right_path->nodes[0], right_ii);
4214                sctx->cur_inode_mode = btrfs_inode_mode(
4215                                sctx->right_path->nodes[0], right_ii);
4216        } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
4217                /*
4218                 * We need to do some special handling in case the inode was
4219                 * reported as changed with a changed generation number. This
4220                 * means that the original inode was deleted and new inode
4221                 * reused the same inum. So we have to treat the old inode as
4222                 * deleted and the new one as new.
4223                 */
4224                if (sctx->cur_inode_new_gen) {
4225                        /*
4226                         * First, process the inode as if it was deleted.
4227                         */
4228                        sctx->cur_inode_gen = right_gen;
4229                        sctx->cur_inode_new = 0;
4230                        sctx->cur_inode_deleted = 1;
4231                        sctx->cur_inode_size = btrfs_inode_size(
4232                                        sctx->right_path->nodes[0], right_ii);
4233                        sctx->cur_inode_mode = btrfs_inode_mode(
4234                                        sctx->right_path->nodes[0], right_ii);
4235                        ret = process_all_refs(sctx,
4236                                        BTRFS_COMPARE_TREE_DELETED);
4237                        if (ret < 0)
4238                                goto out;
4239
4240                        /*
4241                         * Now process the inode as if it was new.
4242                         */
4243                        sctx->cur_inode_gen = left_gen;
4244                        sctx->cur_inode_new = 1;
4245                        sctx->cur_inode_deleted = 0;
4246                        sctx->cur_inode_size = btrfs_inode_size(
4247                                        sctx->left_path->nodes[0], left_ii);
4248                        sctx->cur_inode_mode = btrfs_inode_mode(
4249                                        sctx->left_path->nodes[0], left_ii);
4250                        ret = send_create_inode_if_needed(sctx);
4251                        if (ret < 0)
4252                                goto out;
4253
4254                        ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
4255                        if (ret < 0)
4256                                goto out;
4257                        /*
4258                         * Advance send_progress now as we did not get into
4259                         * process_recorded_refs_if_needed in the new_gen case.
4260                         */
4261                        sctx->send_progress = sctx->cur_ino + 1;
4262
4263                        /*
4264                         * Now process all extents and xattrs of the inode as if
4265                         * they were all new.
4266                         */
4267                        ret = process_all_extents(sctx);
4268                        if (ret < 0)
4269                                goto out;
4270                        ret = process_all_new_xattrs(sctx);
4271                        if (ret < 0)
4272                                goto out;
4273                } else {
4274                        sctx->cur_inode_gen = left_gen;
4275                        sctx->cur_inode_new = 0;
4276                        sctx->cur_inode_new_gen = 0;
4277                        sctx->cur_inode_deleted = 0;
4278                        sctx->cur_inode_size = btrfs_inode_size(
4279                                        sctx->left_path->nodes[0], left_ii);
4280                        sctx->cur_inode_mode = btrfs_inode_mode(
4281                                        sctx->left_path->nodes[0], left_ii);
4282                }
4283        }
4284
4285out:
4286        return ret;
4287}
4288
4289/*
4290 * We have to process new refs before deleted refs, but compare_trees gives us
4291 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
4292 * first and later process them in process_recorded_refs.
4293 * For the cur_inode_new_gen case, we skip recording completely because
4294 * changed_inode did already initiate processing of refs. The reason for this is
4295 * that in this case, compare_tree actually compares the refs of 2 different
4296 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
4297 * refs of the right tree as deleted and all refs of the left tree as new.
4298 */
4299static int changed_ref(struct send_ctx *sctx,
4300                       enum btrfs_compare_tree_result result)
4301{
4302        int ret = 0;
4303
4304        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4305
4306        if (!sctx->cur_inode_new_gen &&
4307            sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
4308                if (result == BTRFS_COMPARE_TREE_NEW)
4309                        ret = record_new_ref(sctx);
4310                else if (result == BTRFS_COMPARE_TREE_DELETED)
4311                        ret = record_deleted_ref(sctx);
4312                else if (result == BTRFS_COMPARE_TREE_CHANGED)
4313                        ret = record_changed_ref(sctx);
4314        }
4315
4316        return ret;
4317}
4318
4319/*
4320 * Process new/deleted/changed xattrs. We skip processing in the
4321 * cur_inode_new_gen case because changed_inode did already initiate processing
4322 * of xattrs. The reason is the same as in changed_ref
4323 */
4324static int changed_xattr(struct send_ctx *sctx,
4325                         enum btrfs_compare_tree_result result)
4326{
4327        int ret = 0;
4328
4329        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4330
4331        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4332                if (result == BTRFS_COMPARE_TREE_NEW)
4333                        ret = process_new_xattr(sctx);
4334                else if (result == BTRFS_COMPARE_TREE_DELETED)
4335                        ret = process_deleted_xattr(sctx);
4336                else if (result == BTRFS_COMPARE_TREE_CHANGED)
4337                        ret = process_changed_xattr(sctx);
4338        }
4339
4340        return ret;
4341}
4342
4343/*
4344 * Process new/deleted/changed extents. We skip processing in the
4345 * cur_inode_new_gen case because changed_inode did already initiate processing
4346 * of extents. The reason is the same as in changed_ref
4347 */
4348static int changed_extent(struct send_ctx *sctx,
4349                          enum btrfs_compare_tree_result result)
4350{
4351        int ret = 0;
4352
4353        BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4354
4355        if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4356                if (result != BTRFS_COMPARE_TREE_DELETED)
4357                        ret = process_extent(sctx, sctx->left_path,
4358                                        sctx->cmp_key);
4359        }
4360
4361        return ret;
4362}
4363
4364/*
4365 * Updates compare related fields in sctx and simply forwards to the actual
4366 * changed_xxx functions.
4367 */
4368static int changed_cb(struct btrfs_root *left_root,
4369                      struct btrfs_root *right_root,
4370                      struct btrfs_path *left_path,
4371                      struct btrfs_path *right_path,
4372                      struct btrfs_key *key,
4373                      enum btrfs_compare_tree_result result,
4374                      void *ctx)
4375{
4376        int ret = 0;
4377        struct send_ctx *sctx = ctx;
4378
4379        sctx->left_path = left_path;
4380        sctx->right_path = right_path;
4381        sctx->cmp_key = key;
4382
4383        ret = finish_inode_if_needed(sctx, 0);
4384        if (ret < 0)
4385                goto out;
4386
4387        /* Ignore non-FS objects */
4388        if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
4389            key->objectid == BTRFS_FREE_SPACE_OBJECTID)
4390                goto out;
4391
4392        if (key->type == BTRFS_INODE_ITEM_KEY)
4393                ret = changed_inode(sctx, result);
4394        else if (key->type == BTRFS_INODE_REF_KEY ||
4395                 key->type == BTRFS_INODE_EXTREF_KEY)
4396                ret = changed_ref(sctx, result);
4397        else if (key->type == BTRFS_XATTR_ITEM_KEY)
4398                ret = changed_xattr(sctx, result);
4399        else if (key->type == BTRFS_EXTENT_DATA_KEY)
4400                ret = changed_extent(sctx, result);
4401
4402out:
4403        return ret;
4404}
4405
4406static int full_send_tree(struct send_ctx *sctx)
4407{
4408        int ret;
4409        struct btrfs_trans_handle *trans = NULL;
4410        struct btrfs_root *send_root = sctx->send_root;
4411        struct btrfs_key key;
4412        struct btrfs_key found_key;
4413        struct btrfs_path *path;
4414        struct extent_buffer *eb;
4415        int slot;
4416        u64 start_ctransid;
4417        u64 ctransid;
4418
4419        path = alloc_path_for_send();
4420        if (!path)
4421                return -ENOMEM;
4422
4423        spin_lock(&send_root->root_item_lock);
4424        start_ctransid = btrfs_root_ctransid(&send_root->root_item);
4425        spin_unlock(&send_root->root_item_lock);
4426
4427        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4428        key.type = BTRFS_INODE_ITEM_KEY;
4429        key.offset = 0;
4430
4431join_trans:
4432        /*
4433         * We need to make sure the transaction does not get committed
4434         * while we do anything on commit roots. Join a transaction to prevent
4435         * this.
4436         */
4437        trans = btrfs_join_transaction(send_root);
4438        if (IS_ERR(trans)) {
4439                ret = PTR_ERR(trans);
4440                trans = NULL;
4441                goto out;
4442        }
4443
4444        /*
4445         * Make sure the tree has not changed after re-joining. We detect this
4446         * by comparing start_ctransid and ctransid. They should always match.
4447         */
4448        spin_lock(&send_root->root_item_lock);
4449        ctransid = btrfs_root_ctransid(&send_root->root_item);
4450        spin_unlock(&send_root->root_item_lock);
4451
4452        if (ctransid != start_ctransid) {
4453                WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
4454                                     "send was modified in between. This is "
4455                                     "probably a bug.\n");
4456                ret = -EIO;
4457                goto out;
4458        }
4459
4460        ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
4461        if (ret < 0)
4462                goto out;
4463        if (ret)
4464                goto out_finish;
4465
4466        while (1) {
4467                /*
4468                 * When someone want to commit while we iterate, end the
4469                 * joined transaction and rejoin.
4470                 */
4471                if (btrfs_should_end_transaction(trans, send_root)) {
4472                        ret = btrfs_end_transaction(trans, send_root);
4473                        trans = NULL;
4474                        if (ret < 0)
4475                                goto out;
4476                        btrfs_release_path(path);
4477                        goto join_trans;
4478                }
4479
4480                eb = path->nodes[0];
4481                slot = path->slots[0];
4482                btrfs_item_key_to_cpu(eb, &found_key, slot);
4483
4484                ret = changed_cb(send_root, NULL, path, NULL,
4485                                &found_key, BTRFS_COMPARE_TREE_NEW, sctx);
4486                if (ret < 0)
4487                        goto out;
4488
4489                key.objectid = found_key.objectid;
4490                key.type = found_key.type;
4491                key.offset = found_key.offset + 1;
4492
4493                ret = btrfs_next_item(send_root, path);
4494                if (ret < 0)
4495                        goto out;
4496                if (ret) {
4497                        ret  = 0;
4498                        break;
4499                }
4500        }
4501
4502out_finish:
4503        ret = finish_inode_if_needed(sctx, 1);
4504
4505out:
4506        btrfs_free_path(path);
4507        if (trans) {
4508                if (!ret)
4509                        ret = btrfs_end_transaction(trans, send_root);
4510                else
4511                        btrfs_end_transaction(trans, send_root);
4512        }
4513        return ret;
4514}
4515
4516static int send_subvol(struct send_ctx *sctx)
4517{
4518        int ret;
4519
4520        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
4521                ret = send_header(sctx);
4522                if (ret < 0)
4523                        goto out;
4524        }
4525
4526        ret = send_subvol_begin(sctx);
4527        if (ret < 0)
4528                goto out;
4529
4530        if (sctx->parent_root) {
4531                ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
4532                                changed_cb, sctx);
4533                if (ret < 0)
4534                        goto out;
4535                ret = finish_inode_if_needed(sctx, 1);
4536                if (ret < 0)
4537                        goto out;
4538        } else {
4539                ret = full_send_tree(sctx);
4540                if (ret < 0)
4541                        goto out;
4542        }
4543
4544out:
4545        if (!ret)
4546                ret = close_cur_inode_file(sctx);
4547        else
4548                close_cur_inode_file(sctx);
4549
4550        free_recorded_refs(sctx);
4551        return ret;
4552}
4553
4554long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4555{
4556        int ret = 0;
4557        struct btrfs_root *send_root;
4558        struct btrfs_root *clone_root;
4559        struct btrfs_fs_info *fs_info;
4560        struct btrfs_ioctl_send_args *arg = NULL;
4561        struct btrfs_key key;
4562        struct send_ctx *sctx = NULL;
4563        u32 i;
4564        u64 *clone_sources_tmp = NULL;
4565
4566        if (!capable(CAP_SYS_ADMIN))
4567                return -EPERM;
4568
4569        send_root = BTRFS_I(file_inode(mnt_file))->root;
4570        fs_info = send_root->fs_info;
4571
4572        /*
4573         * This is done when we lookup the root, it should already be complete
4574         * by the time we get here.
4575         */
4576        WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
4577
4578        /*
4579         * If we just created this root we need to make sure that the orphan
4580         * cleanup has been done and committed since we search the commit root,
4581         * so check its commit root transid with our otransid and if they match
4582         * commit the transaction to make sure everything is updated.
4583         */
4584        down_read(&send_root->fs_info->extent_commit_sem);
4585        if (btrfs_header_generation(send_root->commit_root) ==
4586            btrfs_root_otransid(&send_root->root_item)) {
4587                struct btrfs_trans_handle *trans;
4588
4589                up_read(&send_root->fs_info->extent_commit_sem);
4590
4591                trans = btrfs_attach_transaction_barrier(send_root);
4592                if (IS_ERR(trans)) {
4593                        if (PTR_ERR(trans) != -ENOENT) {
4594                                ret = PTR_ERR(trans);
4595                                goto out;
4596                        }
4597                        /* ENOENT means theres no transaction */
4598                } else {
4599                        ret = btrfs_commit_transaction(trans, send_root);
4600                        if (ret)
4601                                goto out;
4602                }
4603        } else {
4604                up_read(&send_root->fs_info->extent_commit_sem);
4605        }
4606
4607        arg = memdup_user(arg_, sizeof(*arg));
4608        if (IS_ERR(arg)) {
4609                ret = PTR_ERR(arg);
4610                arg = NULL;
4611                goto out;
4612        }
4613
4614        if (!access_ok(VERIFY_READ, arg->clone_sources,
4615                        sizeof(*arg->clone_sources *
4616                        arg->clone_sources_count))) {
4617                ret = -EFAULT;
4618                goto out;
4619        }
4620
4621        if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
4622                ret = -EINVAL;
4623                goto out;
4624        }
4625
4626        sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
4627        if (!sctx) {
4628                ret = -ENOMEM;
4629                goto out;
4630        }
4631
4632        INIT_LIST_HEAD(&sctx->new_refs);
4633        INIT_LIST_HEAD(&sctx->deleted_refs);
4634        INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
4635        INIT_LIST_HEAD(&sctx->name_cache_list);
4636
4637        sctx->flags = arg->flags;
4638
4639        sctx->send_filp = fget(arg->send_fd);
4640        if (!sctx->send_filp) {
4641                ret = -EBADF;
4642                goto out;
4643        }
4644
4645        sctx->mnt = mnt_file->f_path.mnt;
4646
4647        sctx->send_root = send_root;
4648        sctx->clone_roots_cnt = arg->clone_sources_count;
4649
4650        sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
4651        sctx->send_buf = vmalloc(sctx->send_max_size);
4652        if (!sctx->send_buf) {
4653                ret = -ENOMEM;
4654                goto out;
4655        }
4656
4657        sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
4658        if (!sctx->read_buf) {
4659                ret = -ENOMEM;
4660                goto out;
4661        }
4662
4663        sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4664                        (arg->clone_sources_count + 1));
4665        if (!sctx->clone_roots) {
4666                ret = -ENOMEM;
4667                goto out;
4668        }
4669
4670        if (arg->clone_sources_count) {
4671                clone_sources_tmp = vmalloc(arg->clone_sources_count *
4672                                sizeof(*arg->clone_sources));
4673                if (!clone_sources_tmp) {
4674                        ret = -ENOMEM;
4675                        goto out;
4676                }
4677
4678                ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
4679                                arg->clone_sources_count *
4680                                sizeof(*arg->clone_sources));
4681                if (ret) {
4682                        ret = -EFAULT;
4683                        goto out;
4684                }
4685
4686                for (i = 0; i < arg->clone_sources_count; i++) {
4687                        key.objectid = clone_sources_tmp[i];
4688                        key.type = BTRFS_ROOT_ITEM_KEY;
4689                        key.offset = (u64)-1;
4690                        clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4691                        if (IS_ERR(clone_root)) {
4692                                ret = PTR_ERR(clone_root);
4693                                goto out;
4694                        }
4695                        sctx->clone_roots[i].root = clone_root;
4696                }
4697                vfree(clone_sources_tmp);
4698                clone_sources_tmp = NULL;
4699        }
4700
4701        if (arg->parent_root) {
4702                key.objectid = arg->parent_root;
4703                key.type = BTRFS_ROOT_ITEM_KEY;
4704                key.offset = (u64)-1;
4705                sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4706                if (IS_ERR(sctx->parent_root)) {
4707                        ret = PTR_ERR(sctx->parent_root);
4708                        goto out;
4709                }
4710        }
4711
4712        /*
4713         * Clones from send_root are allowed, but only if the clone source
4714         * is behind the current send position. This is checked while searching
4715         * for possible clone sources.
4716         */
4717        sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
4718
4719        /* We do a bsearch later */
4720        sort(sctx->clone_roots, sctx->clone_roots_cnt,
4721                        sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4722                        NULL);
4723
4724        ret = send_subvol(sctx);
4725        if (ret < 0)
4726                goto out;
4727
4728        if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
4729                ret = begin_cmd(sctx, BTRFS_SEND_C_END);
4730                if (ret < 0)
4731                        goto out;
4732                ret = send_cmd(sctx);
4733                if (ret < 0)
4734                        goto out;
4735        }
4736
4737out:
4738        kfree(arg);
4739        vfree(clone_sources_tmp);
4740
4741        if (sctx) {
4742                if (sctx->send_filp)
4743                        fput(sctx->send_filp);
4744
4745                vfree(sctx->clone_roots);
4746                vfree(sctx->send_buf);
4747                vfree(sctx->read_buf);
4748
4749                name_cache_free(sctx);
4750
4751                kfree(sctx);
4752        }
4753
4754        return ret;
4755}
4756