qemu/hw/9pfs/9p.c
<<
>>
Prefs
   1/*
   2 * Virtio 9p backend
   3 *
   4 * Copyright IBM, Corp. 2010
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include <glib/gprintf.h>
  16#include "hw/virtio/virtio.h"
  17#include "qapi/error.h"
  18#include "qemu/error-report.h"
  19#include "qemu/iov.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/sockets.h"
  22#include "virtio-9p.h"
  23#include "fsdev/qemu-fsdev.h"
  24#include "9p-xattr.h"
  25#include "coth.h"
  26#include "trace.h"
  27#include "migration/blocker.h"
  28#include "qemu/xxhash.h"
  29#include <math.h>
  30#include <linux/limits.h>
  31
  32int open_fd_hw;
  33int total_open_fd;
  34static int open_fd_rc;
  35
  36enum {
  37    Oread   = 0x00,
  38    Owrite  = 0x01,
  39    Ordwr   = 0x02,
  40    Oexec   = 0x03,
  41    Oexcl   = 0x04,
  42    Otrunc  = 0x10,
  43    Orexec  = 0x20,
  44    Orclose = 0x40,
  45    Oappend = 0x80,
  46};
  47
  48static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  49{
  50    ssize_t ret;
  51    va_list ap;
  52
  53    va_start(ap, fmt);
  54    ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
  55    va_end(ap);
  56
  57    return ret;
  58}
  59
  60static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  61{
  62    ssize_t ret;
  63    va_list ap;
  64
  65    va_start(ap, fmt);
  66    ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
  67    va_end(ap);
  68
  69    return ret;
  70}
  71
  72static int omode_to_uflags(int8_t mode)
  73{
  74    int ret = 0;
  75
  76    switch (mode & 3) {
  77    case Oread:
  78        ret = O_RDONLY;
  79        break;
  80    case Ordwr:
  81        ret = O_RDWR;
  82        break;
  83    case Owrite:
  84        ret = O_WRONLY;
  85        break;
  86    case Oexec:
  87        ret = O_RDONLY;
  88        break;
  89    }
  90
  91    if (mode & Otrunc) {
  92        ret |= O_TRUNC;
  93    }
  94
  95    if (mode & Oappend) {
  96        ret |= O_APPEND;
  97    }
  98
  99    if (mode & Oexcl) {
 100        ret |= O_EXCL;
 101    }
 102
 103    return ret;
 104}
 105
 106typedef struct DotlOpenflagMap {
 107    int dotl_flag;
 108    int open_flag;
 109} DotlOpenflagMap;
 110
 111static int dotl_to_open_flags(int flags)
 112{
 113    int i;
 114    /*
 115     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
 116     * and P9_DOTL_NOACCESS
 117     */
 118    int oflags = flags & O_ACCMODE;
 119
 120    DotlOpenflagMap dotl_oflag_map[] = {
 121        { P9_DOTL_CREATE, O_CREAT },
 122        { P9_DOTL_EXCL, O_EXCL },
 123        { P9_DOTL_NOCTTY , O_NOCTTY },
 124        { P9_DOTL_TRUNC, O_TRUNC },
 125        { P9_DOTL_APPEND, O_APPEND },
 126        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
 127        { P9_DOTL_DSYNC, O_DSYNC },
 128        { P9_DOTL_FASYNC, FASYNC },
 129        { P9_DOTL_DIRECT, O_DIRECT },
 130        { P9_DOTL_LARGEFILE, O_LARGEFILE },
 131        { P9_DOTL_DIRECTORY, O_DIRECTORY },
 132        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
 133        { P9_DOTL_NOATIME, O_NOATIME },
 134        { P9_DOTL_SYNC, O_SYNC },
 135    };
 136
 137    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
 138        if (flags & dotl_oflag_map[i].dotl_flag) {
 139            oflags |= dotl_oflag_map[i].open_flag;
 140        }
 141    }
 142
 143    return oflags;
 144}
 145
 146void cred_init(FsCred *credp)
 147{
 148    credp->fc_uid = -1;
 149    credp->fc_gid = -1;
 150    credp->fc_mode = -1;
 151    credp->fc_rdev = -1;
 152}
 153
 154static int get_dotl_openflags(V9fsState *s, int oflags)
 155{
 156    int flags;
 157    /*
 158     * Filter the client open flags
 159     */
 160    flags = dotl_to_open_flags(oflags);
 161    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
 162    /*
 163     * Ignore direct disk access hint until the server supports it.
 164     */
 165    flags &= ~O_DIRECT;
 166    return flags;
 167}
 168
 169void v9fs_path_init(V9fsPath *path)
 170{
 171    path->data = NULL;
 172    path->size = 0;
 173}
 174
 175void v9fs_path_free(V9fsPath *path)
 176{
 177    g_free(path->data);
 178    path->data = NULL;
 179    path->size = 0;
 180}
 181
 182
 183void GCC_FMT_ATTR(2, 3)
 184v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
 185{
 186    va_list ap;
 187
 188    v9fs_path_free(path);
 189
 190    va_start(ap, fmt);
 191    /* Bump the size for including terminating NULL */
 192    path->size = g_vasprintf(&path->data, fmt, ap) + 1;
 193    va_end(ap);
 194}
 195
 196void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
 197{
 198    v9fs_path_free(dst);
 199    dst->size = src->size;
 200    dst->data = g_memdup(src->data, src->size);
 201}
 202
 203int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
 204                      const char *name, V9fsPath *path)
 205{
 206    int err;
 207    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
 208    if (err < 0) {
 209        err = -errno;
 210    }
 211    return err;
 212}
 213
 214/*
 215 * Return TRUE if s1 is an ancestor of s2.
 216 *
 217 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
 218 * As a special case, We treat s1 as ancestor of s2 if they are same!
 219 */
 220static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
 221{
 222    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
 223        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
 224            return 1;
 225        }
 226    }
 227    return 0;
 228}
 229
 230static size_t v9fs_string_size(V9fsString *str)
 231{
 232    return str->size;
 233}
 234
 235/*
 236 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
 237static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 238{
 239    int err = 1;
 240    if (f->fid_type == P9_FID_FILE) {
 241        if (f->fs.fd == -1) {
 242            do {
 243                err = v9fs_co_open(pdu, f, f->open_flags);
 244            } while (err == -EINTR && !pdu->cancelled);
 245        }
 246    } else if (f->fid_type == P9_FID_DIR) {
 247        if (f->fs.dir.stream == NULL) {
 248            do {
 249                err = v9fs_co_opendir(pdu, f);
 250            } while (err == -EINTR && !pdu->cancelled);
 251        }
 252    }
 253    return err;
 254}
 255
 256static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
 257{
 258    int err;
 259    V9fsFidState *f;
 260    V9fsState *s = pdu->s;
 261
 262    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
 263        BUG_ON(f->clunked);
 264        if (f->fid == fid) {
 265            /*
 266             * Update the fid ref upfront so that
 267             * we don't get reclaimed when we yield
 268             * in open later.
 269             */
 270            f->ref++;
 271            /*
 272             * check whether we need to reopen the
 273             * file. We might have closed the fd
 274             * while trying to free up some file
 275             * descriptors.
 276             */
 277            err = v9fs_reopen_fid(pdu, f);
 278            if (err < 0) {
 279                f->ref--;
 280                return NULL;
 281            }
 282            /*
 283             * Mark the fid as referenced so that the LRU
 284             * reclaim won't close the file descriptor
 285             */
 286            f->flags |= FID_REFERENCED;
 287            return f;
 288        }
 289    }
 290    return NULL;
 291}
 292
 293static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 294{
 295    V9fsFidState *f;
 296
 297    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
 298        /* If fid is already there return NULL */
 299        BUG_ON(f->clunked);
 300        if (f->fid == fid) {
 301            return NULL;
 302        }
 303    }
 304    f = g_malloc0(sizeof(V9fsFidState));
 305    f->fid = fid;
 306    f->fid_type = P9_FID_NONE;
 307    f->ref = 1;
 308    /*
 309     * Mark the fid as referenced so that the LRU
 310     * reclaim won't close the file descriptor
 311     */
 312    f->flags |= FID_REFERENCED;
 313    QSIMPLEQ_INSERT_TAIL(&s->fid_list, f, next);
 314
 315    v9fs_readdir_init(s->proto_version, &f->fs.dir);
 316    v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
 317
 318    return f;
 319}
 320
 321static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 322{
 323    int retval = 0;
 324
 325    if (fidp->fs.xattr.xattrwalk_fid) {
 326        /* getxattr/listxattr fid */
 327        goto free_value;
 328    }
 329    /*
 330     * if this is fid for setxattr. clunk should
 331     * result in setxattr localcall
 332     */
 333    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
 334        /* clunk after partial write */
 335        retval = -EINVAL;
 336        goto free_out;
 337    }
 338    if (fidp->fs.xattr.len) {
 339        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
 340                                   fidp->fs.xattr.value,
 341                                   fidp->fs.xattr.len,
 342                                   fidp->fs.xattr.flags);
 343    } else {
 344        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
 345    }
 346free_out:
 347    v9fs_string_free(&fidp->fs.xattr.name);
 348free_value:
 349    g_free(fidp->fs.xattr.value);
 350    return retval;
 351}
 352
 353static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 354{
 355    int retval = 0;
 356
 357    if (fidp->fid_type == P9_FID_FILE) {
 358        /* If we reclaimed the fd no need to close */
 359        if (fidp->fs.fd != -1) {
 360            retval = v9fs_co_close(pdu, &fidp->fs);
 361        }
 362    } else if (fidp->fid_type == P9_FID_DIR) {
 363        if (fidp->fs.dir.stream != NULL) {
 364            retval = v9fs_co_closedir(pdu, &fidp->fs);
 365        }
 366    } else if (fidp->fid_type == P9_FID_XATTR) {
 367        retval = v9fs_xattr_fid_clunk(pdu, fidp);
 368    }
 369    v9fs_path_free(&fidp->path);
 370    g_free(fidp);
 371    return retval;
 372}
 373
 374static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 375{
 376    BUG_ON(!fidp->ref);
 377    fidp->ref--;
 378    /*
 379     * Don't free the fid if it is in reclaim list
 380     */
 381    if (!fidp->ref && fidp->clunked) {
 382        if (fidp->fid == pdu->s->root_fid) {
 383            /*
 384             * if the clunked fid is root fid then we
 385             * have unmounted the fs on the client side.
 386             * delete the migration blocker. Ideally, this
 387             * should be hooked to transport close notification
 388             */
 389            if (pdu->s->migration_blocker) {
 390                migrate_del_blocker(pdu->s->migration_blocker);
 391                error_free(pdu->s->migration_blocker);
 392                pdu->s->migration_blocker = NULL;
 393            }
 394        }
 395        return free_fid(pdu, fidp);
 396    }
 397    return 0;
 398}
 399
 400static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
 401{
 402    V9fsFidState *fidp;
 403
 404    QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) {
 405        if (fidp->fid == fid) {
 406            QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
 407            fidp->clunked = true;
 408            return fidp;
 409        }
 410    }
 411    return NULL;
 412}
 413
 414void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
 415{
 416    int reclaim_count = 0;
 417    V9fsState *s = pdu->s;
 418    V9fsFidState *f;
 419    QSLIST_HEAD(, V9fsFidState) reclaim_list =
 420        QSLIST_HEAD_INITIALIZER(reclaim_list);
 421
 422    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
 423        /*
 424         * Unlink fids cannot be reclaimed. Check
 425         * for them and skip them. Also skip fids
 426         * currently being operated on.
 427         */
 428        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
 429            continue;
 430        }
 431        /*
 432         * if it is a recently referenced fid
 433         * we leave the fid untouched and clear the
 434         * reference bit. We come back to it later
 435         * in the next iteration. (a simple LRU without
 436         * moving list elements around)
 437         */
 438        if (f->flags & FID_REFERENCED) {
 439            f->flags &= ~FID_REFERENCED;
 440            continue;
 441        }
 442        /*
 443         * Add fids to reclaim list.
 444         */
 445        if (f->fid_type == P9_FID_FILE) {
 446            if (f->fs.fd != -1) {
 447                /*
 448                 * Up the reference count so that
 449                 * a clunk request won't free this fid
 450                 */
 451                f->ref++;
 452                QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
 453                f->fs_reclaim.fd = f->fs.fd;
 454                f->fs.fd = -1;
 455                reclaim_count++;
 456            }
 457        } else if (f->fid_type == P9_FID_DIR) {
 458            if (f->fs.dir.stream != NULL) {
 459                /*
 460                 * Up the reference count so that
 461                 * a clunk request won't free this fid
 462                 */
 463                f->ref++;
 464                QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
 465                f->fs_reclaim.dir.stream = f->fs.dir.stream;
 466                f->fs.dir.stream = NULL;
 467                reclaim_count++;
 468            }
 469        }
 470        if (reclaim_count >= open_fd_rc) {
 471            break;
 472        }
 473    }
 474    /*
 475     * Now close the fid in reclaim list. Free them if they
 476     * are already clunked.
 477     */
 478    while (!QSLIST_EMPTY(&reclaim_list)) {
 479        f = QSLIST_FIRST(&reclaim_list);
 480        QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next);
 481        if (f->fid_type == P9_FID_FILE) {
 482            v9fs_co_close(pdu, &f->fs_reclaim);
 483        } else if (f->fid_type == P9_FID_DIR) {
 484            v9fs_co_closedir(pdu, &f->fs_reclaim);
 485        }
 486        /*
 487         * Now drop the fid reference, free it
 488         * if clunked.
 489         */
 490        put_fid(pdu, f);
 491    }
 492}
 493
 494static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 495{
 496    int err;
 497    V9fsState *s = pdu->s;
 498    V9fsFidState *fidp, *fidp_next;
 499
 500    fidp = QSIMPLEQ_FIRST(&s->fid_list);
 501    if (!fidp) {
 502        return 0;
 503    }
 504
 505    /*
 506     * v9fs_reopen_fid() can yield : a reference on the fid must be held
 507     * to ensure its pointer remains valid and we can safely pass it to
 508     * QSIMPLEQ_NEXT(). The corresponding put_fid() can also yield so
 509     * we must keep a reference on the next fid as well. So the logic here
 510     * is to get a reference on a fid and only put it back during the next
 511     * iteration after we could get a reference on the next fid. Start with
 512     * the first one.
 513     */
 514    for (fidp->ref++; fidp; fidp = fidp_next) {
 515        if (fidp->path.size == path->size &&
 516            !memcmp(fidp->path.data, path->data, path->size)) {
 517            /* Mark the fid non reclaimable. */
 518            fidp->flags |= FID_NON_RECLAIMABLE;
 519
 520            /* reopen the file/dir if already closed */
 521            err = v9fs_reopen_fid(pdu, fidp);
 522            if (err < 0) {
 523                put_fid(pdu, fidp);
 524                return err;
 525            }
 526        }
 527
 528        fidp_next = QSIMPLEQ_NEXT(fidp, next);
 529
 530        if (fidp_next) {
 531            /*
 532             * Ensure the next fid survives a potential clunk request during
 533             * put_fid() below and v9fs_reopen_fid() in the next iteration.
 534             */
 535            fidp_next->ref++;
 536        }
 537
 538        /* We're done with this fid */
 539        put_fid(pdu, fidp);
 540    }
 541
 542    return 0;
 543}
 544
 545static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
 546{
 547    V9fsState *s = pdu->s;
 548    V9fsFidState *fidp;
 549
 550    /* Free all fids */
 551    while (!QSIMPLEQ_EMPTY(&s->fid_list)) {
 552        /* Get fid */
 553        fidp = QSIMPLEQ_FIRST(&s->fid_list);
 554        fidp->ref++;
 555
 556        /* Clunk fid */
 557        QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
 558        fidp->clunked = true;
 559
 560        put_fid(pdu, fidp);
 561    }
 562}
 563
 564#define P9_QID_TYPE_DIR         0x80
 565#define P9_QID_TYPE_SYMLINK     0x02
 566
 567#define P9_STAT_MODE_DIR        0x80000000
 568#define P9_STAT_MODE_APPEND     0x40000000
 569#define P9_STAT_MODE_EXCL       0x20000000
 570#define P9_STAT_MODE_MOUNT      0x10000000
 571#define P9_STAT_MODE_AUTH       0x08000000
 572#define P9_STAT_MODE_TMP        0x04000000
 573#define P9_STAT_MODE_SYMLINK    0x02000000
 574#define P9_STAT_MODE_LINK       0x01000000
 575#define P9_STAT_MODE_DEVICE     0x00800000
 576#define P9_STAT_MODE_NAMED_PIPE 0x00200000
 577#define P9_STAT_MODE_SOCKET     0x00100000
 578#define P9_STAT_MODE_SETUID     0x00080000
 579#define P9_STAT_MODE_SETGID     0x00040000
 580#define P9_STAT_MODE_SETVTX     0x00010000
 581
 582#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
 583                                P9_STAT_MODE_SYMLINK |      \
 584                                P9_STAT_MODE_LINK |         \
 585                                P9_STAT_MODE_DEVICE |       \
 586                                P9_STAT_MODE_NAMED_PIPE |   \
 587                                P9_STAT_MODE_SOCKET)
 588
 589/* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
 590static inline uint8_t mirror8bit(uint8_t byte)
 591{
 592    return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
 593}
 594
 595/* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
 596static inline uint64_t mirror64bit(uint64_t value)
 597{
 598    return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
 599           ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
 600           ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
 601           ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
 602           ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
 603           ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
 604           ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
 605           ((uint64_t)mirror8bit((value >> 56) & 0xff));
 606}
 607
 608/**
 609 * @brief Parameter k for the Exponential Golomb algorihm to be used.
 610 *
 611 * The smaller this value, the smaller the minimum bit count for the Exp.
 612 * Golomb generated affixes will be (at lowest index) however for the
 613 * price of having higher maximum bit count of generated affixes (at highest
 614 * index). Likewise increasing this parameter yields in smaller maximum bit
 615 * count for the price of having higher minimum bit count.
 616 *
 617 * In practice that means: a good value for k depends on the expected amount
 618 * of devices to be exposed by one export. For a small amount of devices k
 619 * should be small, for a large amount of devices k might be increased
 620 * instead. The default of k=0 should be fine for most users though.
 621 *
 622 * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
 623 * k should not change as long as guest is still running! Because that would
 624 * cause completely different inode numbers to be generated on guest.
 625 */
 626#define EXP_GOLOMB_K    0
 627
 628/**
 629 * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
 630 *
 631 * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
 632 * with growing length and with the mathematical property of being
 633 * "prefix-free". The latter means the generated prefixes can be prepended
 634 * in front of arbitrary numbers and the resulting concatenated numbers are
 635 * guaranteed to be always unique.
 636 *
 637 * This is a minor adjustment to the original Exp. Golomb algorithm in the
 638 * sense that lowest allowed index (@param n) starts with 1, not with zero.
 639 *
 640 * @param n - natural number (or index) of the prefix to be generated
 641 *            (1, 2, 3, ...)
 642 * @param k - parameter k of Exp. Golomb algorithm to be used
 643 *            (see comment on EXP_GOLOMB_K macro for details about k)
 644 */
 645static VariLenAffix expGolombEncode(uint64_t n, int k)
 646{
 647    const uint64_t value = n + (1 << k) - 1;
 648    const int bits = (int) log2(value) + 1;
 649    return (VariLenAffix) {
 650        .type = AffixType_Prefix,
 651        .value = value,
 652        .bits = bits + MAX((bits - 1 - k), 0)
 653    };
 654}
 655
 656/**
 657 * @brief Converts a suffix into a prefix, or a prefix into a suffix.
 658 *
 659 * Simply mirror all bits of the affix value, for the purpose to preserve
 660 * respectively the mathematical "prefix-free" or "suffix-free" property
 661 * after the conversion.
 662 *
 663 * If a passed prefix is suitable to create unique numbers, then the
 664 * returned suffix is suitable to create unique numbers as well (and vice
 665 * versa).
 666 */
 667static VariLenAffix invertAffix(const VariLenAffix *affix)
 668{
 669    return (VariLenAffix) {
 670        .type =
 671            (affix->type == AffixType_Suffix) ?
 672                AffixType_Prefix : AffixType_Suffix,
 673        .value =
 674            mirror64bit(affix->value) >>
 675            ((sizeof(affix->value) * 8) - affix->bits),
 676        .bits = affix->bits
 677    };
 678}
 679
 680/**
 681 * @brief Generates suffix numbers with "suffix-free" property.
 682 *
 683 * This is just a wrapper function on top of the Exp. Golomb algorithm.
 684 *
 685 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
 686 * this function converts the Exp. Golomb prefixes into appropriate suffixes
 687 * which are still suitable for generating unique numbers.
 688 *
 689 * @param n - natural number (or index) of the suffix to be generated
 690 *            (1, 2, 3, ...)
 691 */
 692static VariLenAffix affixForIndex(uint64_t index)
 693{
 694    VariLenAffix prefix;
 695    prefix = expGolombEncode(index, EXP_GOLOMB_K);
 696    return invertAffix(&prefix); /* convert prefix to suffix */
 697}
 698
 699/* creative abuse of tb_hash_func7, which is based on xxhash */
 700static uint32_t qpp_hash(QppEntry e)
 701{
 702    return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
 703}
 704
 705static uint32_t qpf_hash(QpfEntry e)
 706{
 707    return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
 708}
 709
 710static bool qpd_cmp_func(const void *obj, const void *userp)
 711{
 712    const QpdEntry *e1 = obj, *e2 = userp;
 713    return e1->dev == e2->dev;
 714}
 715
 716static bool qpp_cmp_func(const void *obj, const void *userp)
 717{
 718    const QppEntry *e1 = obj, *e2 = userp;
 719    return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
 720}
 721
 722static bool qpf_cmp_func(const void *obj, const void *userp)
 723{
 724    const QpfEntry *e1 = obj, *e2 = userp;
 725    return e1->dev == e2->dev && e1->ino == e2->ino;
 726}
 727
 728static void qp_table_remove(void *p, uint32_t h, void *up)
 729{
 730    g_free(p);
 731}
 732
 733static void qp_table_destroy(struct qht *ht)
 734{
 735    if (!ht || !ht->map) {
 736        return;
 737    }
 738    qht_iter(ht, qp_table_remove, NULL);
 739    qht_destroy(ht);
 740}
 741
 742static void qpd_table_init(struct qht *ht)
 743{
 744    qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 745}
 746
 747static void qpp_table_init(struct qht *ht)
 748{
 749    qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 750}
 751
 752static void qpf_table_init(struct qht *ht)
 753{
 754    qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
 755}
 756
 757/*
 758 * Returns how many (high end) bits of inode numbers of the passed fs
 759 * device shall be used (in combination with the device number) to
 760 * generate hash values for qpp_table entries.
 761 *
 762 * This function is required if variable length suffixes are used for inode
 763 * number mapping on guest level. Since a device may end up having multiple
 764 * entries in qpp_table, each entry most probably with a different suffix
 765 * length, we thus need this function in conjunction with qpd_table to
 766 * "agree" about a fix amount of bits (per device) to be always used for
 767 * generating hash values for the purpose of accessing qpp_table in order
 768 * get consistent behaviour when accessing qpp_table.
 769 */
 770static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
 771{
 772    QpdEntry lookup = {
 773        .dev = dev
 774    }, *val;
 775    uint32_t hash = dev;
 776    VariLenAffix affix;
 777
 778    val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
 779    if (!val) {
 780        val = g_malloc0(sizeof(QpdEntry));
 781        *val = lookup;
 782        affix = affixForIndex(pdu->s->qp_affix_next);
 783        val->prefix_bits = affix.bits;
 784        qht_insert(&pdu->s->qpd_table, val, hash, NULL);
 785        pdu->s->qp_ndevices++;
 786    }
 787    return val->prefix_bits;
 788}
 789
 790/**
 791 * @brief Slow / full mapping host inode nr -> guest inode nr.
 792 *
 793 * This function performs a slower and much more costly remapping of an
 794 * original file inode number on host to an appropriate different inode
 795 * number on guest. For every (dev, inode) combination on host a new
 796 * sequential number is generated, cached and exposed as inode number on
 797 * guest.
 798 *
 799 * This is just a "last resort" fallback solution if the much faster/cheaper
 800 * qid_path_suffixmap() failed. In practice this slow / full mapping is not
 801 * expected ever to be used at all though.
 802 *
 803 * @see qid_path_suffixmap() for details
 804 *
 805 */
 806static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
 807                            uint64_t *path)
 808{
 809    QpfEntry lookup = {
 810        .dev = stbuf->st_dev,
 811        .ino = stbuf->st_ino
 812    }, *val;
 813    uint32_t hash = qpf_hash(lookup);
 814    VariLenAffix affix;
 815
 816    val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
 817
 818    if (!val) {
 819        if (pdu->s->qp_fullpath_next == 0) {
 820            /* no more files can be mapped :'( */
 821            error_report_once(
 822                "9p: No more prefixes available for remapping inodes from "
 823                "host to guest."
 824            );
 825            return -ENFILE;
 826        }
 827
 828        val = g_malloc0(sizeof(QppEntry));
 829        *val = lookup;
 830
 831        /* new unique inode and device combo */
 832        affix = affixForIndex(
 833            1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
 834        );
 835        val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
 836        pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
 837        qht_insert(&pdu->s->qpf_table, val, hash, NULL);
 838    }
 839
 840    *path = val->path;
 841    return 0;
 842}
 843
 844/**
 845 * @brief Quick mapping host inode nr -> guest inode nr.
 846 *
 847 * This function performs quick remapping of an original file inode number
 848 * on host to an appropriate different inode number on guest. This remapping
 849 * of inodes is required to avoid inode nr collisions on guest which would
 850 * happen if the 9p export contains more than 1 exported file system (or
 851 * more than 1 file system data set), because unlike on host level where the
 852 * files would have different device nrs, all files exported by 9p would
 853 * share the same device nr on guest (the device nr of the virtual 9p device
 854 * that is).
 855 *
 856 * Inode remapping is performed by chopping off high end bits of the original
 857 * inode number from host, shifting the result upwards and then assigning a
 858 * generated suffix number for the low end bits, where the same suffix number
 859 * will be shared by all inodes with the same device id AND the same high end
 860 * bits that have been chopped off. That approach utilizes the fact that inode
 861 * numbers very likely share the same high end bits (i.e. due to their common
 862 * sequential generation by file systems) and hence we only have to generate
 863 * and track a very limited amount of suffixes in practice due to that.
 864 *
 865 * We generate variable size suffixes for that purpose. The 1st generated
 866 * suffix will only have 1 bit and hence we only need to chop off 1 bit from
 867 * the original inode number. The subsequent suffixes being generated will
 868 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
 869 * generated will have 3 bits and hence we have to chop off 3 bits from their
 870 * original inodes, and so on. That approach of using variable length suffixes
 871 * (i.e. over fixed size ones) utilizes the fact that in practice only a very
 872 * limited amount of devices are shared by the same export (e.g. typically
 873 * less than 2 dozen devices per 9p export), so in practice we need to chop
 874 * off less bits than with fixed size prefixes and yet are flexible to add
 875 * new devices at runtime below host's export directory at any time without
 876 * having to reboot guest nor requiring to reconfigure guest for that. And due
 877 * to the very limited amount of original high end bits that we chop off that
 878 * way, the total amount of suffixes we need to generate is less than by using
 879 * fixed size prefixes and hence it also improves performance of the inode
 880 * remapping algorithm, and finally has the nice side effect that the inode
 881 * numbers on guest will be much smaller & human friendly. ;-)
 882 */
 883static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
 884                              uint64_t *path)
 885{
 886    const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
 887    QppEntry lookup = {
 888        .dev = stbuf->st_dev,
 889        .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
 890    }, *val;
 891    uint32_t hash = qpp_hash(lookup);
 892
 893    val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
 894
 895    if (!val) {
 896        if (pdu->s->qp_affix_next == 0) {
 897            /* we ran out of affixes */
 898            warn_report_once(
 899                "9p: Potential degraded performance of inode remapping"
 900            );
 901            return -ENFILE;
 902        }
 903
 904        val = g_malloc0(sizeof(QppEntry));
 905        *val = lookup;
 906
 907        /* new unique inode affix and device combo */
 908        val->qp_affix_index = pdu->s->qp_affix_next++;
 909        val->qp_affix = affixForIndex(val->qp_affix_index);
 910        qht_insert(&pdu->s->qpp_table, val, hash, NULL);
 911    }
 912    /* assuming generated affix to be suffix type, not prefix */
 913    *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
 914    return 0;
 915}
 916
 917static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
 918{
 919    int err;
 920    size_t size;
 921
 922    if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
 923        /* map inode+device to qid path (fast path) */
 924        err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
 925        if (err == -ENFILE) {
 926            /* fast path didn't work, fall back to full map */
 927            err = qid_path_fullmap(pdu, stbuf, &qidp->path);
 928        }
 929        if (err) {
 930            return err;
 931        }
 932    } else {
 933        if (pdu->s->dev_id != stbuf->st_dev) {
 934            if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
 935                error_report_once(
 936                    "9p: Multiple devices detected in same VirtFS export. "
 937                    "Access of guest to additional devices is (partly) "
 938                    "denied due to virtfs option 'multidevs=forbid' being "
 939                    "effective."
 940                );
 941                return -ENODEV;
 942            } else {
 943                warn_report_once(
 944                    "9p: Multiple devices detected in same VirtFS export, "
 945                    "which might lead to file ID collisions and severe "
 946                    "misbehaviours on guest! You should either use a "
 947                    "separate export for each device shared from host or "
 948                    "use virtfs option 'multidevs=remap'!"
 949                );
 950            }
 951        }
 952        memset(&qidp->path, 0, sizeof(qidp->path));
 953        size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
 954        memcpy(&qidp->path, &stbuf->st_ino, size);
 955    }
 956
 957    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 958    qidp->type = 0;
 959    if (S_ISDIR(stbuf->st_mode)) {
 960        qidp->type |= P9_QID_TYPE_DIR;
 961    }
 962    if (S_ISLNK(stbuf->st_mode)) {
 963        qidp->type |= P9_QID_TYPE_SYMLINK;
 964    }
 965
 966    return 0;
 967}
 968
 969static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
 970                                   V9fsQID *qidp)
 971{
 972    struct stat stbuf;
 973    int err;
 974
 975    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
 976    if (err < 0) {
 977        return err;
 978    }
 979    err = stat_to_qid(pdu, &stbuf, qidp);
 980    if (err < 0) {
 981        return err;
 982    }
 983    return 0;
 984}
 985
 986V9fsPDU *pdu_alloc(V9fsState *s)
 987{
 988    V9fsPDU *pdu = NULL;
 989
 990    if (!QLIST_EMPTY(&s->free_list)) {
 991        pdu = QLIST_FIRST(&s->free_list);
 992        QLIST_REMOVE(pdu, next);
 993        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
 994    }
 995    return pdu;
 996}
 997
 998void pdu_free(V9fsPDU *pdu)
 999{
1000    V9fsState *s = pdu->s;
1001
1002    g_assert(!pdu->cancelled);
1003    QLIST_REMOVE(pdu, next);
1004    QLIST_INSERT_HEAD(&s->free_list, pdu, next);
1005}
1006
1007static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
1008{
1009    int8_t id = pdu->id + 1; /* Response */
1010    V9fsState *s = pdu->s;
1011    int ret;
1012
1013    /*
1014     * The 9p spec requires that successfully cancelled pdus receive no reply.
1015     * Sending a reply would confuse clients because they would
1016     * assume that any EINTR is the actual result of the operation,
1017     * rather than a consequence of the cancellation. However, if
1018     * the operation completed (succesfully or with an error other
1019     * than caused be cancellation), we do send out that reply, both
1020     * for efficiency and to avoid confusing the rest of the state machine
1021     * that assumes passing a non-error here will mean a successful
1022     * transmission of the reply.
1023     */
1024    bool discard = pdu->cancelled && len == -EINTR;
1025    if (discard) {
1026        trace_v9fs_rcancel(pdu->tag, pdu->id);
1027        pdu->size = 0;
1028        goto out_notify;
1029    }
1030
1031    if (len < 0) {
1032        int err = -len;
1033        len = 7;
1034
1035        if (s->proto_version != V9FS_PROTO_2000L) {
1036            V9fsString str;
1037
1038            str.data = strerror(err);
1039            str.size = strlen(str.data);
1040
1041            ret = pdu_marshal(pdu, len, "s", &str);
1042            if (ret < 0) {
1043                goto out_notify;
1044            }
1045            len += ret;
1046            id = P9_RERROR;
1047        }
1048
1049        ret = pdu_marshal(pdu, len, "d", err);
1050        if (ret < 0) {
1051            goto out_notify;
1052        }
1053        len += ret;
1054
1055        if (s->proto_version == V9FS_PROTO_2000L) {
1056            id = P9_RLERROR;
1057        }
1058        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1059    }
1060
1061    /* fill out the header */
1062    if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1063        goto out_notify;
1064    }
1065
1066    /* keep these in sync */
1067    pdu->size = len;
1068    pdu->id = id;
1069
1070out_notify:
1071    pdu->s->transport->push_and_notify(pdu);
1072
1073    /* Now wakeup anybody waiting in flush for this request */
1074    if (!qemu_co_queue_next(&pdu->complete)) {
1075        pdu_free(pdu);
1076    }
1077}
1078
1079static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1080{
1081    mode_t ret;
1082
1083    ret = mode & 0777;
1084    if (mode & P9_STAT_MODE_DIR) {
1085        ret |= S_IFDIR;
1086    }
1087
1088    if (mode & P9_STAT_MODE_SYMLINK) {
1089        ret |= S_IFLNK;
1090    }
1091    if (mode & P9_STAT_MODE_SOCKET) {
1092        ret |= S_IFSOCK;
1093    }
1094    if (mode & P9_STAT_MODE_NAMED_PIPE) {
1095        ret |= S_IFIFO;
1096    }
1097    if (mode & P9_STAT_MODE_DEVICE) {
1098        if (extension->size && extension->data[0] == 'c') {
1099            ret |= S_IFCHR;
1100        } else {
1101            ret |= S_IFBLK;
1102        }
1103    }
1104
1105    if (!(ret & ~0777)) {
1106        ret |= S_IFREG;
1107    }
1108
1109    if (mode & P9_STAT_MODE_SETUID) {
1110        ret |= S_ISUID;
1111    }
1112    if (mode & P9_STAT_MODE_SETGID) {
1113        ret |= S_ISGID;
1114    }
1115    if (mode & P9_STAT_MODE_SETVTX) {
1116        ret |= S_ISVTX;
1117    }
1118
1119    return ret;
1120}
1121
1122static int donttouch_stat(V9fsStat *stat)
1123{
1124    if (stat->type == -1 &&
1125        stat->dev == -1 &&
1126        stat->qid.type == 0xff &&
1127        stat->qid.version == (uint32_t) -1 &&
1128        stat->qid.path == (uint64_t) -1 &&
1129        stat->mode == -1 &&
1130        stat->atime == -1 &&
1131        stat->mtime == -1 &&
1132        stat->length == -1 &&
1133        !stat->name.size &&
1134        !stat->uid.size &&
1135        !stat->gid.size &&
1136        !stat->muid.size &&
1137        stat->n_uid == -1 &&
1138        stat->n_gid == -1 &&
1139        stat->n_muid == -1) {
1140        return 1;
1141    }
1142
1143    return 0;
1144}
1145
1146static void v9fs_stat_init(V9fsStat *stat)
1147{
1148    v9fs_string_init(&stat->name);
1149    v9fs_string_init(&stat->uid);
1150    v9fs_string_init(&stat->gid);
1151    v9fs_string_init(&stat->muid);
1152    v9fs_string_init(&stat->extension);
1153}
1154
1155static void v9fs_stat_free(V9fsStat *stat)
1156{
1157    v9fs_string_free(&stat->name);
1158    v9fs_string_free(&stat->uid);
1159    v9fs_string_free(&stat->gid);
1160    v9fs_string_free(&stat->muid);
1161    v9fs_string_free(&stat->extension);
1162}
1163
1164static uint32_t stat_to_v9mode(const struct stat *stbuf)
1165{
1166    uint32_t mode;
1167
1168    mode = stbuf->st_mode & 0777;
1169    if (S_ISDIR(stbuf->st_mode)) {
1170        mode |= P9_STAT_MODE_DIR;
1171    }
1172
1173    if (S_ISLNK(stbuf->st_mode)) {
1174        mode |= P9_STAT_MODE_SYMLINK;
1175    }
1176
1177    if (S_ISSOCK(stbuf->st_mode)) {
1178        mode |= P9_STAT_MODE_SOCKET;
1179    }
1180
1181    if (S_ISFIFO(stbuf->st_mode)) {
1182        mode |= P9_STAT_MODE_NAMED_PIPE;
1183    }
1184
1185    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1186        mode |= P9_STAT_MODE_DEVICE;
1187    }
1188
1189    if (stbuf->st_mode & S_ISUID) {
1190        mode |= P9_STAT_MODE_SETUID;
1191    }
1192
1193    if (stbuf->st_mode & S_ISGID) {
1194        mode |= P9_STAT_MODE_SETGID;
1195    }
1196
1197    if (stbuf->st_mode & S_ISVTX) {
1198        mode |= P9_STAT_MODE_SETVTX;
1199    }
1200
1201    return mode;
1202}
1203
1204static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1205                                       const char *basename,
1206                                       const struct stat *stbuf,
1207                                       V9fsStat *v9stat)
1208{
1209    int err;
1210
1211    memset(v9stat, 0, sizeof(*v9stat));
1212
1213    err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1214    if (err < 0) {
1215        return err;
1216    }
1217    v9stat->mode = stat_to_v9mode(stbuf);
1218    v9stat->atime = stbuf->st_atime;
1219    v9stat->mtime = stbuf->st_mtime;
1220    v9stat->length = stbuf->st_size;
1221
1222    v9fs_string_free(&v9stat->uid);
1223    v9fs_string_free(&v9stat->gid);
1224    v9fs_string_free(&v9stat->muid);
1225
1226    v9stat->n_uid = stbuf->st_uid;
1227    v9stat->n_gid = stbuf->st_gid;
1228    v9stat->n_muid = 0;
1229
1230    v9fs_string_free(&v9stat->extension);
1231
1232    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1233        err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1234        if (err < 0) {
1235            return err;
1236        }
1237    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1238        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1239                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1240                major(stbuf->st_rdev), minor(stbuf->st_rdev));
1241    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1242        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1243                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1244    }
1245
1246    v9fs_string_sprintf(&v9stat->name, "%s", basename);
1247
1248    v9stat->size = 61 +
1249        v9fs_string_size(&v9stat->name) +
1250        v9fs_string_size(&v9stat->uid) +
1251        v9fs_string_size(&v9stat->gid) +
1252        v9fs_string_size(&v9stat->muid) +
1253        v9fs_string_size(&v9stat->extension);
1254    return 0;
1255}
1256
1257#define P9_STATS_MODE          0x00000001ULL
1258#define P9_STATS_NLINK         0x00000002ULL
1259#define P9_STATS_UID           0x00000004ULL
1260#define P9_STATS_GID           0x00000008ULL
1261#define P9_STATS_RDEV          0x00000010ULL
1262#define P9_STATS_ATIME         0x00000020ULL
1263#define P9_STATS_MTIME         0x00000040ULL
1264#define P9_STATS_CTIME         0x00000080ULL
1265#define P9_STATS_INO           0x00000100ULL
1266#define P9_STATS_SIZE          0x00000200ULL
1267#define P9_STATS_BLOCKS        0x00000400ULL
1268
1269#define P9_STATS_BTIME         0x00000800ULL
1270#define P9_STATS_GEN           0x00001000ULL
1271#define P9_STATS_DATA_VERSION  0x00002000ULL
1272
1273#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1274#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1275
1276
1277static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1278                                V9fsStatDotl *v9lstat)
1279{
1280    memset(v9lstat, 0, sizeof(*v9lstat));
1281
1282    v9lstat->st_mode = stbuf->st_mode;
1283    v9lstat->st_nlink = stbuf->st_nlink;
1284    v9lstat->st_uid = stbuf->st_uid;
1285    v9lstat->st_gid = stbuf->st_gid;
1286    v9lstat->st_rdev = stbuf->st_rdev;
1287    v9lstat->st_size = stbuf->st_size;
1288    v9lstat->st_blksize = stbuf->st_blksize;
1289    v9lstat->st_blocks = stbuf->st_blocks;
1290    v9lstat->st_atime_sec = stbuf->st_atime;
1291    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1292    v9lstat->st_mtime_sec = stbuf->st_mtime;
1293    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1294    v9lstat->st_ctime_sec = stbuf->st_ctime;
1295    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1296    /* Currently we only support BASIC fields in stat */
1297    v9lstat->st_result_mask = P9_STATS_BASIC;
1298
1299    return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1300}
1301
1302static void print_sg(struct iovec *sg, int cnt)
1303{
1304    int i;
1305
1306    printf("sg[%d]: {", cnt);
1307    for (i = 0; i < cnt; i++) {
1308        if (i) {
1309            printf(", ");
1310        }
1311        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1312    }
1313    printf("}\n");
1314}
1315
1316/* Will call this only for path name based fid */
1317static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1318{
1319    V9fsPath str;
1320    v9fs_path_init(&str);
1321    v9fs_path_copy(&str, dst);
1322    v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1323    v9fs_path_free(&str);
1324}
1325
1326static inline bool is_ro_export(FsContext *ctx)
1327{
1328    return ctx->export_flags & V9FS_RDONLY;
1329}
1330
1331static void coroutine_fn v9fs_version(void *opaque)
1332{
1333    ssize_t err;
1334    V9fsPDU *pdu = opaque;
1335    V9fsState *s = pdu->s;
1336    V9fsString version;
1337    size_t offset = 7;
1338
1339    v9fs_string_init(&version);
1340    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1341    if (err < 0) {
1342        goto out;
1343    }
1344    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1345
1346    virtfs_reset(pdu);
1347
1348    if (!strcmp(version.data, "9P2000.u")) {
1349        s->proto_version = V9FS_PROTO_2000U;
1350    } else if (!strcmp(version.data, "9P2000.L")) {
1351        s->proto_version = V9FS_PROTO_2000L;
1352    } else {
1353        v9fs_string_sprintf(&version, "unknown");
1354        /* skip min. msize check, reporting invalid version has priority */
1355        goto marshal;
1356    }
1357
1358    if (s->msize < P9_MIN_MSIZE) {
1359        err = -EMSGSIZE;
1360        error_report(
1361            "9pfs: Client requested msize < minimum msize ("
1362            stringify(P9_MIN_MSIZE) ") supported by this server."
1363        );
1364        goto out;
1365    }
1366
1367    /* 8192 is the default msize of Linux clients */
1368    if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) {
1369        warn_report_once(
1370            "9p: degraded performance: a reasonable high msize should be "
1371            "chosen on client/guest side (chosen msize is <= 8192). See "
1372            "https://wiki.qemu.org/Documentation/9psetup#msize for details."
1373        );
1374    }
1375
1376marshal:
1377    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1378    if (err < 0) {
1379        goto out;
1380    }
1381    err += offset;
1382    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1383out:
1384    pdu_complete(pdu, err);
1385    v9fs_string_free(&version);
1386}
1387
1388static void coroutine_fn v9fs_attach(void *opaque)
1389{
1390    V9fsPDU *pdu = opaque;
1391    V9fsState *s = pdu->s;
1392    int32_t fid, afid, n_uname;
1393    V9fsString uname, aname;
1394    V9fsFidState *fidp;
1395    size_t offset = 7;
1396    V9fsQID qid;
1397    ssize_t err;
1398
1399    v9fs_string_init(&uname);
1400    v9fs_string_init(&aname);
1401    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1402                        &afid, &uname, &aname, &n_uname);
1403    if (err < 0) {
1404        goto out_nofid;
1405    }
1406    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1407
1408    fidp = alloc_fid(s, fid);
1409    if (fidp == NULL) {
1410        err = -EINVAL;
1411        goto out_nofid;
1412    }
1413    fidp->uid = n_uname;
1414    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1415    if (err < 0) {
1416        err = -EINVAL;
1417        clunk_fid(s, fid);
1418        goto out;
1419    }
1420    err = fid_to_qid(pdu, fidp, &qid);
1421    if (err < 0) {
1422        err = -EINVAL;
1423        clunk_fid(s, fid);
1424        goto out;
1425    }
1426
1427    /*
1428     * disable migration if we haven't done already.
1429     * attach could get called multiple times for the same export.
1430     */
1431    if (!s->migration_blocker) {
1432        error_setg(&s->migration_blocker,
1433                   "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1434                   s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1435        err = migrate_add_blocker(s->migration_blocker, NULL);
1436        if (err < 0) {
1437            error_free(s->migration_blocker);
1438            s->migration_blocker = NULL;
1439            clunk_fid(s, fid);
1440            goto out;
1441        }
1442        s->root_fid = fid;
1443    }
1444
1445    err = pdu_marshal(pdu, offset, "Q", &qid);
1446    if (err < 0) {
1447        clunk_fid(s, fid);
1448        goto out;
1449    }
1450    err += offset;
1451
1452    memcpy(&s->root_qid, &qid, sizeof(qid));
1453    trace_v9fs_attach_return(pdu->tag, pdu->id,
1454                             qid.type, qid.version, qid.path);
1455out:
1456    put_fid(pdu, fidp);
1457out_nofid:
1458    pdu_complete(pdu, err);
1459    v9fs_string_free(&uname);
1460    v9fs_string_free(&aname);
1461}
1462
1463static void coroutine_fn v9fs_stat(void *opaque)
1464{
1465    int32_t fid;
1466    V9fsStat v9stat;
1467    ssize_t err = 0;
1468    size_t offset = 7;
1469    struct stat stbuf;
1470    V9fsFidState *fidp;
1471    V9fsPDU *pdu = opaque;
1472    char *basename;
1473
1474    err = pdu_unmarshal(pdu, offset, "d", &fid);
1475    if (err < 0) {
1476        goto out_nofid;
1477    }
1478    trace_v9fs_stat(pdu->tag, pdu->id, fid);
1479
1480    fidp = get_fid(pdu, fid);
1481    if (fidp == NULL) {
1482        err = -ENOENT;
1483        goto out_nofid;
1484    }
1485    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1486    if (err < 0) {
1487        goto out;
1488    }
1489    basename = g_path_get_basename(fidp->path.data);
1490    err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1491    g_free(basename);
1492    if (err < 0) {
1493        goto out;
1494    }
1495    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1496    if (err < 0) {
1497        v9fs_stat_free(&v9stat);
1498        goto out;
1499    }
1500    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1501                           v9stat.atime, v9stat.mtime, v9stat.length);
1502    err += offset;
1503    v9fs_stat_free(&v9stat);
1504out:
1505    put_fid(pdu, fidp);
1506out_nofid:
1507    pdu_complete(pdu, err);
1508}
1509
1510static void coroutine_fn v9fs_getattr(void *opaque)
1511{
1512    int32_t fid;
1513    size_t offset = 7;
1514    ssize_t retval = 0;
1515    struct stat stbuf;
1516    V9fsFidState *fidp;
1517    uint64_t request_mask;
1518    V9fsStatDotl v9stat_dotl;
1519    V9fsPDU *pdu = opaque;
1520
1521    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1522    if (retval < 0) {
1523        goto out_nofid;
1524    }
1525    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1526
1527    fidp = get_fid(pdu, fid);
1528    if (fidp == NULL) {
1529        retval = -ENOENT;
1530        goto out_nofid;
1531    }
1532    /*
1533     * Currently we only support BASIC fields in stat, so there is no
1534     * need to look at request_mask.
1535     */
1536    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1537    if (retval < 0) {
1538        goto out;
1539    }
1540    retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1541    if (retval < 0) {
1542        goto out;
1543    }
1544
1545    /*  fill st_gen if requested and supported by underlying fs */
1546    if (request_mask & P9_STATS_GEN) {
1547        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1548        switch (retval) {
1549        case 0:
1550            /* we have valid st_gen: update result mask */
1551            v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1552            break;
1553        case -EINTR:
1554            /* request cancelled, e.g. by Tflush */
1555            goto out;
1556        default:
1557            /* failed to get st_gen: not fatal, ignore */
1558            break;
1559        }
1560    }
1561    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1562    if (retval < 0) {
1563        goto out;
1564    }
1565    retval += offset;
1566    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1567                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1568                              v9stat_dotl.st_gid);
1569out:
1570    put_fid(pdu, fidp);
1571out_nofid:
1572    pdu_complete(pdu, retval);
1573}
1574
1575/* Attribute flags */
1576#define P9_ATTR_MODE       (1 << 0)
1577#define P9_ATTR_UID        (1 << 1)
1578#define P9_ATTR_GID        (1 << 2)
1579#define P9_ATTR_SIZE       (1 << 3)
1580#define P9_ATTR_ATIME      (1 << 4)
1581#define P9_ATTR_MTIME      (1 << 5)
1582#define P9_ATTR_CTIME      (1 << 6)
1583#define P9_ATTR_ATIME_SET  (1 << 7)
1584#define P9_ATTR_MTIME_SET  (1 << 8)
1585
1586#define P9_ATTR_MASK    127
1587
1588static void coroutine_fn v9fs_setattr(void *opaque)
1589{
1590    int err = 0;
1591    int32_t fid;
1592    V9fsFidState *fidp;
1593    size_t offset = 7;
1594    V9fsIattr v9iattr;
1595    V9fsPDU *pdu = opaque;
1596
1597    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1598    if (err < 0) {
1599        goto out_nofid;
1600    }
1601
1602    trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1603                       v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1604                       v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1605
1606    fidp = get_fid(pdu, fid);
1607    if (fidp == NULL) {
1608        err = -EINVAL;
1609        goto out_nofid;
1610    }
1611    if (v9iattr.valid & P9_ATTR_MODE) {
1612        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1613        if (err < 0) {
1614            goto out;
1615        }
1616    }
1617    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1618        struct timespec times[2];
1619        if (v9iattr.valid & P9_ATTR_ATIME) {
1620            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1621                times[0].tv_sec = v9iattr.atime_sec;
1622                times[0].tv_nsec = v9iattr.atime_nsec;
1623            } else {
1624                times[0].tv_nsec = UTIME_NOW;
1625            }
1626        } else {
1627            times[0].tv_nsec = UTIME_OMIT;
1628        }
1629        if (v9iattr.valid & P9_ATTR_MTIME) {
1630            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1631                times[1].tv_sec = v9iattr.mtime_sec;
1632                times[1].tv_nsec = v9iattr.mtime_nsec;
1633            } else {
1634                times[1].tv_nsec = UTIME_NOW;
1635            }
1636        } else {
1637            times[1].tv_nsec = UTIME_OMIT;
1638        }
1639        err = v9fs_co_utimensat(pdu, &fidp->path, times);
1640        if (err < 0) {
1641            goto out;
1642        }
1643    }
1644    /*
1645     * If the only valid entry in iattr is ctime we can call
1646     * chown(-1,-1) to update the ctime of the file
1647     */
1648    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1649        ((v9iattr.valid & P9_ATTR_CTIME)
1650         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1651        if (!(v9iattr.valid & P9_ATTR_UID)) {
1652            v9iattr.uid = -1;
1653        }
1654        if (!(v9iattr.valid & P9_ATTR_GID)) {
1655            v9iattr.gid = -1;
1656        }
1657        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1658                            v9iattr.gid);
1659        if (err < 0) {
1660            goto out;
1661        }
1662    }
1663    if (v9iattr.valid & (P9_ATTR_SIZE)) {
1664        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1665        if (err < 0) {
1666            goto out;
1667        }
1668    }
1669    err = offset;
1670    trace_v9fs_setattr_return(pdu->tag, pdu->id);
1671out:
1672    put_fid(pdu, fidp);
1673out_nofid:
1674    pdu_complete(pdu, err);
1675}
1676
1677static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1678{
1679    int i;
1680    ssize_t err;
1681    size_t offset = 7;
1682
1683    err = pdu_marshal(pdu, offset, "w", nwnames);
1684    if (err < 0) {
1685        return err;
1686    }
1687    offset += err;
1688    for (i = 0; i < nwnames; i++) {
1689        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1690        if (err < 0) {
1691            return err;
1692        }
1693        offset += err;
1694    }
1695    return offset;
1696}
1697
1698static bool name_is_illegal(const char *name)
1699{
1700    return !*name || strchr(name, '/') != NULL;
1701}
1702
1703static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
1704{
1705    return
1706        qid1->type != qid2->type ||
1707        qid1->version != qid2->version ||
1708        qid1->path != qid2->path;
1709}
1710
1711static void coroutine_fn v9fs_walk(void *opaque)
1712{
1713    int name_idx;
1714    V9fsQID *qids = NULL;
1715    int i, err = 0;
1716    V9fsPath dpath, path;
1717    uint16_t nwnames;
1718    struct stat stbuf;
1719    size_t offset = 7;
1720    int32_t fid, newfid;
1721    V9fsString *wnames = NULL;
1722    V9fsFidState *fidp;
1723    V9fsFidState *newfidp = NULL;
1724    V9fsPDU *pdu = opaque;
1725    V9fsState *s = pdu->s;
1726    V9fsQID qid;
1727
1728    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1729    if (err < 0) {
1730        pdu_complete(pdu, err);
1731        return ;
1732    }
1733    offset += err;
1734
1735    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1736
1737    if (nwnames && nwnames <= P9_MAXWELEM) {
1738        wnames = g_new0(V9fsString, nwnames);
1739        qids   = g_new0(V9fsQID, nwnames);
1740        for (i = 0; i < nwnames; i++) {
1741            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1742            if (err < 0) {
1743                goto out_nofid;
1744            }
1745            if (name_is_illegal(wnames[i].data)) {
1746                err = -ENOENT;
1747                goto out_nofid;
1748            }
1749            offset += err;
1750        }
1751    } else if (nwnames > P9_MAXWELEM) {
1752        err = -EINVAL;
1753        goto out_nofid;
1754    }
1755    fidp = get_fid(pdu, fid);
1756    if (fidp == NULL) {
1757        err = -ENOENT;
1758        goto out_nofid;
1759    }
1760
1761    v9fs_path_init(&dpath);
1762    v9fs_path_init(&path);
1763
1764    err = fid_to_qid(pdu, fidp, &qid);
1765    if (err < 0) {
1766        goto out;
1767    }
1768
1769    /*
1770     * Both dpath and path initially poin to fidp.
1771     * Needed to handle request with nwnames == 0
1772     */
1773    v9fs_path_copy(&dpath, &fidp->path);
1774    v9fs_path_copy(&path, &fidp->path);
1775    for (name_idx = 0; name_idx < nwnames; name_idx++) {
1776        if (not_same_qid(&pdu->s->root_qid, &qid) ||
1777            strcmp("..", wnames[name_idx].data)) {
1778            err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
1779                                       &path);
1780            if (err < 0) {
1781                goto out;
1782            }
1783
1784            err = v9fs_co_lstat(pdu, &path, &stbuf);
1785            if (err < 0) {
1786                goto out;
1787            }
1788            err = stat_to_qid(pdu, &stbuf, &qid);
1789            if (err < 0) {
1790                goto out;
1791            }
1792            v9fs_path_copy(&dpath, &path);
1793        }
1794        memcpy(&qids[name_idx], &qid, sizeof(qid));
1795    }
1796    if (fid == newfid) {
1797        if (fidp->fid_type != P9_FID_NONE) {
1798            err = -EINVAL;
1799            goto out;
1800        }
1801        v9fs_path_write_lock(s);
1802        v9fs_path_copy(&fidp->path, &path);
1803        v9fs_path_unlock(s);
1804    } else {
1805        newfidp = alloc_fid(s, newfid);
1806        if (newfidp == NULL) {
1807            err = -EINVAL;
1808            goto out;
1809        }
1810        newfidp->uid = fidp->uid;
1811        v9fs_path_copy(&newfidp->path, &path);
1812    }
1813    err = v9fs_walk_marshal(pdu, nwnames, qids);
1814    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1815out:
1816    put_fid(pdu, fidp);
1817    if (newfidp) {
1818        put_fid(pdu, newfidp);
1819    }
1820    v9fs_path_free(&dpath);
1821    v9fs_path_free(&path);
1822out_nofid:
1823    pdu_complete(pdu, err);
1824    if (nwnames && nwnames <= P9_MAXWELEM) {
1825        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1826            v9fs_string_free(&wnames[name_idx]);
1827        }
1828        g_free(wnames);
1829        g_free(qids);
1830    }
1831}
1832
1833static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1834{
1835    struct statfs stbuf;
1836    int32_t iounit = 0;
1837    V9fsState *s = pdu->s;
1838
1839    /*
1840     * iounit should be multiples of f_bsize (host filesystem block size
1841     * and as well as less than (client msize - P9_IOHDRSZ))
1842     */
1843    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1844        if (stbuf.f_bsize) {
1845            iounit = stbuf.f_bsize;
1846            iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
1847        }
1848    }
1849    if (!iounit) {
1850        iounit = s->msize - P9_IOHDRSZ;
1851    }
1852    return iounit;
1853}
1854
1855static void coroutine_fn v9fs_open(void *opaque)
1856{
1857    int flags;
1858    int32_t fid;
1859    int32_t mode;
1860    V9fsQID qid;
1861    int iounit = 0;
1862    ssize_t err = 0;
1863    size_t offset = 7;
1864    struct stat stbuf;
1865    V9fsFidState *fidp;
1866    V9fsPDU *pdu = opaque;
1867    V9fsState *s = pdu->s;
1868
1869    if (s->proto_version == V9FS_PROTO_2000L) {
1870        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1871    } else {
1872        uint8_t modebyte;
1873        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1874        mode = modebyte;
1875    }
1876    if (err < 0) {
1877        goto out_nofid;
1878    }
1879    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1880
1881    fidp = get_fid(pdu, fid);
1882    if (fidp == NULL) {
1883        err = -ENOENT;
1884        goto out_nofid;
1885    }
1886    if (fidp->fid_type != P9_FID_NONE) {
1887        err = -EINVAL;
1888        goto out;
1889    }
1890
1891    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1892    if (err < 0) {
1893        goto out;
1894    }
1895    err = stat_to_qid(pdu, &stbuf, &qid);
1896    if (err < 0) {
1897        goto out;
1898    }
1899    if (S_ISDIR(stbuf.st_mode)) {
1900        err = v9fs_co_opendir(pdu, fidp);
1901        if (err < 0) {
1902            goto out;
1903        }
1904        fidp->fid_type = P9_FID_DIR;
1905        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1906        if (err < 0) {
1907            goto out;
1908        }
1909        err += offset;
1910    } else {
1911        if (s->proto_version == V9FS_PROTO_2000L) {
1912            flags = get_dotl_openflags(s, mode);
1913        } else {
1914            flags = omode_to_uflags(mode);
1915        }
1916        if (is_ro_export(&s->ctx)) {
1917            if (mode & O_WRONLY || mode & O_RDWR ||
1918                mode & O_APPEND || mode & O_TRUNC) {
1919                err = -EROFS;
1920                goto out;
1921            }
1922        }
1923        err = v9fs_co_open(pdu, fidp, flags);
1924        if (err < 0) {
1925            goto out;
1926        }
1927        fidp->fid_type = P9_FID_FILE;
1928        fidp->open_flags = flags;
1929        if (flags & O_EXCL) {
1930            /*
1931             * We let the host file system do O_EXCL check
1932             * We should not reclaim such fd
1933             */
1934            fidp->flags |= FID_NON_RECLAIMABLE;
1935        }
1936        iounit = get_iounit(pdu, &fidp->path);
1937        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1938        if (err < 0) {
1939            goto out;
1940        }
1941        err += offset;
1942    }
1943    trace_v9fs_open_return(pdu->tag, pdu->id,
1944                           qid.type, qid.version, qid.path, iounit);
1945out:
1946    put_fid(pdu, fidp);
1947out_nofid:
1948    pdu_complete(pdu, err);
1949}
1950
1951static void coroutine_fn v9fs_lcreate(void *opaque)
1952{
1953    int32_t dfid, flags, mode;
1954    gid_t gid;
1955    ssize_t err = 0;
1956    ssize_t offset = 7;
1957    V9fsString name;
1958    V9fsFidState *fidp;
1959    struct stat stbuf;
1960    V9fsQID qid;
1961    int32_t iounit;
1962    V9fsPDU *pdu = opaque;
1963
1964    v9fs_string_init(&name);
1965    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1966                        &name, &flags, &mode, &gid);
1967    if (err < 0) {
1968        goto out_nofid;
1969    }
1970    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1971
1972    if (name_is_illegal(name.data)) {
1973        err = -ENOENT;
1974        goto out_nofid;
1975    }
1976
1977    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
1978        err = -EEXIST;
1979        goto out_nofid;
1980    }
1981
1982    fidp = get_fid(pdu, dfid);
1983    if (fidp == NULL) {
1984        err = -ENOENT;
1985        goto out_nofid;
1986    }
1987    if (fidp->fid_type != P9_FID_NONE) {
1988        err = -EINVAL;
1989        goto out;
1990    }
1991
1992    flags = get_dotl_openflags(pdu->s, flags);
1993    err = v9fs_co_open2(pdu, fidp, &name, gid,
1994                        flags | O_CREAT, mode, &stbuf);
1995    if (err < 0) {
1996        goto out;
1997    }
1998    fidp->fid_type = P9_FID_FILE;
1999    fidp->open_flags = flags;
2000    if (flags & O_EXCL) {
2001        /*
2002         * We let the host file system do O_EXCL check
2003         * We should not reclaim such fd
2004         */
2005        fidp->flags |= FID_NON_RECLAIMABLE;
2006    }
2007    iounit =  get_iounit(pdu, &fidp->path);
2008    err = stat_to_qid(pdu, &stbuf, &qid);
2009    if (err < 0) {
2010        goto out;
2011    }
2012    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2013    if (err < 0) {
2014        goto out;
2015    }
2016    err += offset;
2017    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2018                              qid.type, qid.version, qid.path, iounit);
2019out:
2020    put_fid(pdu, fidp);
2021out_nofid:
2022    pdu_complete(pdu, err);
2023    v9fs_string_free(&name);
2024}
2025
2026static void coroutine_fn v9fs_fsync(void *opaque)
2027{
2028    int err;
2029    int32_t fid;
2030    int datasync;
2031    size_t offset = 7;
2032    V9fsFidState *fidp;
2033    V9fsPDU *pdu = opaque;
2034
2035    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2036    if (err < 0) {
2037        goto out_nofid;
2038    }
2039    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2040
2041    fidp = get_fid(pdu, fid);
2042    if (fidp == NULL) {
2043        err = -ENOENT;
2044        goto out_nofid;
2045    }
2046    err = v9fs_co_fsync(pdu, fidp, datasync);
2047    if (!err) {
2048        err = offset;
2049    }
2050    put_fid(pdu, fidp);
2051out_nofid:
2052    pdu_complete(pdu, err);
2053}
2054
2055static void coroutine_fn v9fs_clunk(void *opaque)
2056{
2057    int err;
2058    int32_t fid;
2059    size_t offset = 7;
2060    V9fsFidState *fidp;
2061    V9fsPDU *pdu = opaque;
2062    V9fsState *s = pdu->s;
2063
2064    err = pdu_unmarshal(pdu, offset, "d", &fid);
2065    if (err < 0) {
2066        goto out_nofid;
2067    }
2068    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2069
2070    fidp = clunk_fid(s, fid);
2071    if (fidp == NULL) {
2072        err = -ENOENT;
2073        goto out_nofid;
2074    }
2075    /*
2076     * Bump the ref so that put_fid will
2077     * free the fid.
2078     */
2079    fidp->ref++;
2080    err = put_fid(pdu, fidp);
2081    if (!err) {
2082        err = offset;
2083    }
2084out_nofid:
2085    pdu_complete(pdu, err);
2086}
2087
2088/*
2089 * Create a QEMUIOVector for a sub-region of PDU iovecs
2090 *
2091 * @qiov:       uninitialized QEMUIOVector
2092 * @skip:       number of bytes to skip from beginning of PDU
2093 * @size:       number of bytes to include
2094 * @is_write:   true - write, false - read
2095 *
2096 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2097 * with qemu_iovec_destroy().
2098 */
2099static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2100                                    size_t skip, size_t size,
2101                                    bool is_write)
2102{
2103    QEMUIOVector elem;
2104    struct iovec *iov;
2105    unsigned int niov;
2106
2107    if (is_write) {
2108        pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
2109    } else {
2110        pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
2111    }
2112
2113    qemu_iovec_init_external(&elem, iov, niov);
2114    qemu_iovec_init(qiov, niov);
2115    qemu_iovec_concat(qiov, &elem, skip, size);
2116}
2117
2118static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2119                           uint64_t off, uint32_t max_count)
2120{
2121    ssize_t err;
2122    size_t offset = 7;
2123    uint64_t read_count;
2124    QEMUIOVector qiov_full;
2125
2126    if (fidp->fs.xattr.len < off) {
2127        read_count = 0;
2128    } else {
2129        read_count = fidp->fs.xattr.len - off;
2130    }
2131    if (read_count > max_count) {
2132        read_count = max_count;
2133    }
2134    err = pdu_marshal(pdu, offset, "d", read_count);
2135    if (err < 0) {
2136        return err;
2137    }
2138    offset += err;
2139
2140    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
2141    err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2142                    ((char *)fidp->fs.xattr.value) + off,
2143                    read_count);
2144    qemu_iovec_destroy(&qiov_full);
2145    if (err < 0) {
2146        return err;
2147    }
2148    offset += err;
2149    return offset;
2150}
2151
2152static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2153                                                  V9fsFidState *fidp,
2154                                                  uint32_t max_count)
2155{
2156    V9fsPath path;
2157    V9fsStat v9stat;
2158    int len, err = 0;
2159    int32_t count = 0;
2160    struct stat stbuf;
2161    off_t saved_dir_pos;
2162    struct dirent *dent;
2163
2164    /* save the directory position */
2165    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2166    if (saved_dir_pos < 0) {
2167        return saved_dir_pos;
2168    }
2169
2170    while (1) {
2171        v9fs_path_init(&path);
2172
2173        v9fs_readdir_lock(&fidp->fs.dir);
2174
2175        err = v9fs_co_readdir(pdu, fidp, &dent);
2176        if (err || !dent) {
2177            break;
2178        }
2179        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2180        if (err < 0) {
2181            break;
2182        }
2183        err = v9fs_co_lstat(pdu, &path, &stbuf);
2184        if (err < 0) {
2185            break;
2186        }
2187        err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2188        if (err < 0) {
2189            break;
2190        }
2191        if ((count + v9stat.size + 2) > max_count) {
2192            v9fs_readdir_unlock(&fidp->fs.dir);
2193
2194            /* Ran out of buffer. Set dir back to old position and return */
2195            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2196            v9fs_stat_free(&v9stat);
2197            v9fs_path_free(&path);
2198            return count;
2199        }
2200
2201        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2202        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2203
2204        v9fs_readdir_unlock(&fidp->fs.dir);
2205
2206        if (len < 0) {
2207            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2208            v9fs_stat_free(&v9stat);
2209            v9fs_path_free(&path);
2210            return len;
2211        }
2212        count += len;
2213        v9fs_stat_free(&v9stat);
2214        v9fs_path_free(&path);
2215        saved_dir_pos = dent->d_off;
2216    }
2217
2218    v9fs_readdir_unlock(&fidp->fs.dir);
2219
2220    v9fs_path_free(&path);
2221    if (err < 0) {
2222        return err;
2223    }
2224    return count;
2225}
2226
2227static void coroutine_fn v9fs_read(void *opaque)
2228{
2229    int32_t fid;
2230    uint64_t off;
2231    ssize_t err = 0;
2232    int32_t count = 0;
2233    size_t offset = 7;
2234    uint32_t max_count;
2235    V9fsFidState *fidp;
2236    V9fsPDU *pdu = opaque;
2237    V9fsState *s = pdu->s;
2238
2239    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2240    if (err < 0) {
2241        goto out_nofid;
2242    }
2243    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2244
2245    fidp = get_fid(pdu, fid);
2246    if (fidp == NULL) {
2247        err = -EINVAL;
2248        goto out_nofid;
2249    }
2250    if (fidp->fid_type == P9_FID_DIR) {
2251        if (s->proto_version != V9FS_PROTO_2000U) {
2252            warn_report_once(
2253                "9p: bad client: T_read request on directory only expected "
2254                "with 9P2000.u protocol version"
2255            );
2256            err = -EOPNOTSUPP;
2257            goto out;
2258        }
2259        if (off == 0) {
2260            v9fs_co_rewinddir(pdu, fidp);
2261        }
2262        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2263        if (count < 0) {
2264            err = count;
2265            goto out;
2266        }
2267        err = pdu_marshal(pdu, offset, "d", count);
2268        if (err < 0) {
2269            goto out;
2270        }
2271        err += offset + count;
2272    } else if (fidp->fid_type == P9_FID_FILE) {
2273        QEMUIOVector qiov_full;
2274        QEMUIOVector qiov;
2275        int32_t len;
2276
2277        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
2278        qemu_iovec_init(&qiov, qiov_full.niov);
2279        do {
2280            qemu_iovec_reset(&qiov);
2281            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2282            if (0) {
2283                print_sg(qiov.iov, qiov.niov);
2284            }
2285            /* Loop in case of EINTR */
2286            do {
2287                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2288                if (len >= 0) {
2289                    off   += len;
2290                    count += len;
2291                }
2292            } while (len == -EINTR && !pdu->cancelled);
2293            if (len < 0) {
2294                /* IO error return the error */
2295                err = len;
2296                goto out_free_iovec;
2297            }
2298        } while (count < max_count && len > 0);
2299        err = pdu_marshal(pdu, offset, "d", count);
2300        if (err < 0) {
2301            goto out_free_iovec;
2302        }
2303        err += offset + count;
2304out_free_iovec:
2305        qemu_iovec_destroy(&qiov);
2306        qemu_iovec_destroy(&qiov_full);
2307    } else if (fidp->fid_type == P9_FID_XATTR) {
2308        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2309    } else {
2310        err = -EINVAL;
2311    }
2312    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2313out:
2314    put_fid(pdu, fidp);
2315out_nofid:
2316    pdu_complete(pdu, err);
2317}
2318
2319/**
2320 * Returns size required in Rreaddir response for the passed dirent @p name.
2321 *
2322 * @param name - directory entry's name (i.e. file name, directory name)
2323 * @returns required size in bytes
2324 */
2325size_t v9fs_readdir_response_size(V9fsString *name)
2326{
2327    /*
2328     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2329     * size of type (1) + size of name.size (2) + strlen(name.data)
2330     */
2331    return 24 + v9fs_string_size(name);
2332}
2333
2334static void v9fs_free_dirents(struct V9fsDirEnt *e)
2335{
2336    struct V9fsDirEnt *next = NULL;
2337
2338    for (; e; e = next) {
2339        next = e->next;
2340        g_free(e->dent);
2341        g_free(e->st);
2342        g_free(e);
2343    }
2344}
2345
2346static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2347                                        off_t offset, int32_t max_count)
2348{
2349    size_t size;
2350    V9fsQID qid;
2351    V9fsString name;
2352    int len, err = 0;
2353    int32_t count = 0;
2354    struct dirent *dent;
2355    struct stat *st;
2356    struct V9fsDirEnt *entries = NULL;
2357
2358    /*
2359     * inode remapping requires the device id, which in turn might be
2360     * different for different directory entries, so if inode remapping is
2361     * enabled we have to make a full stat for each directory entry
2362     */
2363    const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
2364
2365    /*
2366     * Fetch all required directory entries altogether on a background IO
2367     * thread from fs driver. We don't want to do that for each entry
2368     * individually, because hopping between threads (this main IO thread
2369     * and background IO driver thread) would sum up to huge latencies.
2370     */
2371    count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
2372                                 dostat);
2373    if (count < 0) {
2374        err = count;
2375        count = 0;
2376        goto out;
2377    }
2378    count = 0;
2379
2380    for (struct V9fsDirEnt *e = entries; e; e = e->next) {
2381        dent = e->dent;
2382
2383        if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2384            st = e->st;
2385            /* e->st should never be NULL, but just to be sure */
2386            if (!st) {
2387                err = -1;
2388                break;
2389            }
2390
2391            /* remap inode */
2392            err = stat_to_qid(pdu, st, &qid);
2393            if (err < 0) {
2394                break;
2395            }
2396        } else {
2397            /*
2398             * Fill up just the path field of qid because the client uses
2399             * only that. To fill the entire qid structure we will have
2400             * to stat each dirent found, which is expensive. For the
2401             * latter reason we don't call stat_to_qid() here. Only drawback
2402             * is that no multi-device export detection of stat_to_qid()
2403             * would be done and provided as error to the user here. But
2404             * user would get that error anyway when accessing those
2405             * files/dirs through other ways.
2406             */
2407            size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2408            memcpy(&qid.path, &dent->d_ino, size);
2409            /* Fill the other fields with dummy values */
2410            qid.type = 0;
2411            qid.version = 0;
2412        }
2413
2414        v9fs_string_init(&name);
2415        v9fs_string_sprintf(&name, "%s", dent->d_name);
2416
2417        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2418        len = pdu_marshal(pdu, 11 + count, "Qqbs",
2419                          &qid, dent->d_off,
2420                          dent->d_type, &name);
2421
2422        v9fs_string_free(&name);
2423
2424        if (len < 0) {
2425            err = len;
2426            break;
2427        }
2428
2429        count += len;
2430    }
2431
2432out:
2433    v9fs_free_dirents(entries);
2434    if (err < 0) {
2435        return err;
2436    }
2437    return count;
2438}
2439
2440static void coroutine_fn v9fs_readdir(void *opaque)
2441{
2442    int32_t fid;
2443    V9fsFidState *fidp;
2444    ssize_t retval = 0;
2445    size_t offset = 7;
2446    uint64_t initial_offset;
2447    int32_t count;
2448    uint32_t max_count;
2449    V9fsPDU *pdu = opaque;
2450    V9fsState *s = pdu->s;
2451
2452    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2453                           &initial_offset, &max_count);
2454    if (retval < 0) {
2455        goto out_nofid;
2456    }
2457    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2458
2459    /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
2460    if (max_count > s->msize - 11) {
2461        max_count = s->msize - 11;
2462        warn_report_once(
2463            "9p: bad client: T_readdir with count > msize - 11"
2464        );
2465    }
2466
2467    fidp = get_fid(pdu, fid);
2468    if (fidp == NULL) {
2469        retval = -EINVAL;
2470        goto out_nofid;
2471    }
2472    if (!fidp->fs.dir.stream) {
2473        retval = -EINVAL;
2474        goto out;
2475    }
2476    if (s->proto_version != V9FS_PROTO_2000L) {
2477        warn_report_once(
2478            "9p: bad client: T_readdir request only expected with 9P2000.L "
2479            "protocol version"
2480        );
2481        retval = -EOPNOTSUPP;
2482        goto out;
2483    }
2484    count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
2485    if (count < 0) {
2486        retval = count;
2487        goto out;
2488    }
2489    retval = pdu_marshal(pdu, offset, "d", count);
2490    if (retval < 0) {
2491        goto out;
2492    }
2493    retval += count + offset;
2494    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2495out:
2496    put_fid(pdu, fidp);
2497out_nofid:
2498    pdu_complete(pdu, retval);
2499}
2500
2501static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2502                            uint64_t off, uint32_t count,
2503                            struct iovec *sg, int cnt)
2504{
2505    int i, to_copy;
2506    ssize_t err = 0;
2507    uint64_t write_count;
2508    size_t offset = 7;
2509
2510
2511    if (fidp->fs.xattr.len < off) {
2512        return -ENOSPC;
2513    }
2514    write_count = fidp->fs.xattr.len - off;
2515    if (write_count > count) {
2516        write_count = count;
2517    }
2518    err = pdu_marshal(pdu, offset, "d", write_count);
2519    if (err < 0) {
2520        return err;
2521    }
2522    err += offset;
2523    fidp->fs.xattr.copied_len += write_count;
2524    /*
2525     * Now copy the content from sg list
2526     */
2527    for (i = 0; i < cnt; i++) {
2528        if (write_count > sg[i].iov_len) {
2529            to_copy = sg[i].iov_len;
2530        } else {
2531            to_copy = write_count;
2532        }
2533        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2534        /* updating vs->off since we are not using below */
2535        off += to_copy;
2536        write_count -= to_copy;
2537    }
2538
2539    return err;
2540}
2541
2542static void coroutine_fn v9fs_write(void *opaque)
2543{
2544    ssize_t err;
2545    int32_t fid;
2546    uint64_t off;
2547    uint32_t count;
2548    int32_t len = 0;
2549    int32_t total = 0;
2550    size_t offset = 7;
2551    V9fsFidState *fidp;
2552    V9fsPDU *pdu = opaque;
2553    V9fsState *s = pdu->s;
2554    QEMUIOVector qiov_full;
2555    QEMUIOVector qiov;
2556
2557    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2558    if (err < 0) {
2559        pdu_complete(pdu, err);
2560        return;
2561    }
2562    offset += err;
2563    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
2564    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2565
2566    fidp = get_fid(pdu, fid);
2567    if (fidp == NULL) {
2568        err = -EINVAL;
2569        goto out_nofid;
2570    }
2571    if (fidp->fid_type == P9_FID_FILE) {
2572        if (fidp->fs.fd == -1) {
2573            err = -EINVAL;
2574            goto out;
2575        }
2576    } else if (fidp->fid_type == P9_FID_XATTR) {
2577        /*
2578         * setxattr operation
2579         */
2580        err = v9fs_xattr_write(s, pdu, fidp, off, count,
2581                               qiov_full.iov, qiov_full.niov);
2582        goto out;
2583    } else {
2584        err = -EINVAL;
2585        goto out;
2586    }
2587    qemu_iovec_init(&qiov, qiov_full.niov);
2588    do {
2589        qemu_iovec_reset(&qiov);
2590        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2591        if (0) {
2592            print_sg(qiov.iov, qiov.niov);
2593        }
2594        /* Loop in case of EINTR */
2595        do {
2596            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2597            if (len >= 0) {
2598                off   += len;
2599                total += len;
2600            }
2601        } while (len == -EINTR && !pdu->cancelled);
2602        if (len < 0) {
2603            /* IO error return the error */
2604            err = len;
2605            goto out_qiov;
2606        }
2607    } while (total < count && len > 0);
2608
2609    offset = 7;
2610    err = pdu_marshal(pdu, offset, "d", total);
2611    if (err < 0) {
2612        goto out_qiov;
2613    }
2614    err += offset;
2615    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2616out_qiov:
2617    qemu_iovec_destroy(&qiov);
2618out:
2619    put_fid(pdu, fidp);
2620out_nofid:
2621    qemu_iovec_destroy(&qiov_full);
2622    pdu_complete(pdu, err);
2623}
2624
2625static void coroutine_fn v9fs_create(void *opaque)
2626{
2627    int32_t fid;
2628    int err = 0;
2629    size_t offset = 7;
2630    V9fsFidState *fidp;
2631    V9fsQID qid;
2632    int32_t perm;
2633    int8_t mode;
2634    V9fsPath path;
2635    struct stat stbuf;
2636    V9fsString name;
2637    V9fsString extension;
2638    int iounit;
2639    V9fsPDU *pdu = opaque;
2640    V9fsState *s = pdu->s;
2641
2642    v9fs_path_init(&path);
2643    v9fs_string_init(&name);
2644    v9fs_string_init(&extension);
2645    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2646                        &perm, &mode, &extension);
2647    if (err < 0) {
2648        goto out_nofid;
2649    }
2650    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2651
2652    if (name_is_illegal(name.data)) {
2653        err = -ENOENT;
2654        goto out_nofid;
2655    }
2656
2657    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2658        err = -EEXIST;
2659        goto out_nofid;
2660    }
2661
2662    fidp = get_fid(pdu, fid);
2663    if (fidp == NULL) {
2664        err = -EINVAL;
2665        goto out_nofid;
2666    }
2667    if (fidp->fid_type != P9_FID_NONE) {
2668        err = -EINVAL;
2669        goto out;
2670    }
2671    if (perm & P9_STAT_MODE_DIR) {
2672        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2673                            fidp->uid, -1, &stbuf);
2674        if (err < 0) {
2675            goto out;
2676        }
2677        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2678        if (err < 0) {
2679            goto out;
2680        }
2681        v9fs_path_write_lock(s);
2682        v9fs_path_copy(&fidp->path, &path);
2683        v9fs_path_unlock(s);
2684        err = v9fs_co_opendir(pdu, fidp);
2685        if (err < 0) {
2686            goto out;
2687        }
2688        fidp->fid_type = P9_FID_DIR;
2689    } else if (perm & P9_STAT_MODE_SYMLINK) {
2690        err = v9fs_co_symlink(pdu, fidp, &name,
2691                              extension.data, -1 , &stbuf);
2692        if (err < 0) {
2693            goto out;
2694        }
2695        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2696        if (err < 0) {
2697            goto out;
2698        }
2699        v9fs_path_write_lock(s);
2700        v9fs_path_copy(&fidp->path, &path);
2701        v9fs_path_unlock(s);
2702    } else if (perm & P9_STAT_MODE_LINK) {
2703        int32_t ofid = atoi(extension.data);
2704        V9fsFidState *ofidp = get_fid(pdu, ofid);
2705        if (ofidp == NULL) {
2706            err = -EINVAL;
2707            goto out;
2708        }
2709        err = v9fs_co_link(pdu, ofidp, fidp, &name);
2710        put_fid(pdu, ofidp);
2711        if (err < 0) {
2712            goto out;
2713        }
2714        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2715        if (err < 0) {
2716            fidp->fid_type = P9_FID_NONE;
2717            goto out;
2718        }
2719        v9fs_path_write_lock(s);
2720        v9fs_path_copy(&fidp->path, &path);
2721        v9fs_path_unlock(s);
2722        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2723        if (err < 0) {
2724            fidp->fid_type = P9_FID_NONE;
2725            goto out;
2726        }
2727    } else if (perm & P9_STAT_MODE_DEVICE) {
2728        char ctype;
2729        uint32_t major, minor;
2730        mode_t nmode = 0;
2731
2732        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2733            err = -errno;
2734            goto out;
2735        }
2736
2737        switch (ctype) {
2738        case 'c':
2739            nmode = S_IFCHR;
2740            break;
2741        case 'b':
2742            nmode = S_IFBLK;
2743            break;
2744        default:
2745            err = -EIO;
2746            goto out;
2747        }
2748
2749        nmode |= perm & 0777;
2750        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2751                            makedev(major, minor), nmode, &stbuf);
2752        if (err < 0) {
2753            goto out;
2754        }
2755        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2756        if (err < 0) {
2757            goto out;
2758        }
2759        v9fs_path_write_lock(s);
2760        v9fs_path_copy(&fidp->path, &path);
2761        v9fs_path_unlock(s);
2762    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2763        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2764                            0, S_IFIFO | (perm & 0777), &stbuf);
2765        if (err < 0) {
2766            goto out;
2767        }
2768        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2769        if (err < 0) {
2770            goto out;
2771        }
2772        v9fs_path_write_lock(s);
2773        v9fs_path_copy(&fidp->path, &path);
2774        v9fs_path_unlock(s);
2775    } else if (perm & P9_STAT_MODE_SOCKET) {
2776        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2777                            0, S_IFSOCK | (perm & 0777), &stbuf);
2778        if (err < 0) {
2779            goto out;
2780        }
2781        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2782        if (err < 0) {
2783            goto out;
2784        }
2785        v9fs_path_write_lock(s);
2786        v9fs_path_copy(&fidp->path, &path);
2787        v9fs_path_unlock(s);
2788    } else {
2789        err = v9fs_co_open2(pdu, fidp, &name, -1,
2790                            omode_to_uflags(mode) | O_CREAT, perm, &stbuf);
2791        if (err < 0) {
2792            goto out;
2793        }
2794        fidp->fid_type = P9_FID_FILE;
2795        fidp->open_flags = omode_to_uflags(mode);
2796        if (fidp->open_flags & O_EXCL) {
2797            /*
2798             * We let the host file system do O_EXCL check
2799             * We should not reclaim such fd
2800             */
2801            fidp->flags |= FID_NON_RECLAIMABLE;
2802        }
2803    }
2804    iounit = get_iounit(pdu, &fidp->path);
2805    err = stat_to_qid(pdu, &stbuf, &qid);
2806    if (err < 0) {
2807        goto out;
2808    }
2809    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2810    if (err < 0) {
2811        goto out;
2812    }
2813    err += offset;
2814    trace_v9fs_create_return(pdu->tag, pdu->id,
2815                             qid.type, qid.version, qid.path, iounit);
2816out:
2817    put_fid(pdu, fidp);
2818out_nofid:
2819   pdu_complete(pdu, err);
2820   v9fs_string_free(&name);
2821   v9fs_string_free(&extension);
2822   v9fs_path_free(&path);
2823}
2824
2825static void coroutine_fn v9fs_symlink(void *opaque)
2826{
2827    V9fsPDU *pdu = opaque;
2828    V9fsString name;
2829    V9fsString symname;
2830    V9fsFidState *dfidp;
2831    V9fsQID qid;
2832    struct stat stbuf;
2833    int32_t dfid;
2834    int err = 0;
2835    gid_t gid;
2836    size_t offset = 7;
2837
2838    v9fs_string_init(&name);
2839    v9fs_string_init(&symname);
2840    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2841    if (err < 0) {
2842        goto out_nofid;
2843    }
2844    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2845
2846    if (name_is_illegal(name.data)) {
2847        err = -ENOENT;
2848        goto out_nofid;
2849    }
2850
2851    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2852        err = -EEXIST;
2853        goto out_nofid;
2854    }
2855
2856    dfidp = get_fid(pdu, dfid);
2857    if (dfidp == NULL) {
2858        err = -EINVAL;
2859        goto out_nofid;
2860    }
2861    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2862    if (err < 0) {
2863        goto out;
2864    }
2865    err = stat_to_qid(pdu, &stbuf, &qid);
2866    if (err < 0) {
2867        goto out;
2868    }
2869    err =  pdu_marshal(pdu, offset, "Q", &qid);
2870    if (err < 0) {
2871        goto out;
2872    }
2873    err += offset;
2874    trace_v9fs_symlink_return(pdu->tag, pdu->id,
2875                              qid.type, qid.version, qid.path);
2876out:
2877    put_fid(pdu, dfidp);
2878out_nofid:
2879    pdu_complete(pdu, err);
2880    v9fs_string_free(&name);
2881    v9fs_string_free(&symname);
2882}
2883
2884static void coroutine_fn v9fs_flush(void *opaque)
2885{
2886    ssize_t err;
2887    int16_t tag;
2888    size_t offset = 7;
2889    V9fsPDU *cancel_pdu = NULL;
2890    V9fsPDU *pdu = opaque;
2891    V9fsState *s = pdu->s;
2892
2893    err = pdu_unmarshal(pdu, offset, "w", &tag);
2894    if (err < 0) {
2895        pdu_complete(pdu, err);
2896        return;
2897    }
2898    trace_v9fs_flush(pdu->tag, pdu->id, tag);
2899
2900    if (pdu->tag == tag) {
2901        warn_report("the guest sent a self-referencing 9P flush request");
2902    } else {
2903        QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2904            if (cancel_pdu->tag == tag) {
2905                break;
2906            }
2907        }
2908    }
2909    if (cancel_pdu) {
2910        cancel_pdu->cancelled = 1;
2911        /*
2912         * Wait for pdu to complete.
2913         */
2914        qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2915        if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2916            cancel_pdu->cancelled = 0;
2917            pdu_free(cancel_pdu);
2918        }
2919    }
2920    pdu_complete(pdu, 7);
2921}
2922
2923static void coroutine_fn v9fs_link(void *opaque)
2924{
2925    V9fsPDU *pdu = opaque;
2926    int32_t dfid, oldfid;
2927    V9fsFidState *dfidp, *oldfidp;
2928    V9fsString name;
2929    size_t offset = 7;
2930    int err = 0;
2931
2932    v9fs_string_init(&name);
2933    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2934    if (err < 0) {
2935        goto out_nofid;
2936    }
2937    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2938
2939    if (name_is_illegal(name.data)) {
2940        err = -ENOENT;
2941        goto out_nofid;
2942    }
2943
2944    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2945        err = -EEXIST;
2946        goto out_nofid;
2947    }
2948
2949    dfidp = get_fid(pdu, dfid);
2950    if (dfidp == NULL) {
2951        err = -ENOENT;
2952        goto out_nofid;
2953    }
2954
2955    oldfidp = get_fid(pdu, oldfid);
2956    if (oldfidp == NULL) {
2957        err = -ENOENT;
2958        goto out;
2959    }
2960    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2961    if (!err) {
2962        err = offset;
2963    }
2964    put_fid(pdu, oldfidp);
2965out:
2966    put_fid(pdu, dfidp);
2967out_nofid:
2968    v9fs_string_free(&name);
2969    pdu_complete(pdu, err);
2970}
2971
2972/* Only works with path name based fid */
2973static void coroutine_fn v9fs_remove(void *opaque)
2974{
2975    int32_t fid;
2976    int err = 0;
2977    size_t offset = 7;
2978    V9fsFidState *fidp;
2979    V9fsPDU *pdu = opaque;
2980
2981    err = pdu_unmarshal(pdu, offset, "d", &fid);
2982    if (err < 0) {
2983        goto out_nofid;
2984    }
2985    trace_v9fs_remove(pdu->tag, pdu->id, fid);
2986
2987    fidp = get_fid(pdu, fid);
2988    if (fidp == NULL) {
2989        err = -EINVAL;
2990        goto out_nofid;
2991    }
2992    /* if fs driver is not path based, return EOPNOTSUPP */
2993    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2994        err = -EOPNOTSUPP;
2995        goto out_err;
2996    }
2997    /*
2998     * IF the file is unlinked, we cannot reopen
2999     * the file later. So don't reclaim fd
3000     */
3001    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
3002    if (err < 0) {
3003        goto out_err;
3004    }
3005    err = v9fs_co_remove(pdu, &fidp->path);
3006    if (!err) {
3007        err = offset;
3008    }
3009out_err:
3010    /* For TREMOVE we need to clunk the fid even on failed remove */
3011    clunk_fid(pdu->s, fidp->fid);
3012    put_fid(pdu, fidp);
3013out_nofid:
3014    pdu_complete(pdu, err);
3015}
3016
3017static void coroutine_fn v9fs_unlinkat(void *opaque)
3018{
3019    int err = 0;
3020    V9fsString name;
3021    int32_t dfid, flags, rflags = 0;
3022    size_t offset = 7;
3023    V9fsPath path;
3024    V9fsFidState *dfidp;
3025    V9fsPDU *pdu = opaque;
3026
3027    v9fs_string_init(&name);
3028    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
3029    if (err < 0) {
3030        goto out_nofid;
3031    }
3032
3033    if (name_is_illegal(name.data)) {
3034        err = -ENOENT;
3035        goto out_nofid;
3036    }
3037
3038    if (!strcmp(".", name.data)) {
3039        err = -EINVAL;
3040        goto out_nofid;
3041    }
3042
3043    if (!strcmp("..", name.data)) {
3044        err = -ENOTEMPTY;
3045        goto out_nofid;
3046    }
3047
3048    if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3049        err = -EINVAL;
3050        goto out_nofid;
3051    }
3052
3053    if (flags & P9_DOTL_AT_REMOVEDIR) {
3054        rflags |= AT_REMOVEDIR;
3055    }
3056
3057    dfidp = get_fid(pdu, dfid);
3058    if (dfidp == NULL) {
3059        err = -EINVAL;
3060        goto out_nofid;
3061    }
3062    /*
3063     * IF the file is unlinked, we cannot reopen
3064     * the file later. So don't reclaim fd
3065     */
3066    v9fs_path_init(&path);
3067    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3068    if (err < 0) {
3069        goto out_err;
3070    }
3071    err = v9fs_mark_fids_unreclaim(pdu, &path);
3072    if (err < 0) {
3073        goto out_err;
3074    }
3075    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3076    if (!err) {
3077        err = offset;
3078    }
3079out_err:
3080    put_fid(pdu, dfidp);
3081    v9fs_path_free(&path);
3082out_nofid:
3083    pdu_complete(pdu, err);
3084    v9fs_string_free(&name);
3085}
3086
3087
3088/* Only works with path name based fid */
3089static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3090                                             int32_t newdirfid,
3091                                             V9fsString *name)
3092{
3093    int err = 0;
3094    V9fsPath new_path;
3095    V9fsFidState *tfidp;
3096    V9fsState *s = pdu->s;
3097    V9fsFidState *dirfidp = NULL;
3098
3099    v9fs_path_init(&new_path);
3100    if (newdirfid != -1) {
3101        dirfidp = get_fid(pdu, newdirfid);
3102        if (dirfidp == NULL) {
3103            return -ENOENT;
3104        }
3105        if (fidp->fid_type != P9_FID_NONE) {
3106            err = -EINVAL;
3107            goto out;
3108        }
3109        err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3110        if (err < 0) {
3111            goto out;
3112        }
3113    } else {
3114        char *dir_name = g_path_get_dirname(fidp->path.data);
3115        V9fsPath dir_path;
3116
3117        v9fs_path_init(&dir_path);
3118        v9fs_path_sprintf(&dir_path, "%s", dir_name);
3119        g_free(dir_name);
3120
3121        err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3122        v9fs_path_free(&dir_path);
3123        if (err < 0) {
3124            goto out;
3125        }
3126    }
3127    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3128    if (err < 0) {
3129        goto out;
3130    }
3131    /*
3132     * Fixup fid's pointing to the old name to
3133     * start pointing to the new name
3134     */
3135    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3136        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3137            /* replace the name */
3138            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3139        }
3140    }
3141out:
3142    if (dirfidp) {
3143        put_fid(pdu, dirfidp);
3144    }
3145    v9fs_path_free(&new_path);
3146    return err;
3147}
3148
3149/* Only works with path name based fid */
3150static void coroutine_fn v9fs_rename(void *opaque)
3151{
3152    int32_t fid;
3153    ssize_t err = 0;
3154    size_t offset = 7;
3155    V9fsString name;
3156    int32_t newdirfid;
3157    V9fsFidState *fidp;
3158    V9fsPDU *pdu = opaque;
3159    V9fsState *s = pdu->s;
3160
3161    v9fs_string_init(&name);
3162    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3163    if (err < 0) {
3164        goto out_nofid;
3165    }
3166
3167    if (name_is_illegal(name.data)) {
3168        err = -ENOENT;
3169        goto out_nofid;
3170    }
3171
3172    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3173        err = -EISDIR;
3174        goto out_nofid;
3175    }
3176
3177    fidp = get_fid(pdu, fid);
3178    if (fidp == NULL) {
3179        err = -ENOENT;
3180        goto out_nofid;
3181    }
3182    if (fidp->fid_type != P9_FID_NONE) {
3183        err = -EINVAL;
3184        goto out;
3185    }
3186    /* if fs driver is not path based, return EOPNOTSUPP */
3187    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3188        err = -EOPNOTSUPP;
3189        goto out;
3190    }
3191    v9fs_path_write_lock(s);
3192    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3193    v9fs_path_unlock(s);
3194    if (!err) {
3195        err = offset;
3196    }
3197out:
3198    put_fid(pdu, fidp);
3199out_nofid:
3200    pdu_complete(pdu, err);
3201    v9fs_string_free(&name);
3202}
3203
3204static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3205                                           V9fsString *old_name,
3206                                           V9fsPath *newdir,
3207                                           V9fsString *new_name)
3208{
3209    V9fsFidState *tfidp;
3210    V9fsPath oldpath, newpath;
3211    V9fsState *s = pdu->s;
3212    int err;
3213
3214    v9fs_path_init(&oldpath);
3215    v9fs_path_init(&newpath);
3216    err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3217    if (err < 0) {
3218        goto out;
3219    }
3220    err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3221    if (err < 0) {
3222        goto out;
3223    }
3224
3225    /*
3226     * Fixup fid's pointing to the old name to
3227     * start pointing to the new name
3228     */
3229    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3230        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3231            /* replace the name */
3232            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3233        }
3234    }
3235out:
3236    v9fs_path_free(&oldpath);
3237    v9fs_path_free(&newpath);
3238    return err;
3239}
3240
3241static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3242                                               V9fsString *old_name,
3243                                               int32_t newdirfid,
3244                                               V9fsString *new_name)
3245{
3246    int err = 0;
3247    V9fsState *s = pdu->s;
3248    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3249
3250    olddirfidp = get_fid(pdu, olddirfid);
3251    if (olddirfidp == NULL) {
3252        err = -ENOENT;
3253        goto out;
3254    }
3255    if (newdirfid != -1) {
3256        newdirfidp = get_fid(pdu, newdirfid);
3257        if (newdirfidp == NULL) {
3258            err = -ENOENT;
3259            goto out;
3260        }
3261    } else {
3262        newdirfidp = get_fid(pdu, olddirfid);
3263    }
3264
3265    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3266                           &newdirfidp->path, new_name);
3267    if (err < 0) {
3268        goto out;
3269    }
3270    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3271        /* Only for path based fid  we need to do the below fixup */
3272        err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3273                                 &newdirfidp->path, new_name);
3274    }
3275out:
3276    if (olddirfidp) {
3277        put_fid(pdu, olddirfidp);
3278    }
3279    if (newdirfidp) {
3280        put_fid(pdu, newdirfidp);
3281    }
3282    return err;
3283}
3284
3285static void coroutine_fn v9fs_renameat(void *opaque)
3286{
3287    ssize_t err = 0;
3288    size_t offset = 7;
3289    V9fsPDU *pdu = opaque;
3290    V9fsState *s = pdu->s;
3291    int32_t olddirfid, newdirfid;
3292    V9fsString old_name, new_name;
3293
3294    v9fs_string_init(&old_name);
3295    v9fs_string_init(&new_name);
3296    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3297                        &old_name, &newdirfid, &new_name);
3298    if (err < 0) {
3299        goto out_err;
3300    }
3301
3302    if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3303        err = -ENOENT;
3304        goto out_err;
3305    }
3306
3307    if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3308        !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3309        err = -EISDIR;
3310        goto out_err;
3311    }
3312
3313    v9fs_path_write_lock(s);
3314    err = v9fs_complete_renameat(pdu, olddirfid,
3315                                 &old_name, newdirfid, &new_name);
3316    v9fs_path_unlock(s);
3317    if (!err) {
3318        err = offset;
3319    }
3320
3321out_err:
3322    pdu_complete(pdu, err);
3323    v9fs_string_free(&old_name);
3324    v9fs_string_free(&new_name);
3325}
3326
3327static void coroutine_fn v9fs_wstat(void *opaque)
3328{
3329    int32_t fid;
3330    int err = 0;
3331    int16_t unused;
3332    V9fsStat v9stat;
3333    size_t offset = 7;
3334    struct stat stbuf;
3335    V9fsFidState *fidp;
3336    V9fsPDU *pdu = opaque;
3337    V9fsState *s = pdu->s;
3338
3339    v9fs_stat_init(&v9stat);
3340    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3341    if (err < 0) {
3342        goto out_nofid;
3343    }
3344    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3345                     v9stat.mode, v9stat.atime, v9stat.mtime);
3346
3347    fidp = get_fid(pdu, fid);
3348    if (fidp == NULL) {
3349        err = -EINVAL;
3350        goto out_nofid;
3351    }
3352    /* do we need to sync the file? */
3353    if (donttouch_stat(&v9stat)) {
3354        err = v9fs_co_fsync(pdu, fidp, 0);
3355        goto out;
3356    }
3357    if (v9stat.mode != -1) {
3358        uint32_t v9_mode;
3359        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3360        if (err < 0) {
3361            goto out;
3362        }
3363        v9_mode = stat_to_v9mode(&stbuf);
3364        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3365            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3366            /* Attempting to change the type */
3367            err = -EIO;
3368            goto out;
3369        }
3370        err = v9fs_co_chmod(pdu, &fidp->path,
3371                            v9mode_to_mode(v9stat.mode,
3372                                           &v9stat.extension));
3373        if (err < 0) {
3374            goto out;
3375        }
3376    }
3377    if (v9stat.mtime != -1 || v9stat.atime != -1) {
3378        struct timespec times[2];
3379        if (v9stat.atime != -1) {
3380            times[0].tv_sec = v9stat.atime;
3381            times[0].tv_nsec = 0;
3382        } else {
3383            times[0].tv_nsec = UTIME_OMIT;
3384        }
3385        if (v9stat.mtime != -1) {
3386            times[1].tv_sec = v9stat.mtime;
3387            times[1].tv_nsec = 0;
3388        } else {
3389            times[1].tv_nsec = UTIME_OMIT;
3390        }
3391        err = v9fs_co_utimensat(pdu, &fidp->path, times);
3392        if (err < 0) {
3393            goto out;
3394        }
3395    }
3396    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3397        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3398        if (err < 0) {
3399            goto out;
3400        }
3401    }
3402    if (v9stat.name.size != 0) {
3403        v9fs_path_write_lock(s);
3404        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3405        v9fs_path_unlock(s);
3406        if (err < 0) {
3407            goto out;
3408        }
3409    }
3410    if (v9stat.length != -1) {
3411        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3412        if (err < 0) {
3413            goto out;
3414        }
3415    }
3416    err = offset;
3417out:
3418    put_fid(pdu, fidp);
3419out_nofid:
3420    v9fs_stat_free(&v9stat);
3421    pdu_complete(pdu, err);
3422}
3423
3424static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3425{
3426    uint32_t f_type;
3427    uint32_t f_bsize;
3428    uint64_t f_blocks;
3429    uint64_t f_bfree;
3430    uint64_t f_bavail;
3431    uint64_t f_files;
3432    uint64_t f_ffree;
3433    uint64_t fsid_val;
3434    uint32_t f_namelen;
3435    size_t offset = 7;
3436    int32_t bsize_factor;
3437
3438    /*
3439     * compute bsize factor based on host file system block size
3440     * and client msize
3441     */
3442    bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize;
3443    if (!bsize_factor) {
3444        bsize_factor = 1;
3445    }
3446    f_type  = stbuf->f_type;
3447    f_bsize = stbuf->f_bsize;
3448    f_bsize *= bsize_factor;
3449    /*
3450     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3451     * adjust(divide) the number of blocks, free blocks and available
3452     * blocks by bsize factor
3453     */
3454    f_blocks = stbuf->f_blocks / bsize_factor;
3455    f_bfree  = stbuf->f_bfree / bsize_factor;
3456    f_bavail = stbuf->f_bavail / bsize_factor;
3457    f_files  = stbuf->f_files;
3458    f_ffree  = stbuf->f_ffree;
3459    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3460               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3461    f_namelen = stbuf->f_namelen;
3462
3463    return pdu_marshal(pdu, offset, "ddqqqqqqd",
3464                       f_type, f_bsize, f_blocks, f_bfree,
3465                       f_bavail, f_files, f_ffree,
3466                       fsid_val, f_namelen);
3467}
3468
3469static void coroutine_fn v9fs_statfs(void *opaque)
3470{
3471    int32_t fid;
3472    ssize_t retval = 0;
3473    size_t offset = 7;
3474    V9fsFidState *fidp;
3475    struct statfs stbuf;
3476    V9fsPDU *pdu = opaque;
3477    V9fsState *s = pdu->s;
3478
3479    retval = pdu_unmarshal(pdu, offset, "d", &fid);
3480    if (retval < 0) {
3481        goto out_nofid;
3482    }
3483    fidp = get_fid(pdu, fid);
3484    if (fidp == NULL) {
3485        retval = -ENOENT;
3486        goto out_nofid;
3487    }
3488    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3489    if (retval < 0) {
3490        goto out;
3491    }
3492    retval = v9fs_fill_statfs(s, pdu, &stbuf);
3493    if (retval < 0) {
3494        goto out;
3495    }
3496    retval += offset;
3497out:
3498    put_fid(pdu, fidp);
3499out_nofid:
3500    pdu_complete(pdu, retval);
3501}
3502
3503static void coroutine_fn v9fs_mknod(void *opaque)
3504{
3505
3506    int mode;
3507    gid_t gid;
3508    int32_t fid;
3509    V9fsQID qid;
3510    int err = 0;
3511    int major, minor;
3512    size_t offset = 7;
3513    V9fsString name;
3514    struct stat stbuf;
3515    V9fsFidState *fidp;
3516    V9fsPDU *pdu = opaque;
3517
3518    v9fs_string_init(&name);
3519    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3520                        &major, &minor, &gid);
3521    if (err < 0) {
3522        goto out_nofid;
3523    }
3524    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3525
3526    if (name_is_illegal(name.data)) {
3527        err = -ENOENT;
3528        goto out_nofid;
3529    }
3530
3531    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3532        err = -EEXIST;
3533        goto out_nofid;
3534    }
3535
3536    fidp = get_fid(pdu, fid);
3537    if (fidp == NULL) {
3538        err = -ENOENT;
3539        goto out_nofid;
3540    }
3541    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3542                        makedev(major, minor), mode, &stbuf);
3543    if (err < 0) {
3544        goto out;
3545    }
3546    err = stat_to_qid(pdu, &stbuf, &qid);
3547    if (err < 0) {
3548        goto out;
3549    }
3550    err = pdu_marshal(pdu, offset, "Q", &qid);
3551    if (err < 0) {
3552        goto out;
3553    }
3554    err += offset;
3555    trace_v9fs_mknod_return(pdu->tag, pdu->id,
3556                            qid.type, qid.version, qid.path);
3557out:
3558    put_fid(pdu, fidp);
3559out_nofid:
3560    pdu_complete(pdu, err);
3561    v9fs_string_free(&name);
3562}
3563
3564/*
3565 * Implement posix byte range locking code
3566 * Server side handling of locking code is very simple, because 9p server in
3567 * QEMU can handle only one client. And most of the lock handling
3568 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3569 * do any thing in * qemu 9p server side lock code path.
3570 * So when a TLOCK request comes, always return success
3571 */
3572static void coroutine_fn v9fs_lock(void *opaque)
3573{
3574    V9fsFlock flock;
3575    size_t offset = 7;
3576    struct stat stbuf;
3577    V9fsFidState *fidp;
3578    int32_t fid, err = 0;
3579    V9fsPDU *pdu = opaque;
3580
3581    v9fs_string_init(&flock.client_id);
3582    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3583                        &flock.flags, &flock.start, &flock.length,
3584                        &flock.proc_id, &flock.client_id);
3585    if (err < 0) {
3586        goto out_nofid;
3587    }
3588    trace_v9fs_lock(pdu->tag, pdu->id, fid,
3589                    flock.type, flock.start, flock.length);
3590
3591
3592    /* We support only block flag now (that too ignored currently) */
3593    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3594        err = -EINVAL;
3595        goto out_nofid;
3596    }
3597    fidp = get_fid(pdu, fid);
3598    if (fidp == NULL) {
3599        err = -ENOENT;
3600        goto out_nofid;
3601    }
3602    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3603    if (err < 0) {
3604        goto out;
3605    }
3606    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3607    if (err < 0) {
3608        goto out;
3609    }
3610    err += offset;
3611    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3612out:
3613    put_fid(pdu, fidp);
3614out_nofid:
3615    pdu_complete(pdu, err);
3616    v9fs_string_free(&flock.client_id);
3617}
3618
3619/*
3620 * When a TGETLOCK request comes, always return success because all lock
3621 * handling is done by client's VFS layer.
3622 */
3623static void coroutine_fn v9fs_getlock(void *opaque)
3624{
3625    size_t offset = 7;
3626    struct stat stbuf;
3627    V9fsFidState *fidp;
3628    V9fsGetlock glock;
3629    int32_t fid, err = 0;
3630    V9fsPDU *pdu = opaque;
3631
3632    v9fs_string_init(&glock.client_id);
3633    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3634                        &glock.start, &glock.length, &glock.proc_id,
3635                        &glock.client_id);
3636    if (err < 0) {
3637        goto out_nofid;
3638    }
3639    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3640                       glock.type, glock.start, glock.length);
3641
3642    fidp = get_fid(pdu, fid);
3643    if (fidp == NULL) {
3644        err = -ENOENT;
3645        goto out_nofid;
3646    }
3647    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3648    if (err < 0) {
3649        goto out;
3650    }
3651    glock.type = P9_LOCK_TYPE_UNLCK;
3652    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3653                          glock.start, glock.length, glock.proc_id,
3654                          &glock.client_id);
3655    if (err < 0) {
3656        goto out;
3657    }
3658    err += offset;
3659    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3660                              glock.length, glock.proc_id);
3661out:
3662    put_fid(pdu, fidp);
3663out_nofid:
3664    pdu_complete(pdu, err);
3665    v9fs_string_free(&glock.client_id);
3666}
3667
3668static void coroutine_fn v9fs_mkdir(void *opaque)
3669{
3670    V9fsPDU *pdu = opaque;
3671    size_t offset = 7;
3672    int32_t fid;
3673    struct stat stbuf;
3674    V9fsQID qid;
3675    V9fsString name;
3676    V9fsFidState *fidp;
3677    gid_t gid;
3678    int mode;
3679    int err = 0;
3680
3681    v9fs_string_init(&name);
3682    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3683    if (err < 0) {
3684        goto out_nofid;
3685    }
3686    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3687
3688    if (name_is_illegal(name.data)) {
3689        err = -ENOENT;
3690        goto out_nofid;
3691    }
3692
3693    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3694        err = -EEXIST;
3695        goto out_nofid;
3696    }
3697
3698    fidp = get_fid(pdu, fid);
3699    if (fidp == NULL) {
3700        err = -ENOENT;
3701        goto out_nofid;
3702    }
3703    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3704    if (err < 0) {
3705        goto out;
3706    }
3707    err = stat_to_qid(pdu, &stbuf, &qid);
3708    if (err < 0) {
3709        goto out;
3710    }
3711    err = pdu_marshal(pdu, offset, "Q", &qid);
3712    if (err < 0) {
3713        goto out;
3714    }
3715    err += offset;
3716    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3717                            qid.type, qid.version, qid.path, err);
3718out:
3719    put_fid(pdu, fidp);
3720out_nofid:
3721    pdu_complete(pdu, err);
3722    v9fs_string_free(&name);
3723}
3724
3725static void coroutine_fn v9fs_xattrwalk(void *opaque)
3726{
3727    int64_t size;
3728    V9fsString name;
3729    ssize_t err = 0;
3730    size_t offset = 7;
3731    int32_t fid, newfid;
3732    V9fsFidState *file_fidp;
3733    V9fsFidState *xattr_fidp = NULL;
3734    V9fsPDU *pdu = opaque;
3735    V9fsState *s = pdu->s;
3736
3737    v9fs_string_init(&name);
3738    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3739    if (err < 0) {
3740        goto out_nofid;
3741    }
3742    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3743
3744    file_fidp = get_fid(pdu, fid);
3745    if (file_fidp == NULL) {
3746        err = -ENOENT;
3747        goto out_nofid;
3748    }
3749    xattr_fidp = alloc_fid(s, newfid);
3750    if (xattr_fidp == NULL) {
3751        err = -EINVAL;
3752        goto out;
3753    }
3754    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3755    if (!v9fs_string_size(&name)) {
3756        /*
3757         * listxattr request. Get the size first
3758         */
3759        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3760        if (size < 0) {
3761            err = size;
3762            clunk_fid(s, xattr_fidp->fid);
3763            goto out;
3764        }
3765        /*
3766         * Read the xattr value
3767         */
3768        xattr_fidp->fs.xattr.len = size;
3769        xattr_fidp->fid_type = P9_FID_XATTR;
3770        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3771        xattr_fidp->fs.xattr.value = g_malloc0(size);
3772        if (size) {
3773            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3774                                     xattr_fidp->fs.xattr.value,
3775                                     xattr_fidp->fs.xattr.len);
3776            if (err < 0) {
3777                clunk_fid(s, xattr_fidp->fid);
3778                goto out;
3779            }
3780        }
3781        err = pdu_marshal(pdu, offset, "q", size);
3782        if (err < 0) {
3783            goto out;
3784        }
3785        err += offset;
3786    } else {
3787        /*
3788         * specific xattr fid. We check for xattr
3789         * presence also collect the xattr size
3790         */
3791        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3792                                 &name, NULL, 0);
3793        if (size < 0) {
3794            err = size;
3795            clunk_fid(s, xattr_fidp->fid);
3796            goto out;
3797        }
3798        /*
3799         * Read the xattr value
3800         */
3801        xattr_fidp->fs.xattr.len = size;
3802        xattr_fidp->fid_type = P9_FID_XATTR;
3803        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3804        xattr_fidp->fs.xattr.value = g_malloc0(size);
3805        if (size) {
3806            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3807                                    &name, xattr_fidp->fs.xattr.value,
3808                                    xattr_fidp->fs.xattr.len);
3809            if (err < 0) {
3810                clunk_fid(s, xattr_fidp->fid);
3811                goto out;
3812            }
3813        }
3814        err = pdu_marshal(pdu, offset, "q", size);
3815        if (err < 0) {
3816            goto out;
3817        }
3818        err += offset;
3819    }
3820    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3821out:
3822    put_fid(pdu, file_fidp);
3823    if (xattr_fidp) {
3824        put_fid(pdu, xattr_fidp);
3825    }
3826out_nofid:
3827    pdu_complete(pdu, err);
3828    v9fs_string_free(&name);
3829}
3830
3831static void coroutine_fn v9fs_xattrcreate(void *opaque)
3832{
3833    int flags, rflags = 0;
3834    int32_t fid;
3835    uint64_t size;
3836    ssize_t err = 0;
3837    V9fsString name;
3838    size_t offset = 7;
3839    V9fsFidState *file_fidp;
3840    V9fsFidState *xattr_fidp;
3841    V9fsPDU *pdu = opaque;
3842
3843    v9fs_string_init(&name);
3844    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3845    if (err < 0) {
3846        goto out_nofid;
3847    }
3848    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3849
3850    if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3851        err = -EINVAL;
3852        goto out_nofid;
3853    }
3854
3855    if (flags & P9_XATTR_CREATE) {
3856        rflags |= XATTR_CREATE;
3857    }
3858
3859    if (flags & P9_XATTR_REPLACE) {
3860        rflags |= XATTR_REPLACE;
3861    }
3862
3863    if (size > XATTR_SIZE_MAX) {
3864        err = -E2BIG;
3865        goto out_nofid;
3866    }
3867
3868    file_fidp = get_fid(pdu, fid);
3869    if (file_fidp == NULL) {
3870        err = -EINVAL;
3871        goto out_nofid;
3872    }
3873    if (file_fidp->fid_type != P9_FID_NONE) {
3874        err = -EINVAL;
3875        goto out_put_fid;
3876    }
3877
3878    /* Make the file fid point to xattr */
3879    xattr_fidp = file_fidp;
3880    xattr_fidp->fid_type = P9_FID_XATTR;
3881    xattr_fidp->fs.xattr.copied_len = 0;
3882    xattr_fidp->fs.xattr.xattrwalk_fid = false;
3883    xattr_fidp->fs.xattr.len = size;
3884    xattr_fidp->fs.xattr.flags = rflags;
3885    v9fs_string_init(&xattr_fidp->fs.xattr.name);
3886    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3887    xattr_fidp->fs.xattr.value = g_malloc0(size);
3888    err = offset;
3889out_put_fid:
3890    put_fid(pdu, file_fidp);
3891out_nofid:
3892    pdu_complete(pdu, err);
3893    v9fs_string_free(&name);
3894}
3895
3896static void coroutine_fn v9fs_readlink(void *opaque)
3897{
3898    V9fsPDU *pdu = opaque;
3899    size_t offset = 7;
3900    V9fsString target;
3901    int32_t fid;
3902    int err = 0;
3903    V9fsFidState *fidp;
3904
3905    err = pdu_unmarshal(pdu, offset, "d", &fid);
3906    if (err < 0) {
3907        goto out_nofid;
3908    }
3909    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3910    fidp = get_fid(pdu, fid);
3911    if (fidp == NULL) {
3912        err = -ENOENT;
3913        goto out_nofid;
3914    }
3915
3916    v9fs_string_init(&target);
3917    err = v9fs_co_readlink(pdu, &fidp->path, &target);
3918    if (err < 0) {
3919        goto out;
3920    }
3921    err = pdu_marshal(pdu, offset, "s", &target);
3922    if (err < 0) {
3923        v9fs_string_free(&target);
3924        goto out;
3925    }
3926    err += offset;
3927    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3928    v9fs_string_free(&target);
3929out:
3930    put_fid(pdu, fidp);
3931out_nofid:
3932    pdu_complete(pdu, err);
3933}
3934
3935static CoroutineEntry *pdu_co_handlers[] = {
3936    [P9_TREADDIR] = v9fs_readdir,
3937    [P9_TSTATFS] = v9fs_statfs,
3938    [P9_TGETATTR] = v9fs_getattr,
3939    [P9_TSETATTR] = v9fs_setattr,
3940    [P9_TXATTRWALK] = v9fs_xattrwalk,
3941    [P9_TXATTRCREATE] = v9fs_xattrcreate,
3942    [P9_TMKNOD] = v9fs_mknod,
3943    [P9_TRENAME] = v9fs_rename,
3944    [P9_TLOCK] = v9fs_lock,
3945    [P9_TGETLOCK] = v9fs_getlock,
3946    [P9_TRENAMEAT] = v9fs_renameat,
3947    [P9_TREADLINK] = v9fs_readlink,
3948    [P9_TUNLINKAT] = v9fs_unlinkat,
3949    [P9_TMKDIR] = v9fs_mkdir,
3950    [P9_TVERSION] = v9fs_version,
3951    [P9_TLOPEN] = v9fs_open,
3952    [P9_TATTACH] = v9fs_attach,
3953    [P9_TSTAT] = v9fs_stat,
3954    [P9_TWALK] = v9fs_walk,
3955    [P9_TCLUNK] = v9fs_clunk,
3956    [P9_TFSYNC] = v9fs_fsync,
3957    [P9_TOPEN] = v9fs_open,
3958    [P9_TREAD] = v9fs_read,
3959#if 0
3960    [P9_TAUTH] = v9fs_auth,
3961#endif
3962    [P9_TFLUSH] = v9fs_flush,
3963    [P9_TLINK] = v9fs_link,
3964    [P9_TSYMLINK] = v9fs_symlink,
3965    [P9_TCREATE] = v9fs_create,
3966    [P9_TLCREATE] = v9fs_lcreate,
3967    [P9_TWRITE] = v9fs_write,
3968    [P9_TWSTAT] = v9fs_wstat,
3969    [P9_TREMOVE] = v9fs_remove,
3970};
3971
3972static void coroutine_fn v9fs_op_not_supp(void *opaque)
3973{
3974    V9fsPDU *pdu = opaque;
3975    pdu_complete(pdu, -EOPNOTSUPP);
3976}
3977
3978static void coroutine_fn v9fs_fs_ro(void *opaque)
3979{
3980    V9fsPDU *pdu = opaque;
3981    pdu_complete(pdu, -EROFS);
3982}
3983
3984static inline bool is_read_only_op(V9fsPDU *pdu)
3985{
3986    switch (pdu->id) {
3987    case P9_TREADDIR:
3988    case P9_TSTATFS:
3989    case P9_TGETATTR:
3990    case P9_TXATTRWALK:
3991    case P9_TLOCK:
3992    case P9_TGETLOCK:
3993    case P9_TREADLINK:
3994    case P9_TVERSION:
3995    case P9_TLOPEN:
3996    case P9_TATTACH:
3997    case P9_TSTAT:
3998    case P9_TWALK:
3999    case P9_TCLUNK:
4000    case P9_TFSYNC:
4001    case P9_TOPEN:
4002    case P9_TREAD:
4003    case P9_TAUTH:
4004    case P9_TFLUSH:
4005        return 1;
4006    default:
4007        return 0;
4008    }
4009}
4010
4011void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
4012{
4013    Coroutine *co;
4014    CoroutineEntry *handler;
4015    V9fsState *s = pdu->s;
4016
4017    pdu->size = le32_to_cpu(hdr->size_le);
4018    pdu->id = hdr->id;
4019    pdu->tag = le16_to_cpu(hdr->tag_le);
4020
4021    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
4022        (pdu_co_handlers[pdu->id] == NULL)) {
4023        handler = v9fs_op_not_supp;
4024    } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
4025        handler = v9fs_fs_ro;
4026    } else {
4027        handler = pdu_co_handlers[pdu->id];
4028    }
4029
4030    qemu_co_queue_init(&pdu->complete);
4031    co = qemu_coroutine_create(handler, pdu);
4032    qemu_coroutine_enter(co);
4033}
4034
4035/* Returns 0 on success, 1 on failure. */
4036int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
4037                               Error **errp)
4038{
4039    ERRP_GUARD();
4040    int i, len;
4041    struct stat stat;
4042    FsDriverEntry *fse;
4043    V9fsPath path;
4044    int rc = 1;
4045
4046    assert(!s->transport);
4047    s->transport = t;
4048
4049    /* initialize pdu allocator */
4050    QLIST_INIT(&s->free_list);
4051    QLIST_INIT(&s->active_list);
4052    for (i = 0; i < MAX_REQ; i++) {
4053        QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4054        s->pdus[i].s = s;
4055        s->pdus[i].idx = i;
4056    }
4057
4058    v9fs_path_init(&path);
4059
4060    fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4061
4062    if (!fse) {
4063        /* We don't have a fsdev identified by fsdev_id */
4064        error_setg(errp, "9pfs device couldn't find fsdev with the "
4065                   "id = %s",
4066                   s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4067        goto out;
4068    }
4069
4070    if (!s->fsconf.tag) {
4071        /* we haven't specified a mount_tag */
4072        error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4073                   s->fsconf.fsdev_id);
4074        goto out;
4075    }
4076
4077    s->ctx.export_flags = fse->export_flags;
4078    s->ctx.fs_root = g_strdup(fse->path);
4079    s->ctx.exops.get_st_gen = NULL;
4080    len = strlen(s->fsconf.tag);
4081    if (len > MAX_TAG_LEN - 1) {
4082        error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4083                   "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4084        goto out;
4085    }
4086
4087    s->tag = g_strdup(s->fsconf.tag);
4088    s->ctx.uid = -1;
4089
4090    s->ops = fse->ops;
4091
4092    s->ctx.fmode = fse->fmode;
4093    s->ctx.dmode = fse->dmode;
4094
4095    QSIMPLEQ_INIT(&s->fid_list);
4096    qemu_co_rwlock_init(&s->rename_lock);
4097
4098    if (s->ops->init(&s->ctx, errp) < 0) {
4099        error_prepend(errp, "cannot initialize fsdev '%s': ",
4100                      s->fsconf.fsdev_id);
4101        goto out;
4102    }
4103
4104    /*
4105     * Check details of export path, We need to use fs driver
4106     * call back to do that. Since we are in the init path, we don't
4107     * use co-routines here.
4108     */
4109    if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4110        error_setg(errp,
4111                   "error in converting name to path %s", strerror(errno));
4112        goto out;
4113    }
4114    if (s->ops->lstat(&s->ctx, &path, &stat)) {
4115        error_setg(errp, "share path %s does not exist", fse->path);
4116        goto out;
4117    } else if (!S_ISDIR(stat.st_mode)) {
4118        error_setg(errp, "share path %s is not a directory", fse->path);
4119        goto out;
4120    }
4121
4122    s->dev_id = stat.st_dev;
4123
4124    /* init inode remapping : */
4125    /* hash table for variable length inode suffixes */
4126    qpd_table_init(&s->qpd_table);
4127    /* hash table for slow/full inode remapping (most users won't need it) */
4128    qpf_table_init(&s->qpf_table);
4129    /* hash table for quick inode remapping */
4130    qpp_table_init(&s->qpp_table);
4131    s->qp_ndevices = 0;
4132    s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4133    s->qp_fullpath_next = 1;
4134
4135    s->ctx.fst = &fse->fst;
4136    fsdev_throttle_init(s->ctx.fst);
4137
4138    rc = 0;
4139out:
4140    if (rc) {
4141        v9fs_device_unrealize_common(s);
4142    }
4143    v9fs_path_free(&path);
4144    return rc;
4145}
4146
4147void v9fs_device_unrealize_common(V9fsState *s)
4148{
4149    if (s->ops && s->ops->cleanup) {
4150        s->ops->cleanup(&s->ctx);
4151    }
4152    if (s->ctx.fst) {
4153        fsdev_throttle_cleanup(s->ctx.fst);
4154    }
4155    g_free(s->tag);
4156    qp_table_destroy(&s->qpd_table);
4157    qp_table_destroy(&s->qpp_table);
4158    qp_table_destroy(&s->qpf_table);
4159    g_free(s->ctx.fs_root);
4160}
4161
4162typedef struct VirtfsCoResetData {
4163    V9fsPDU pdu;
4164    bool done;
4165} VirtfsCoResetData;
4166
4167static void coroutine_fn virtfs_co_reset(void *opaque)
4168{
4169    VirtfsCoResetData *data = opaque;
4170
4171    virtfs_reset(&data->pdu);
4172    data->done = true;
4173}
4174
4175void v9fs_reset(V9fsState *s)
4176{
4177    VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4178    Coroutine *co;
4179
4180    while (!QLIST_EMPTY(&s->active_list)) {
4181        aio_poll(qemu_get_aio_context(), true);
4182    }
4183
4184    co = qemu_coroutine_create(virtfs_co_reset, &data);
4185    qemu_coroutine_enter(co);
4186
4187    while (!data.done) {
4188        aio_poll(qemu_get_aio_context(), true);
4189    }
4190}
4191
4192static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4193{
4194    struct rlimit rlim;
4195    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4196        error_report("Failed to get the resource limit");
4197        exit(1);
4198    }
4199    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3);
4200    open_fd_rc = rlim.rlim_cur / 2;
4201}
4202