qemu/hw/9pfs/9p.c
<<
>>
Prefs
   1/*
   2 * Virtio 9p backend
   3 *
   4 * Copyright IBM, Corp. 2010
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include <glib/gprintf.h>
  16#include "hw/virtio/virtio.h"
  17#include "qapi/error.h"
  18#include "qemu/error-report.h"
  19#include "qemu/iov.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/sockets.h"
  22#include "virtio-9p.h"
  23#include "fsdev/qemu-fsdev.h"
  24#include "9p-xattr.h"
  25#include "coth.h"
  26#include "trace.h"
  27#include "migration/blocker.h"
  28#include "sysemu/qtest.h"
  29#include "qemu/xxhash.h"
  30#include <math.h>
  31
  32int open_fd_hw;
  33int total_open_fd;
  34static int open_fd_rc;
  35
  36enum {
  37    Oread   = 0x00,
  38    Owrite  = 0x01,
  39    Ordwr   = 0x02,
  40    Oexec   = 0x03,
  41    Oexcl   = 0x04,
  42    Otrunc  = 0x10,
  43    Orexec  = 0x20,
  44    Orclose = 0x40,
  45    Oappend = 0x80,
  46};
  47
  48static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  49{
  50    ssize_t ret;
  51    va_list ap;
  52
  53    va_start(ap, fmt);
  54    ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
  55    va_end(ap);
  56
  57    return ret;
  58}
  59
  60static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  61{
  62    ssize_t ret;
  63    va_list ap;
  64
  65    va_start(ap, fmt);
  66    ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
  67    va_end(ap);
  68
  69    return ret;
  70}
  71
  72static int omode_to_uflags(int8_t mode)
  73{
  74    int ret = 0;
  75
  76    switch (mode & 3) {
  77    case Oread:
  78        ret = O_RDONLY;
  79        break;
  80    case Ordwr:
  81        ret = O_RDWR;
  82        break;
  83    case Owrite:
  84        ret = O_WRONLY;
  85        break;
  86    case Oexec:
  87        ret = O_RDONLY;
  88        break;
  89    }
  90
  91    if (mode & Otrunc) {
  92        ret |= O_TRUNC;
  93    }
  94
  95    if (mode & Oappend) {
  96        ret |= O_APPEND;
  97    }
  98
  99    if (mode & Oexcl) {
 100        ret |= O_EXCL;
 101    }
 102
 103    return ret;
 104}
 105
 106typedef struct DotlOpenflagMap {
 107    int dotl_flag;
 108    int open_flag;
 109} DotlOpenflagMap;
 110
 111static int dotl_to_open_flags(int flags)
 112{
 113    int i;
 114    /*
 115     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
 116     * and P9_DOTL_NOACCESS
 117     */
 118    int oflags = flags & O_ACCMODE;
 119
 120    DotlOpenflagMap dotl_oflag_map[] = {
 121        { P9_DOTL_CREATE, O_CREAT },
 122        { P9_DOTL_EXCL, O_EXCL },
 123        { P9_DOTL_NOCTTY , O_NOCTTY },
 124        { P9_DOTL_TRUNC, O_TRUNC },
 125        { P9_DOTL_APPEND, O_APPEND },
 126        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
 127        { P9_DOTL_DSYNC, O_DSYNC },
 128        { P9_DOTL_FASYNC, FASYNC },
 129        { P9_DOTL_DIRECT, O_DIRECT },
 130        { P9_DOTL_LARGEFILE, O_LARGEFILE },
 131        { P9_DOTL_DIRECTORY, O_DIRECTORY },
 132        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
 133        { P9_DOTL_NOATIME, O_NOATIME },
 134        { P9_DOTL_SYNC, O_SYNC },
 135    };
 136
 137    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
 138        if (flags & dotl_oflag_map[i].dotl_flag) {
 139            oflags |= dotl_oflag_map[i].open_flag;
 140        }
 141    }
 142
 143    return oflags;
 144}
 145
 146void cred_init(FsCred *credp)
 147{
 148    credp->fc_uid = -1;
 149    credp->fc_gid = -1;
 150    credp->fc_mode = -1;
 151    credp->fc_rdev = -1;
 152}
 153
 154static int get_dotl_openflags(V9fsState *s, int oflags)
 155{
 156    int flags;
 157    /*
 158     * Filter the client open flags
 159     */
 160    flags = dotl_to_open_flags(oflags);
 161    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
 162    /*
 163     * Ignore direct disk access hint until the server supports it.
 164     */
 165    flags &= ~O_DIRECT;
 166    return flags;
 167}
 168
 169void v9fs_path_init(V9fsPath *path)
 170{
 171    path->data = NULL;
 172    path->size = 0;
 173}
 174
 175void v9fs_path_free(V9fsPath *path)
 176{
 177    g_free(path->data);
 178    path->data = NULL;
 179    path->size = 0;
 180}
 181
 182
 183void GCC_FMT_ATTR(2, 3)
 184v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
 185{
 186    va_list ap;
 187
 188    v9fs_path_free(path);
 189
 190    va_start(ap, fmt);
 191    /* Bump the size for including terminating NULL */
 192    path->size = g_vasprintf(&path->data, fmt, ap) + 1;
 193    va_end(ap);
 194}
 195
 196void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
 197{
 198    v9fs_path_free(dst);
 199    dst->size = src->size;
 200    dst->data = g_memdup(src->data, src->size);
 201}
 202
 203int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
 204                      const char *name, V9fsPath *path)
 205{
 206    int err;
 207    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
 208    if (err < 0) {
 209        err = -errno;
 210    }
 211    return err;
 212}
 213
 214/*
 215 * Return TRUE if s1 is an ancestor of s2.
 216 *
 217 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
 218 * As a special case, We treat s1 as ancestor of s2 if they are same!
 219 */
 220static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
 221{
 222    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
 223        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
 224            return 1;
 225        }
 226    }
 227    return 0;
 228}
 229
 230static size_t v9fs_string_size(V9fsString *str)
 231{
 232    return str->size;
 233}
 234
 235/*
 236 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
 237static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 238{
 239    int err = 1;
 240    if (f->fid_type == P9_FID_FILE) {
 241        if (f->fs.fd == -1) {
 242            do {
 243                err = v9fs_co_open(pdu, f, f->open_flags);
 244            } while (err == -EINTR && !pdu->cancelled);
 245        }
 246    } else if (f->fid_type == P9_FID_DIR) {
 247        if (f->fs.dir.stream == NULL) {
 248            do {
 249                err = v9fs_co_opendir(pdu, f);
 250            } while (err == -EINTR && !pdu->cancelled);
 251        }
 252    }
 253    return err;
 254}
 255
 256static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
 257{
 258    int err;
 259    V9fsFidState *f;
 260    V9fsState *s = pdu->s;
 261
 262    for (f = s->fid_list; f; f = f->next) {
 263        BUG_ON(f->clunked);
 264        if (f->fid == fid) {
 265            /*
 266             * Update the fid ref upfront so that
 267             * we don't get reclaimed when we yield
 268             * in open later.
 269             */
 270            f->ref++;
 271            /*
 272             * check whether we need to reopen the
 273             * file. We might have closed the fd
 274             * while trying to free up some file
 275             * descriptors.
 276             */
 277            err = v9fs_reopen_fid(pdu, f);
 278            if (err < 0) {
 279                f->ref--;
 280                return NULL;
 281            }
 282            /*
 283             * Mark the fid as referenced so that the LRU
 284             * reclaim won't close the file descriptor
 285             */
 286            f->flags |= FID_REFERENCED;
 287            return f;
 288        }
 289    }
 290    return NULL;
 291}
 292
 293static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 294{
 295    V9fsFidState *f;
 296
 297    for (f = s->fid_list; f; f = f->next) {
 298        /* If fid is already there return NULL */
 299        BUG_ON(f->clunked);
 300        if (f->fid == fid) {
 301            return NULL;
 302        }
 303    }
 304    f = g_malloc0(sizeof(V9fsFidState));
 305    f->fid = fid;
 306    f->fid_type = P9_FID_NONE;
 307    f->ref = 1;
 308    /*
 309     * Mark the fid as referenced so that the LRU
 310     * reclaim won't close the file descriptor
 311     */
 312    f->flags |= FID_REFERENCED;
 313    f->next = s->fid_list;
 314    s->fid_list = f;
 315
 316    v9fs_readdir_init(&f->fs.dir);
 317    v9fs_readdir_init(&f->fs_reclaim.dir);
 318
 319    return f;
 320}
 321
 322static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 323{
 324    int retval = 0;
 325
 326    if (fidp->fs.xattr.xattrwalk_fid) {
 327        /* getxattr/listxattr fid */
 328        goto free_value;
 329    }
 330    /*
 331     * if this is fid for setxattr. clunk should
 332     * result in setxattr localcall
 333     */
 334    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
 335        /* clunk after partial write */
 336        retval = -EINVAL;
 337        goto free_out;
 338    }
 339    if (fidp->fs.xattr.len) {
 340        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
 341                                   fidp->fs.xattr.value,
 342                                   fidp->fs.xattr.len,
 343                                   fidp->fs.xattr.flags);
 344    } else {
 345        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
 346    }
 347free_out:
 348    v9fs_string_free(&fidp->fs.xattr.name);
 349free_value:
 350    g_free(fidp->fs.xattr.value);
 351    return retval;
 352}
 353
 354static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 355{
 356    int retval = 0;
 357
 358    if (fidp->fid_type == P9_FID_FILE) {
 359        /* If we reclaimed the fd no need to close */
 360        if (fidp->fs.fd != -1) {
 361            retval = v9fs_co_close(pdu, &fidp->fs);
 362        }
 363    } else if (fidp->fid_type == P9_FID_DIR) {
 364        if (fidp->fs.dir.stream != NULL) {
 365            retval = v9fs_co_closedir(pdu, &fidp->fs);
 366        }
 367    } else if (fidp->fid_type == P9_FID_XATTR) {
 368        retval = v9fs_xattr_fid_clunk(pdu, fidp);
 369    }
 370    v9fs_path_free(&fidp->path);
 371    g_free(fidp);
 372    return retval;
 373}
 374
 375static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 376{
 377    BUG_ON(!fidp->ref);
 378    fidp->ref--;
 379    /*
 380     * Don't free the fid if it is in reclaim list
 381     */
 382    if (!fidp->ref && fidp->clunked) {
 383        if (fidp->fid == pdu->s->root_fid) {
 384            /*
 385             * if the clunked fid is root fid then we
 386             * have unmounted the fs on the client side.
 387             * delete the migration blocker. Ideally, this
 388             * should be hooked to transport close notification
 389             */
 390            if (pdu->s->migration_blocker) {
 391                migrate_del_blocker(pdu->s->migration_blocker);
 392                error_free(pdu->s->migration_blocker);
 393                pdu->s->migration_blocker = NULL;
 394            }
 395        }
 396        return free_fid(pdu, fidp);
 397    }
 398    return 0;
 399}
 400
 401static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
 402{
 403    V9fsFidState **fidpp, *fidp;
 404
 405    for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
 406        if ((*fidpp)->fid == fid) {
 407            break;
 408        }
 409    }
 410    if (*fidpp == NULL) {
 411        return NULL;
 412    }
 413    fidp = *fidpp;
 414    *fidpp = fidp->next;
 415    fidp->clunked = 1;
 416    return fidp;
 417}
 418
 419void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
 420{
 421    int reclaim_count = 0;
 422    V9fsState *s = pdu->s;
 423    V9fsFidState *f, *reclaim_list = NULL;
 424
 425    for (f = s->fid_list; f; f = f->next) {
 426        /*
 427         * Unlink fids cannot be reclaimed. Check
 428         * for them and skip them. Also skip fids
 429         * currently being operated on.
 430         */
 431        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
 432            continue;
 433        }
 434        /*
 435         * if it is a recently referenced fid
 436         * we leave the fid untouched and clear the
 437         * reference bit. We come back to it later
 438         * in the next iteration. (a simple LRU without
 439         * moving list elements around)
 440         */
 441        if (f->flags & FID_REFERENCED) {
 442            f->flags &= ~FID_REFERENCED;
 443            continue;
 444        }
 445        /*
 446         * Add fids to reclaim list.
 447         */
 448        if (f->fid_type == P9_FID_FILE) {
 449            if (f->fs.fd != -1) {
 450                /*
 451                 * Up the reference count so that
 452                 * a clunk request won't free this fid
 453                 */
 454                f->ref++;
 455                f->rclm_lst = reclaim_list;
 456                reclaim_list = f;
 457                f->fs_reclaim.fd = f->fs.fd;
 458                f->fs.fd = -1;
 459                reclaim_count++;
 460            }
 461        } else if (f->fid_type == P9_FID_DIR) {
 462            if (f->fs.dir.stream != NULL) {
 463                /*
 464                 * Up the reference count so that
 465                 * a clunk request won't free this fid
 466                 */
 467                f->ref++;
 468                f->rclm_lst = reclaim_list;
 469                reclaim_list = f;
 470                f->fs_reclaim.dir.stream = f->fs.dir.stream;
 471                f->fs.dir.stream = NULL;
 472                reclaim_count++;
 473            }
 474        }
 475        if (reclaim_count >= open_fd_rc) {
 476            break;
 477        }
 478    }
 479    /*
 480     * Now close the fid in reclaim list. Free them if they
 481     * are already clunked.
 482     */
 483    while (reclaim_list) {
 484        f = reclaim_list;
 485        reclaim_list = f->rclm_lst;
 486        if (f->fid_type == P9_FID_FILE) {
 487            v9fs_co_close(pdu, &f->fs_reclaim);
 488        } else if (f->fid_type == P9_FID_DIR) {
 489            v9fs_co_closedir(pdu, &f->fs_reclaim);
 490        }
 491        f->rclm_lst = NULL;
 492        /*
 493         * Now drop the fid reference, free it
 494         * if clunked.
 495         */
 496        put_fid(pdu, f);
 497    }
 498}
 499
 500static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 501{
 502    int err;
 503    V9fsState *s = pdu->s;
 504    V9fsFidState *fidp, head_fid;
 505
 506    head_fid.next = s->fid_list;
 507    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
 508        if (fidp->path.size != path->size) {
 509            continue;
 510        }
 511        if (!memcmp(fidp->path.data, path->data, path->size)) {
 512            /* Mark the fid non reclaimable. */
 513            fidp->flags |= FID_NON_RECLAIMABLE;
 514
 515            /* reopen the file/dir if already closed */
 516            err = v9fs_reopen_fid(pdu, fidp);
 517            if (err < 0) {
 518                return err;
 519            }
 520            /*
 521             * Go back to head of fid list because
 522             * the list could have got updated when
 523             * switched to the worker thread
 524             */
 525            if (err == 0) {
 526                fidp = &head_fid;
 527            }
 528        }
 529    }
 530    return 0;
 531}
 532
 533static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
 534{
 535    V9fsState *s = pdu->s;
 536    V9fsFidState *fidp;
 537
 538    /* Free all fids */
 539    while (s->fid_list) {
 540        /* Get fid */
 541        fidp = s->fid_list;
 542        fidp->ref++;
 543
 544        /* Clunk fid */
 545        s->fid_list = fidp->next;
 546        fidp->clunked = 1;
 547
 548        put_fid(pdu, fidp);
 549    }
 550}
 551
 552#define P9_QID_TYPE_DIR         0x80
 553#define P9_QID_TYPE_SYMLINK     0x02
 554
 555#define P9_STAT_MODE_DIR        0x80000000
 556#define P9_STAT_MODE_APPEND     0x40000000
 557#define P9_STAT_MODE_EXCL       0x20000000
 558#define P9_STAT_MODE_MOUNT      0x10000000
 559#define P9_STAT_MODE_AUTH       0x08000000
 560#define P9_STAT_MODE_TMP        0x04000000
 561#define P9_STAT_MODE_SYMLINK    0x02000000
 562#define P9_STAT_MODE_LINK       0x01000000
 563#define P9_STAT_MODE_DEVICE     0x00800000
 564#define P9_STAT_MODE_NAMED_PIPE 0x00200000
 565#define P9_STAT_MODE_SOCKET     0x00100000
 566#define P9_STAT_MODE_SETUID     0x00080000
 567#define P9_STAT_MODE_SETGID     0x00040000
 568#define P9_STAT_MODE_SETVTX     0x00010000
 569
 570#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
 571                                P9_STAT_MODE_SYMLINK |      \
 572                                P9_STAT_MODE_LINK |         \
 573                                P9_STAT_MODE_DEVICE |       \
 574                                P9_STAT_MODE_NAMED_PIPE |   \
 575                                P9_STAT_MODE_SOCKET)
 576
 577/* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
 578static inline uint8_t mirror8bit(uint8_t byte)
 579{
 580    return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
 581}
 582
 583/* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
 584static inline uint64_t mirror64bit(uint64_t value)
 585{
 586    return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
 587           ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
 588           ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
 589           ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
 590           ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
 591           ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
 592           ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
 593           ((uint64_t)mirror8bit((value >> 56) & 0xff));
 594}
 595
 596/**
 597 * @brief Parameter k for the Exponential Golomb algorihm to be used.
 598 *
 599 * The smaller this value, the smaller the minimum bit count for the Exp.
 600 * Golomb generated affixes will be (at lowest index) however for the
 601 * price of having higher maximum bit count of generated affixes (at highest
 602 * index). Likewise increasing this parameter yields in smaller maximum bit
 603 * count for the price of having higher minimum bit count.
 604 *
 605 * In practice that means: a good value for k depends on the expected amount
 606 * of devices to be exposed by one export. For a small amount of devices k
 607 * should be small, for a large amount of devices k might be increased
 608 * instead. The default of k=0 should be fine for most users though.
 609 *
 610 * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
 611 * k should not change as long as guest is still running! Because that would
 612 * cause completely different inode numbers to be generated on guest.
 613 */
 614#define EXP_GOLOMB_K    0
 615
 616/**
 617 * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
 618 *
 619 * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
 620 * with growing length and with the mathematical property of being
 621 * "prefix-free". The latter means the generated prefixes can be prepended
 622 * in front of arbitrary numbers and the resulting concatenated numbers are
 623 * guaranteed to be always unique.
 624 *
 625 * This is a minor adjustment to the original Exp. Golomb algorithm in the
 626 * sense that lowest allowed index (@param n) starts with 1, not with zero.
 627 *
 628 * @param n - natural number (or index) of the prefix to be generated
 629 *            (1, 2, 3, ...)
 630 * @param k - parameter k of Exp. Golomb algorithm to be used
 631 *            (see comment on EXP_GOLOMB_K macro for details about k)
 632 */
 633static VariLenAffix expGolombEncode(uint64_t n, int k)
 634{
 635    const uint64_t value = n + (1 << k) - 1;
 636    const int bits = (int) log2(value) + 1;
 637    return (VariLenAffix) {
 638        .type = AffixType_Prefix,
 639        .value = value,
 640        .bits = bits + MAX((bits - 1 - k), 0)
 641    };
 642}
 643
 644/**
 645 * @brief Converts a suffix into a prefix, or a prefix into a suffix.
 646 *
 647 * Simply mirror all bits of the affix value, for the purpose to preserve
 648 * respectively the mathematical "prefix-free" or "suffix-free" property
 649 * after the conversion.
 650 *
 651 * If a passed prefix is suitable to create unique numbers, then the
 652 * returned suffix is suitable to create unique numbers as well (and vice
 653 * versa).
 654 */
 655static VariLenAffix invertAffix(const VariLenAffix *affix)
 656{
 657    return (VariLenAffix) {
 658        .type =
 659            (affix->type == AffixType_Suffix) ?
 660                AffixType_Prefix : AffixType_Suffix,
 661        .value =
 662            mirror64bit(affix->value) >>
 663            ((sizeof(affix->value) * 8) - affix->bits),
 664        .bits = affix->bits
 665    };
 666}
 667
 668/**
 669 * @brief Generates suffix numbers with "suffix-free" property.
 670 *
 671 * This is just a wrapper function on top of the Exp. Golomb algorithm.
 672 *
 673 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
 674 * this function converts the Exp. Golomb prefixes into appropriate suffixes
 675 * which are still suitable for generating unique numbers.
 676 *
 677 * @param n - natural number (or index) of the suffix to be generated
 678 *            (1, 2, 3, ...)
 679 */
 680static VariLenAffix affixForIndex(uint64_t index)
 681{
 682    VariLenAffix prefix;
 683    prefix = expGolombEncode(index, EXP_GOLOMB_K);
 684    return invertAffix(&prefix); /* convert prefix to suffix */
 685}
 686
 687/* creative abuse of tb_hash_func7, which is based on xxhash */
 688static uint32_t qpp_hash(QppEntry e)
 689{
 690    return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
 691}
 692
 693static uint32_t qpf_hash(QpfEntry e)
 694{
 695    return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
 696}
 697
 698static bool qpd_cmp_func(const void *obj, const void *userp)
 699{
 700    const QpdEntry *e1 = obj, *e2 = userp;
 701    return e1->dev == e2->dev;
 702}
 703
 704static bool qpp_cmp_func(const void *obj, const void *userp)
 705{
 706    const QppEntry *e1 = obj, *e2 = userp;
 707    return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
 708}
 709
 710static bool qpf_cmp_func(const void *obj, const void *userp)
 711{
 712    const QpfEntry *e1 = obj, *e2 = userp;
 713    return e1->dev == e2->dev && e1->ino == e2->ino;
 714}
 715
 716static void qp_table_remove(void *p, uint32_t h, void *up)
 717{
 718    g_free(p);
 719}
 720
 721static void qp_table_destroy(struct qht *ht)
 722{
 723    if (!ht || !ht->map) {
 724        return;
 725    }
 726    qht_iter(ht, qp_table_remove, NULL);
 727    qht_destroy(ht);
 728}
 729
 730static void qpd_table_init(struct qht *ht)
 731{
 732    qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 733}
 734
 735static void qpp_table_init(struct qht *ht)
 736{
 737    qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 738}
 739
 740static void qpf_table_init(struct qht *ht)
 741{
 742    qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
 743}
 744
 745/*
 746 * Returns how many (high end) bits of inode numbers of the passed fs
 747 * device shall be used (in combination with the device number) to
 748 * generate hash values for qpp_table entries.
 749 *
 750 * This function is required if variable length suffixes are used for inode
 751 * number mapping on guest level. Since a device may end up having multiple
 752 * entries in qpp_table, each entry most probably with a different suffix
 753 * length, we thus need this function in conjunction with qpd_table to
 754 * "agree" about a fix amount of bits (per device) to be always used for
 755 * generating hash values for the purpose of accessing qpp_table in order
 756 * get consistent behaviour when accessing qpp_table.
 757 */
 758static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
 759{
 760    QpdEntry lookup = {
 761        .dev = dev
 762    }, *val;
 763    uint32_t hash = dev;
 764    VariLenAffix affix;
 765
 766    val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
 767    if (!val) {
 768        val = g_malloc0(sizeof(QpdEntry));
 769        *val = lookup;
 770        affix = affixForIndex(pdu->s->qp_affix_next);
 771        val->prefix_bits = affix.bits;
 772        qht_insert(&pdu->s->qpd_table, val, hash, NULL);
 773        pdu->s->qp_ndevices++;
 774    }
 775    return val->prefix_bits;
 776}
 777
 778/**
 779 * @brief Slow / full mapping host inode nr -> guest inode nr.
 780 *
 781 * This function performs a slower and much more costly remapping of an
 782 * original file inode number on host to an appropriate different inode
 783 * number on guest. For every (dev, inode) combination on host a new
 784 * sequential number is generated, cached and exposed as inode number on
 785 * guest.
 786 *
 787 * This is just a "last resort" fallback solution if the much faster/cheaper
 788 * qid_path_suffixmap() failed. In practice this slow / full mapping is not
 789 * expected ever to be used at all though.
 790 *
 791 * @see qid_path_suffixmap() for details
 792 *
 793 */
 794static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
 795                            uint64_t *path)
 796{
 797    QpfEntry lookup = {
 798        .dev = stbuf->st_dev,
 799        .ino = stbuf->st_ino
 800    }, *val;
 801    uint32_t hash = qpf_hash(lookup);
 802    VariLenAffix affix;
 803
 804    val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
 805
 806    if (!val) {
 807        if (pdu->s->qp_fullpath_next == 0) {
 808            /* no more files can be mapped :'( */
 809            error_report_once(
 810                "9p: No more prefixes available for remapping inodes from "
 811                "host to guest."
 812            );
 813            return -ENFILE;
 814        }
 815
 816        val = g_malloc0(sizeof(QppEntry));
 817        *val = lookup;
 818
 819        /* new unique inode and device combo */
 820        affix = affixForIndex(
 821            1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
 822        );
 823        val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
 824        pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
 825        qht_insert(&pdu->s->qpf_table, val, hash, NULL);
 826    }
 827
 828    *path = val->path;
 829    return 0;
 830}
 831
 832/**
 833 * @brief Quick mapping host inode nr -> guest inode nr.
 834 *
 835 * This function performs quick remapping of an original file inode number
 836 * on host to an appropriate different inode number on guest. This remapping
 837 * of inodes is required to avoid inode nr collisions on guest which would
 838 * happen if the 9p export contains more than 1 exported file system (or
 839 * more than 1 file system data set), because unlike on host level where the
 840 * files would have different device nrs, all files exported by 9p would
 841 * share the same device nr on guest (the device nr of the virtual 9p device
 842 * that is).
 843 *
 844 * Inode remapping is performed by chopping off high end bits of the original
 845 * inode number from host, shifting the result upwards and then assigning a
 846 * generated suffix number for the low end bits, where the same suffix number
 847 * will be shared by all inodes with the same device id AND the same high end
 848 * bits that have been chopped off. That approach utilizes the fact that inode
 849 * numbers very likely share the same high end bits (i.e. due to their common
 850 * sequential generation by file systems) and hence we only have to generate
 851 * and track a very limited amount of suffixes in practice due to that.
 852 *
 853 * We generate variable size suffixes for that purpose. The 1st generated
 854 * suffix will only have 1 bit and hence we only need to chop off 1 bit from
 855 * the original inode number. The subsequent suffixes being generated will
 856 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
 857 * generated will have 3 bits and hence we have to chop off 3 bits from their
 858 * original inodes, and so on. That approach of using variable length suffixes
 859 * (i.e. over fixed size ones) utilizes the fact that in practice only a very
 860 * limited amount of devices are shared by the same export (e.g. typically
 861 * less than 2 dozen devices per 9p export), so in practice we need to chop
 862 * off less bits than with fixed size prefixes and yet are flexible to add
 863 * new devices at runtime below host's export directory at any time without
 864 * having to reboot guest nor requiring to reconfigure guest for that. And due
 865 * to the very limited amount of original high end bits that we chop off that
 866 * way, the total amount of suffixes we need to generate is less than by using
 867 * fixed size prefixes and hence it also improves performance of the inode
 868 * remapping algorithm, and finally has the nice side effect that the inode
 869 * numbers on guest will be much smaller & human friendly. ;-)
 870 */
 871static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
 872                              uint64_t *path)
 873{
 874    const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
 875    QppEntry lookup = {
 876        .dev = stbuf->st_dev,
 877        .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
 878    }, *val;
 879    uint32_t hash = qpp_hash(lookup);
 880
 881    val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
 882
 883    if (!val) {
 884        if (pdu->s->qp_affix_next == 0) {
 885            /* we ran out of affixes */
 886            warn_report_once(
 887                "9p: Potential degraded performance of inode remapping"
 888            );
 889            return -ENFILE;
 890        }
 891
 892        val = g_malloc0(sizeof(QppEntry));
 893        *val = lookup;
 894
 895        /* new unique inode affix and device combo */
 896        val->qp_affix_index = pdu->s->qp_affix_next++;
 897        val->qp_affix = affixForIndex(val->qp_affix_index);
 898        qht_insert(&pdu->s->qpp_table, val, hash, NULL);
 899    }
 900    /* assuming generated affix to be suffix type, not prefix */
 901    *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
 902    return 0;
 903}
 904
 905static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
 906{
 907    int err;
 908    size_t size;
 909
 910    if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
 911        /* map inode+device to qid path (fast path) */
 912        err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
 913        if (err == -ENFILE) {
 914            /* fast path didn't work, fall back to full map */
 915            err = qid_path_fullmap(pdu, stbuf, &qidp->path);
 916        }
 917        if (err) {
 918            return err;
 919        }
 920    } else {
 921        if (pdu->s->dev_id != stbuf->st_dev) {
 922            if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
 923                error_report_once(
 924                    "9p: Multiple devices detected in same VirtFS export. "
 925                    "Access of guest to additional devices is (partly) "
 926                    "denied due to virtfs option 'multidevs=forbid' being "
 927                    "effective."
 928                );
 929                return -ENODEV;
 930            } else {
 931                warn_report_once(
 932                    "9p: Multiple devices detected in same VirtFS export, "
 933                    "which might lead to file ID collisions and severe "
 934                    "misbehaviours on guest! You should either use a "
 935                    "separate export for each device shared from host or "
 936                    "use virtfs option 'multidevs=remap'!"
 937                );
 938            }
 939        }
 940        memset(&qidp->path, 0, sizeof(qidp->path));
 941        size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
 942        memcpy(&qidp->path, &stbuf->st_ino, size);
 943    }
 944
 945    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 946    qidp->type = 0;
 947    if (S_ISDIR(stbuf->st_mode)) {
 948        qidp->type |= P9_QID_TYPE_DIR;
 949    }
 950    if (S_ISLNK(stbuf->st_mode)) {
 951        qidp->type |= P9_QID_TYPE_SYMLINK;
 952    }
 953
 954    return 0;
 955}
 956
 957static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
 958                                   V9fsQID *qidp)
 959{
 960    struct stat stbuf;
 961    int err;
 962
 963    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
 964    if (err < 0) {
 965        return err;
 966    }
 967    err = stat_to_qid(pdu, &stbuf, qidp);
 968    if (err < 0) {
 969        return err;
 970    }
 971    return 0;
 972}
 973
 974static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
 975                                      struct dirent *dent, V9fsQID *qidp)
 976{
 977    struct stat stbuf;
 978    V9fsPath path;
 979    int err;
 980
 981    v9fs_path_init(&path);
 982
 983    err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
 984    if (err < 0) {
 985        goto out;
 986    }
 987    err = v9fs_co_lstat(pdu, &path, &stbuf);
 988    if (err < 0) {
 989        goto out;
 990    }
 991    err = stat_to_qid(pdu, &stbuf, qidp);
 992
 993out:
 994    v9fs_path_free(&path);
 995    return err;
 996}
 997
 998V9fsPDU *pdu_alloc(V9fsState *s)
 999{
1000    V9fsPDU *pdu = NULL;
1001
1002    if (!QLIST_EMPTY(&s->free_list)) {
1003        pdu = QLIST_FIRST(&s->free_list);
1004        QLIST_REMOVE(pdu, next);
1005        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
1006    }
1007    return pdu;
1008}
1009
1010void pdu_free(V9fsPDU *pdu)
1011{
1012    V9fsState *s = pdu->s;
1013
1014    g_assert(!pdu->cancelled);
1015    QLIST_REMOVE(pdu, next);
1016    QLIST_INSERT_HEAD(&s->free_list, pdu, next);
1017}
1018
1019static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
1020{
1021    int8_t id = pdu->id + 1; /* Response */
1022    V9fsState *s = pdu->s;
1023    int ret;
1024
1025    /*
1026     * The 9p spec requires that successfully cancelled pdus receive no reply.
1027     * Sending a reply would confuse clients because they would
1028     * assume that any EINTR is the actual result of the operation,
1029     * rather than a consequence of the cancellation. However, if
1030     * the operation completed (succesfully or with an error other
1031     * than caused be cancellation), we do send out that reply, both
1032     * for efficiency and to avoid confusing the rest of the state machine
1033     * that assumes passing a non-error here will mean a successful
1034     * transmission of the reply.
1035     */
1036    bool discard = pdu->cancelled && len == -EINTR;
1037    if (discard) {
1038        trace_v9fs_rcancel(pdu->tag, pdu->id);
1039        pdu->size = 0;
1040        goto out_notify;
1041    }
1042
1043    if (len < 0) {
1044        int err = -len;
1045        len = 7;
1046
1047        if (s->proto_version != V9FS_PROTO_2000L) {
1048            V9fsString str;
1049
1050            str.data = strerror(err);
1051            str.size = strlen(str.data);
1052
1053            ret = pdu_marshal(pdu, len, "s", &str);
1054            if (ret < 0) {
1055                goto out_notify;
1056            }
1057            len += ret;
1058            id = P9_RERROR;
1059        }
1060
1061        ret = pdu_marshal(pdu, len, "d", err);
1062        if (ret < 0) {
1063            goto out_notify;
1064        }
1065        len += ret;
1066
1067        if (s->proto_version == V9FS_PROTO_2000L) {
1068            id = P9_RLERROR;
1069        }
1070        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1071    }
1072
1073    /* fill out the header */
1074    if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1075        goto out_notify;
1076    }
1077
1078    /* keep these in sync */
1079    pdu->size = len;
1080    pdu->id = id;
1081
1082out_notify:
1083    pdu->s->transport->push_and_notify(pdu);
1084
1085    /* Now wakeup anybody waiting in flush for this request */
1086    if (!qemu_co_queue_next(&pdu->complete)) {
1087        pdu_free(pdu);
1088    }
1089}
1090
1091static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1092{
1093    mode_t ret;
1094
1095    ret = mode & 0777;
1096    if (mode & P9_STAT_MODE_DIR) {
1097        ret |= S_IFDIR;
1098    }
1099
1100    if (mode & P9_STAT_MODE_SYMLINK) {
1101        ret |= S_IFLNK;
1102    }
1103    if (mode & P9_STAT_MODE_SOCKET) {
1104        ret |= S_IFSOCK;
1105    }
1106    if (mode & P9_STAT_MODE_NAMED_PIPE) {
1107        ret |= S_IFIFO;
1108    }
1109    if (mode & P9_STAT_MODE_DEVICE) {
1110        if (extension->size && extension->data[0] == 'c') {
1111            ret |= S_IFCHR;
1112        } else {
1113            ret |= S_IFBLK;
1114        }
1115    }
1116
1117    if (!(ret&~0777)) {
1118        ret |= S_IFREG;
1119    }
1120
1121    if (mode & P9_STAT_MODE_SETUID) {
1122        ret |= S_ISUID;
1123    }
1124    if (mode & P9_STAT_MODE_SETGID) {
1125        ret |= S_ISGID;
1126    }
1127    if (mode & P9_STAT_MODE_SETVTX) {
1128        ret |= S_ISVTX;
1129    }
1130
1131    return ret;
1132}
1133
1134static int donttouch_stat(V9fsStat *stat)
1135{
1136    if (stat->type == -1 &&
1137        stat->dev == -1 &&
1138        stat->qid.type == 0xff &&
1139        stat->qid.version == (uint32_t) -1 &&
1140        stat->qid.path == (uint64_t) -1 &&
1141        stat->mode == -1 &&
1142        stat->atime == -1 &&
1143        stat->mtime == -1 &&
1144        stat->length == -1 &&
1145        !stat->name.size &&
1146        !stat->uid.size &&
1147        !stat->gid.size &&
1148        !stat->muid.size &&
1149        stat->n_uid == -1 &&
1150        stat->n_gid == -1 &&
1151        stat->n_muid == -1) {
1152        return 1;
1153    }
1154
1155    return 0;
1156}
1157
1158static void v9fs_stat_init(V9fsStat *stat)
1159{
1160    v9fs_string_init(&stat->name);
1161    v9fs_string_init(&stat->uid);
1162    v9fs_string_init(&stat->gid);
1163    v9fs_string_init(&stat->muid);
1164    v9fs_string_init(&stat->extension);
1165}
1166
1167static void v9fs_stat_free(V9fsStat *stat)
1168{
1169    v9fs_string_free(&stat->name);
1170    v9fs_string_free(&stat->uid);
1171    v9fs_string_free(&stat->gid);
1172    v9fs_string_free(&stat->muid);
1173    v9fs_string_free(&stat->extension);
1174}
1175
1176static uint32_t stat_to_v9mode(const struct stat *stbuf)
1177{
1178    uint32_t mode;
1179
1180    mode = stbuf->st_mode & 0777;
1181    if (S_ISDIR(stbuf->st_mode)) {
1182        mode |= P9_STAT_MODE_DIR;
1183    }
1184
1185    if (S_ISLNK(stbuf->st_mode)) {
1186        mode |= P9_STAT_MODE_SYMLINK;
1187    }
1188
1189    if (S_ISSOCK(stbuf->st_mode)) {
1190        mode |= P9_STAT_MODE_SOCKET;
1191    }
1192
1193    if (S_ISFIFO(stbuf->st_mode)) {
1194        mode |= P9_STAT_MODE_NAMED_PIPE;
1195    }
1196
1197    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1198        mode |= P9_STAT_MODE_DEVICE;
1199    }
1200
1201    if (stbuf->st_mode & S_ISUID) {
1202        mode |= P9_STAT_MODE_SETUID;
1203    }
1204
1205    if (stbuf->st_mode & S_ISGID) {
1206        mode |= P9_STAT_MODE_SETGID;
1207    }
1208
1209    if (stbuf->st_mode & S_ISVTX) {
1210        mode |= P9_STAT_MODE_SETVTX;
1211    }
1212
1213    return mode;
1214}
1215
1216static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1217                                       const char *basename,
1218                                       const struct stat *stbuf,
1219                                       V9fsStat *v9stat)
1220{
1221    int err;
1222
1223    memset(v9stat, 0, sizeof(*v9stat));
1224
1225    err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1226    if (err < 0) {
1227        return err;
1228    }
1229    v9stat->mode = stat_to_v9mode(stbuf);
1230    v9stat->atime = stbuf->st_atime;
1231    v9stat->mtime = stbuf->st_mtime;
1232    v9stat->length = stbuf->st_size;
1233
1234    v9fs_string_free(&v9stat->uid);
1235    v9fs_string_free(&v9stat->gid);
1236    v9fs_string_free(&v9stat->muid);
1237
1238    v9stat->n_uid = stbuf->st_uid;
1239    v9stat->n_gid = stbuf->st_gid;
1240    v9stat->n_muid = 0;
1241
1242    v9fs_string_free(&v9stat->extension);
1243
1244    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1245        err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1246        if (err < 0) {
1247            return err;
1248        }
1249    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1250        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1251                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1252                major(stbuf->st_rdev), minor(stbuf->st_rdev));
1253    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1254        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1255                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1256    }
1257
1258    v9fs_string_sprintf(&v9stat->name, "%s", basename);
1259
1260    v9stat->size = 61 +
1261        v9fs_string_size(&v9stat->name) +
1262        v9fs_string_size(&v9stat->uid) +
1263        v9fs_string_size(&v9stat->gid) +
1264        v9fs_string_size(&v9stat->muid) +
1265        v9fs_string_size(&v9stat->extension);
1266    return 0;
1267}
1268
1269#define P9_STATS_MODE          0x00000001ULL
1270#define P9_STATS_NLINK         0x00000002ULL
1271#define P9_STATS_UID           0x00000004ULL
1272#define P9_STATS_GID           0x00000008ULL
1273#define P9_STATS_RDEV          0x00000010ULL
1274#define P9_STATS_ATIME         0x00000020ULL
1275#define P9_STATS_MTIME         0x00000040ULL
1276#define P9_STATS_CTIME         0x00000080ULL
1277#define P9_STATS_INO           0x00000100ULL
1278#define P9_STATS_SIZE          0x00000200ULL
1279#define P9_STATS_BLOCKS        0x00000400ULL
1280
1281#define P9_STATS_BTIME         0x00000800ULL
1282#define P9_STATS_GEN           0x00001000ULL
1283#define P9_STATS_DATA_VERSION  0x00002000ULL
1284
1285#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1286#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1287
1288
1289static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1290                                V9fsStatDotl *v9lstat)
1291{
1292    memset(v9lstat, 0, sizeof(*v9lstat));
1293
1294    v9lstat->st_mode = stbuf->st_mode;
1295    v9lstat->st_nlink = stbuf->st_nlink;
1296    v9lstat->st_uid = stbuf->st_uid;
1297    v9lstat->st_gid = stbuf->st_gid;
1298    v9lstat->st_rdev = stbuf->st_rdev;
1299    v9lstat->st_size = stbuf->st_size;
1300    v9lstat->st_blksize = stbuf->st_blksize;
1301    v9lstat->st_blocks = stbuf->st_blocks;
1302    v9lstat->st_atime_sec = stbuf->st_atime;
1303    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1304    v9lstat->st_mtime_sec = stbuf->st_mtime;
1305    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1306    v9lstat->st_ctime_sec = stbuf->st_ctime;
1307    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1308    /* Currently we only support BASIC fields in stat */
1309    v9lstat->st_result_mask = P9_STATS_BASIC;
1310
1311    return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1312}
1313
1314static void print_sg(struct iovec *sg, int cnt)
1315{
1316    int i;
1317
1318    printf("sg[%d]: {", cnt);
1319    for (i = 0; i < cnt; i++) {
1320        if (i) {
1321            printf(", ");
1322        }
1323        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1324    }
1325    printf("}\n");
1326}
1327
1328/* Will call this only for path name based fid */
1329static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1330{
1331    V9fsPath str;
1332    v9fs_path_init(&str);
1333    v9fs_path_copy(&str, dst);
1334    v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1335    v9fs_path_free(&str);
1336}
1337
1338static inline bool is_ro_export(FsContext *ctx)
1339{
1340    return ctx->export_flags & V9FS_RDONLY;
1341}
1342
1343static void coroutine_fn v9fs_version(void *opaque)
1344{
1345    ssize_t err;
1346    V9fsPDU *pdu = opaque;
1347    V9fsState *s = pdu->s;
1348    V9fsString version;
1349    size_t offset = 7;
1350
1351    v9fs_string_init(&version);
1352    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1353    if (err < 0) {
1354        goto out;
1355    }
1356    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1357
1358    virtfs_reset(pdu);
1359
1360    if (!strcmp(version.data, "9P2000.u")) {
1361        s->proto_version = V9FS_PROTO_2000U;
1362    } else if (!strcmp(version.data, "9P2000.L")) {
1363        s->proto_version = V9FS_PROTO_2000L;
1364    } else {
1365        v9fs_string_sprintf(&version, "unknown");
1366    }
1367
1368    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1369    if (err < 0) {
1370        goto out;
1371    }
1372    err += offset;
1373    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1374out:
1375    pdu_complete(pdu, err);
1376    v9fs_string_free(&version);
1377}
1378
1379static void coroutine_fn v9fs_attach(void *opaque)
1380{
1381    V9fsPDU *pdu = opaque;
1382    V9fsState *s = pdu->s;
1383    int32_t fid, afid, n_uname;
1384    V9fsString uname, aname;
1385    V9fsFidState *fidp;
1386    size_t offset = 7;
1387    V9fsQID qid;
1388    ssize_t err;
1389    Error *local_err = NULL;
1390
1391    v9fs_string_init(&uname);
1392    v9fs_string_init(&aname);
1393    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1394                        &afid, &uname, &aname, &n_uname);
1395    if (err < 0) {
1396        goto out_nofid;
1397    }
1398    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1399
1400    fidp = alloc_fid(s, fid);
1401    if (fidp == NULL) {
1402        err = -EINVAL;
1403        goto out_nofid;
1404    }
1405    fidp->uid = n_uname;
1406    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1407    if (err < 0) {
1408        err = -EINVAL;
1409        clunk_fid(s, fid);
1410        goto out;
1411    }
1412    err = fid_to_qid(pdu, fidp, &qid);
1413    if (err < 0) {
1414        err = -EINVAL;
1415        clunk_fid(s, fid);
1416        goto out;
1417    }
1418
1419    /*
1420     * disable migration if we haven't done already.
1421     * attach could get called multiple times for the same export.
1422     */
1423    if (!s->migration_blocker) {
1424        error_setg(&s->migration_blocker,
1425                   "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1426                   s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1427        err = migrate_add_blocker(s->migration_blocker, &local_err);
1428        if (local_err) {
1429            error_free(local_err);
1430            error_free(s->migration_blocker);
1431            s->migration_blocker = NULL;
1432            clunk_fid(s, fid);
1433            goto out;
1434        }
1435        s->root_fid = fid;
1436    }
1437
1438    err = pdu_marshal(pdu, offset, "Q", &qid);
1439    if (err < 0) {
1440        clunk_fid(s, fid);
1441        goto out;
1442    }
1443    err += offset;
1444
1445    memcpy(&s->root_qid, &qid, sizeof(qid));
1446    trace_v9fs_attach_return(pdu->tag, pdu->id,
1447                             qid.type, qid.version, qid.path);
1448out:
1449    put_fid(pdu, fidp);
1450out_nofid:
1451    pdu_complete(pdu, err);
1452    v9fs_string_free(&uname);
1453    v9fs_string_free(&aname);
1454}
1455
1456static void coroutine_fn v9fs_stat(void *opaque)
1457{
1458    int32_t fid;
1459    V9fsStat v9stat;
1460    ssize_t err = 0;
1461    size_t offset = 7;
1462    struct stat stbuf;
1463    V9fsFidState *fidp;
1464    V9fsPDU *pdu = opaque;
1465    char *basename;
1466
1467    err = pdu_unmarshal(pdu, offset, "d", &fid);
1468    if (err < 0) {
1469        goto out_nofid;
1470    }
1471    trace_v9fs_stat(pdu->tag, pdu->id, fid);
1472
1473    fidp = get_fid(pdu, fid);
1474    if (fidp == NULL) {
1475        err = -ENOENT;
1476        goto out_nofid;
1477    }
1478    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1479    if (err < 0) {
1480        goto out;
1481    }
1482    basename = g_path_get_basename(fidp->path.data);
1483    err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1484    g_free(basename);
1485    if (err < 0) {
1486        goto out;
1487    }
1488    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1489    if (err < 0) {
1490        v9fs_stat_free(&v9stat);
1491        goto out;
1492    }
1493    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1494                           v9stat.atime, v9stat.mtime, v9stat.length);
1495    err += offset;
1496    v9fs_stat_free(&v9stat);
1497out:
1498    put_fid(pdu, fidp);
1499out_nofid:
1500    pdu_complete(pdu, err);
1501}
1502
1503static void coroutine_fn v9fs_getattr(void *opaque)
1504{
1505    int32_t fid;
1506    size_t offset = 7;
1507    ssize_t retval = 0;
1508    struct stat stbuf;
1509    V9fsFidState *fidp;
1510    uint64_t request_mask;
1511    V9fsStatDotl v9stat_dotl;
1512    V9fsPDU *pdu = opaque;
1513
1514    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1515    if (retval < 0) {
1516        goto out_nofid;
1517    }
1518    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1519
1520    fidp = get_fid(pdu, fid);
1521    if (fidp == NULL) {
1522        retval = -ENOENT;
1523        goto out_nofid;
1524    }
1525    /*
1526     * Currently we only support BASIC fields in stat, so there is no
1527     * need to look at request_mask.
1528     */
1529    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1530    if (retval < 0) {
1531        goto out;
1532    }
1533    retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1534    if (retval < 0) {
1535        goto out;
1536    }
1537
1538    /*  fill st_gen if requested and supported by underlying fs */
1539    if (request_mask & P9_STATS_GEN) {
1540        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1541        switch (retval) {
1542        case 0:
1543            /* we have valid st_gen: update result mask */
1544            v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1545            break;
1546        case -EINTR:
1547            /* request cancelled, e.g. by Tflush */
1548            goto out;
1549        default:
1550            /* failed to get st_gen: not fatal, ignore */
1551            break;
1552        }
1553    }
1554    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1555    if (retval < 0) {
1556        goto out;
1557    }
1558    retval += offset;
1559    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1560                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1561                              v9stat_dotl.st_gid);
1562out:
1563    put_fid(pdu, fidp);
1564out_nofid:
1565    pdu_complete(pdu, retval);
1566}
1567
1568/* Attribute flags */
1569#define P9_ATTR_MODE       (1 << 0)
1570#define P9_ATTR_UID        (1 << 1)
1571#define P9_ATTR_GID        (1 << 2)
1572#define P9_ATTR_SIZE       (1 << 3)
1573#define P9_ATTR_ATIME      (1 << 4)
1574#define P9_ATTR_MTIME      (1 << 5)
1575#define P9_ATTR_CTIME      (1 << 6)
1576#define P9_ATTR_ATIME_SET  (1 << 7)
1577#define P9_ATTR_MTIME_SET  (1 << 8)
1578
1579#define P9_ATTR_MASK    127
1580
1581static void coroutine_fn v9fs_setattr(void *opaque)
1582{
1583    int err = 0;
1584    int32_t fid;
1585    V9fsFidState *fidp;
1586    size_t offset = 7;
1587    V9fsIattr v9iattr;
1588    V9fsPDU *pdu = opaque;
1589
1590    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1591    if (err < 0) {
1592        goto out_nofid;
1593    }
1594
1595    trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1596                       v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1597                       v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1598
1599    fidp = get_fid(pdu, fid);
1600    if (fidp == NULL) {
1601        err = -EINVAL;
1602        goto out_nofid;
1603    }
1604    if (v9iattr.valid & P9_ATTR_MODE) {
1605        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1606        if (err < 0) {
1607            goto out;
1608        }
1609    }
1610    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1611        struct timespec times[2];
1612        if (v9iattr.valid & P9_ATTR_ATIME) {
1613            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1614                times[0].tv_sec = v9iattr.atime_sec;
1615                times[0].tv_nsec = v9iattr.atime_nsec;
1616            } else {
1617                times[0].tv_nsec = UTIME_NOW;
1618            }
1619        } else {
1620            times[0].tv_nsec = UTIME_OMIT;
1621        }
1622        if (v9iattr.valid & P9_ATTR_MTIME) {
1623            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1624                times[1].tv_sec = v9iattr.mtime_sec;
1625                times[1].tv_nsec = v9iattr.mtime_nsec;
1626            } else {
1627                times[1].tv_nsec = UTIME_NOW;
1628            }
1629        } else {
1630            times[1].tv_nsec = UTIME_OMIT;
1631        }
1632        err = v9fs_co_utimensat(pdu, &fidp->path, times);
1633        if (err < 0) {
1634            goto out;
1635        }
1636    }
1637    /*
1638     * If the only valid entry in iattr is ctime we can call
1639     * chown(-1,-1) to update the ctime of the file
1640     */
1641    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1642        ((v9iattr.valid & P9_ATTR_CTIME)
1643         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1644        if (!(v9iattr.valid & P9_ATTR_UID)) {
1645            v9iattr.uid = -1;
1646        }
1647        if (!(v9iattr.valid & P9_ATTR_GID)) {
1648            v9iattr.gid = -1;
1649        }
1650        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1651                            v9iattr.gid);
1652        if (err < 0) {
1653            goto out;
1654        }
1655    }
1656    if (v9iattr.valid & (P9_ATTR_SIZE)) {
1657        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1658        if (err < 0) {
1659            goto out;
1660        }
1661    }
1662    err = offset;
1663    trace_v9fs_setattr_return(pdu->tag, pdu->id);
1664out:
1665    put_fid(pdu, fidp);
1666out_nofid:
1667    pdu_complete(pdu, err);
1668}
1669
1670static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1671{
1672    int i;
1673    ssize_t err;
1674    size_t offset = 7;
1675
1676    err = pdu_marshal(pdu, offset, "w", nwnames);
1677    if (err < 0) {
1678        return err;
1679    }
1680    offset += err;
1681    for (i = 0; i < nwnames; i++) {
1682        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1683        if (err < 0) {
1684            return err;
1685        }
1686        offset += err;
1687    }
1688    return offset;
1689}
1690
1691static bool name_is_illegal(const char *name)
1692{
1693    return !*name || strchr(name, '/') != NULL;
1694}
1695
1696static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
1697{
1698    return
1699        qid1->type != qid2->type ||
1700        qid1->version != qid2->version ||
1701        qid1->path != qid2->path;
1702}
1703
1704static void coroutine_fn v9fs_walk(void *opaque)
1705{
1706    int name_idx;
1707    V9fsQID *qids = NULL;
1708    int i, err = 0;
1709    V9fsPath dpath, path;
1710    uint16_t nwnames;
1711    struct stat stbuf;
1712    size_t offset = 7;
1713    int32_t fid, newfid;
1714    V9fsString *wnames = NULL;
1715    V9fsFidState *fidp;
1716    V9fsFidState *newfidp = NULL;
1717    V9fsPDU *pdu = opaque;
1718    V9fsState *s = pdu->s;
1719    V9fsQID qid;
1720
1721    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1722    if (err < 0) {
1723        pdu_complete(pdu, err);
1724        return ;
1725    }
1726    offset += err;
1727
1728    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1729
1730    if (nwnames && nwnames <= P9_MAXWELEM) {
1731        wnames = g_new0(V9fsString, nwnames);
1732        qids   = g_new0(V9fsQID, nwnames);
1733        for (i = 0; i < nwnames; i++) {
1734            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1735            if (err < 0) {
1736                goto out_nofid;
1737            }
1738            if (name_is_illegal(wnames[i].data)) {
1739                err = -ENOENT;
1740                goto out_nofid;
1741            }
1742            offset += err;
1743        }
1744    } else if (nwnames > P9_MAXWELEM) {
1745        err = -EINVAL;
1746        goto out_nofid;
1747    }
1748    fidp = get_fid(pdu, fid);
1749    if (fidp == NULL) {
1750        err = -ENOENT;
1751        goto out_nofid;
1752    }
1753
1754    v9fs_path_init(&dpath);
1755    v9fs_path_init(&path);
1756
1757    err = fid_to_qid(pdu, fidp, &qid);
1758    if (err < 0) {
1759        goto out;
1760    }
1761
1762    /*
1763     * Both dpath and path initially poin to fidp.
1764     * Needed to handle request with nwnames == 0
1765     */
1766    v9fs_path_copy(&dpath, &fidp->path);
1767    v9fs_path_copy(&path, &fidp->path);
1768    for (name_idx = 0; name_idx < nwnames; name_idx++) {
1769        if (not_same_qid(&pdu->s->root_qid, &qid) ||
1770            strcmp("..", wnames[name_idx].data)) {
1771            err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
1772                                       &path);
1773            if (err < 0) {
1774                goto out;
1775            }
1776
1777            err = v9fs_co_lstat(pdu, &path, &stbuf);
1778            if (err < 0) {
1779                goto out;
1780            }
1781            err = stat_to_qid(pdu, &stbuf, &qid);
1782            if (err < 0) {
1783                goto out;
1784            }
1785            v9fs_path_copy(&dpath, &path);
1786        }
1787        memcpy(&qids[name_idx], &qid, sizeof(qid));
1788    }
1789    if (fid == newfid) {
1790        if (fidp->fid_type != P9_FID_NONE) {
1791            err = -EINVAL;
1792            goto out;
1793        }
1794        v9fs_path_write_lock(s);
1795        v9fs_path_copy(&fidp->path, &path);
1796        v9fs_path_unlock(s);
1797    } else {
1798        newfidp = alloc_fid(s, newfid);
1799        if (newfidp == NULL) {
1800            err = -EINVAL;
1801            goto out;
1802        }
1803        newfidp->uid = fidp->uid;
1804        v9fs_path_copy(&newfidp->path, &path);
1805    }
1806    err = v9fs_walk_marshal(pdu, nwnames, qids);
1807    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1808out:
1809    put_fid(pdu, fidp);
1810    if (newfidp) {
1811        put_fid(pdu, newfidp);
1812    }
1813    v9fs_path_free(&dpath);
1814    v9fs_path_free(&path);
1815out_nofid:
1816    pdu_complete(pdu, err);
1817    if (nwnames && nwnames <= P9_MAXWELEM) {
1818        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1819            v9fs_string_free(&wnames[name_idx]);
1820        }
1821        g_free(wnames);
1822        g_free(qids);
1823    }
1824}
1825
1826static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1827{
1828    struct statfs stbuf;
1829    int32_t iounit = 0;
1830    V9fsState *s = pdu->s;
1831
1832    /*
1833     * iounit should be multiples of f_bsize (host filesystem block size
1834     * and as well as less than (client msize - P9_IOHDRSZ))
1835     */
1836    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1837        if (stbuf.f_bsize) {
1838            iounit = stbuf.f_bsize;
1839            iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
1840        }
1841    }
1842    if (!iounit) {
1843        iounit = s->msize - P9_IOHDRSZ;
1844    }
1845    return iounit;
1846}
1847
1848static void coroutine_fn v9fs_open(void *opaque)
1849{
1850    int flags;
1851    int32_t fid;
1852    int32_t mode;
1853    V9fsQID qid;
1854    int iounit = 0;
1855    ssize_t err = 0;
1856    size_t offset = 7;
1857    struct stat stbuf;
1858    V9fsFidState *fidp;
1859    V9fsPDU *pdu = opaque;
1860    V9fsState *s = pdu->s;
1861
1862    if (s->proto_version == V9FS_PROTO_2000L) {
1863        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1864    } else {
1865        uint8_t modebyte;
1866        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1867        mode = modebyte;
1868    }
1869    if (err < 0) {
1870        goto out_nofid;
1871    }
1872    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1873
1874    fidp = get_fid(pdu, fid);
1875    if (fidp == NULL) {
1876        err = -ENOENT;
1877        goto out_nofid;
1878    }
1879    if (fidp->fid_type != P9_FID_NONE) {
1880        err = -EINVAL;
1881        goto out;
1882    }
1883
1884    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1885    if (err < 0) {
1886        goto out;
1887    }
1888    err = stat_to_qid(pdu, &stbuf, &qid);
1889    if (err < 0) {
1890        goto out;
1891    }
1892    if (S_ISDIR(stbuf.st_mode)) {
1893        err = v9fs_co_opendir(pdu, fidp);
1894        if (err < 0) {
1895            goto out;
1896        }
1897        fidp->fid_type = P9_FID_DIR;
1898        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1899        if (err < 0) {
1900            goto out;
1901        }
1902        err += offset;
1903    } else {
1904        if (s->proto_version == V9FS_PROTO_2000L) {
1905            flags = get_dotl_openflags(s, mode);
1906        } else {
1907            flags = omode_to_uflags(mode);
1908        }
1909        if (is_ro_export(&s->ctx)) {
1910            if (mode & O_WRONLY || mode & O_RDWR ||
1911                mode & O_APPEND || mode & O_TRUNC) {
1912                err = -EROFS;
1913                goto out;
1914            }
1915        }
1916        err = v9fs_co_open(pdu, fidp, flags);
1917        if (err < 0) {
1918            goto out;
1919        }
1920        fidp->fid_type = P9_FID_FILE;
1921        fidp->open_flags = flags;
1922        if (flags & O_EXCL) {
1923            /*
1924             * We let the host file system do O_EXCL check
1925             * We should not reclaim such fd
1926             */
1927            fidp->flags |= FID_NON_RECLAIMABLE;
1928        }
1929        iounit = get_iounit(pdu, &fidp->path);
1930        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1931        if (err < 0) {
1932            goto out;
1933        }
1934        err += offset;
1935    }
1936    trace_v9fs_open_return(pdu->tag, pdu->id,
1937                           qid.type, qid.version, qid.path, iounit);
1938out:
1939    put_fid(pdu, fidp);
1940out_nofid:
1941    pdu_complete(pdu, err);
1942}
1943
1944static void coroutine_fn v9fs_lcreate(void *opaque)
1945{
1946    int32_t dfid, flags, mode;
1947    gid_t gid;
1948    ssize_t err = 0;
1949    ssize_t offset = 7;
1950    V9fsString name;
1951    V9fsFidState *fidp;
1952    struct stat stbuf;
1953    V9fsQID qid;
1954    int32_t iounit;
1955    V9fsPDU *pdu = opaque;
1956
1957    v9fs_string_init(&name);
1958    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1959                        &name, &flags, &mode, &gid);
1960    if (err < 0) {
1961        goto out_nofid;
1962    }
1963    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1964
1965    if (name_is_illegal(name.data)) {
1966        err = -ENOENT;
1967        goto out_nofid;
1968    }
1969
1970    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
1971        err = -EEXIST;
1972        goto out_nofid;
1973    }
1974
1975    fidp = get_fid(pdu, dfid);
1976    if (fidp == NULL) {
1977        err = -ENOENT;
1978        goto out_nofid;
1979    }
1980    if (fidp->fid_type != P9_FID_NONE) {
1981        err = -EINVAL;
1982        goto out;
1983    }
1984
1985    flags = get_dotl_openflags(pdu->s, flags);
1986    err = v9fs_co_open2(pdu, fidp, &name, gid,
1987                        flags | O_CREAT, mode, &stbuf);
1988    if (err < 0) {
1989        goto out;
1990    }
1991    fidp->fid_type = P9_FID_FILE;
1992    fidp->open_flags = flags;
1993    if (flags & O_EXCL) {
1994        /*
1995         * We let the host file system do O_EXCL check
1996         * We should not reclaim such fd
1997         */
1998        fidp->flags |= FID_NON_RECLAIMABLE;
1999    }
2000    iounit =  get_iounit(pdu, &fidp->path);
2001    err = stat_to_qid(pdu, &stbuf, &qid);
2002    if (err < 0) {
2003        goto out;
2004    }
2005    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2006    if (err < 0) {
2007        goto out;
2008    }
2009    err += offset;
2010    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2011                              qid.type, qid.version, qid.path, iounit);
2012out:
2013    put_fid(pdu, fidp);
2014out_nofid:
2015    pdu_complete(pdu, err);
2016    v9fs_string_free(&name);
2017}
2018
2019static void coroutine_fn v9fs_fsync(void *opaque)
2020{
2021    int err;
2022    int32_t fid;
2023    int datasync;
2024    size_t offset = 7;
2025    V9fsFidState *fidp;
2026    V9fsPDU *pdu = opaque;
2027
2028    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2029    if (err < 0) {
2030        goto out_nofid;
2031    }
2032    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2033
2034    fidp = get_fid(pdu, fid);
2035    if (fidp == NULL) {
2036        err = -ENOENT;
2037        goto out_nofid;
2038    }
2039    err = v9fs_co_fsync(pdu, fidp, datasync);
2040    if (!err) {
2041        err = offset;
2042    }
2043    put_fid(pdu, fidp);
2044out_nofid:
2045    pdu_complete(pdu, err);
2046}
2047
2048static void coroutine_fn v9fs_clunk(void *opaque)
2049{
2050    int err;
2051    int32_t fid;
2052    size_t offset = 7;
2053    V9fsFidState *fidp;
2054    V9fsPDU *pdu = opaque;
2055    V9fsState *s = pdu->s;
2056
2057    err = pdu_unmarshal(pdu, offset, "d", &fid);
2058    if (err < 0) {
2059        goto out_nofid;
2060    }
2061    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2062
2063    fidp = clunk_fid(s, fid);
2064    if (fidp == NULL) {
2065        err = -ENOENT;
2066        goto out_nofid;
2067    }
2068    /*
2069     * Bump the ref so that put_fid will
2070     * free the fid.
2071     */
2072    fidp->ref++;
2073    err = put_fid(pdu, fidp);
2074    if (!err) {
2075        err = offset;
2076    }
2077out_nofid:
2078    pdu_complete(pdu, err);
2079}
2080
2081/*
2082 * Create a QEMUIOVector for a sub-region of PDU iovecs
2083 *
2084 * @qiov:       uninitialized QEMUIOVector
2085 * @skip:       number of bytes to skip from beginning of PDU
2086 * @size:       number of bytes to include
2087 * @is_write:   true - write, false - read
2088 *
2089 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2090 * with qemu_iovec_destroy().
2091 */
2092static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2093                                    size_t skip, size_t size,
2094                                    bool is_write)
2095{
2096    QEMUIOVector elem;
2097    struct iovec *iov;
2098    unsigned int niov;
2099
2100    if (is_write) {
2101        pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
2102    } else {
2103        pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
2104    }
2105
2106    qemu_iovec_init_external(&elem, iov, niov);
2107    qemu_iovec_init(qiov, niov);
2108    qemu_iovec_concat(qiov, &elem, skip, size);
2109}
2110
2111static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2112                           uint64_t off, uint32_t max_count)
2113{
2114    ssize_t err;
2115    size_t offset = 7;
2116    uint64_t read_count;
2117    QEMUIOVector qiov_full;
2118
2119    if (fidp->fs.xattr.len < off) {
2120        read_count = 0;
2121    } else {
2122        read_count = fidp->fs.xattr.len - off;
2123    }
2124    if (read_count > max_count) {
2125        read_count = max_count;
2126    }
2127    err = pdu_marshal(pdu, offset, "d", read_count);
2128    if (err < 0) {
2129        return err;
2130    }
2131    offset += err;
2132
2133    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
2134    err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2135                    ((char *)fidp->fs.xattr.value) + off,
2136                    read_count);
2137    qemu_iovec_destroy(&qiov_full);
2138    if (err < 0) {
2139        return err;
2140    }
2141    offset += err;
2142    return offset;
2143}
2144
2145static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2146                                                  V9fsFidState *fidp,
2147                                                  uint32_t max_count)
2148{
2149    V9fsPath path;
2150    V9fsStat v9stat;
2151    int len, err = 0;
2152    int32_t count = 0;
2153    struct stat stbuf;
2154    off_t saved_dir_pos;
2155    struct dirent *dent;
2156
2157    /* save the directory position */
2158    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2159    if (saved_dir_pos < 0) {
2160        return saved_dir_pos;
2161    }
2162
2163    while (1) {
2164        v9fs_path_init(&path);
2165
2166        v9fs_readdir_lock(&fidp->fs.dir);
2167
2168        err = v9fs_co_readdir(pdu, fidp, &dent);
2169        if (err || !dent) {
2170            break;
2171        }
2172        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2173        if (err < 0) {
2174            break;
2175        }
2176        err = v9fs_co_lstat(pdu, &path, &stbuf);
2177        if (err < 0) {
2178            break;
2179        }
2180        err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2181        if (err < 0) {
2182            break;
2183        }
2184        if ((count + v9stat.size + 2) > max_count) {
2185            v9fs_readdir_unlock(&fidp->fs.dir);
2186
2187            /* Ran out of buffer. Set dir back to old position and return */
2188            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2189            v9fs_stat_free(&v9stat);
2190            v9fs_path_free(&path);
2191            return count;
2192        }
2193
2194        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2195        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2196
2197        v9fs_readdir_unlock(&fidp->fs.dir);
2198
2199        if (len < 0) {
2200            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2201            v9fs_stat_free(&v9stat);
2202            v9fs_path_free(&path);
2203            return len;
2204        }
2205        count += len;
2206        v9fs_stat_free(&v9stat);
2207        v9fs_path_free(&path);
2208        saved_dir_pos = dent->d_off;
2209    }
2210
2211    v9fs_readdir_unlock(&fidp->fs.dir);
2212
2213    v9fs_path_free(&path);
2214    if (err < 0) {
2215        return err;
2216    }
2217    return count;
2218}
2219
2220static void coroutine_fn v9fs_read(void *opaque)
2221{
2222    int32_t fid;
2223    uint64_t off;
2224    ssize_t err = 0;
2225    int32_t count = 0;
2226    size_t offset = 7;
2227    uint32_t max_count;
2228    V9fsFidState *fidp;
2229    V9fsPDU *pdu = opaque;
2230    V9fsState *s = pdu->s;
2231
2232    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2233    if (err < 0) {
2234        goto out_nofid;
2235    }
2236    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2237
2238    fidp = get_fid(pdu, fid);
2239    if (fidp == NULL) {
2240        err = -EINVAL;
2241        goto out_nofid;
2242    }
2243    if (fidp->fid_type == P9_FID_DIR) {
2244
2245        if (off == 0) {
2246            v9fs_co_rewinddir(pdu, fidp);
2247        }
2248        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2249        if (count < 0) {
2250            err = count;
2251            goto out;
2252        }
2253        err = pdu_marshal(pdu, offset, "d", count);
2254        if (err < 0) {
2255            goto out;
2256        }
2257        err += offset + count;
2258    } else if (fidp->fid_type == P9_FID_FILE) {
2259        QEMUIOVector qiov_full;
2260        QEMUIOVector qiov;
2261        int32_t len;
2262
2263        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
2264        qemu_iovec_init(&qiov, qiov_full.niov);
2265        do {
2266            qemu_iovec_reset(&qiov);
2267            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2268            if (0) {
2269                print_sg(qiov.iov, qiov.niov);
2270            }
2271            /* Loop in case of EINTR */
2272            do {
2273                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2274                if (len >= 0) {
2275                    off   += len;
2276                    count += len;
2277                }
2278            } while (len == -EINTR && !pdu->cancelled);
2279            if (len < 0) {
2280                /* IO error return the error */
2281                err = len;
2282                goto out_free_iovec;
2283            }
2284        } while (count < max_count && len > 0);
2285        err = pdu_marshal(pdu, offset, "d", count);
2286        if (err < 0) {
2287            goto out_free_iovec;
2288        }
2289        err += offset + count;
2290out_free_iovec:
2291        qemu_iovec_destroy(&qiov);
2292        qemu_iovec_destroy(&qiov_full);
2293    } else if (fidp->fid_type == P9_FID_XATTR) {
2294        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2295    } else {
2296        err = -EINVAL;
2297    }
2298    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2299out:
2300    put_fid(pdu, fidp);
2301out_nofid:
2302    pdu_complete(pdu, err);
2303}
2304
2305static size_t v9fs_readdir_data_size(V9fsString *name)
2306{
2307    /*
2308     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2309     * size of type (1) + size of name.size (2) + strlen(name.data)
2310     */
2311    return 24 + v9fs_string_size(name);
2312}
2313
2314static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2315                                        int32_t max_count)
2316{
2317    size_t size;
2318    V9fsQID qid;
2319    V9fsString name;
2320    int len, err = 0;
2321    int32_t count = 0;
2322    off_t saved_dir_pos;
2323    struct dirent *dent;
2324
2325    /* save the directory position */
2326    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2327    if (saved_dir_pos < 0) {
2328        return saved_dir_pos;
2329    }
2330
2331    while (1) {
2332        v9fs_readdir_lock(&fidp->fs.dir);
2333
2334        err = v9fs_co_readdir(pdu, fidp, &dent);
2335        if (err || !dent) {
2336            break;
2337        }
2338        v9fs_string_init(&name);
2339        v9fs_string_sprintf(&name, "%s", dent->d_name);
2340        if ((count + v9fs_readdir_data_size(&name)) > max_count) {
2341            v9fs_readdir_unlock(&fidp->fs.dir);
2342
2343            /* Ran out of buffer. Set dir back to old position and return */
2344            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2345            v9fs_string_free(&name);
2346            return count;
2347        }
2348
2349        if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2350            /*
2351             * dirent_to_qid() implies expensive stat call for each entry,
2352             * we must do that here though since inode remapping requires
2353             * the device id, which in turn might be different for
2354             * different entries; we cannot make any assumption to avoid
2355             * that here.
2356             */
2357            err = dirent_to_qid(pdu, fidp, dent, &qid);
2358            if (err < 0) {
2359                v9fs_readdir_unlock(&fidp->fs.dir);
2360                v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2361                v9fs_string_free(&name);
2362                return err;
2363            }
2364        } else {
2365            /*
2366             * Fill up just the path field of qid because the client uses
2367             * only that. To fill the entire qid structure we will have
2368             * to stat each dirent found, which is expensive. For the
2369             * latter reason we don't call dirent_to_qid() here. Only drawback
2370             * is that no multi-device export detection of stat_to_qid()
2371             * would be done and provided as error to the user here. But
2372             * user would get that error anyway when accessing those
2373             * files/dirs through other ways.
2374             */
2375            size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2376            memcpy(&qid.path, &dent->d_ino, size);
2377            /* Fill the other fields with dummy values */
2378            qid.type = 0;
2379            qid.version = 0;
2380        }
2381
2382        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2383        len = pdu_marshal(pdu, 11 + count, "Qqbs",
2384                          &qid, dent->d_off,
2385                          dent->d_type, &name);
2386
2387        v9fs_readdir_unlock(&fidp->fs.dir);
2388
2389        if (len < 0) {
2390            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2391            v9fs_string_free(&name);
2392            return len;
2393        }
2394        count += len;
2395        v9fs_string_free(&name);
2396        saved_dir_pos = dent->d_off;
2397    }
2398
2399    v9fs_readdir_unlock(&fidp->fs.dir);
2400
2401    if (err < 0) {
2402        return err;
2403    }
2404    return count;
2405}
2406
2407static void coroutine_fn v9fs_readdir(void *opaque)
2408{
2409    int32_t fid;
2410    V9fsFidState *fidp;
2411    ssize_t retval = 0;
2412    size_t offset = 7;
2413    uint64_t initial_offset;
2414    int32_t count;
2415    uint32_t max_count;
2416    V9fsPDU *pdu = opaque;
2417
2418    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2419                           &initial_offset, &max_count);
2420    if (retval < 0) {
2421        goto out_nofid;
2422    }
2423    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2424
2425    fidp = get_fid(pdu, fid);
2426    if (fidp == NULL) {
2427        retval = -EINVAL;
2428        goto out_nofid;
2429    }
2430    if (!fidp->fs.dir.stream) {
2431        retval = -EINVAL;
2432        goto out;
2433    }
2434    if (initial_offset == 0) {
2435        v9fs_co_rewinddir(pdu, fidp);
2436    } else {
2437        v9fs_co_seekdir(pdu, fidp, initial_offset);
2438    }
2439    count = v9fs_do_readdir(pdu, fidp, max_count);
2440    if (count < 0) {
2441        retval = count;
2442        goto out;
2443    }
2444    retval = pdu_marshal(pdu, offset, "d", count);
2445    if (retval < 0) {
2446        goto out;
2447    }
2448    retval += count + offset;
2449    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2450out:
2451    put_fid(pdu, fidp);
2452out_nofid:
2453    pdu_complete(pdu, retval);
2454}
2455
2456static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2457                            uint64_t off, uint32_t count,
2458                            struct iovec *sg, int cnt)
2459{
2460    int i, to_copy;
2461    ssize_t err = 0;
2462    uint64_t write_count;
2463    size_t offset = 7;
2464
2465
2466    if (fidp->fs.xattr.len < off) {
2467        err = -ENOSPC;
2468        goto out;
2469    }
2470    write_count = fidp->fs.xattr.len - off;
2471    if (write_count > count) {
2472        write_count = count;
2473    }
2474    err = pdu_marshal(pdu, offset, "d", write_count);
2475    if (err < 0) {
2476        return err;
2477    }
2478    err += offset;
2479    fidp->fs.xattr.copied_len += write_count;
2480    /*
2481     * Now copy the content from sg list
2482     */
2483    for (i = 0; i < cnt; i++) {
2484        if (write_count > sg[i].iov_len) {
2485            to_copy = sg[i].iov_len;
2486        } else {
2487            to_copy = write_count;
2488        }
2489        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2490        /* updating vs->off since we are not using below */
2491        off += to_copy;
2492        write_count -= to_copy;
2493    }
2494out:
2495    return err;
2496}
2497
2498static void coroutine_fn v9fs_write(void *opaque)
2499{
2500    ssize_t err;
2501    int32_t fid;
2502    uint64_t off;
2503    uint32_t count;
2504    int32_t len = 0;
2505    int32_t total = 0;
2506    size_t offset = 7;
2507    V9fsFidState *fidp;
2508    V9fsPDU *pdu = opaque;
2509    V9fsState *s = pdu->s;
2510    QEMUIOVector qiov_full;
2511    QEMUIOVector qiov;
2512
2513    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2514    if (err < 0) {
2515        pdu_complete(pdu, err);
2516        return;
2517    }
2518    offset += err;
2519    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
2520    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2521
2522    fidp = get_fid(pdu, fid);
2523    if (fidp == NULL) {
2524        err = -EINVAL;
2525        goto out_nofid;
2526    }
2527    if (fidp->fid_type == P9_FID_FILE) {
2528        if (fidp->fs.fd == -1) {
2529            err = -EINVAL;
2530            goto out;
2531        }
2532    } else if (fidp->fid_type == P9_FID_XATTR) {
2533        /*
2534         * setxattr operation
2535         */
2536        err = v9fs_xattr_write(s, pdu, fidp, off, count,
2537                               qiov_full.iov, qiov_full.niov);
2538        goto out;
2539    } else {
2540        err = -EINVAL;
2541        goto out;
2542    }
2543    qemu_iovec_init(&qiov, qiov_full.niov);
2544    do {
2545        qemu_iovec_reset(&qiov);
2546        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2547        if (0) {
2548            print_sg(qiov.iov, qiov.niov);
2549        }
2550        /* Loop in case of EINTR */
2551        do {
2552            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2553            if (len >= 0) {
2554                off   += len;
2555                total += len;
2556            }
2557        } while (len == -EINTR && !pdu->cancelled);
2558        if (len < 0) {
2559            /* IO error return the error */
2560            err = len;
2561            goto out_qiov;
2562        }
2563    } while (total < count && len > 0);
2564
2565    offset = 7;
2566    err = pdu_marshal(pdu, offset, "d", total);
2567    if (err < 0) {
2568        goto out_qiov;
2569    }
2570    err += offset;
2571    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2572out_qiov:
2573    qemu_iovec_destroy(&qiov);
2574out:
2575    put_fid(pdu, fidp);
2576out_nofid:
2577    qemu_iovec_destroy(&qiov_full);
2578    pdu_complete(pdu, err);
2579}
2580
2581static void coroutine_fn v9fs_create(void *opaque)
2582{
2583    int32_t fid;
2584    int err = 0;
2585    size_t offset = 7;
2586    V9fsFidState *fidp;
2587    V9fsQID qid;
2588    int32_t perm;
2589    int8_t mode;
2590    V9fsPath path;
2591    struct stat stbuf;
2592    V9fsString name;
2593    V9fsString extension;
2594    int iounit;
2595    V9fsPDU *pdu = opaque;
2596    V9fsState *s = pdu->s;
2597
2598    v9fs_path_init(&path);
2599    v9fs_string_init(&name);
2600    v9fs_string_init(&extension);
2601    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2602                        &perm, &mode, &extension);
2603    if (err < 0) {
2604        goto out_nofid;
2605    }
2606    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2607
2608    if (name_is_illegal(name.data)) {
2609        err = -ENOENT;
2610        goto out_nofid;
2611    }
2612
2613    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2614        err = -EEXIST;
2615        goto out_nofid;
2616    }
2617
2618    fidp = get_fid(pdu, fid);
2619    if (fidp == NULL) {
2620        err = -EINVAL;
2621        goto out_nofid;
2622    }
2623    if (fidp->fid_type != P9_FID_NONE) {
2624        err = -EINVAL;
2625        goto out;
2626    }
2627    if (perm & P9_STAT_MODE_DIR) {
2628        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2629                            fidp->uid, -1, &stbuf);
2630        if (err < 0) {
2631            goto out;
2632        }
2633        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2634        if (err < 0) {
2635            goto out;
2636        }
2637        v9fs_path_write_lock(s);
2638        v9fs_path_copy(&fidp->path, &path);
2639        v9fs_path_unlock(s);
2640        err = v9fs_co_opendir(pdu, fidp);
2641        if (err < 0) {
2642            goto out;
2643        }
2644        fidp->fid_type = P9_FID_DIR;
2645    } else if (perm & P9_STAT_MODE_SYMLINK) {
2646        err = v9fs_co_symlink(pdu, fidp, &name,
2647                              extension.data, -1 , &stbuf);
2648        if (err < 0) {
2649            goto out;
2650        }
2651        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2652        if (err < 0) {
2653            goto out;
2654        }
2655        v9fs_path_write_lock(s);
2656        v9fs_path_copy(&fidp->path, &path);
2657        v9fs_path_unlock(s);
2658    } else if (perm & P9_STAT_MODE_LINK) {
2659        int32_t ofid = atoi(extension.data);
2660        V9fsFidState *ofidp = get_fid(pdu, ofid);
2661        if (ofidp == NULL) {
2662            err = -EINVAL;
2663            goto out;
2664        }
2665        err = v9fs_co_link(pdu, ofidp, fidp, &name);
2666        put_fid(pdu, ofidp);
2667        if (err < 0) {
2668            goto out;
2669        }
2670        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2671        if (err < 0) {
2672            fidp->fid_type = P9_FID_NONE;
2673            goto out;
2674        }
2675        v9fs_path_write_lock(s);
2676        v9fs_path_copy(&fidp->path, &path);
2677        v9fs_path_unlock(s);
2678        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2679        if (err < 0) {
2680            fidp->fid_type = P9_FID_NONE;
2681            goto out;
2682        }
2683    } else if (perm & P9_STAT_MODE_DEVICE) {
2684        char ctype;
2685        uint32_t major, minor;
2686        mode_t nmode = 0;
2687
2688        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2689            err = -errno;
2690            goto out;
2691        }
2692
2693        switch (ctype) {
2694        case 'c':
2695            nmode = S_IFCHR;
2696            break;
2697        case 'b':
2698            nmode = S_IFBLK;
2699            break;
2700        default:
2701            err = -EIO;
2702            goto out;
2703        }
2704
2705        nmode |= perm & 0777;
2706        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2707                            makedev(major, minor), nmode, &stbuf);
2708        if (err < 0) {
2709            goto out;
2710        }
2711        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2712        if (err < 0) {
2713            goto out;
2714        }
2715        v9fs_path_write_lock(s);
2716        v9fs_path_copy(&fidp->path, &path);
2717        v9fs_path_unlock(s);
2718    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2719        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2720                            0, S_IFIFO | (perm & 0777), &stbuf);
2721        if (err < 0) {
2722            goto out;
2723        }
2724        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2725        if (err < 0) {
2726            goto out;
2727        }
2728        v9fs_path_write_lock(s);
2729        v9fs_path_copy(&fidp->path, &path);
2730        v9fs_path_unlock(s);
2731    } else if (perm & P9_STAT_MODE_SOCKET) {
2732        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2733                            0, S_IFSOCK | (perm & 0777), &stbuf);
2734        if (err < 0) {
2735            goto out;
2736        }
2737        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2738        if (err < 0) {
2739            goto out;
2740        }
2741        v9fs_path_write_lock(s);
2742        v9fs_path_copy(&fidp->path, &path);
2743        v9fs_path_unlock(s);
2744    } else {
2745        err = v9fs_co_open2(pdu, fidp, &name, -1,
2746                            omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2747        if (err < 0) {
2748            goto out;
2749        }
2750        fidp->fid_type = P9_FID_FILE;
2751        fidp->open_flags = omode_to_uflags(mode);
2752        if (fidp->open_flags & O_EXCL) {
2753            /*
2754             * We let the host file system do O_EXCL check
2755             * We should not reclaim such fd
2756             */
2757            fidp->flags |= FID_NON_RECLAIMABLE;
2758        }
2759    }
2760    iounit = get_iounit(pdu, &fidp->path);
2761    err = stat_to_qid(pdu, &stbuf, &qid);
2762    if (err < 0) {
2763        goto out;
2764    }
2765    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2766    if (err < 0) {
2767        goto out;
2768    }
2769    err += offset;
2770    trace_v9fs_create_return(pdu->tag, pdu->id,
2771                             qid.type, qid.version, qid.path, iounit);
2772out:
2773    put_fid(pdu, fidp);
2774out_nofid:
2775   pdu_complete(pdu, err);
2776   v9fs_string_free(&name);
2777   v9fs_string_free(&extension);
2778   v9fs_path_free(&path);
2779}
2780
2781static void coroutine_fn v9fs_symlink(void *opaque)
2782{
2783    V9fsPDU *pdu = opaque;
2784    V9fsString name;
2785    V9fsString symname;
2786    V9fsFidState *dfidp;
2787    V9fsQID qid;
2788    struct stat stbuf;
2789    int32_t dfid;
2790    int err = 0;
2791    gid_t gid;
2792    size_t offset = 7;
2793
2794    v9fs_string_init(&name);
2795    v9fs_string_init(&symname);
2796    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2797    if (err < 0) {
2798        goto out_nofid;
2799    }
2800    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2801
2802    if (name_is_illegal(name.data)) {
2803        err = -ENOENT;
2804        goto out_nofid;
2805    }
2806
2807    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2808        err = -EEXIST;
2809        goto out_nofid;
2810    }
2811
2812    dfidp = get_fid(pdu, dfid);
2813    if (dfidp == NULL) {
2814        err = -EINVAL;
2815        goto out_nofid;
2816    }
2817    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2818    if (err < 0) {
2819        goto out;
2820    }
2821    err = stat_to_qid(pdu, &stbuf, &qid);
2822    if (err < 0) {
2823        goto out;
2824    }
2825    err =  pdu_marshal(pdu, offset, "Q", &qid);
2826    if (err < 0) {
2827        goto out;
2828    }
2829    err += offset;
2830    trace_v9fs_symlink_return(pdu->tag, pdu->id,
2831                              qid.type, qid.version, qid.path);
2832out:
2833    put_fid(pdu, dfidp);
2834out_nofid:
2835    pdu_complete(pdu, err);
2836    v9fs_string_free(&name);
2837    v9fs_string_free(&symname);
2838}
2839
2840static void coroutine_fn v9fs_flush(void *opaque)
2841{
2842    ssize_t err;
2843    int16_t tag;
2844    size_t offset = 7;
2845    V9fsPDU *cancel_pdu = NULL;
2846    V9fsPDU *pdu = opaque;
2847    V9fsState *s = pdu->s;
2848
2849    err = pdu_unmarshal(pdu, offset, "w", &tag);
2850    if (err < 0) {
2851        pdu_complete(pdu, err);
2852        return;
2853    }
2854    trace_v9fs_flush(pdu->tag, pdu->id, tag);
2855
2856    if (pdu->tag == tag) {
2857        warn_report("the guest sent a self-referencing 9P flush request");
2858    } else {
2859        QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2860            if (cancel_pdu->tag == tag) {
2861                break;
2862            }
2863        }
2864    }
2865    if (cancel_pdu) {
2866        cancel_pdu->cancelled = 1;
2867        /*
2868         * Wait for pdu to complete.
2869         */
2870        qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2871        if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2872            cancel_pdu->cancelled = 0;
2873            pdu_free(cancel_pdu);
2874        }
2875    }
2876    pdu_complete(pdu, 7);
2877}
2878
2879static void coroutine_fn v9fs_link(void *opaque)
2880{
2881    V9fsPDU *pdu = opaque;
2882    int32_t dfid, oldfid;
2883    V9fsFidState *dfidp, *oldfidp;
2884    V9fsString name;
2885    size_t offset = 7;
2886    int err = 0;
2887
2888    v9fs_string_init(&name);
2889    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2890    if (err < 0) {
2891        goto out_nofid;
2892    }
2893    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2894
2895    if (name_is_illegal(name.data)) {
2896        err = -ENOENT;
2897        goto out_nofid;
2898    }
2899
2900    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2901        err = -EEXIST;
2902        goto out_nofid;
2903    }
2904
2905    dfidp = get_fid(pdu, dfid);
2906    if (dfidp == NULL) {
2907        err = -ENOENT;
2908        goto out_nofid;
2909    }
2910
2911    oldfidp = get_fid(pdu, oldfid);
2912    if (oldfidp == NULL) {
2913        err = -ENOENT;
2914        goto out;
2915    }
2916    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2917    if (!err) {
2918        err = offset;
2919    }
2920    put_fid(pdu, oldfidp);
2921out:
2922    put_fid(pdu, dfidp);
2923out_nofid:
2924    v9fs_string_free(&name);
2925    pdu_complete(pdu, err);
2926}
2927
2928/* Only works with path name based fid */
2929static void coroutine_fn v9fs_remove(void *opaque)
2930{
2931    int32_t fid;
2932    int err = 0;
2933    size_t offset = 7;
2934    V9fsFidState *fidp;
2935    V9fsPDU *pdu = opaque;
2936
2937    err = pdu_unmarshal(pdu, offset, "d", &fid);
2938    if (err < 0) {
2939        goto out_nofid;
2940    }
2941    trace_v9fs_remove(pdu->tag, pdu->id, fid);
2942
2943    fidp = get_fid(pdu, fid);
2944    if (fidp == NULL) {
2945        err = -EINVAL;
2946        goto out_nofid;
2947    }
2948    /* if fs driver is not path based, return EOPNOTSUPP */
2949    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2950        err = -EOPNOTSUPP;
2951        goto out_err;
2952    }
2953    /*
2954     * IF the file is unlinked, we cannot reopen
2955     * the file later. So don't reclaim fd
2956     */
2957    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2958    if (err < 0) {
2959        goto out_err;
2960    }
2961    err = v9fs_co_remove(pdu, &fidp->path);
2962    if (!err) {
2963        err = offset;
2964    }
2965out_err:
2966    /* For TREMOVE we need to clunk the fid even on failed remove */
2967    clunk_fid(pdu->s, fidp->fid);
2968    put_fid(pdu, fidp);
2969out_nofid:
2970    pdu_complete(pdu, err);
2971}
2972
2973static void coroutine_fn v9fs_unlinkat(void *opaque)
2974{
2975    int err = 0;
2976    V9fsString name;
2977    int32_t dfid, flags, rflags = 0;
2978    size_t offset = 7;
2979    V9fsPath path;
2980    V9fsFidState *dfidp;
2981    V9fsPDU *pdu = opaque;
2982
2983    v9fs_string_init(&name);
2984    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
2985    if (err < 0) {
2986        goto out_nofid;
2987    }
2988
2989    if (name_is_illegal(name.data)) {
2990        err = -ENOENT;
2991        goto out_nofid;
2992    }
2993
2994    if (!strcmp(".", name.data)) {
2995        err = -EINVAL;
2996        goto out_nofid;
2997    }
2998
2999    if (!strcmp("..", name.data)) {
3000        err = -ENOTEMPTY;
3001        goto out_nofid;
3002    }
3003
3004    if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3005        err = -EINVAL;
3006        goto out_nofid;
3007    }
3008
3009    if (flags & P9_DOTL_AT_REMOVEDIR) {
3010        rflags |= AT_REMOVEDIR;
3011    }
3012
3013    dfidp = get_fid(pdu, dfid);
3014    if (dfidp == NULL) {
3015        err = -EINVAL;
3016        goto out_nofid;
3017    }
3018    /*
3019     * IF the file is unlinked, we cannot reopen
3020     * the file later. So don't reclaim fd
3021     */
3022    v9fs_path_init(&path);
3023    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3024    if (err < 0) {
3025        goto out_err;
3026    }
3027    err = v9fs_mark_fids_unreclaim(pdu, &path);
3028    if (err < 0) {
3029        goto out_err;
3030    }
3031    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3032    if (!err) {
3033        err = offset;
3034    }
3035out_err:
3036    put_fid(pdu, dfidp);
3037    v9fs_path_free(&path);
3038out_nofid:
3039    pdu_complete(pdu, err);
3040    v9fs_string_free(&name);
3041}
3042
3043
3044/* Only works with path name based fid */
3045static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3046                                             int32_t newdirfid,
3047                                             V9fsString *name)
3048{
3049    int err = 0;
3050    V9fsPath new_path;
3051    V9fsFidState *tfidp;
3052    V9fsState *s = pdu->s;
3053    V9fsFidState *dirfidp = NULL;
3054
3055    v9fs_path_init(&new_path);
3056    if (newdirfid != -1) {
3057        dirfidp = get_fid(pdu, newdirfid);
3058        if (dirfidp == NULL) {
3059            err = -ENOENT;
3060            goto out_nofid;
3061        }
3062        if (fidp->fid_type != P9_FID_NONE) {
3063            err = -EINVAL;
3064            goto out;
3065        }
3066        err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3067        if (err < 0) {
3068            goto out;
3069        }
3070    } else {
3071        char *dir_name = g_path_get_dirname(fidp->path.data);
3072        V9fsPath dir_path;
3073
3074        v9fs_path_init(&dir_path);
3075        v9fs_path_sprintf(&dir_path, "%s", dir_name);
3076        g_free(dir_name);
3077
3078        err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3079        v9fs_path_free(&dir_path);
3080        if (err < 0) {
3081            goto out;
3082        }
3083    }
3084    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3085    if (err < 0) {
3086        goto out;
3087    }
3088    /*
3089     * Fixup fid's pointing to the old name to
3090     * start pointing to the new name
3091     */
3092    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
3093        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3094            /* replace the name */
3095            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3096        }
3097    }
3098out:
3099    if (dirfidp) {
3100        put_fid(pdu, dirfidp);
3101    }
3102    v9fs_path_free(&new_path);
3103out_nofid:
3104    return err;
3105}
3106
3107/* Only works with path name based fid */
3108static void coroutine_fn v9fs_rename(void *opaque)
3109{
3110    int32_t fid;
3111    ssize_t err = 0;
3112    size_t offset = 7;
3113    V9fsString name;
3114    int32_t newdirfid;
3115    V9fsFidState *fidp;
3116    V9fsPDU *pdu = opaque;
3117    V9fsState *s = pdu->s;
3118
3119    v9fs_string_init(&name);
3120    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3121    if (err < 0) {
3122        goto out_nofid;
3123    }
3124
3125    if (name_is_illegal(name.data)) {
3126        err = -ENOENT;
3127        goto out_nofid;
3128    }
3129
3130    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3131        err = -EISDIR;
3132        goto out_nofid;
3133    }
3134
3135    fidp = get_fid(pdu, fid);
3136    if (fidp == NULL) {
3137        err = -ENOENT;
3138        goto out_nofid;
3139    }
3140    if (fidp->fid_type != P9_FID_NONE) {
3141        err = -EINVAL;
3142        goto out;
3143    }
3144    /* if fs driver is not path based, return EOPNOTSUPP */
3145    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3146        err = -EOPNOTSUPP;
3147        goto out;
3148    }
3149    v9fs_path_write_lock(s);
3150    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3151    v9fs_path_unlock(s);
3152    if (!err) {
3153        err = offset;
3154    }
3155out:
3156    put_fid(pdu, fidp);
3157out_nofid:
3158    pdu_complete(pdu, err);
3159    v9fs_string_free(&name);
3160}
3161
3162static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3163                                           V9fsString *old_name,
3164                                           V9fsPath *newdir,
3165                                           V9fsString *new_name)
3166{
3167    V9fsFidState *tfidp;
3168    V9fsPath oldpath, newpath;
3169    V9fsState *s = pdu->s;
3170    int err;
3171
3172    v9fs_path_init(&oldpath);
3173    v9fs_path_init(&newpath);
3174    err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3175    if (err < 0) {
3176        goto out;
3177    }
3178    err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3179    if (err < 0) {
3180        goto out;
3181    }
3182
3183    /*
3184     * Fixup fid's pointing to the old name to
3185     * start pointing to the new name
3186     */
3187    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
3188        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3189            /* replace the name */
3190            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3191        }
3192    }
3193out:
3194    v9fs_path_free(&oldpath);
3195    v9fs_path_free(&newpath);
3196    return err;
3197}
3198
3199static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3200                                               V9fsString *old_name,
3201                                               int32_t newdirfid,
3202                                               V9fsString *new_name)
3203{
3204    int err = 0;
3205    V9fsState *s = pdu->s;
3206    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3207
3208    olddirfidp = get_fid(pdu, olddirfid);
3209    if (olddirfidp == NULL) {
3210        err = -ENOENT;
3211        goto out;
3212    }
3213    if (newdirfid != -1) {
3214        newdirfidp = get_fid(pdu, newdirfid);
3215        if (newdirfidp == NULL) {
3216            err = -ENOENT;
3217            goto out;
3218        }
3219    } else {
3220        newdirfidp = get_fid(pdu, olddirfid);
3221    }
3222
3223    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3224                           &newdirfidp->path, new_name);
3225    if (err < 0) {
3226        goto out;
3227    }
3228    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3229        /* Only for path based fid  we need to do the below fixup */
3230        err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3231                                 &newdirfidp->path, new_name);
3232    }
3233out:
3234    if (olddirfidp) {
3235        put_fid(pdu, olddirfidp);
3236    }
3237    if (newdirfidp) {
3238        put_fid(pdu, newdirfidp);
3239    }
3240    return err;
3241}
3242
3243static void coroutine_fn v9fs_renameat(void *opaque)
3244{
3245    ssize_t err = 0;
3246    size_t offset = 7;
3247    V9fsPDU *pdu = opaque;
3248    V9fsState *s = pdu->s;
3249    int32_t olddirfid, newdirfid;
3250    V9fsString old_name, new_name;
3251
3252    v9fs_string_init(&old_name);
3253    v9fs_string_init(&new_name);
3254    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3255                        &old_name, &newdirfid, &new_name);
3256    if (err < 0) {
3257        goto out_err;
3258    }
3259
3260    if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3261        err = -ENOENT;
3262        goto out_err;
3263    }
3264
3265    if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3266        !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3267        err = -EISDIR;
3268        goto out_err;
3269    }
3270
3271    v9fs_path_write_lock(s);
3272    err = v9fs_complete_renameat(pdu, olddirfid,
3273                                 &old_name, newdirfid, &new_name);
3274    v9fs_path_unlock(s);
3275    if (!err) {
3276        err = offset;
3277    }
3278
3279out_err:
3280    pdu_complete(pdu, err);
3281    v9fs_string_free(&old_name);
3282    v9fs_string_free(&new_name);
3283}
3284
3285static void coroutine_fn v9fs_wstat(void *opaque)
3286{
3287    int32_t fid;
3288    int err = 0;
3289    int16_t unused;
3290    V9fsStat v9stat;
3291    size_t offset = 7;
3292    struct stat stbuf;
3293    V9fsFidState *fidp;
3294    V9fsPDU *pdu = opaque;
3295    V9fsState *s = pdu->s;
3296
3297    v9fs_stat_init(&v9stat);
3298    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3299    if (err < 0) {
3300        goto out_nofid;
3301    }
3302    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3303                     v9stat.mode, v9stat.atime, v9stat.mtime);
3304
3305    fidp = get_fid(pdu, fid);
3306    if (fidp == NULL) {
3307        err = -EINVAL;
3308        goto out_nofid;
3309    }
3310    /* do we need to sync the file? */
3311    if (donttouch_stat(&v9stat)) {
3312        err = v9fs_co_fsync(pdu, fidp, 0);
3313        goto out;
3314    }
3315    if (v9stat.mode != -1) {
3316        uint32_t v9_mode;
3317        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3318        if (err < 0) {
3319            goto out;
3320        }
3321        v9_mode = stat_to_v9mode(&stbuf);
3322        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3323            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3324            /* Attempting to change the type */
3325            err = -EIO;
3326            goto out;
3327        }
3328        err = v9fs_co_chmod(pdu, &fidp->path,
3329                            v9mode_to_mode(v9stat.mode,
3330                                           &v9stat.extension));
3331        if (err < 0) {
3332            goto out;
3333        }
3334    }
3335    if (v9stat.mtime != -1 || v9stat.atime != -1) {
3336        struct timespec times[2];
3337        if (v9stat.atime != -1) {
3338            times[0].tv_sec = v9stat.atime;
3339            times[0].tv_nsec = 0;
3340        } else {
3341            times[0].tv_nsec = UTIME_OMIT;
3342        }
3343        if (v9stat.mtime != -1) {
3344            times[1].tv_sec = v9stat.mtime;
3345            times[1].tv_nsec = 0;
3346        } else {
3347            times[1].tv_nsec = UTIME_OMIT;
3348        }
3349        err = v9fs_co_utimensat(pdu, &fidp->path, times);
3350        if (err < 0) {
3351            goto out;
3352        }
3353    }
3354    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3355        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3356        if (err < 0) {
3357            goto out;
3358        }
3359    }
3360    if (v9stat.name.size != 0) {
3361        v9fs_path_write_lock(s);
3362        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3363        v9fs_path_unlock(s);
3364        if (err < 0) {
3365            goto out;
3366        }
3367    }
3368    if (v9stat.length != -1) {
3369        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3370        if (err < 0) {
3371            goto out;
3372        }
3373    }
3374    err = offset;
3375out:
3376    put_fid(pdu, fidp);
3377out_nofid:
3378    v9fs_stat_free(&v9stat);
3379    pdu_complete(pdu, err);
3380}
3381
3382static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3383{
3384    uint32_t f_type;
3385    uint32_t f_bsize;
3386    uint64_t f_blocks;
3387    uint64_t f_bfree;
3388    uint64_t f_bavail;
3389    uint64_t f_files;
3390    uint64_t f_ffree;
3391    uint64_t fsid_val;
3392    uint32_t f_namelen;
3393    size_t offset = 7;
3394    int32_t bsize_factor;
3395
3396    /*
3397     * compute bsize factor based on host file system block size
3398     * and client msize
3399     */
3400    bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
3401    if (!bsize_factor) {
3402        bsize_factor = 1;
3403    }
3404    f_type  = stbuf->f_type;
3405    f_bsize = stbuf->f_bsize;
3406    f_bsize *= bsize_factor;
3407    /*
3408     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3409     * adjust(divide) the number of blocks, free blocks and available
3410     * blocks by bsize factor
3411     */
3412    f_blocks = stbuf->f_blocks/bsize_factor;
3413    f_bfree  = stbuf->f_bfree/bsize_factor;
3414    f_bavail = stbuf->f_bavail/bsize_factor;
3415    f_files  = stbuf->f_files;
3416    f_ffree  = stbuf->f_ffree;
3417    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3418               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3419    f_namelen = stbuf->f_namelen;
3420
3421    return pdu_marshal(pdu, offset, "ddqqqqqqd",
3422                       f_type, f_bsize, f_blocks, f_bfree,
3423                       f_bavail, f_files, f_ffree,
3424                       fsid_val, f_namelen);
3425}
3426
3427static void coroutine_fn v9fs_statfs(void *opaque)
3428{
3429    int32_t fid;
3430    ssize_t retval = 0;
3431    size_t offset = 7;
3432    V9fsFidState *fidp;
3433    struct statfs stbuf;
3434    V9fsPDU *pdu = opaque;
3435    V9fsState *s = pdu->s;
3436
3437    retval = pdu_unmarshal(pdu, offset, "d", &fid);
3438    if (retval < 0) {
3439        goto out_nofid;
3440    }
3441    fidp = get_fid(pdu, fid);
3442    if (fidp == NULL) {
3443        retval = -ENOENT;
3444        goto out_nofid;
3445    }
3446    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3447    if (retval < 0) {
3448        goto out;
3449    }
3450    retval = v9fs_fill_statfs(s, pdu, &stbuf);
3451    if (retval < 0) {
3452        goto out;
3453    }
3454    retval += offset;
3455out:
3456    put_fid(pdu, fidp);
3457out_nofid:
3458    pdu_complete(pdu, retval);
3459}
3460
3461static void coroutine_fn v9fs_mknod(void *opaque)
3462{
3463
3464    int mode;
3465    gid_t gid;
3466    int32_t fid;
3467    V9fsQID qid;
3468    int err = 0;
3469    int major, minor;
3470    size_t offset = 7;
3471    V9fsString name;
3472    struct stat stbuf;
3473    V9fsFidState *fidp;
3474    V9fsPDU *pdu = opaque;
3475
3476    v9fs_string_init(&name);
3477    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3478                        &major, &minor, &gid);
3479    if (err < 0) {
3480        goto out_nofid;
3481    }
3482    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3483
3484    if (name_is_illegal(name.data)) {
3485        err = -ENOENT;
3486        goto out_nofid;
3487    }
3488
3489    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3490        err = -EEXIST;
3491        goto out_nofid;
3492    }
3493
3494    fidp = get_fid(pdu, fid);
3495    if (fidp == NULL) {
3496        err = -ENOENT;
3497        goto out_nofid;
3498    }
3499    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3500                        makedev(major, minor), mode, &stbuf);
3501    if (err < 0) {
3502        goto out;
3503    }
3504    err = stat_to_qid(pdu, &stbuf, &qid);
3505    if (err < 0) {
3506        goto out;
3507    }
3508    err = pdu_marshal(pdu, offset, "Q", &qid);
3509    if (err < 0) {
3510        goto out;
3511    }
3512    err += offset;
3513    trace_v9fs_mknod_return(pdu->tag, pdu->id,
3514                            qid.type, qid.version, qid.path);
3515out:
3516    put_fid(pdu, fidp);
3517out_nofid:
3518    pdu_complete(pdu, err);
3519    v9fs_string_free(&name);
3520}
3521
3522/*
3523 * Implement posix byte range locking code
3524 * Server side handling of locking code is very simple, because 9p server in
3525 * QEMU can handle only one client. And most of the lock handling
3526 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3527 * do any thing in * qemu 9p server side lock code path.
3528 * So when a TLOCK request comes, always return success
3529 */
3530static void coroutine_fn v9fs_lock(void *opaque)
3531{
3532    V9fsFlock flock;
3533    size_t offset = 7;
3534    struct stat stbuf;
3535    V9fsFidState *fidp;
3536    int32_t fid, err = 0;
3537    V9fsPDU *pdu = opaque;
3538
3539    v9fs_string_init(&flock.client_id);
3540    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3541                        &flock.flags, &flock.start, &flock.length,
3542                        &flock.proc_id, &flock.client_id);
3543    if (err < 0) {
3544        goto out_nofid;
3545    }
3546    trace_v9fs_lock(pdu->tag, pdu->id, fid,
3547                    flock.type, flock.start, flock.length);
3548
3549
3550    /* We support only block flag now (that too ignored currently) */
3551    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3552        err = -EINVAL;
3553        goto out_nofid;
3554    }
3555    fidp = get_fid(pdu, fid);
3556    if (fidp == NULL) {
3557        err = -ENOENT;
3558        goto out_nofid;
3559    }
3560    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3561    if (err < 0) {
3562        goto out;
3563    }
3564    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3565    if (err < 0) {
3566        goto out;
3567    }
3568    err += offset;
3569    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3570out:
3571    put_fid(pdu, fidp);
3572out_nofid:
3573    pdu_complete(pdu, err);
3574    v9fs_string_free(&flock.client_id);
3575}
3576
3577/*
3578 * When a TGETLOCK request comes, always return success because all lock
3579 * handling is done by client's VFS layer.
3580 */
3581static void coroutine_fn v9fs_getlock(void *opaque)
3582{
3583    size_t offset = 7;
3584    struct stat stbuf;
3585    V9fsFidState *fidp;
3586    V9fsGetlock glock;
3587    int32_t fid, err = 0;
3588    V9fsPDU *pdu = opaque;
3589
3590    v9fs_string_init(&glock.client_id);
3591    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3592                        &glock.start, &glock.length, &glock.proc_id,
3593                        &glock.client_id);
3594    if (err < 0) {
3595        goto out_nofid;
3596    }
3597    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3598                       glock.type, glock.start, glock.length);
3599
3600    fidp = get_fid(pdu, fid);
3601    if (fidp == NULL) {
3602        err = -ENOENT;
3603        goto out_nofid;
3604    }
3605    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3606    if (err < 0) {
3607        goto out;
3608    }
3609    glock.type = P9_LOCK_TYPE_UNLCK;
3610    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3611                          glock.start, glock.length, glock.proc_id,
3612                          &glock.client_id);
3613    if (err < 0) {
3614        goto out;
3615    }
3616    err += offset;
3617    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3618                              glock.length, glock.proc_id);
3619out:
3620    put_fid(pdu, fidp);
3621out_nofid:
3622    pdu_complete(pdu, err);
3623    v9fs_string_free(&glock.client_id);
3624}
3625
3626static void coroutine_fn v9fs_mkdir(void *opaque)
3627{
3628    V9fsPDU *pdu = opaque;
3629    size_t offset = 7;
3630    int32_t fid;
3631    struct stat stbuf;
3632    V9fsQID qid;
3633    V9fsString name;
3634    V9fsFidState *fidp;
3635    gid_t gid;
3636    int mode;
3637    int err = 0;
3638
3639    v9fs_string_init(&name);
3640    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3641    if (err < 0) {
3642        goto out_nofid;
3643    }
3644    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3645
3646    if (name_is_illegal(name.data)) {
3647        err = -ENOENT;
3648        goto out_nofid;
3649    }
3650
3651    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3652        err = -EEXIST;
3653        goto out_nofid;
3654    }
3655
3656    fidp = get_fid(pdu, fid);
3657    if (fidp == NULL) {
3658        err = -ENOENT;
3659        goto out_nofid;
3660    }
3661    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3662    if (err < 0) {
3663        goto out;
3664    }
3665    err = stat_to_qid(pdu, &stbuf, &qid);
3666    if (err < 0) {
3667        goto out;
3668    }
3669    err = pdu_marshal(pdu, offset, "Q", &qid);
3670    if (err < 0) {
3671        goto out;
3672    }
3673    err += offset;
3674    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3675                            qid.type, qid.version, qid.path, err);
3676out:
3677    put_fid(pdu, fidp);
3678out_nofid:
3679    pdu_complete(pdu, err);
3680    v9fs_string_free(&name);
3681}
3682
3683static void coroutine_fn v9fs_xattrwalk(void *opaque)
3684{
3685    int64_t size;
3686    V9fsString name;
3687    ssize_t err = 0;
3688    size_t offset = 7;
3689    int32_t fid, newfid;
3690    V9fsFidState *file_fidp;
3691    V9fsFidState *xattr_fidp = NULL;
3692    V9fsPDU *pdu = opaque;
3693    V9fsState *s = pdu->s;
3694
3695    v9fs_string_init(&name);
3696    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3697    if (err < 0) {
3698        goto out_nofid;
3699    }
3700    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3701
3702    file_fidp = get_fid(pdu, fid);
3703    if (file_fidp == NULL) {
3704        err = -ENOENT;
3705        goto out_nofid;
3706    }
3707    xattr_fidp = alloc_fid(s, newfid);
3708    if (xattr_fidp == NULL) {
3709        err = -EINVAL;
3710        goto out;
3711    }
3712    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3713    if (!v9fs_string_size(&name)) {
3714        /*
3715         * listxattr request. Get the size first
3716         */
3717        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3718        if (size < 0) {
3719            err = size;
3720            clunk_fid(s, xattr_fidp->fid);
3721            goto out;
3722        }
3723        /*
3724         * Read the xattr value
3725         */
3726        xattr_fidp->fs.xattr.len = size;
3727        xattr_fidp->fid_type = P9_FID_XATTR;
3728        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3729        xattr_fidp->fs.xattr.value = g_malloc0(size);
3730        if (size) {
3731            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3732                                     xattr_fidp->fs.xattr.value,
3733                                     xattr_fidp->fs.xattr.len);
3734            if (err < 0) {
3735                clunk_fid(s, xattr_fidp->fid);
3736                goto out;
3737            }
3738        }
3739        err = pdu_marshal(pdu, offset, "q", size);
3740        if (err < 0) {
3741            goto out;
3742        }
3743        err += offset;
3744    } else {
3745        /*
3746         * specific xattr fid. We check for xattr
3747         * presence also collect the xattr size
3748         */
3749        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3750                                 &name, NULL, 0);
3751        if (size < 0) {
3752            err = size;
3753            clunk_fid(s, xattr_fidp->fid);
3754            goto out;
3755        }
3756        /*
3757         * Read the xattr value
3758         */
3759        xattr_fidp->fs.xattr.len = size;
3760        xattr_fidp->fid_type = P9_FID_XATTR;
3761        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3762        xattr_fidp->fs.xattr.value = g_malloc0(size);
3763        if (size) {
3764            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3765                                    &name, xattr_fidp->fs.xattr.value,
3766                                    xattr_fidp->fs.xattr.len);
3767            if (err < 0) {
3768                clunk_fid(s, xattr_fidp->fid);
3769                goto out;
3770            }
3771        }
3772        err = pdu_marshal(pdu, offset, "q", size);
3773        if (err < 0) {
3774            goto out;
3775        }
3776        err += offset;
3777    }
3778    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3779out:
3780    put_fid(pdu, file_fidp);
3781    if (xattr_fidp) {
3782        put_fid(pdu, xattr_fidp);
3783    }
3784out_nofid:
3785    pdu_complete(pdu, err);
3786    v9fs_string_free(&name);
3787}
3788
3789static void coroutine_fn v9fs_xattrcreate(void *opaque)
3790{
3791    int flags, rflags = 0;
3792    int32_t fid;
3793    uint64_t size;
3794    ssize_t err = 0;
3795    V9fsString name;
3796    size_t offset = 7;
3797    V9fsFidState *file_fidp;
3798    V9fsFidState *xattr_fidp;
3799    V9fsPDU *pdu = opaque;
3800
3801    v9fs_string_init(&name);
3802    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3803    if (err < 0) {
3804        goto out_nofid;
3805    }
3806    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3807
3808    if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3809        err = -EINVAL;
3810        goto out_nofid;
3811    }
3812
3813    if (flags & P9_XATTR_CREATE) {
3814        rflags |= XATTR_CREATE;
3815    }
3816
3817    if (flags & P9_XATTR_REPLACE) {
3818        rflags |= XATTR_REPLACE;
3819    }
3820
3821    if (size > XATTR_SIZE_MAX) {
3822        err = -E2BIG;
3823        goto out_nofid;
3824    }
3825
3826    file_fidp = get_fid(pdu, fid);
3827    if (file_fidp == NULL) {
3828        err = -EINVAL;
3829        goto out_nofid;
3830    }
3831    if (file_fidp->fid_type != P9_FID_NONE) {
3832        err = -EINVAL;
3833        goto out_put_fid;
3834    }
3835
3836    /* Make the file fid point to xattr */
3837    xattr_fidp = file_fidp;
3838    xattr_fidp->fid_type = P9_FID_XATTR;
3839    xattr_fidp->fs.xattr.copied_len = 0;
3840    xattr_fidp->fs.xattr.xattrwalk_fid = false;
3841    xattr_fidp->fs.xattr.len = size;
3842    xattr_fidp->fs.xattr.flags = rflags;
3843    v9fs_string_init(&xattr_fidp->fs.xattr.name);
3844    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3845    xattr_fidp->fs.xattr.value = g_malloc0(size);
3846    err = offset;
3847out_put_fid:
3848    put_fid(pdu, file_fidp);
3849out_nofid:
3850    pdu_complete(pdu, err);
3851    v9fs_string_free(&name);
3852}
3853
3854static void coroutine_fn v9fs_readlink(void *opaque)
3855{
3856    V9fsPDU *pdu = opaque;
3857    size_t offset = 7;
3858    V9fsString target;
3859    int32_t fid;
3860    int err = 0;
3861    V9fsFidState *fidp;
3862
3863    err = pdu_unmarshal(pdu, offset, "d", &fid);
3864    if (err < 0) {
3865        goto out_nofid;
3866    }
3867    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3868    fidp = get_fid(pdu, fid);
3869    if (fidp == NULL) {
3870        err = -ENOENT;
3871        goto out_nofid;
3872    }
3873
3874    v9fs_string_init(&target);
3875    err = v9fs_co_readlink(pdu, &fidp->path, &target);
3876    if (err < 0) {
3877        goto out;
3878    }
3879    err = pdu_marshal(pdu, offset, "s", &target);
3880    if (err < 0) {
3881        v9fs_string_free(&target);
3882        goto out;
3883    }
3884    err += offset;
3885    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3886    v9fs_string_free(&target);
3887out:
3888    put_fid(pdu, fidp);
3889out_nofid:
3890    pdu_complete(pdu, err);
3891}
3892
3893static CoroutineEntry *pdu_co_handlers[] = {
3894    [P9_TREADDIR] = v9fs_readdir,
3895    [P9_TSTATFS] = v9fs_statfs,
3896    [P9_TGETATTR] = v9fs_getattr,
3897    [P9_TSETATTR] = v9fs_setattr,
3898    [P9_TXATTRWALK] = v9fs_xattrwalk,
3899    [P9_TXATTRCREATE] = v9fs_xattrcreate,
3900    [P9_TMKNOD] = v9fs_mknod,
3901    [P9_TRENAME] = v9fs_rename,
3902    [P9_TLOCK] = v9fs_lock,
3903    [P9_TGETLOCK] = v9fs_getlock,
3904    [P9_TRENAMEAT] = v9fs_renameat,
3905    [P9_TREADLINK] = v9fs_readlink,
3906    [P9_TUNLINKAT] = v9fs_unlinkat,
3907    [P9_TMKDIR] = v9fs_mkdir,
3908    [P9_TVERSION] = v9fs_version,
3909    [P9_TLOPEN] = v9fs_open,
3910    [P9_TATTACH] = v9fs_attach,
3911    [P9_TSTAT] = v9fs_stat,
3912    [P9_TWALK] = v9fs_walk,
3913    [P9_TCLUNK] = v9fs_clunk,
3914    [P9_TFSYNC] = v9fs_fsync,
3915    [P9_TOPEN] = v9fs_open,
3916    [P9_TREAD] = v9fs_read,
3917#if 0
3918    [P9_TAUTH] = v9fs_auth,
3919#endif
3920    [P9_TFLUSH] = v9fs_flush,
3921    [P9_TLINK] = v9fs_link,
3922    [P9_TSYMLINK] = v9fs_symlink,
3923    [P9_TCREATE] = v9fs_create,
3924    [P9_TLCREATE] = v9fs_lcreate,
3925    [P9_TWRITE] = v9fs_write,
3926    [P9_TWSTAT] = v9fs_wstat,
3927    [P9_TREMOVE] = v9fs_remove,
3928};
3929
3930static void coroutine_fn v9fs_op_not_supp(void *opaque)
3931{
3932    V9fsPDU *pdu = opaque;
3933    pdu_complete(pdu, -EOPNOTSUPP);
3934}
3935
3936static void coroutine_fn v9fs_fs_ro(void *opaque)
3937{
3938    V9fsPDU *pdu = opaque;
3939    pdu_complete(pdu, -EROFS);
3940}
3941
3942static inline bool is_read_only_op(V9fsPDU *pdu)
3943{
3944    switch (pdu->id) {
3945    case P9_TREADDIR:
3946    case P9_TSTATFS:
3947    case P9_TGETATTR:
3948    case P9_TXATTRWALK:
3949    case P9_TLOCK:
3950    case P9_TGETLOCK:
3951    case P9_TREADLINK:
3952    case P9_TVERSION:
3953    case P9_TLOPEN:
3954    case P9_TATTACH:
3955    case P9_TSTAT:
3956    case P9_TWALK:
3957    case P9_TCLUNK:
3958    case P9_TFSYNC:
3959    case P9_TOPEN:
3960    case P9_TREAD:
3961    case P9_TAUTH:
3962    case P9_TFLUSH:
3963        return 1;
3964    default:
3965        return 0;
3966    }
3967}
3968
3969void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
3970{
3971    Coroutine *co;
3972    CoroutineEntry *handler;
3973    V9fsState *s = pdu->s;
3974
3975    pdu->size = le32_to_cpu(hdr->size_le);
3976    pdu->id = hdr->id;
3977    pdu->tag = le16_to_cpu(hdr->tag_le);
3978
3979    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
3980        (pdu_co_handlers[pdu->id] == NULL)) {
3981        handler = v9fs_op_not_supp;
3982    } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
3983        handler = v9fs_fs_ro;
3984    } else {
3985        handler = pdu_co_handlers[pdu->id];
3986    }
3987
3988    qemu_co_queue_init(&pdu->complete);
3989    co = qemu_coroutine_create(handler, pdu);
3990    qemu_coroutine_enter(co);
3991}
3992
3993/* Returns 0 on success, 1 on failure. */
3994int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
3995                               Error **errp)
3996{
3997    int i, len;
3998    struct stat stat;
3999    FsDriverEntry *fse;
4000    V9fsPath path;
4001    int rc = 1;
4002
4003    assert(!s->transport);
4004    s->transport = t;
4005
4006    /* initialize pdu allocator */
4007    QLIST_INIT(&s->free_list);
4008    QLIST_INIT(&s->active_list);
4009    for (i = 0; i < MAX_REQ; i++) {
4010        QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4011        s->pdus[i].s = s;
4012        s->pdus[i].idx = i;
4013    }
4014
4015    v9fs_path_init(&path);
4016
4017    fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4018
4019    if (!fse) {
4020        /* We don't have a fsdev identified by fsdev_id */
4021        error_setg(errp, "9pfs device couldn't find fsdev with the "
4022                   "id = %s",
4023                   s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4024        goto out;
4025    }
4026
4027    if (!s->fsconf.tag) {
4028        /* we haven't specified a mount_tag */
4029        error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4030                   s->fsconf.fsdev_id);
4031        goto out;
4032    }
4033
4034    s->ctx.export_flags = fse->export_flags;
4035    s->ctx.fs_root = g_strdup(fse->path);
4036    s->ctx.exops.get_st_gen = NULL;
4037    len = strlen(s->fsconf.tag);
4038    if (len > MAX_TAG_LEN - 1) {
4039        error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4040                   "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4041        goto out;
4042    }
4043
4044    s->tag = g_strdup(s->fsconf.tag);
4045    s->ctx.uid = -1;
4046
4047    s->ops = fse->ops;
4048
4049    s->ctx.fmode = fse->fmode;
4050    s->ctx.dmode = fse->dmode;
4051
4052    s->fid_list = NULL;
4053    qemu_co_rwlock_init(&s->rename_lock);
4054
4055    if (s->ops->init(&s->ctx, errp) < 0) {
4056        error_prepend(errp, "cannot initialize fsdev '%s': ",
4057                      s->fsconf.fsdev_id);
4058        goto out;
4059    }
4060
4061    /*
4062     * Check details of export path, We need to use fs driver
4063     * call back to do that. Since we are in the init path, we don't
4064     * use co-routines here.
4065     */
4066    if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4067        error_setg(errp,
4068                   "error in converting name to path %s", strerror(errno));
4069        goto out;
4070    }
4071    if (s->ops->lstat(&s->ctx, &path, &stat)) {
4072        error_setg(errp, "share path %s does not exist", fse->path);
4073        goto out;
4074    } else if (!S_ISDIR(stat.st_mode)) {
4075        error_setg(errp, "share path %s is not a directory", fse->path);
4076        goto out;
4077    }
4078
4079    s->dev_id = stat.st_dev;
4080
4081    /* init inode remapping : */
4082    /* hash table for variable length inode suffixes */
4083    qpd_table_init(&s->qpd_table);
4084    /* hash table for slow/full inode remapping (most users won't need it) */
4085    qpf_table_init(&s->qpf_table);
4086    /* hash table for quick inode remapping */
4087    qpp_table_init(&s->qpp_table);
4088    s->qp_ndevices = 0;
4089    s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4090    s->qp_fullpath_next = 1;
4091
4092    s->ctx.fst = &fse->fst;
4093    fsdev_throttle_init(s->ctx.fst);
4094
4095    rc = 0;
4096out:
4097    if (rc) {
4098        v9fs_device_unrealize_common(s, NULL);
4099    }
4100    v9fs_path_free(&path);
4101    return rc;
4102}
4103
4104void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
4105{
4106    if (s->ops && s->ops->cleanup) {
4107        s->ops->cleanup(&s->ctx);
4108    }
4109    if (s->ctx.fst) {
4110        fsdev_throttle_cleanup(s->ctx.fst);
4111    }
4112    g_free(s->tag);
4113    qp_table_destroy(&s->qpd_table);
4114    qp_table_destroy(&s->qpp_table);
4115    qp_table_destroy(&s->qpf_table);
4116    g_free(s->ctx.fs_root);
4117}
4118
4119typedef struct VirtfsCoResetData {
4120    V9fsPDU pdu;
4121    bool done;
4122} VirtfsCoResetData;
4123
4124static void coroutine_fn virtfs_co_reset(void *opaque)
4125{
4126    VirtfsCoResetData *data = opaque;
4127
4128    virtfs_reset(&data->pdu);
4129    data->done = true;
4130}
4131
4132void v9fs_reset(V9fsState *s)
4133{
4134    VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4135    Coroutine *co;
4136
4137    while (!QLIST_EMPTY(&s->active_list)) {
4138        aio_poll(qemu_get_aio_context(), true);
4139    }
4140
4141    co = qemu_coroutine_create(virtfs_co_reset, &data);
4142    qemu_coroutine_enter(co);
4143
4144    while (!data.done) {
4145        aio_poll(qemu_get_aio_context(), true);
4146    }
4147}
4148
4149static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4150{
4151    struct rlimit rlim;
4152    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4153        error_report("Failed to get the resource limit");
4154        exit(1);
4155    }
4156    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
4157    open_fd_rc = rlim.rlim_cur/2;
4158}
4159