qemu/hw/9pfs/9p.c
<<
>>
Prefs
   1/*
   2 * Virtio 9p backend
   3 *
   4 * Copyright IBM, Corp. 2010
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include <glib/gprintf.h>
  16#include "hw/virtio/virtio.h"
  17#include "qapi/error.h"
  18#include "qemu/error-report.h"
  19#include "qemu/iov.h"
  20#include "qemu/main-loop.h"
  21#include "qemu/sockets.h"
  22#include "virtio-9p.h"
  23#include "fsdev/qemu-fsdev.h"
  24#include "9p-xattr.h"
  25#include "coth.h"
  26#include "trace.h"
  27#include "migration/blocker.h"
  28#include "sysemu/qtest.h"
  29#include "qemu/xxhash.h"
  30#include <math.h>
  31
  32int open_fd_hw;
  33int total_open_fd;
  34static int open_fd_rc;
  35
  36enum {
  37    Oread   = 0x00,
  38    Owrite  = 0x01,
  39    Ordwr   = 0x02,
  40    Oexec   = 0x03,
  41    Oexcl   = 0x04,
  42    Otrunc  = 0x10,
  43    Orexec  = 0x20,
  44    Orclose = 0x40,
  45    Oappend = 0x80,
  46};
  47
  48static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  49{
  50    ssize_t ret;
  51    va_list ap;
  52
  53    va_start(ap, fmt);
  54    ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
  55    va_end(ap);
  56
  57    return ret;
  58}
  59
  60static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  61{
  62    ssize_t ret;
  63    va_list ap;
  64
  65    va_start(ap, fmt);
  66    ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
  67    va_end(ap);
  68
  69    return ret;
  70}
  71
  72static int omode_to_uflags(int8_t mode)
  73{
  74    int ret = 0;
  75
  76    switch (mode & 3) {
  77    case Oread:
  78        ret = O_RDONLY;
  79        break;
  80    case Ordwr:
  81        ret = O_RDWR;
  82        break;
  83    case Owrite:
  84        ret = O_WRONLY;
  85        break;
  86    case Oexec:
  87        ret = O_RDONLY;
  88        break;
  89    }
  90
  91    if (mode & Otrunc) {
  92        ret |= O_TRUNC;
  93    }
  94
  95    if (mode & Oappend) {
  96        ret |= O_APPEND;
  97    }
  98
  99    if (mode & Oexcl) {
 100        ret |= O_EXCL;
 101    }
 102
 103    return ret;
 104}
 105
 106typedef struct DotlOpenflagMap {
 107    int dotl_flag;
 108    int open_flag;
 109} DotlOpenflagMap;
 110
 111static int dotl_to_open_flags(int flags)
 112{
 113    int i;
 114    /*
 115     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
 116     * and P9_DOTL_NOACCESS
 117     */
 118    int oflags = flags & O_ACCMODE;
 119
 120    DotlOpenflagMap dotl_oflag_map[] = {
 121        { P9_DOTL_CREATE, O_CREAT },
 122        { P9_DOTL_EXCL, O_EXCL },
 123        { P9_DOTL_NOCTTY , O_NOCTTY },
 124        { P9_DOTL_TRUNC, O_TRUNC },
 125        { P9_DOTL_APPEND, O_APPEND },
 126        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
 127        { P9_DOTL_DSYNC, O_DSYNC },
 128        { P9_DOTL_FASYNC, FASYNC },
 129        { P9_DOTL_DIRECT, O_DIRECT },
 130        { P9_DOTL_LARGEFILE, O_LARGEFILE },
 131        { P9_DOTL_DIRECTORY, O_DIRECTORY },
 132        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
 133        { P9_DOTL_NOATIME, O_NOATIME },
 134        { P9_DOTL_SYNC, O_SYNC },
 135    };
 136
 137    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
 138        if (flags & dotl_oflag_map[i].dotl_flag) {
 139            oflags |= dotl_oflag_map[i].open_flag;
 140        }
 141    }
 142
 143    return oflags;
 144}
 145
 146void cred_init(FsCred *credp)
 147{
 148    credp->fc_uid = -1;
 149    credp->fc_gid = -1;
 150    credp->fc_mode = -1;
 151    credp->fc_rdev = -1;
 152}
 153
 154static int get_dotl_openflags(V9fsState *s, int oflags)
 155{
 156    int flags;
 157    /*
 158     * Filter the client open flags
 159     */
 160    flags = dotl_to_open_flags(oflags);
 161    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
 162    /*
 163     * Ignore direct disk access hint until the server supports it.
 164     */
 165    flags &= ~O_DIRECT;
 166    return flags;
 167}
 168
 169void v9fs_path_init(V9fsPath *path)
 170{
 171    path->data = NULL;
 172    path->size = 0;
 173}
 174
 175void v9fs_path_free(V9fsPath *path)
 176{
 177    g_free(path->data);
 178    path->data = NULL;
 179    path->size = 0;
 180}
 181
 182
 183void GCC_FMT_ATTR(2, 3)
 184v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
 185{
 186    va_list ap;
 187
 188    v9fs_path_free(path);
 189
 190    va_start(ap, fmt);
 191    /* Bump the size for including terminating NULL */
 192    path->size = g_vasprintf(&path->data, fmt, ap) + 1;
 193    va_end(ap);
 194}
 195
 196void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
 197{
 198    v9fs_path_free(dst);
 199    dst->size = src->size;
 200    dst->data = g_memdup(src->data, src->size);
 201}
 202
 203int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
 204                      const char *name, V9fsPath *path)
 205{
 206    int err;
 207    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
 208    if (err < 0) {
 209        err = -errno;
 210    }
 211    return err;
 212}
 213
 214/*
 215 * Return TRUE if s1 is an ancestor of s2.
 216 *
 217 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
 218 * As a special case, We treat s1 as ancestor of s2 if they are same!
 219 */
 220static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
 221{
 222    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
 223        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
 224            return 1;
 225        }
 226    }
 227    return 0;
 228}
 229
 230static size_t v9fs_string_size(V9fsString *str)
 231{
 232    return str->size;
 233}
 234
 235/*
 236 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
 237static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 238{
 239    int err = 1;
 240    if (f->fid_type == P9_FID_FILE) {
 241        if (f->fs.fd == -1) {
 242            do {
 243                err = v9fs_co_open(pdu, f, f->open_flags);
 244            } while (err == -EINTR && !pdu->cancelled);
 245        }
 246    } else if (f->fid_type == P9_FID_DIR) {
 247        if (f->fs.dir.stream == NULL) {
 248            do {
 249                err = v9fs_co_opendir(pdu, f);
 250            } while (err == -EINTR && !pdu->cancelled);
 251        }
 252    }
 253    return err;
 254}
 255
 256static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
 257{
 258    int err;
 259    V9fsFidState *f;
 260    V9fsState *s = pdu->s;
 261
 262    for (f = s->fid_list; f; f = f->next) {
 263        BUG_ON(f->clunked);
 264        if (f->fid == fid) {
 265            /*
 266             * Update the fid ref upfront so that
 267             * we don't get reclaimed when we yield
 268             * in open later.
 269             */
 270            f->ref++;
 271            /*
 272             * check whether we need to reopen the
 273             * file. We might have closed the fd
 274             * while trying to free up some file
 275             * descriptors.
 276             */
 277            err = v9fs_reopen_fid(pdu, f);
 278            if (err < 0) {
 279                f->ref--;
 280                return NULL;
 281            }
 282            /*
 283             * Mark the fid as referenced so that the LRU
 284             * reclaim won't close the file descriptor
 285             */
 286            f->flags |= FID_REFERENCED;
 287            return f;
 288        }
 289    }
 290    return NULL;
 291}
 292
 293static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 294{
 295    V9fsFidState *f;
 296
 297    for (f = s->fid_list; f; f = f->next) {
 298        /* If fid is already there return NULL */
 299        BUG_ON(f->clunked);
 300        if (f->fid == fid) {
 301            return NULL;
 302        }
 303    }
 304    f = g_malloc0(sizeof(V9fsFidState));
 305    f->fid = fid;
 306    f->fid_type = P9_FID_NONE;
 307    f->ref = 1;
 308    /*
 309     * Mark the fid as referenced so that the LRU
 310     * reclaim won't close the file descriptor
 311     */
 312    f->flags |= FID_REFERENCED;
 313    f->next = s->fid_list;
 314    s->fid_list = f;
 315
 316    v9fs_readdir_init(&f->fs.dir);
 317    v9fs_readdir_init(&f->fs_reclaim.dir);
 318
 319    return f;
 320}
 321
 322static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 323{
 324    int retval = 0;
 325
 326    if (fidp->fs.xattr.xattrwalk_fid) {
 327        /* getxattr/listxattr fid */
 328        goto free_value;
 329    }
 330    /*
 331     * if this is fid for setxattr. clunk should
 332     * result in setxattr localcall
 333     */
 334    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
 335        /* clunk after partial write */
 336        retval = -EINVAL;
 337        goto free_out;
 338    }
 339    if (fidp->fs.xattr.len) {
 340        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
 341                                   fidp->fs.xattr.value,
 342                                   fidp->fs.xattr.len,
 343                                   fidp->fs.xattr.flags);
 344    } else {
 345        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
 346    }
 347free_out:
 348    v9fs_string_free(&fidp->fs.xattr.name);
 349free_value:
 350    g_free(fidp->fs.xattr.value);
 351    return retval;
 352}
 353
 354static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 355{
 356    int retval = 0;
 357
 358    if (fidp->fid_type == P9_FID_FILE) {
 359        /* If we reclaimed the fd no need to close */
 360        if (fidp->fs.fd != -1) {
 361            retval = v9fs_co_close(pdu, &fidp->fs);
 362        }
 363    } else if (fidp->fid_type == P9_FID_DIR) {
 364        if (fidp->fs.dir.stream != NULL) {
 365            retval = v9fs_co_closedir(pdu, &fidp->fs);
 366        }
 367    } else if (fidp->fid_type == P9_FID_XATTR) {
 368        retval = v9fs_xattr_fid_clunk(pdu, fidp);
 369    }
 370    v9fs_path_free(&fidp->path);
 371    g_free(fidp);
 372    return retval;
 373}
 374
 375static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 376{
 377    BUG_ON(!fidp->ref);
 378    fidp->ref--;
 379    /*
 380     * Don't free the fid if it is in reclaim list
 381     */
 382    if (!fidp->ref && fidp->clunked) {
 383        if (fidp->fid == pdu->s->root_fid) {
 384            /*
 385             * if the clunked fid is root fid then we
 386             * have unmounted the fs on the client side.
 387             * delete the migration blocker. Ideally, this
 388             * should be hooked to transport close notification
 389             */
 390            if (pdu->s->migration_blocker) {
 391                migrate_del_blocker(pdu->s->migration_blocker);
 392                error_free(pdu->s->migration_blocker);
 393                pdu->s->migration_blocker = NULL;
 394            }
 395        }
 396        return free_fid(pdu, fidp);
 397    }
 398    return 0;
 399}
 400
 401static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
 402{
 403    V9fsFidState **fidpp, *fidp;
 404
 405    for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
 406        if ((*fidpp)->fid == fid) {
 407            break;
 408        }
 409    }
 410    if (*fidpp == NULL) {
 411        return NULL;
 412    }
 413    fidp = *fidpp;
 414    *fidpp = fidp->next;
 415    fidp->clunked = 1;
 416    return fidp;
 417}
 418
 419void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
 420{
 421    int reclaim_count = 0;
 422    V9fsState *s = pdu->s;
 423    V9fsFidState *f, *reclaim_list = NULL;
 424
 425    for (f = s->fid_list; f; f = f->next) {
 426        /*
 427         * Unlink fids cannot be reclaimed. Check
 428         * for them and skip them. Also skip fids
 429         * currently being operated on.
 430         */
 431        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
 432            continue;
 433        }
 434        /*
 435         * if it is a recently referenced fid
 436         * we leave the fid untouched and clear the
 437         * reference bit. We come back to it later
 438         * in the next iteration. (a simple LRU without
 439         * moving list elements around)
 440         */
 441        if (f->flags & FID_REFERENCED) {
 442            f->flags &= ~FID_REFERENCED;
 443            continue;
 444        }
 445        /*
 446         * Add fids to reclaim list.
 447         */
 448        if (f->fid_type == P9_FID_FILE) {
 449            if (f->fs.fd != -1) {
 450                /*
 451                 * Up the reference count so that
 452                 * a clunk request won't free this fid
 453                 */
 454                f->ref++;
 455                f->rclm_lst = reclaim_list;
 456                reclaim_list = f;
 457                f->fs_reclaim.fd = f->fs.fd;
 458                f->fs.fd = -1;
 459                reclaim_count++;
 460            }
 461        } else if (f->fid_type == P9_FID_DIR) {
 462            if (f->fs.dir.stream != NULL) {
 463                /*
 464                 * Up the reference count so that
 465                 * a clunk request won't free this fid
 466                 */
 467                f->ref++;
 468                f->rclm_lst = reclaim_list;
 469                reclaim_list = f;
 470                f->fs_reclaim.dir.stream = f->fs.dir.stream;
 471                f->fs.dir.stream = NULL;
 472                reclaim_count++;
 473            }
 474        }
 475        if (reclaim_count >= open_fd_rc) {
 476            break;
 477        }
 478    }
 479    /*
 480     * Now close the fid in reclaim list. Free them if they
 481     * are already clunked.
 482     */
 483    while (reclaim_list) {
 484        f = reclaim_list;
 485        reclaim_list = f->rclm_lst;
 486        if (f->fid_type == P9_FID_FILE) {
 487            v9fs_co_close(pdu, &f->fs_reclaim);
 488        } else if (f->fid_type == P9_FID_DIR) {
 489            v9fs_co_closedir(pdu, &f->fs_reclaim);
 490        }
 491        f->rclm_lst = NULL;
 492        /*
 493         * Now drop the fid reference, free it
 494         * if clunked.
 495         */
 496        put_fid(pdu, f);
 497    }
 498}
 499
 500static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 501{
 502    int err;
 503    V9fsState *s = pdu->s;
 504    V9fsFidState *fidp, head_fid;
 505
 506    head_fid.next = s->fid_list;
 507    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
 508        if (fidp->path.size != path->size) {
 509            continue;
 510        }
 511        if (!memcmp(fidp->path.data, path->data, path->size)) {
 512            /* Mark the fid non reclaimable. */
 513            fidp->flags |= FID_NON_RECLAIMABLE;
 514
 515            /* reopen the file/dir if already closed */
 516            err = v9fs_reopen_fid(pdu, fidp);
 517            if (err < 0) {
 518                return err;
 519            }
 520            /*
 521             * Go back to head of fid list because
 522             * the list could have got updated when
 523             * switched to the worker thread
 524             */
 525            if (err == 0) {
 526                fidp = &head_fid;
 527            }
 528        }
 529    }
 530    return 0;
 531}
 532
 533static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
 534{
 535    V9fsState *s = pdu->s;
 536    V9fsFidState *fidp;
 537
 538    /* Free all fids */
 539    while (s->fid_list) {
 540        /* Get fid */
 541        fidp = s->fid_list;
 542        fidp->ref++;
 543
 544        /* Clunk fid */
 545        s->fid_list = fidp->next;
 546        fidp->clunked = 1;
 547
 548        put_fid(pdu, fidp);
 549    }
 550}
 551
 552#define P9_QID_TYPE_DIR         0x80
 553#define P9_QID_TYPE_SYMLINK     0x02
 554
 555#define P9_STAT_MODE_DIR        0x80000000
 556#define P9_STAT_MODE_APPEND     0x40000000
 557#define P9_STAT_MODE_EXCL       0x20000000
 558#define P9_STAT_MODE_MOUNT      0x10000000
 559#define P9_STAT_MODE_AUTH       0x08000000
 560#define P9_STAT_MODE_TMP        0x04000000
 561#define P9_STAT_MODE_SYMLINK    0x02000000
 562#define P9_STAT_MODE_LINK       0x01000000
 563#define P9_STAT_MODE_DEVICE     0x00800000
 564#define P9_STAT_MODE_NAMED_PIPE 0x00200000
 565#define P9_STAT_MODE_SOCKET     0x00100000
 566#define P9_STAT_MODE_SETUID     0x00080000
 567#define P9_STAT_MODE_SETGID     0x00040000
 568#define P9_STAT_MODE_SETVTX     0x00010000
 569
 570#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
 571                                P9_STAT_MODE_SYMLINK |      \
 572                                P9_STAT_MODE_LINK |         \
 573                                P9_STAT_MODE_DEVICE |       \
 574                                P9_STAT_MODE_NAMED_PIPE |   \
 575                                P9_STAT_MODE_SOCKET)
 576
 577/* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
 578static inline uint8_t mirror8bit(uint8_t byte)
 579{
 580    return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
 581}
 582
 583/* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
 584static inline uint64_t mirror64bit(uint64_t value)
 585{
 586    return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
 587           ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
 588           ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
 589           ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
 590           ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
 591           ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
 592           ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
 593           ((uint64_t)mirror8bit((value >> 56) & 0xff));
 594}
 595
 596/**
 597 * @brief Parameter k for the Exponential Golomb algorihm to be used.
 598 *
 599 * The smaller this value, the smaller the minimum bit count for the Exp.
 600 * Golomb generated affixes will be (at lowest index) however for the
 601 * price of having higher maximum bit count of generated affixes (at highest
 602 * index). Likewise increasing this parameter yields in smaller maximum bit
 603 * count for the price of having higher minimum bit count.
 604 *
 605 * In practice that means: a good value for k depends on the expected amount
 606 * of devices to be exposed by one export. For a small amount of devices k
 607 * should be small, for a large amount of devices k might be increased
 608 * instead. The default of k=0 should be fine for most users though.
 609 *
 610 * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
 611 * k should not change as long as guest is still running! Because that would
 612 * cause completely different inode numbers to be generated on guest.
 613 */
 614#define EXP_GOLOMB_K    0
 615
 616/**
 617 * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
 618 *
 619 * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
 620 * with growing length and with the mathematical property of being
 621 * "prefix-free". The latter means the generated prefixes can be prepended
 622 * in front of arbitrary numbers and the resulting concatenated numbers are
 623 * guaranteed to be always unique.
 624 *
 625 * This is a minor adjustment to the original Exp. Golomb algorithm in the
 626 * sense that lowest allowed index (@param n) starts with 1, not with zero.
 627 *
 628 * @param n - natural number (or index) of the prefix to be generated
 629 *            (1, 2, 3, ...)
 630 * @param k - parameter k of Exp. Golomb algorithm to be used
 631 *            (see comment on EXP_GOLOMB_K macro for details about k)
 632 */
 633static VariLenAffix expGolombEncode(uint64_t n, int k)
 634{
 635    const uint64_t value = n + (1 << k) - 1;
 636    const int bits = (int) log2(value) + 1;
 637    return (VariLenAffix) {
 638        .type = AffixType_Prefix,
 639        .value = value,
 640        .bits = bits + MAX((bits - 1 - k), 0)
 641    };
 642}
 643
 644/**
 645 * @brief Converts a suffix into a prefix, or a prefix into a suffix.
 646 *
 647 * Simply mirror all bits of the affix value, for the purpose to preserve
 648 * respectively the mathematical "prefix-free" or "suffix-free" property
 649 * after the conversion.
 650 *
 651 * If a passed prefix is suitable to create unique numbers, then the
 652 * returned suffix is suitable to create unique numbers as well (and vice
 653 * versa).
 654 */
 655static VariLenAffix invertAffix(const VariLenAffix *affix)
 656{
 657    return (VariLenAffix) {
 658        .type =
 659            (affix->type == AffixType_Suffix) ?
 660                AffixType_Prefix : AffixType_Suffix,
 661        .value =
 662            mirror64bit(affix->value) >>
 663            ((sizeof(affix->value) * 8) - affix->bits),
 664        .bits = affix->bits
 665    };
 666}
 667
 668/**
 669 * @brief Generates suffix numbers with "suffix-free" property.
 670 *
 671 * This is just a wrapper function on top of the Exp. Golomb algorithm.
 672 *
 673 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
 674 * this function converts the Exp. Golomb prefixes into appropriate suffixes
 675 * which are still suitable for generating unique numbers.
 676 *
 677 * @param n - natural number (or index) of the suffix to be generated
 678 *            (1, 2, 3, ...)
 679 */
 680static VariLenAffix affixForIndex(uint64_t index)
 681{
 682    VariLenAffix prefix;
 683    prefix = expGolombEncode(index, EXP_GOLOMB_K);
 684    return invertAffix(&prefix); /* convert prefix to suffix */
 685}
 686
 687/* creative abuse of tb_hash_func7, which is based on xxhash */
 688static uint32_t qpp_hash(QppEntry e)
 689{
 690    return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
 691}
 692
 693static uint32_t qpf_hash(QpfEntry e)
 694{
 695    return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
 696}
 697
 698static bool qpd_cmp_func(const void *obj, const void *userp)
 699{
 700    const QpdEntry *e1 = obj, *e2 = userp;
 701    return e1->dev == e2->dev;
 702}
 703
 704static bool qpp_cmp_func(const void *obj, const void *userp)
 705{
 706    const QppEntry *e1 = obj, *e2 = userp;
 707    return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
 708}
 709
 710static bool qpf_cmp_func(const void *obj, const void *userp)
 711{
 712    const QpfEntry *e1 = obj, *e2 = userp;
 713    return e1->dev == e2->dev && e1->ino == e2->ino;
 714}
 715
 716static void qp_table_remove(void *p, uint32_t h, void *up)
 717{
 718    g_free(p);
 719}
 720
 721static void qp_table_destroy(struct qht *ht)
 722{
 723    if (!ht || !ht->map) {
 724        return;
 725    }
 726    qht_iter(ht, qp_table_remove, NULL);
 727    qht_destroy(ht);
 728}
 729
 730static void qpd_table_init(struct qht *ht)
 731{
 732    qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 733}
 734
 735static void qpp_table_init(struct qht *ht)
 736{
 737    qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 738}
 739
 740static void qpf_table_init(struct qht *ht)
 741{
 742    qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
 743}
 744
 745/*
 746 * Returns how many (high end) bits of inode numbers of the passed fs
 747 * device shall be used (in combination with the device number) to
 748 * generate hash values for qpp_table entries.
 749 *
 750 * This function is required if variable length suffixes are used for inode
 751 * number mapping on guest level. Since a device may end up having multiple
 752 * entries in qpp_table, each entry most probably with a different suffix
 753 * length, we thus need this function in conjunction with qpd_table to
 754 * "agree" about a fix amount of bits (per device) to be always used for
 755 * generating hash values for the purpose of accessing qpp_table in order
 756 * get consistent behaviour when accessing qpp_table.
 757 */
 758static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
 759{
 760    QpdEntry lookup = {
 761        .dev = dev
 762    }, *val;
 763    uint32_t hash = dev;
 764    VariLenAffix affix;
 765
 766    val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
 767    if (!val) {
 768        val = g_malloc0(sizeof(QpdEntry));
 769        *val = lookup;
 770        affix = affixForIndex(pdu->s->qp_affix_next);
 771        val->prefix_bits = affix.bits;
 772        qht_insert(&pdu->s->qpd_table, val, hash, NULL);
 773        pdu->s->qp_ndevices++;
 774    }
 775    return val->prefix_bits;
 776}
 777
 778/**
 779 * @brief Slow / full mapping host inode nr -> guest inode nr.
 780 *
 781 * This function performs a slower and much more costly remapping of an
 782 * original file inode number on host to an appropriate different inode
 783 * number on guest. For every (dev, inode) combination on host a new
 784 * sequential number is generated, cached and exposed as inode number on
 785 * guest.
 786 *
 787 * This is just a "last resort" fallback solution if the much faster/cheaper
 788 * qid_path_suffixmap() failed. In practice this slow / full mapping is not
 789 * expected ever to be used at all though.
 790 *
 791 * @see qid_path_suffixmap() for details
 792 *
 793 */
 794static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
 795                            uint64_t *path)
 796{
 797    QpfEntry lookup = {
 798        .dev = stbuf->st_dev,
 799        .ino = stbuf->st_ino
 800    }, *val;
 801    uint32_t hash = qpf_hash(lookup);
 802    VariLenAffix affix;
 803
 804    val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
 805
 806    if (!val) {
 807        if (pdu->s->qp_fullpath_next == 0) {
 808            /* no more files can be mapped :'( */
 809            error_report_once(
 810                "9p: No more prefixes available for remapping inodes from "
 811                "host to guest."
 812            );
 813            return -ENFILE;
 814        }
 815
 816        val = g_malloc0(sizeof(QppEntry));
 817        *val = lookup;
 818
 819        /* new unique inode and device combo */
 820        affix = affixForIndex(
 821            1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
 822        );
 823        val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
 824        pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
 825        qht_insert(&pdu->s->qpf_table, val, hash, NULL);
 826    }
 827
 828    *path = val->path;
 829    return 0;
 830}
 831
 832/**
 833 * @brief Quick mapping host inode nr -> guest inode nr.
 834 *
 835 * This function performs quick remapping of an original file inode number
 836 * on host to an appropriate different inode number on guest. This remapping
 837 * of inodes is required to avoid inode nr collisions on guest which would
 838 * happen if the 9p export contains more than 1 exported file system (or
 839 * more than 1 file system data set), because unlike on host level where the
 840 * files would have different device nrs, all files exported by 9p would
 841 * share the same device nr on guest (the device nr of the virtual 9p device
 842 * that is).
 843 *
 844 * Inode remapping is performed by chopping off high end bits of the original
 845 * inode number from host, shifting the result upwards and then assigning a
 846 * generated suffix number for the low end bits, where the same suffix number
 847 * will be shared by all inodes with the same device id AND the same high end
 848 * bits that have been chopped off. That approach utilizes the fact that inode
 849 * numbers very likely share the same high end bits (i.e. due to their common
 850 * sequential generation by file systems) and hence we only have to generate
 851 * and track a very limited amount of suffixes in practice due to that.
 852 *
 853 * We generate variable size suffixes for that purpose. The 1st generated
 854 * suffix will only have 1 bit and hence we only need to chop off 1 bit from
 855 * the original inode number. The subsequent suffixes being generated will
 856 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
 857 * generated will have 3 bits and hence we have to chop off 3 bits from their
 858 * original inodes, and so on. That approach of using variable length suffixes
 859 * (i.e. over fixed size ones) utilizes the fact that in practice only a very
 860 * limited amount of devices are shared by the same export (e.g. typically
 861 * less than 2 dozen devices per 9p export), so in practice we need to chop
 862 * off less bits than with fixed size prefixes and yet are flexible to add
 863 * new devices at runtime below host's export directory at any time without
 864 * having to reboot guest nor requiring to reconfigure guest for that. And due
 865 * to the very limited amount of original high end bits that we chop off that
 866 * way, the total amount of suffixes we need to generate is less than by using
 867 * fixed size prefixes and hence it also improves performance of the inode
 868 * remapping algorithm, and finally has the nice side effect that the inode
 869 * numbers on guest will be much smaller & human friendly. ;-)
 870 */
 871static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
 872                              uint64_t *path)
 873{
 874    const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
 875    QppEntry lookup = {
 876        .dev = stbuf->st_dev,
 877        .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
 878    }, *val;
 879    uint32_t hash = qpp_hash(lookup);
 880
 881    val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
 882
 883    if (!val) {
 884        if (pdu->s->qp_affix_next == 0) {
 885            /* we ran out of affixes */
 886            warn_report_once(
 887                "9p: Potential degraded performance of inode remapping"
 888            );
 889            return -ENFILE;
 890        }
 891
 892        val = g_malloc0(sizeof(QppEntry));
 893        *val = lookup;
 894
 895        /* new unique inode affix and device combo */
 896        val->qp_affix_index = pdu->s->qp_affix_next++;
 897        val->qp_affix = affixForIndex(val->qp_affix_index);
 898        qht_insert(&pdu->s->qpp_table, val, hash, NULL);
 899    }
 900    /* assuming generated affix to be suffix type, not prefix */
 901    *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
 902    return 0;
 903}
 904
 905static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
 906{
 907    int err;
 908    size_t size;
 909
 910    if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
 911        /* map inode+device to qid path (fast path) */
 912        err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
 913        if (err == -ENFILE) {
 914            /* fast path didn't work, fall back to full map */
 915            err = qid_path_fullmap(pdu, stbuf, &qidp->path);
 916        }
 917        if (err) {
 918            return err;
 919        }
 920    } else {
 921        if (pdu->s->dev_id != stbuf->st_dev) {
 922            if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
 923                error_report_once(
 924                    "9p: Multiple devices detected in same VirtFS export. "
 925                    "Access of guest to additional devices is (partly) "
 926                    "denied due to virtfs option 'multidevs=forbid' being "
 927                    "effective."
 928                );
 929                return -ENODEV;
 930            } else {
 931                warn_report_once(
 932                    "9p: Multiple devices detected in same VirtFS export, "
 933                    "which might lead to file ID collisions and severe "
 934                    "misbehaviours on guest! You should either use a "
 935                    "separate export for each device shared from host or "
 936                    "use virtfs option 'multidevs=remap'!"
 937                );
 938            }
 939        }
 940        memset(&qidp->path, 0, sizeof(qidp->path));
 941        size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
 942        memcpy(&qidp->path, &stbuf->st_ino, size);
 943    }
 944
 945    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 946    qidp->type = 0;
 947    if (S_ISDIR(stbuf->st_mode)) {
 948        qidp->type |= P9_QID_TYPE_DIR;
 949    }
 950    if (S_ISLNK(stbuf->st_mode)) {
 951        qidp->type |= P9_QID_TYPE_SYMLINK;
 952    }
 953
 954    return 0;
 955}
 956
 957static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
 958                                   V9fsQID *qidp)
 959{
 960    struct stat stbuf;
 961    int err;
 962
 963    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
 964    if (err < 0) {
 965        return err;
 966    }
 967    err = stat_to_qid(pdu, &stbuf, qidp);
 968    if (err < 0) {
 969        return err;
 970    }
 971    return 0;
 972}
 973
 974static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
 975                                      struct dirent *dent, V9fsQID *qidp)
 976{
 977    struct stat stbuf;
 978    V9fsPath path;
 979    int err;
 980
 981    v9fs_path_init(&path);
 982
 983    err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
 984    if (err < 0) {
 985        goto out;
 986    }
 987    err = v9fs_co_lstat(pdu, &path, &stbuf);
 988    if (err < 0) {
 989        goto out;
 990    }
 991    err = stat_to_qid(pdu, &stbuf, qidp);
 992
 993out:
 994    v9fs_path_free(&path);
 995    return err;
 996}
 997
 998V9fsPDU *pdu_alloc(V9fsState *s)
 999{
1000    V9fsPDU *pdu = NULL;
1001
1002    if (!QLIST_EMPTY(&s->free_list)) {
1003        pdu = QLIST_FIRST(&s->free_list);
1004        QLIST_REMOVE(pdu, next);
1005        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
1006    }
1007    return pdu;
1008}
1009
1010void pdu_free(V9fsPDU *pdu)
1011{
1012    V9fsState *s = pdu->s;
1013
1014    g_assert(!pdu->cancelled);
1015    QLIST_REMOVE(pdu, next);
1016    QLIST_INSERT_HEAD(&s->free_list, pdu, next);
1017}
1018
1019static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
1020{
1021    int8_t id = pdu->id + 1; /* Response */
1022    V9fsState *s = pdu->s;
1023    int ret;
1024
1025    /*
1026     * The 9p spec requires that successfully cancelled pdus receive no reply.
1027     * Sending a reply would confuse clients because they would
1028     * assume that any EINTR is the actual result of the operation,
1029     * rather than a consequence of the cancellation. However, if
1030     * the operation completed (succesfully or with an error other
1031     * than caused be cancellation), we do send out that reply, both
1032     * for efficiency and to avoid confusing the rest of the state machine
1033     * that assumes passing a non-error here will mean a successful
1034     * transmission of the reply.
1035     */
1036    bool discard = pdu->cancelled && len == -EINTR;
1037    if (discard) {
1038        trace_v9fs_rcancel(pdu->tag, pdu->id);
1039        pdu->size = 0;
1040        goto out_notify;
1041    }
1042
1043    if (len < 0) {
1044        int err = -len;
1045        len = 7;
1046
1047        if (s->proto_version != V9FS_PROTO_2000L) {
1048            V9fsString str;
1049
1050            str.data = strerror(err);
1051            str.size = strlen(str.data);
1052
1053            ret = pdu_marshal(pdu, len, "s", &str);
1054            if (ret < 0) {
1055                goto out_notify;
1056            }
1057            len += ret;
1058            id = P9_RERROR;
1059        }
1060
1061        ret = pdu_marshal(pdu, len, "d", err);
1062        if (ret < 0) {
1063            goto out_notify;
1064        }
1065        len += ret;
1066
1067        if (s->proto_version == V9FS_PROTO_2000L) {
1068            id = P9_RLERROR;
1069        }
1070        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1071    }
1072
1073    /* fill out the header */
1074    if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1075        goto out_notify;
1076    }
1077
1078    /* keep these in sync */
1079    pdu->size = len;
1080    pdu->id = id;
1081
1082out_notify:
1083    pdu->s->transport->push_and_notify(pdu);
1084
1085    /* Now wakeup anybody waiting in flush for this request */
1086    if (!qemu_co_queue_next(&pdu->complete)) {
1087        pdu_free(pdu);
1088    }
1089}
1090
1091static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1092{
1093    mode_t ret;
1094
1095    ret = mode & 0777;
1096    if (mode & P9_STAT_MODE_DIR) {
1097        ret |= S_IFDIR;
1098    }
1099
1100    if (mode & P9_STAT_MODE_SYMLINK) {
1101        ret |= S_IFLNK;
1102    }
1103    if (mode & P9_STAT_MODE_SOCKET) {
1104        ret |= S_IFSOCK;
1105    }
1106    if (mode & P9_STAT_MODE_NAMED_PIPE) {
1107        ret |= S_IFIFO;
1108    }
1109    if (mode & P9_STAT_MODE_DEVICE) {
1110        if (extension->size && extension->data[0] == 'c') {
1111            ret |= S_IFCHR;
1112        } else {
1113            ret |= S_IFBLK;
1114        }
1115    }
1116
1117    if (!(ret&~0777)) {
1118        ret |= S_IFREG;
1119    }
1120
1121    if (mode & P9_STAT_MODE_SETUID) {
1122        ret |= S_ISUID;
1123    }
1124    if (mode & P9_STAT_MODE_SETGID) {
1125        ret |= S_ISGID;
1126    }
1127    if (mode & P9_STAT_MODE_SETVTX) {
1128        ret |= S_ISVTX;
1129    }
1130
1131    return ret;
1132}
1133
1134static int donttouch_stat(V9fsStat *stat)
1135{
1136    if (stat->type == -1 &&
1137        stat->dev == -1 &&
1138        stat->qid.type == 0xff &&
1139        stat->qid.version == (uint32_t) -1 &&
1140        stat->qid.path == (uint64_t) -1 &&
1141        stat->mode == -1 &&
1142        stat->atime == -1 &&
1143        stat->mtime == -1 &&
1144        stat->length == -1 &&
1145        !stat->name.size &&
1146        !stat->uid.size &&
1147        !stat->gid.size &&
1148        !stat->muid.size &&
1149        stat->n_uid == -1 &&
1150        stat->n_gid == -1 &&
1151        stat->n_muid == -1) {
1152        return 1;
1153    }
1154
1155    return 0;
1156}
1157
1158static void v9fs_stat_init(V9fsStat *stat)
1159{
1160    v9fs_string_init(&stat->name);
1161    v9fs_string_init(&stat->uid);
1162    v9fs_string_init(&stat->gid);
1163    v9fs_string_init(&stat->muid);
1164    v9fs_string_init(&stat->extension);
1165}
1166
1167static void v9fs_stat_free(V9fsStat *stat)
1168{
1169    v9fs_string_free(&stat->name);
1170    v9fs_string_free(&stat->uid);
1171    v9fs_string_free(&stat->gid);
1172    v9fs_string_free(&stat->muid);
1173    v9fs_string_free(&stat->extension);
1174}
1175
1176static uint32_t stat_to_v9mode(const struct stat *stbuf)
1177{
1178    uint32_t mode;
1179
1180    mode = stbuf->st_mode & 0777;
1181    if (S_ISDIR(stbuf->st_mode)) {
1182        mode |= P9_STAT_MODE_DIR;
1183    }
1184
1185    if (S_ISLNK(stbuf->st_mode)) {
1186        mode |= P9_STAT_MODE_SYMLINK;
1187    }
1188
1189    if (S_ISSOCK(stbuf->st_mode)) {
1190        mode |= P9_STAT_MODE_SOCKET;
1191    }
1192
1193    if (S_ISFIFO(stbuf->st_mode)) {
1194        mode |= P9_STAT_MODE_NAMED_PIPE;
1195    }
1196
1197    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1198        mode |= P9_STAT_MODE_DEVICE;
1199    }
1200
1201    if (stbuf->st_mode & S_ISUID) {
1202        mode |= P9_STAT_MODE_SETUID;
1203    }
1204
1205    if (stbuf->st_mode & S_ISGID) {
1206        mode |= P9_STAT_MODE_SETGID;
1207    }
1208
1209    if (stbuf->st_mode & S_ISVTX) {
1210        mode |= P9_STAT_MODE_SETVTX;
1211    }
1212
1213    return mode;
1214}
1215
1216static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1217                                       const char *basename,
1218                                       const struct stat *stbuf,
1219                                       V9fsStat *v9stat)
1220{
1221    int err;
1222
1223    memset(v9stat, 0, sizeof(*v9stat));
1224
1225    err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1226    if (err < 0) {
1227        return err;
1228    }
1229    v9stat->mode = stat_to_v9mode(stbuf);
1230    v9stat->atime = stbuf->st_atime;
1231    v9stat->mtime = stbuf->st_mtime;
1232    v9stat->length = stbuf->st_size;
1233
1234    v9fs_string_free(&v9stat->uid);
1235    v9fs_string_free(&v9stat->gid);
1236    v9fs_string_free(&v9stat->muid);
1237
1238    v9stat->n_uid = stbuf->st_uid;
1239    v9stat->n_gid = stbuf->st_gid;
1240    v9stat->n_muid = 0;
1241
1242    v9fs_string_free(&v9stat->extension);
1243
1244    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1245        err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1246        if (err < 0) {
1247            return err;
1248        }
1249    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1250        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1251                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1252                major(stbuf->st_rdev), minor(stbuf->st_rdev));
1253    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1254        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1255                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1256    }
1257
1258    v9fs_string_sprintf(&v9stat->name, "%s", basename);
1259
1260    v9stat->size = 61 +
1261        v9fs_string_size(&v9stat->name) +
1262        v9fs_string_size(&v9stat->uid) +
1263        v9fs_string_size(&v9stat->gid) +
1264        v9fs_string_size(&v9stat->muid) +
1265        v9fs_string_size(&v9stat->extension);
1266    return 0;
1267}
1268
1269#define P9_STATS_MODE          0x00000001ULL
1270#define P9_STATS_NLINK         0x00000002ULL
1271#define P9_STATS_UID           0x00000004ULL
1272#define P9_STATS_GID           0x00000008ULL
1273#define P9_STATS_RDEV          0x00000010ULL
1274#define P9_STATS_ATIME         0x00000020ULL
1275#define P9_STATS_MTIME         0x00000040ULL
1276#define P9_STATS_CTIME         0x00000080ULL
1277#define P9_STATS_INO           0x00000100ULL
1278#define P9_STATS_SIZE          0x00000200ULL
1279#define P9_STATS_BLOCKS        0x00000400ULL
1280
1281#define P9_STATS_BTIME         0x00000800ULL
1282#define P9_STATS_GEN           0x00001000ULL
1283#define P9_STATS_DATA_VERSION  0x00002000ULL
1284
1285#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1286#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1287
1288
1289static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1290                                V9fsStatDotl *v9lstat)
1291{
1292    memset(v9lstat, 0, sizeof(*v9lstat));
1293
1294    v9lstat->st_mode = stbuf->st_mode;
1295    v9lstat->st_nlink = stbuf->st_nlink;
1296    v9lstat->st_uid = stbuf->st_uid;
1297    v9lstat->st_gid = stbuf->st_gid;
1298    v9lstat->st_rdev = stbuf->st_rdev;
1299    v9lstat->st_size = stbuf->st_size;
1300    v9lstat->st_blksize = stbuf->st_blksize;
1301    v9lstat->st_blocks = stbuf->st_blocks;
1302    v9lstat->st_atime_sec = stbuf->st_atime;
1303    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1304    v9lstat->st_mtime_sec = stbuf->st_mtime;
1305    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1306    v9lstat->st_ctime_sec = stbuf->st_ctime;
1307    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1308    /* Currently we only support BASIC fields in stat */
1309    v9lstat->st_result_mask = P9_STATS_BASIC;
1310
1311    return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1312}
1313
1314static void print_sg(struct iovec *sg, int cnt)
1315{
1316    int i;
1317
1318    printf("sg[%d]: {", cnt);
1319    for (i = 0; i < cnt; i++) {
1320        if (i) {
1321            printf(", ");
1322        }
1323        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1324    }
1325    printf("}\n");
1326}
1327
1328/* Will call this only for path name based fid */
1329static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1330{
1331    V9fsPath str;
1332    v9fs_path_init(&str);
1333    v9fs_path_copy(&str, dst);
1334    v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1335    v9fs_path_free(&str);
1336}
1337
1338static inline bool is_ro_export(FsContext *ctx)
1339{
1340    return ctx->export_flags & V9FS_RDONLY;
1341}
1342
1343static void coroutine_fn v9fs_version(void *opaque)
1344{
1345    ssize_t err;
1346    V9fsPDU *pdu = opaque;
1347    V9fsState *s = pdu->s;
1348    V9fsString version;
1349    size_t offset = 7;
1350
1351    v9fs_string_init(&version);
1352    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1353    if (err < 0) {
1354        goto out;
1355    }
1356    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1357
1358    virtfs_reset(pdu);
1359
1360    if (!strcmp(version.data, "9P2000.u")) {
1361        s->proto_version = V9FS_PROTO_2000U;
1362    } else if (!strcmp(version.data, "9P2000.L")) {
1363        s->proto_version = V9FS_PROTO_2000L;
1364    } else {
1365        v9fs_string_sprintf(&version, "unknown");
1366        /* skip min. msize check, reporting invalid version has priority */
1367        goto marshal;
1368    }
1369
1370    if (s->msize < P9_MIN_MSIZE) {
1371        err = -EMSGSIZE;
1372        error_report(
1373            "9pfs: Client requested msize < minimum msize ("
1374            stringify(P9_MIN_MSIZE) ") supported by this server."
1375        );
1376        goto out;
1377    }
1378
1379marshal:
1380    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1381    if (err < 0) {
1382        goto out;
1383    }
1384    err += offset;
1385    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1386out:
1387    pdu_complete(pdu, err);
1388    v9fs_string_free(&version);
1389}
1390
1391static void coroutine_fn v9fs_attach(void *opaque)
1392{
1393    V9fsPDU *pdu = opaque;
1394    V9fsState *s = pdu->s;
1395    int32_t fid, afid, n_uname;
1396    V9fsString uname, aname;
1397    V9fsFidState *fidp;
1398    size_t offset = 7;
1399    V9fsQID qid;
1400    ssize_t err;
1401    Error *local_err = NULL;
1402
1403    v9fs_string_init(&uname);
1404    v9fs_string_init(&aname);
1405    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1406                        &afid, &uname, &aname, &n_uname);
1407    if (err < 0) {
1408        goto out_nofid;
1409    }
1410    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1411
1412    fidp = alloc_fid(s, fid);
1413    if (fidp == NULL) {
1414        err = -EINVAL;
1415        goto out_nofid;
1416    }
1417    fidp->uid = n_uname;
1418    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1419    if (err < 0) {
1420        err = -EINVAL;
1421        clunk_fid(s, fid);
1422        goto out;
1423    }
1424    err = fid_to_qid(pdu, fidp, &qid);
1425    if (err < 0) {
1426        err = -EINVAL;
1427        clunk_fid(s, fid);
1428        goto out;
1429    }
1430
1431    /*
1432     * disable migration if we haven't done already.
1433     * attach could get called multiple times for the same export.
1434     */
1435    if (!s->migration_blocker) {
1436        error_setg(&s->migration_blocker,
1437                   "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1438                   s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1439        err = migrate_add_blocker(s->migration_blocker, &local_err);
1440        if (local_err) {
1441            error_free(local_err);
1442            error_free(s->migration_blocker);
1443            s->migration_blocker = NULL;
1444            clunk_fid(s, fid);
1445            goto out;
1446        }
1447        s->root_fid = fid;
1448    }
1449
1450    err = pdu_marshal(pdu, offset, "Q", &qid);
1451    if (err < 0) {
1452        clunk_fid(s, fid);
1453        goto out;
1454    }
1455    err += offset;
1456
1457    memcpy(&s->root_qid, &qid, sizeof(qid));
1458    trace_v9fs_attach_return(pdu->tag, pdu->id,
1459                             qid.type, qid.version, qid.path);
1460out:
1461    put_fid(pdu, fidp);
1462out_nofid:
1463    pdu_complete(pdu, err);
1464    v9fs_string_free(&uname);
1465    v9fs_string_free(&aname);
1466}
1467
1468static void coroutine_fn v9fs_stat(void *opaque)
1469{
1470    int32_t fid;
1471    V9fsStat v9stat;
1472    ssize_t err = 0;
1473    size_t offset = 7;
1474    struct stat stbuf;
1475    V9fsFidState *fidp;
1476    V9fsPDU *pdu = opaque;
1477    char *basename;
1478
1479    err = pdu_unmarshal(pdu, offset, "d", &fid);
1480    if (err < 0) {
1481        goto out_nofid;
1482    }
1483    trace_v9fs_stat(pdu->tag, pdu->id, fid);
1484
1485    fidp = get_fid(pdu, fid);
1486    if (fidp == NULL) {
1487        err = -ENOENT;
1488        goto out_nofid;
1489    }
1490    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1491    if (err < 0) {
1492        goto out;
1493    }
1494    basename = g_path_get_basename(fidp->path.data);
1495    err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1496    g_free(basename);
1497    if (err < 0) {
1498        goto out;
1499    }
1500    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1501    if (err < 0) {
1502        v9fs_stat_free(&v9stat);
1503        goto out;
1504    }
1505    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1506                           v9stat.atime, v9stat.mtime, v9stat.length);
1507    err += offset;
1508    v9fs_stat_free(&v9stat);
1509out:
1510    put_fid(pdu, fidp);
1511out_nofid:
1512    pdu_complete(pdu, err);
1513}
1514
1515static void coroutine_fn v9fs_getattr(void *opaque)
1516{
1517    int32_t fid;
1518    size_t offset = 7;
1519    ssize_t retval = 0;
1520    struct stat stbuf;
1521    V9fsFidState *fidp;
1522    uint64_t request_mask;
1523    V9fsStatDotl v9stat_dotl;
1524    V9fsPDU *pdu = opaque;
1525
1526    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1527    if (retval < 0) {
1528        goto out_nofid;
1529    }
1530    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1531
1532    fidp = get_fid(pdu, fid);
1533    if (fidp == NULL) {
1534        retval = -ENOENT;
1535        goto out_nofid;
1536    }
1537    /*
1538     * Currently we only support BASIC fields in stat, so there is no
1539     * need to look at request_mask.
1540     */
1541    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1542    if (retval < 0) {
1543        goto out;
1544    }
1545    retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1546    if (retval < 0) {
1547        goto out;
1548    }
1549
1550    /*  fill st_gen if requested and supported by underlying fs */
1551    if (request_mask & P9_STATS_GEN) {
1552        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1553        switch (retval) {
1554        case 0:
1555            /* we have valid st_gen: update result mask */
1556            v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1557            break;
1558        case -EINTR:
1559            /* request cancelled, e.g. by Tflush */
1560            goto out;
1561        default:
1562            /* failed to get st_gen: not fatal, ignore */
1563            break;
1564        }
1565    }
1566    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1567    if (retval < 0) {
1568        goto out;
1569    }
1570    retval += offset;
1571    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1572                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1573                              v9stat_dotl.st_gid);
1574out:
1575    put_fid(pdu, fidp);
1576out_nofid:
1577    pdu_complete(pdu, retval);
1578}
1579
1580/* Attribute flags */
1581#define P9_ATTR_MODE       (1 << 0)
1582#define P9_ATTR_UID        (1 << 1)
1583#define P9_ATTR_GID        (1 << 2)
1584#define P9_ATTR_SIZE       (1 << 3)
1585#define P9_ATTR_ATIME      (1 << 4)
1586#define P9_ATTR_MTIME      (1 << 5)
1587#define P9_ATTR_CTIME      (1 << 6)
1588#define P9_ATTR_ATIME_SET  (1 << 7)
1589#define P9_ATTR_MTIME_SET  (1 << 8)
1590
1591#define P9_ATTR_MASK    127
1592
1593static void coroutine_fn v9fs_setattr(void *opaque)
1594{
1595    int err = 0;
1596    int32_t fid;
1597    V9fsFidState *fidp;
1598    size_t offset = 7;
1599    V9fsIattr v9iattr;
1600    V9fsPDU *pdu = opaque;
1601
1602    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1603    if (err < 0) {
1604        goto out_nofid;
1605    }
1606
1607    trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1608                       v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1609                       v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1610
1611    fidp = get_fid(pdu, fid);
1612    if (fidp == NULL) {
1613        err = -EINVAL;
1614        goto out_nofid;
1615    }
1616    if (v9iattr.valid & P9_ATTR_MODE) {
1617        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1618        if (err < 0) {
1619            goto out;
1620        }
1621    }
1622    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1623        struct timespec times[2];
1624        if (v9iattr.valid & P9_ATTR_ATIME) {
1625            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1626                times[0].tv_sec = v9iattr.atime_sec;
1627                times[0].tv_nsec = v9iattr.atime_nsec;
1628            } else {
1629                times[0].tv_nsec = UTIME_NOW;
1630            }
1631        } else {
1632            times[0].tv_nsec = UTIME_OMIT;
1633        }
1634        if (v9iattr.valid & P9_ATTR_MTIME) {
1635            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1636                times[1].tv_sec = v9iattr.mtime_sec;
1637                times[1].tv_nsec = v9iattr.mtime_nsec;
1638            } else {
1639                times[1].tv_nsec = UTIME_NOW;
1640            }
1641        } else {
1642            times[1].tv_nsec = UTIME_OMIT;
1643        }
1644        err = v9fs_co_utimensat(pdu, &fidp->path, times);
1645        if (err < 0) {
1646            goto out;
1647        }
1648    }
1649    /*
1650     * If the only valid entry in iattr is ctime we can call
1651     * chown(-1,-1) to update the ctime of the file
1652     */
1653    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1654        ((v9iattr.valid & P9_ATTR_CTIME)
1655         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1656        if (!(v9iattr.valid & P9_ATTR_UID)) {
1657            v9iattr.uid = -1;
1658        }
1659        if (!(v9iattr.valid & P9_ATTR_GID)) {
1660            v9iattr.gid = -1;
1661        }
1662        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1663                            v9iattr.gid);
1664        if (err < 0) {
1665            goto out;
1666        }
1667    }
1668    if (v9iattr.valid & (P9_ATTR_SIZE)) {
1669        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1670        if (err < 0) {
1671            goto out;
1672        }
1673    }
1674    err = offset;
1675    trace_v9fs_setattr_return(pdu->tag, pdu->id);
1676out:
1677    put_fid(pdu, fidp);
1678out_nofid:
1679    pdu_complete(pdu, err);
1680}
1681
1682static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1683{
1684    int i;
1685    ssize_t err;
1686    size_t offset = 7;
1687
1688    err = pdu_marshal(pdu, offset, "w", nwnames);
1689    if (err < 0) {
1690        return err;
1691    }
1692    offset += err;
1693    for (i = 0; i < nwnames; i++) {
1694        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1695        if (err < 0) {
1696            return err;
1697        }
1698        offset += err;
1699    }
1700    return offset;
1701}
1702
1703static bool name_is_illegal(const char *name)
1704{
1705    return !*name || strchr(name, '/') != NULL;
1706}
1707
1708static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
1709{
1710    return
1711        qid1->type != qid2->type ||
1712        qid1->version != qid2->version ||
1713        qid1->path != qid2->path;
1714}
1715
1716static void coroutine_fn v9fs_walk(void *opaque)
1717{
1718    int name_idx;
1719    V9fsQID *qids = NULL;
1720    int i, err = 0;
1721    V9fsPath dpath, path;
1722    uint16_t nwnames;
1723    struct stat stbuf;
1724    size_t offset = 7;
1725    int32_t fid, newfid;
1726    V9fsString *wnames = NULL;
1727    V9fsFidState *fidp;
1728    V9fsFidState *newfidp = NULL;
1729    V9fsPDU *pdu = opaque;
1730    V9fsState *s = pdu->s;
1731    V9fsQID qid;
1732
1733    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1734    if (err < 0) {
1735        pdu_complete(pdu, err);
1736        return ;
1737    }
1738    offset += err;
1739
1740    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1741
1742    if (nwnames && nwnames <= P9_MAXWELEM) {
1743        wnames = g_new0(V9fsString, nwnames);
1744        qids   = g_new0(V9fsQID, nwnames);
1745        for (i = 0; i < nwnames; i++) {
1746            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1747            if (err < 0) {
1748                goto out_nofid;
1749            }
1750            if (name_is_illegal(wnames[i].data)) {
1751                err = -ENOENT;
1752                goto out_nofid;
1753            }
1754            offset += err;
1755        }
1756    } else if (nwnames > P9_MAXWELEM) {
1757        err = -EINVAL;
1758        goto out_nofid;
1759    }
1760    fidp = get_fid(pdu, fid);
1761    if (fidp == NULL) {
1762        err = -ENOENT;
1763        goto out_nofid;
1764    }
1765
1766    v9fs_path_init(&dpath);
1767    v9fs_path_init(&path);
1768
1769    err = fid_to_qid(pdu, fidp, &qid);
1770    if (err < 0) {
1771        goto out;
1772    }
1773
1774    /*
1775     * Both dpath and path initially poin to fidp.
1776     * Needed to handle request with nwnames == 0
1777     */
1778    v9fs_path_copy(&dpath, &fidp->path);
1779    v9fs_path_copy(&path, &fidp->path);
1780    for (name_idx = 0; name_idx < nwnames; name_idx++) {
1781        if (not_same_qid(&pdu->s->root_qid, &qid) ||
1782            strcmp("..", wnames[name_idx].data)) {
1783            err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
1784                                       &path);
1785            if (err < 0) {
1786                goto out;
1787            }
1788
1789            err = v9fs_co_lstat(pdu, &path, &stbuf);
1790            if (err < 0) {
1791                goto out;
1792            }
1793            err = stat_to_qid(pdu, &stbuf, &qid);
1794            if (err < 0) {
1795                goto out;
1796            }
1797            v9fs_path_copy(&dpath, &path);
1798        }
1799        memcpy(&qids[name_idx], &qid, sizeof(qid));
1800    }
1801    if (fid == newfid) {
1802        if (fidp->fid_type != P9_FID_NONE) {
1803            err = -EINVAL;
1804            goto out;
1805        }
1806        v9fs_path_write_lock(s);
1807        v9fs_path_copy(&fidp->path, &path);
1808        v9fs_path_unlock(s);
1809    } else {
1810        newfidp = alloc_fid(s, newfid);
1811        if (newfidp == NULL) {
1812            err = -EINVAL;
1813            goto out;
1814        }
1815        newfidp->uid = fidp->uid;
1816        v9fs_path_copy(&newfidp->path, &path);
1817    }
1818    err = v9fs_walk_marshal(pdu, nwnames, qids);
1819    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1820out:
1821    put_fid(pdu, fidp);
1822    if (newfidp) {
1823        put_fid(pdu, newfidp);
1824    }
1825    v9fs_path_free(&dpath);
1826    v9fs_path_free(&path);
1827out_nofid:
1828    pdu_complete(pdu, err);
1829    if (nwnames && nwnames <= P9_MAXWELEM) {
1830        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1831            v9fs_string_free(&wnames[name_idx]);
1832        }
1833        g_free(wnames);
1834        g_free(qids);
1835    }
1836}
1837
1838static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1839{
1840    struct statfs stbuf;
1841    int32_t iounit = 0;
1842    V9fsState *s = pdu->s;
1843
1844    /*
1845     * iounit should be multiples of f_bsize (host filesystem block size
1846     * and as well as less than (client msize - P9_IOHDRSZ))
1847     */
1848    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1849        if (stbuf.f_bsize) {
1850            iounit = stbuf.f_bsize;
1851            iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
1852        }
1853    }
1854    if (!iounit) {
1855        iounit = s->msize - P9_IOHDRSZ;
1856    }
1857    return iounit;
1858}
1859
1860static void coroutine_fn v9fs_open(void *opaque)
1861{
1862    int flags;
1863    int32_t fid;
1864    int32_t mode;
1865    V9fsQID qid;
1866    int iounit = 0;
1867    ssize_t err = 0;
1868    size_t offset = 7;
1869    struct stat stbuf;
1870    V9fsFidState *fidp;
1871    V9fsPDU *pdu = opaque;
1872    V9fsState *s = pdu->s;
1873
1874    if (s->proto_version == V9FS_PROTO_2000L) {
1875        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1876    } else {
1877        uint8_t modebyte;
1878        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1879        mode = modebyte;
1880    }
1881    if (err < 0) {
1882        goto out_nofid;
1883    }
1884    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1885
1886    fidp = get_fid(pdu, fid);
1887    if (fidp == NULL) {
1888        err = -ENOENT;
1889        goto out_nofid;
1890    }
1891    if (fidp->fid_type != P9_FID_NONE) {
1892        err = -EINVAL;
1893        goto out;
1894    }
1895
1896    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1897    if (err < 0) {
1898        goto out;
1899    }
1900    err = stat_to_qid(pdu, &stbuf, &qid);
1901    if (err < 0) {
1902        goto out;
1903    }
1904    if (S_ISDIR(stbuf.st_mode)) {
1905        err = v9fs_co_opendir(pdu, fidp);
1906        if (err < 0) {
1907            goto out;
1908        }
1909        fidp->fid_type = P9_FID_DIR;
1910        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1911        if (err < 0) {
1912            goto out;
1913        }
1914        err += offset;
1915    } else {
1916        if (s->proto_version == V9FS_PROTO_2000L) {
1917            flags = get_dotl_openflags(s, mode);
1918        } else {
1919            flags = omode_to_uflags(mode);
1920        }
1921        if (is_ro_export(&s->ctx)) {
1922            if (mode & O_WRONLY || mode & O_RDWR ||
1923                mode & O_APPEND || mode & O_TRUNC) {
1924                err = -EROFS;
1925                goto out;
1926            }
1927        }
1928        err = v9fs_co_open(pdu, fidp, flags);
1929        if (err < 0) {
1930            goto out;
1931        }
1932        fidp->fid_type = P9_FID_FILE;
1933        fidp->open_flags = flags;
1934        if (flags & O_EXCL) {
1935            /*
1936             * We let the host file system do O_EXCL check
1937             * We should not reclaim such fd
1938             */
1939            fidp->flags |= FID_NON_RECLAIMABLE;
1940        }
1941        iounit = get_iounit(pdu, &fidp->path);
1942        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1943        if (err < 0) {
1944            goto out;
1945        }
1946        err += offset;
1947    }
1948    trace_v9fs_open_return(pdu->tag, pdu->id,
1949                           qid.type, qid.version, qid.path, iounit);
1950out:
1951    put_fid(pdu, fidp);
1952out_nofid:
1953    pdu_complete(pdu, err);
1954}
1955
1956static void coroutine_fn v9fs_lcreate(void *opaque)
1957{
1958    int32_t dfid, flags, mode;
1959    gid_t gid;
1960    ssize_t err = 0;
1961    ssize_t offset = 7;
1962    V9fsString name;
1963    V9fsFidState *fidp;
1964    struct stat stbuf;
1965    V9fsQID qid;
1966    int32_t iounit;
1967    V9fsPDU *pdu = opaque;
1968
1969    v9fs_string_init(&name);
1970    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1971                        &name, &flags, &mode, &gid);
1972    if (err < 0) {
1973        goto out_nofid;
1974    }
1975    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1976
1977    if (name_is_illegal(name.data)) {
1978        err = -ENOENT;
1979        goto out_nofid;
1980    }
1981
1982    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
1983        err = -EEXIST;
1984        goto out_nofid;
1985    }
1986
1987    fidp = get_fid(pdu, dfid);
1988    if (fidp == NULL) {
1989        err = -ENOENT;
1990        goto out_nofid;
1991    }
1992    if (fidp->fid_type != P9_FID_NONE) {
1993        err = -EINVAL;
1994        goto out;
1995    }
1996
1997    flags = get_dotl_openflags(pdu->s, flags);
1998    err = v9fs_co_open2(pdu, fidp, &name, gid,
1999                        flags | O_CREAT, mode, &stbuf);
2000    if (err < 0) {
2001        goto out;
2002    }
2003    fidp->fid_type = P9_FID_FILE;
2004    fidp->open_flags = flags;
2005    if (flags & O_EXCL) {
2006        /*
2007         * We let the host file system do O_EXCL check
2008         * We should not reclaim such fd
2009         */
2010        fidp->flags |= FID_NON_RECLAIMABLE;
2011    }
2012    iounit =  get_iounit(pdu, &fidp->path);
2013    err = stat_to_qid(pdu, &stbuf, &qid);
2014    if (err < 0) {
2015        goto out;
2016    }
2017    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2018    if (err < 0) {
2019        goto out;
2020    }
2021    err += offset;
2022    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2023                              qid.type, qid.version, qid.path, iounit);
2024out:
2025    put_fid(pdu, fidp);
2026out_nofid:
2027    pdu_complete(pdu, err);
2028    v9fs_string_free(&name);
2029}
2030
2031static void coroutine_fn v9fs_fsync(void *opaque)
2032{
2033    int err;
2034    int32_t fid;
2035    int datasync;
2036    size_t offset = 7;
2037    V9fsFidState *fidp;
2038    V9fsPDU *pdu = opaque;
2039
2040    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2041    if (err < 0) {
2042        goto out_nofid;
2043    }
2044    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2045
2046    fidp = get_fid(pdu, fid);
2047    if (fidp == NULL) {
2048        err = -ENOENT;
2049        goto out_nofid;
2050    }
2051    err = v9fs_co_fsync(pdu, fidp, datasync);
2052    if (!err) {
2053        err = offset;
2054    }
2055    put_fid(pdu, fidp);
2056out_nofid:
2057    pdu_complete(pdu, err);
2058}
2059
2060static void coroutine_fn v9fs_clunk(void *opaque)
2061{
2062    int err;
2063    int32_t fid;
2064    size_t offset = 7;
2065    V9fsFidState *fidp;
2066    V9fsPDU *pdu = opaque;
2067    V9fsState *s = pdu->s;
2068
2069    err = pdu_unmarshal(pdu, offset, "d", &fid);
2070    if (err < 0) {
2071        goto out_nofid;
2072    }
2073    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2074
2075    fidp = clunk_fid(s, fid);
2076    if (fidp == NULL) {
2077        err = -ENOENT;
2078        goto out_nofid;
2079    }
2080    /*
2081     * Bump the ref so that put_fid will
2082     * free the fid.
2083     */
2084    fidp->ref++;
2085    err = put_fid(pdu, fidp);
2086    if (!err) {
2087        err = offset;
2088    }
2089out_nofid:
2090    pdu_complete(pdu, err);
2091}
2092
2093/*
2094 * Create a QEMUIOVector for a sub-region of PDU iovecs
2095 *
2096 * @qiov:       uninitialized QEMUIOVector
2097 * @skip:       number of bytes to skip from beginning of PDU
2098 * @size:       number of bytes to include
2099 * @is_write:   true - write, false - read
2100 *
2101 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2102 * with qemu_iovec_destroy().
2103 */
2104static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2105                                    size_t skip, size_t *size,
2106                                    bool is_write)
2107{
2108    QEMUIOVector elem;
2109    struct iovec *iov;
2110    unsigned int niov;
2111    size_t alloc_size = *size + skip;
2112
2113    if (is_write) {
2114        pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, alloc_size);
2115    } else {
2116        pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, &alloc_size);
2117    }
2118
2119    if (alloc_size < skip) {
2120        *size = 0;
2121    } else {
2122        *size = alloc_size - skip;
2123    }
2124
2125    qemu_iovec_init_external(&elem, iov, niov);
2126    qemu_iovec_init(qiov, niov);
2127    qemu_iovec_concat(qiov, &elem, skip, *size);
2128}
2129
2130static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2131                           uint64_t off, uint32_t max_count)
2132{
2133    ssize_t err;
2134    size_t offset = 7;
2135    size_t read_count;
2136    QEMUIOVector qiov_full;
2137
2138    if (fidp->fs.xattr.len < off) {
2139        read_count = 0;
2140    } else if (fidp->fs.xattr.len - off < max_count) {
2141        read_count = fidp->fs.xattr.len - off;
2142    } else {
2143        read_count = max_count;
2144    }
2145    err = pdu_marshal(pdu, offset, "d", read_count);
2146    if (err < 0) {
2147        return err;
2148    }
2149    offset += err;
2150
2151    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &read_count, false);
2152    err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2153                    ((char *)fidp->fs.xattr.value) + off,
2154                    read_count);
2155    qemu_iovec_destroy(&qiov_full);
2156    if (err < 0) {
2157        return err;
2158    }
2159    offset += err;
2160    return offset;
2161}
2162
2163static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2164                                                  V9fsFidState *fidp,
2165                                                  uint32_t max_count)
2166{
2167    V9fsPath path;
2168    V9fsStat v9stat;
2169    int len, err = 0;
2170    int32_t count = 0;
2171    struct stat stbuf;
2172    off_t saved_dir_pos;
2173    struct dirent *dent;
2174
2175    /* save the directory position */
2176    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2177    if (saved_dir_pos < 0) {
2178        return saved_dir_pos;
2179    }
2180
2181    while (1) {
2182        v9fs_path_init(&path);
2183
2184        v9fs_readdir_lock(&fidp->fs.dir);
2185
2186        err = v9fs_co_readdir(pdu, fidp, &dent);
2187        if (err || !dent) {
2188            break;
2189        }
2190        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2191        if (err < 0) {
2192            break;
2193        }
2194        err = v9fs_co_lstat(pdu, &path, &stbuf);
2195        if (err < 0) {
2196            break;
2197        }
2198        err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2199        if (err < 0) {
2200            break;
2201        }
2202        if ((count + v9stat.size + 2) > max_count) {
2203            v9fs_readdir_unlock(&fidp->fs.dir);
2204
2205            /* Ran out of buffer. Set dir back to old position and return */
2206            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2207            v9fs_stat_free(&v9stat);
2208            v9fs_path_free(&path);
2209            return count;
2210        }
2211
2212        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2213        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2214
2215        v9fs_readdir_unlock(&fidp->fs.dir);
2216
2217        if (len < 0) {
2218            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2219            v9fs_stat_free(&v9stat);
2220            v9fs_path_free(&path);
2221            return len;
2222        }
2223        count += len;
2224        v9fs_stat_free(&v9stat);
2225        v9fs_path_free(&path);
2226        saved_dir_pos = dent->d_off;
2227    }
2228
2229    v9fs_readdir_unlock(&fidp->fs.dir);
2230
2231    v9fs_path_free(&path);
2232    if (err < 0) {
2233        return err;
2234    }
2235    return count;
2236}
2237
2238static void coroutine_fn v9fs_read(void *opaque)
2239{
2240    int32_t fid;
2241    uint64_t off;
2242    ssize_t err = 0;
2243    int32_t count = 0;
2244    size_t offset = 7;
2245    uint32_t max_count;
2246    V9fsFidState *fidp;
2247    V9fsPDU *pdu = opaque;
2248    V9fsState *s = pdu->s;
2249
2250    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2251    if (err < 0) {
2252        goto out_nofid;
2253    }
2254    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2255
2256    fidp = get_fid(pdu, fid);
2257    if (fidp == NULL) {
2258        err = -EINVAL;
2259        goto out_nofid;
2260    }
2261    if (fidp->fid_type == P9_FID_DIR) {
2262
2263        if (off == 0) {
2264            v9fs_co_rewinddir(pdu, fidp);
2265        }
2266        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2267        if (count < 0) {
2268            err = count;
2269            goto out;
2270        }
2271        err = pdu_marshal(pdu, offset, "d", count);
2272        if (err < 0) {
2273            goto out;
2274        }
2275        err += offset + count;
2276    } else if (fidp->fid_type == P9_FID_FILE) {
2277        QEMUIOVector qiov_full;
2278        QEMUIOVector qiov;
2279        int32_t len;
2280        size_t size = max_count;
2281
2282        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, &size, false);
2283        qemu_iovec_init(&qiov, qiov_full.niov);
2284        max_count = size;
2285        do {
2286            qemu_iovec_reset(&qiov);
2287            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2288            if (0) {
2289                print_sg(qiov.iov, qiov.niov);
2290            }
2291            /* Loop in case of EINTR */
2292            do {
2293                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2294                if (len >= 0) {
2295                    off   += len;
2296                    count += len;
2297                }
2298            } while (len == -EINTR && !pdu->cancelled);
2299            if (len < 0) {
2300                /* IO error return the error */
2301                err = len;
2302                goto out_free_iovec;
2303            }
2304        } while (count < max_count && len > 0);
2305        err = pdu_marshal(pdu, offset, "d", count);
2306        if (err < 0) {
2307            goto out_free_iovec;
2308        }
2309        err += offset + count;
2310out_free_iovec:
2311        qemu_iovec_destroy(&qiov);
2312        qemu_iovec_destroy(&qiov_full);
2313    } else if (fidp->fid_type == P9_FID_XATTR) {
2314        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2315    } else {
2316        err = -EINVAL;
2317    }
2318    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2319out:
2320    put_fid(pdu, fidp);
2321out_nofid:
2322    pdu_complete(pdu, err);
2323}
2324
2325static size_t v9fs_readdir_data_size(V9fsString *name)
2326{
2327    /*
2328     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2329     * size of type (1) + size of name.size (2) + strlen(name.data)
2330     */
2331    return 24 + v9fs_string_size(name);
2332}
2333
2334static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2335                                        int32_t max_count)
2336{
2337    size_t size;
2338    V9fsQID qid;
2339    V9fsString name;
2340    int len, err = 0;
2341    int32_t count = 0;
2342    off_t saved_dir_pos;
2343    struct dirent *dent;
2344
2345    /* save the directory position */
2346    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2347    if (saved_dir_pos < 0) {
2348        return saved_dir_pos;
2349    }
2350
2351    while (1) {
2352        v9fs_readdir_lock(&fidp->fs.dir);
2353
2354        err = v9fs_co_readdir(pdu, fidp, &dent);
2355        if (err || !dent) {
2356            break;
2357        }
2358        v9fs_string_init(&name);
2359        v9fs_string_sprintf(&name, "%s", dent->d_name);
2360        if ((count + v9fs_readdir_data_size(&name)) > max_count) {
2361            v9fs_readdir_unlock(&fidp->fs.dir);
2362
2363            /* Ran out of buffer. Set dir back to old position and return */
2364            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2365            v9fs_string_free(&name);
2366            return count;
2367        }
2368
2369        if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2370            /*
2371             * dirent_to_qid() implies expensive stat call for each entry,
2372             * we must do that here though since inode remapping requires
2373             * the device id, which in turn might be different for
2374             * different entries; we cannot make any assumption to avoid
2375             * that here.
2376             */
2377            err = dirent_to_qid(pdu, fidp, dent, &qid);
2378            if (err < 0) {
2379                v9fs_readdir_unlock(&fidp->fs.dir);
2380                v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2381                v9fs_string_free(&name);
2382                return err;
2383            }
2384        } else {
2385            /*
2386             * Fill up just the path field of qid because the client uses
2387             * only that. To fill the entire qid structure we will have
2388             * to stat each dirent found, which is expensive. For the
2389             * latter reason we don't call dirent_to_qid() here. Only drawback
2390             * is that no multi-device export detection of stat_to_qid()
2391             * would be done and provided as error to the user here. But
2392             * user would get that error anyway when accessing those
2393             * files/dirs through other ways.
2394             */
2395            size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2396            memcpy(&qid.path, &dent->d_ino, size);
2397            /* Fill the other fields with dummy values */
2398            qid.type = 0;
2399            qid.version = 0;
2400        }
2401
2402        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2403        len = pdu_marshal(pdu, 11 + count, "Qqbs",
2404                          &qid, dent->d_off,
2405                          dent->d_type, &name);
2406
2407        v9fs_readdir_unlock(&fidp->fs.dir);
2408
2409        if (len < 0) {
2410            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2411            v9fs_string_free(&name);
2412            return len;
2413        }
2414        count += len;
2415        v9fs_string_free(&name);
2416        saved_dir_pos = dent->d_off;
2417    }
2418
2419    v9fs_readdir_unlock(&fidp->fs.dir);
2420
2421    if (err < 0) {
2422        return err;
2423    }
2424    return count;
2425}
2426
2427static void coroutine_fn v9fs_readdir(void *opaque)
2428{
2429    int32_t fid;
2430    V9fsFidState *fidp;
2431    ssize_t retval = 0;
2432    size_t offset = 7;
2433    uint64_t initial_offset;
2434    int32_t count;
2435    uint32_t max_count;
2436    V9fsPDU *pdu = opaque;
2437    V9fsState *s = pdu->s;
2438
2439    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2440                           &initial_offset, &max_count);
2441    if (retval < 0) {
2442        goto out_nofid;
2443    }
2444    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2445
2446    /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
2447    if (max_count > s->msize - 11) {
2448        max_count = s->msize - 11;
2449        warn_report_once(
2450            "9p: bad client: T_readdir with count > msize - 11"
2451        );
2452    }
2453
2454    fidp = get_fid(pdu, fid);
2455    if (fidp == NULL) {
2456        retval = -EINVAL;
2457        goto out_nofid;
2458    }
2459    if (!fidp->fs.dir.stream) {
2460        retval = -EINVAL;
2461        goto out;
2462    }
2463    if (initial_offset == 0) {
2464        v9fs_co_rewinddir(pdu, fidp);
2465    } else {
2466        v9fs_co_seekdir(pdu, fidp, initial_offset);
2467    }
2468    count = v9fs_do_readdir(pdu, fidp, max_count);
2469    if (count < 0) {
2470        retval = count;
2471        goto out;
2472    }
2473    retval = pdu_marshal(pdu, offset, "d", count);
2474    if (retval < 0) {
2475        goto out;
2476    }
2477    retval += count + offset;
2478    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2479out:
2480    put_fid(pdu, fidp);
2481out_nofid:
2482    pdu_complete(pdu, retval);
2483}
2484
2485static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2486                            uint64_t off, uint32_t count,
2487                            struct iovec *sg, int cnt)
2488{
2489    int i, to_copy;
2490    ssize_t err = 0;
2491    uint64_t write_count;
2492    size_t offset = 7;
2493
2494
2495    if (fidp->fs.xattr.len < off) {
2496        return -ENOSPC;
2497    }
2498    write_count = fidp->fs.xattr.len - off;
2499    if (write_count > count) {
2500        write_count = count;
2501    }
2502    err = pdu_marshal(pdu, offset, "d", write_count);
2503    if (err < 0) {
2504        return err;
2505    }
2506    err += offset;
2507    fidp->fs.xattr.copied_len += write_count;
2508    /*
2509     * Now copy the content from sg list
2510     */
2511    for (i = 0; i < cnt; i++) {
2512        if (write_count > sg[i].iov_len) {
2513            to_copy = sg[i].iov_len;
2514        } else {
2515            to_copy = write_count;
2516        }
2517        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2518        /* updating vs->off since we are not using below */
2519        off += to_copy;
2520        write_count -= to_copy;
2521    }
2522
2523    return err;
2524}
2525
2526static void coroutine_fn v9fs_write(void *opaque)
2527{
2528    ssize_t err;
2529    int32_t fid;
2530    uint64_t off;
2531    uint32_t count;
2532    int32_t len = 0;
2533    int32_t total = 0;
2534    size_t offset = 7;
2535    size_t size;
2536    V9fsFidState *fidp;
2537    V9fsPDU *pdu = opaque;
2538    V9fsState *s = pdu->s;
2539    QEMUIOVector qiov_full;
2540    QEMUIOVector qiov;
2541
2542    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2543    if (err < 0) {
2544        pdu_complete(pdu, err);
2545        return;
2546    }
2547    offset += err;
2548    size = count;
2549    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &size, true);
2550    count = size;
2551    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2552
2553    fidp = get_fid(pdu, fid);
2554    if (fidp == NULL) {
2555        err = -EINVAL;
2556        goto out_nofid;
2557    }
2558    if (fidp->fid_type == P9_FID_FILE) {
2559        if (fidp->fs.fd == -1) {
2560            err = -EINVAL;
2561            goto out;
2562        }
2563    } else if (fidp->fid_type == P9_FID_XATTR) {
2564        /*
2565         * setxattr operation
2566         */
2567        err = v9fs_xattr_write(s, pdu, fidp, off, count,
2568                               qiov_full.iov, qiov_full.niov);
2569        goto out;
2570    } else {
2571        err = -EINVAL;
2572        goto out;
2573    }
2574    qemu_iovec_init(&qiov, qiov_full.niov);
2575    do {
2576        qemu_iovec_reset(&qiov);
2577        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2578        if (0) {
2579            print_sg(qiov.iov, qiov.niov);
2580        }
2581        /* Loop in case of EINTR */
2582        do {
2583            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2584            if (len >= 0) {
2585                off   += len;
2586                total += len;
2587            }
2588        } while (len == -EINTR && !pdu->cancelled);
2589        if (len < 0) {
2590            /* IO error return the error */
2591            err = len;
2592            goto out_qiov;
2593        }
2594    } while (total < count && len > 0);
2595
2596    offset = 7;
2597    err = pdu_marshal(pdu, offset, "d", total);
2598    if (err < 0) {
2599        goto out_qiov;
2600    }
2601    err += offset;
2602    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2603out_qiov:
2604    qemu_iovec_destroy(&qiov);
2605out:
2606    put_fid(pdu, fidp);
2607out_nofid:
2608    qemu_iovec_destroy(&qiov_full);
2609    pdu_complete(pdu, err);
2610}
2611
2612static void coroutine_fn v9fs_create(void *opaque)
2613{
2614    int32_t fid;
2615    int err = 0;
2616    size_t offset = 7;
2617    V9fsFidState *fidp;
2618    V9fsQID qid;
2619    int32_t perm;
2620    int8_t mode;
2621    V9fsPath path;
2622    struct stat stbuf;
2623    V9fsString name;
2624    V9fsString extension;
2625    int iounit;
2626    V9fsPDU *pdu = opaque;
2627    V9fsState *s = pdu->s;
2628
2629    v9fs_path_init(&path);
2630    v9fs_string_init(&name);
2631    v9fs_string_init(&extension);
2632    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2633                        &perm, &mode, &extension);
2634    if (err < 0) {
2635        goto out_nofid;
2636    }
2637    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2638
2639    if (name_is_illegal(name.data)) {
2640        err = -ENOENT;
2641        goto out_nofid;
2642    }
2643
2644    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2645        err = -EEXIST;
2646        goto out_nofid;
2647    }
2648
2649    fidp = get_fid(pdu, fid);
2650    if (fidp == NULL) {
2651        err = -EINVAL;
2652        goto out_nofid;
2653    }
2654    if (fidp->fid_type != P9_FID_NONE) {
2655        err = -EINVAL;
2656        goto out;
2657    }
2658    if (perm & P9_STAT_MODE_DIR) {
2659        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2660                            fidp->uid, -1, &stbuf);
2661        if (err < 0) {
2662            goto out;
2663        }
2664        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2665        if (err < 0) {
2666            goto out;
2667        }
2668        v9fs_path_write_lock(s);
2669        v9fs_path_copy(&fidp->path, &path);
2670        v9fs_path_unlock(s);
2671        err = v9fs_co_opendir(pdu, fidp);
2672        if (err < 0) {
2673            goto out;
2674        }
2675        fidp->fid_type = P9_FID_DIR;
2676    } else if (perm & P9_STAT_MODE_SYMLINK) {
2677        err = v9fs_co_symlink(pdu, fidp, &name,
2678                              extension.data, -1 , &stbuf);
2679        if (err < 0) {
2680            goto out;
2681        }
2682        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2683        if (err < 0) {
2684            goto out;
2685        }
2686        v9fs_path_write_lock(s);
2687        v9fs_path_copy(&fidp->path, &path);
2688        v9fs_path_unlock(s);
2689    } else if (perm & P9_STAT_MODE_LINK) {
2690        int32_t ofid = atoi(extension.data);
2691        V9fsFidState *ofidp = get_fid(pdu, ofid);
2692        if (ofidp == NULL) {
2693            err = -EINVAL;
2694            goto out;
2695        }
2696        err = v9fs_co_link(pdu, ofidp, fidp, &name);
2697        put_fid(pdu, ofidp);
2698        if (err < 0) {
2699            goto out;
2700        }
2701        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2702        if (err < 0) {
2703            fidp->fid_type = P9_FID_NONE;
2704            goto out;
2705        }
2706        v9fs_path_write_lock(s);
2707        v9fs_path_copy(&fidp->path, &path);
2708        v9fs_path_unlock(s);
2709        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2710        if (err < 0) {
2711            fidp->fid_type = P9_FID_NONE;
2712            goto out;
2713        }
2714    } else if (perm & P9_STAT_MODE_DEVICE) {
2715        char ctype;
2716        uint32_t major, minor;
2717        mode_t nmode = 0;
2718
2719        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2720            err = -errno;
2721            goto out;
2722        }
2723
2724        switch (ctype) {
2725        case 'c':
2726            nmode = S_IFCHR;
2727            break;
2728        case 'b':
2729            nmode = S_IFBLK;
2730            break;
2731        default:
2732            err = -EIO;
2733            goto out;
2734        }
2735
2736        nmode |= perm & 0777;
2737        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2738                            makedev(major, minor), nmode, &stbuf);
2739        if (err < 0) {
2740            goto out;
2741        }
2742        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2743        if (err < 0) {
2744            goto out;
2745        }
2746        v9fs_path_write_lock(s);
2747        v9fs_path_copy(&fidp->path, &path);
2748        v9fs_path_unlock(s);
2749    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2750        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2751                            0, S_IFIFO | (perm & 0777), &stbuf);
2752        if (err < 0) {
2753            goto out;
2754        }
2755        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2756        if (err < 0) {
2757            goto out;
2758        }
2759        v9fs_path_write_lock(s);
2760        v9fs_path_copy(&fidp->path, &path);
2761        v9fs_path_unlock(s);
2762    } else if (perm & P9_STAT_MODE_SOCKET) {
2763        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2764                            0, S_IFSOCK | (perm & 0777), &stbuf);
2765        if (err < 0) {
2766            goto out;
2767        }
2768        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2769        if (err < 0) {
2770            goto out;
2771        }
2772        v9fs_path_write_lock(s);
2773        v9fs_path_copy(&fidp->path, &path);
2774        v9fs_path_unlock(s);
2775    } else {
2776        err = v9fs_co_open2(pdu, fidp, &name, -1,
2777                            omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2778        if (err < 0) {
2779            goto out;
2780        }
2781        fidp->fid_type = P9_FID_FILE;
2782        fidp->open_flags = omode_to_uflags(mode);
2783        if (fidp->open_flags & O_EXCL) {
2784            /*
2785             * We let the host file system do O_EXCL check
2786             * We should not reclaim such fd
2787             */
2788            fidp->flags |= FID_NON_RECLAIMABLE;
2789        }
2790    }
2791    iounit = get_iounit(pdu, &fidp->path);
2792    err = stat_to_qid(pdu, &stbuf, &qid);
2793    if (err < 0) {
2794        goto out;
2795    }
2796    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2797    if (err < 0) {
2798        goto out;
2799    }
2800    err += offset;
2801    trace_v9fs_create_return(pdu->tag, pdu->id,
2802                             qid.type, qid.version, qid.path, iounit);
2803out:
2804    put_fid(pdu, fidp);
2805out_nofid:
2806   pdu_complete(pdu, err);
2807   v9fs_string_free(&name);
2808   v9fs_string_free(&extension);
2809   v9fs_path_free(&path);
2810}
2811
2812static void coroutine_fn v9fs_symlink(void *opaque)
2813{
2814    V9fsPDU *pdu = opaque;
2815    V9fsString name;
2816    V9fsString symname;
2817    V9fsFidState *dfidp;
2818    V9fsQID qid;
2819    struct stat stbuf;
2820    int32_t dfid;
2821    int err = 0;
2822    gid_t gid;
2823    size_t offset = 7;
2824
2825    v9fs_string_init(&name);
2826    v9fs_string_init(&symname);
2827    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2828    if (err < 0) {
2829        goto out_nofid;
2830    }
2831    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2832
2833    if (name_is_illegal(name.data)) {
2834        err = -ENOENT;
2835        goto out_nofid;
2836    }
2837
2838    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2839        err = -EEXIST;
2840        goto out_nofid;
2841    }
2842
2843    dfidp = get_fid(pdu, dfid);
2844    if (dfidp == NULL) {
2845        err = -EINVAL;
2846        goto out_nofid;
2847    }
2848    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2849    if (err < 0) {
2850        goto out;
2851    }
2852    err = stat_to_qid(pdu, &stbuf, &qid);
2853    if (err < 0) {
2854        goto out;
2855    }
2856    err =  pdu_marshal(pdu, offset, "Q", &qid);
2857    if (err < 0) {
2858        goto out;
2859    }
2860    err += offset;
2861    trace_v9fs_symlink_return(pdu->tag, pdu->id,
2862                              qid.type, qid.version, qid.path);
2863out:
2864    put_fid(pdu, dfidp);
2865out_nofid:
2866    pdu_complete(pdu, err);
2867    v9fs_string_free(&name);
2868    v9fs_string_free(&symname);
2869}
2870
2871static void coroutine_fn v9fs_flush(void *opaque)
2872{
2873    ssize_t err;
2874    int16_t tag;
2875    size_t offset = 7;
2876    V9fsPDU *cancel_pdu = NULL;
2877    V9fsPDU *pdu = opaque;
2878    V9fsState *s = pdu->s;
2879
2880    err = pdu_unmarshal(pdu, offset, "w", &tag);
2881    if (err < 0) {
2882        pdu_complete(pdu, err);
2883        return;
2884    }
2885    trace_v9fs_flush(pdu->tag, pdu->id, tag);
2886
2887    if (pdu->tag == tag) {
2888        warn_report("the guest sent a self-referencing 9P flush request");
2889    } else {
2890        QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2891            if (cancel_pdu->tag == tag) {
2892                break;
2893            }
2894        }
2895    }
2896    if (cancel_pdu) {
2897        cancel_pdu->cancelled = 1;
2898        /*
2899         * Wait for pdu to complete.
2900         */
2901        qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2902        if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2903            cancel_pdu->cancelled = 0;
2904            pdu_free(cancel_pdu);
2905        }
2906    }
2907    pdu_complete(pdu, 7);
2908}
2909
2910static void coroutine_fn v9fs_link(void *opaque)
2911{
2912    V9fsPDU *pdu = opaque;
2913    int32_t dfid, oldfid;
2914    V9fsFidState *dfidp, *oldfidp;
2915    V9fsString name;
2916    size_t offset = 7;
2917    int err = 0;
2918
2919    v9fs_string_init(&name);
2920    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2921    if (err < 0) {
2922        goto out_nofid;
2923    }
2924    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2925
2926    if (name_is_illegal(name.data)) {
2927        err = -ENOENT;
2928        goto out_nofid;
2929    }
2930
2931    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2932        err = -EEXIST;
2933        goto out_nofid;
2934    }
2935
2936    dfidp = get_fid(pdu, dfid);
2937    if (dfidp == NULL) {
2938        err = -ENOENT;
2939        goto out_nofid;
2940    }
2941
2942    oldfidp = get_fid(pdu, oldfid);
2943    if (oldfidp == NULL) {
2944        err = -ENOENT;
2945        goto out;
2946    }
2947    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2948    if (!err) {
2949        err = offset;
2950    }
2951    put_fid(pdu, oldfidp);
2952out:
2953    put_fid(pdu, dfidp);
2954out_nofid:
2955    v9fs_string_free(&name);
2956    pdu_complete(pdu, err);
2957}
2958
2959/* Only works with path name based fid */
2960static void coroutine_fn v9fs_remove(void *opaque)
2961{
2962    int32_t fid;
2963    int err = 0;
2964    size_t offset = 7;
2965    V9fsFidState *fidp;
2966    V9fsPDU *pdu = opaque;
2967
2968    err = pdu_unmarshal(pdu, offset, "d", &fid);
2969    if (err < 0) {
2970        goto out_nofid;
2971    }
2972    trace_v9fs_remove(pdu->tag, pdu->id, fid);
2973
2974    fidp = get_fid(pdu, fid);
2975    if (fidp == NULL) {
2976        err = -EINVAL;
2977        goto out_nofid;
2978    }
2979    /* if fs driver is not path based, return EOPNOTSUPP */
2980    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2981        err = -EOPNOTSUPP;
2982        goto out_err;
2983    }
2984    /*
2985     * IF the file is unlinked, we cannot reopen
2986     * the file later. So don't reclaim fd
2987     */
2988    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2989    if (err < 0) {
2990        goto out_err;
2991    }
2992    err = v9fs_co_remove(pdu, &fidp->path);
2993    if (!err) {
2994        err = offset;
2995    }
2996out_err:
2997    /* For TREMOVE we need to clunk the fid even on failed remove */
2998    clunk_fid(pdu->s, fidp->fid);
2999    put_fid(pdu, fidp);
3000out_nofid:
3001    pdu_complete(pdu, err);
3002}
3003
3004static void coroutine_fn v9fs_unlinkat(void *opaque)
3005{
3006    int err = 0;
3007    V9fsString name;
3008    int32_t dfid, flags, rflags = 0;
3009    size_t offset = 7;
3010    V9fsPath path;
3011    V9fsFidState *dfidp;
3012    V9fsPDU *pdu = opaque;
3013
3014    v9fs_string_init(&name);
3015    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
3016    if (err < 0) {
3017        goto out_nofid;
3018    }
3019
3020    if (name_is_illegal(name.data)) {
3021        err = -ENOENT;
3022        goto out_nofid;
3023    }
3024
3025    if (!strcmp(".", name.data)) {
3026        err = -EINVAL;
3027        goto out_nofid;
3028    }
3029
3030    if (!strcmp("..", name.data)) {
3031        err = -ENOTEMPTY;
3032        goto out_nofid;
3033    }
3034
3035    if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3036        err = -EINVAL;
3037        goto out_nofid;
3038    }
3039
3040    if (flags & P9_DOTL_AT_REMOVEDIR) {
3041        rflags |= AT_REMOVEDIR;
3042    }
3043
3044    dfidp = get_fid(pdu, dfid);
3045    if (dfidp == NULL) {
3046        err = -EINVAL;
3047        goto out_nofid;
3048    }
3049    /*
3050     * IF the file is unlinked, we cannot reopen
3051     * the file later. So don't reclaim fd
3052     */
3053    v9fs_path_init(&path);
3054    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3055    if (err < 0) {
3056        goto out_err;
3057    }
3058    err = v9fs_mark_fids_unreclaim(pdu, &path);
3059    if (err < 0) {
3060        goto out_err;
3061    }
3062    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3063    if (!err) {
3064        err = offset;
3065    }
3066out_err:
3067    put_fid(pdu, dfidp);
3068    v9fs_path_free(&path);
3069out_nofid:
3070    pdu_complete(pdu, err);
3071    v9fs_string_free(&name);
3072}
3073
3074
3075/* Only works with path name based fid */
3076static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3077                                             int32_t newdirfid,
3078                                             V9fsString *name)
3079{
3080    int err = 0;
3081    V9fsPath new_path;
3082    V9fsFidState *tfidp;
3083    V9fsState *s = pdu->s;
3084    V9fsFidState *dirfidp = NULL;
3085
3086    v9fs_path_init(&new_path);
3087    if (newdirfid != -1) {
3088        dirfidp = get_fid(pdu, newdirfid);
3089        if (dirfidp == NULL) {
3090            return -ENOENT;
3091        }
3092        if (fidp->fid_type != P9_FID_NONE) {
3093            err = -EINVAL;
3094            goto out;
3095        }
3096        err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3097        if (err < 0) {
3098            goto out;
3099        }
3100    } else {
3101        char *dir_name = g_path_get_dirname(fidp->path.data);
3102        V9fsPath dir_path;
3103
3104        v9fs_path_init(&dir_path);
3105        v9fs_path_sprintf(&dir_path, "%s", dir_name);
3106        g_free(dir_name);
3107
3108        err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3109        v9fs_path_free(&dir_path);
3110        if (err < 0) {
3111            goto out;
3112        }
3113    }
3114    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3115    if (err < 0) {
3116        goto out;
3117    }
3118    /*
3119     * Fixup fid's pointing to the old name to
3120     * start pointing to the new name
3121     */
3122    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
3123        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3124            /* replace the name */
3125            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3126        }
3127    }
3128out:
3129    if (dirfidp) {
3130        put_fid(pdu, dirfidp);
3131    }
3132    v9fs_path_free(&new_path);
3133    return err;
3134}
3135
3136/* Only works with path name based fid */
3137static void coroutine_fn v9fs_rename(void *opaque)
3138{
3139    int32_t fid;
3140    ssize_t err = 0;
3141    size_t offset = 7;
3142    V9fsString name;
3143    int32_t newdirfid;
3144    V9fsFidState *fidp;
3145    V9fsPDU *pdu = opaque;
3146    V9fsState *s = pdu->s;
3147
3148    v9fs_string_init(&name);
3149    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3150    if (err < 0) {
3151        goto out_nofid;
3152    }
3153
3154    if (name_is_illegal(name.data)) {
3155        err = -ENOENT;
3156        goto out_nofid;
3157    }
3158
3159    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3160        err = -EISDIR;
3161        goto out_nofid;
3162    }
3163
3164    fidp = get_fid(pdu, fid);
3165    if (fidp == NULL) {
3166        err = -ENOENT;
3167        goto out_nofid;
3168    }
3169    if (fidp->fid_type != P9_FID_NONE) {
3170        err = -EINVAL;
3171        goto out;
3172    }
3173    /* if fs driver is not path based, return EOPNOTSUPP */
3174    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3175        err = -EOPNOTSUPP;
3176        goto out;
3177    }
3178    v9fs_path_write_lock(s);
3179    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3180    v9fs_path_unlock(s);
3181    if (!err) {
3182        err = offset;
3183    }
3184out:
3185    put_fid(pdu, fidp);
3186out_nofid:
3187    pdu_complete(pdu, err);
3188    v9fs_string_free(&name);
3189}
3190
3191static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3192                                           V9fsString *old_name,
3193                                           V9fsPath *newdir,
3194                                           V9fsString *new_name)
3195{
3196    V9fsFidState *tfidp;
3197    V9fsPath oldpath, newpath;
3198    V9fsState *s = pdu->s;
3199    int err;
3200
3201    v9fs_path_init(&oldpath);
3202    v9fs_path_init(&newpath);
3203    err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3204    if (err < 0) {
3205        goto out;
3206    }
3207    err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3208    if (err < 0) {
3209        goto out;
3210    }
3211
3212    /*
3213     * Fixup fid's pointing to the old name to
3214     * start pointing to the new name
3215     */
3216    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
3217        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3218            /* replace the name */
3219            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3220        }
3221    }
3222out:
3223    v9fs_path_free(&oldpath);
3224    v9fs_path_free(&newpath);
3225    return err;
3226}
3227
3228static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3229                                               V9fsString *old_name,
3230                                               int32_t newdirfid,
3231                                               V9fsString *new_name)
3232{
3233    int err = 0;
3234    V9fsState *s = pdu->s;
3235    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3236
3237    olddirfidp = get_fid(pdu, olddirfid);
3238    if (olddirfidp == NULL) {
3239        err = -ENOENT;
3240        goto out;
3241    }
3242    if (newdirfid != -1) {
3243        newdirfidp = get_fid(pdu, newdirfid);
3244        if (newdirfidp == NULL) {
3245            err = -ENOENT;
3246            goto out;
3247        }
3248    } else {
3249        newdirfidp = get_fid(pdu, olddirfid);
3250    }
3251
3252    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3253                           &newdirfidp->path, new_name);
3254    if (err < 0) {
3255        goto out;
3256    }
3257    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3258        /* Only for path based fid  we need to do the below fixup */
3259        err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3260                                 &newdirfidp->path, new_name);
3261    }
3262out:
3263    if (olddirfidp) {
3264        put_fid(pdu, olddirfidp);
3265    }
3266    if (newdirfidp) {
3267        put_fid(pdu, newdirfidp);
3268    }
3269    return err;
3270}
3271
3272static void coroutine_fn v9fs_renameat(void *opaque)
3273{
3274    ssize_t err = 0;
3275    size_t offset = 7;
3276    V9fsPDU *pdu = opaque;
3277    V9fsState *s = pdu->s;
3278    int32_t olddirfid, newdirfid;
3279    V9fsString old_name, new_name;
3280
3281    v9fs_string_init(&old_name);
3282    v9fs_string_init(&new_name);
3283    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3284                        &old_name, &newdirfid, &new_name);
3285    if (err < 0) {
3286        goto out_err;
3287    }
3288
3289    if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3290        err = -ENOENT;
3291        goto out_err;
3292    }
3293
3294    if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3295        !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3296        err = -EISDIR;
3297        goto out_err;
3298    }
3299
3300    v9fs_path_write_lock(s);
3301    err = v9fs_complete_renameat(pdu, olddirfid,
3302                                 &old_name, newdirfid, &new_name);
3303    v9fs_path_unlock(s);
3304    if (!err) {
3305        err = offset;
3306    }
3307
3308out_err:
3309    pdu_complete(pdu, err);
3310    v9fs_string_free(&old_name);
3311    v9fs_string_free(&new_name);
3312}
3313
3314static void coroutine_fn v9fs_wstat(void *opaque)
3315{
3316    int32_t fid;
3317    int err = 0;
3318    int16_t unused;
3319    V9fsStat v9stat;
3320    size_t offset = 7;
3321    struct stat stbuf;
3322    V9fsFidState *fidp;
3323    V9fsPDU *pdu = opaque;
3324    V9fsState *s = pdu->s;
3325
3326    v9fs_stat_init(&v9stat);
3327    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3328    if (err < 0) {
3329        goto out_nofid;
3330    }
3331    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3332                     v9stat.mode, v9stat.atime, v9stat.mtime);
3333
3334    fidp = get_fid(pdu, fid);
3335    if (fidp == NULL) {
3336        err = -EINVAL;
3337        goto out_nofid;
3338    }
3339    /* do we need to sync the file? */
3340    if (donttouch_stat(&v9stat)) {
3341        err = v9fs_co_fsync(pdu, fidp, 0);
3342        goto out;
3343    }
3344    if (v9stat.mode != -1) {
3345        uint32_t v9_mode;
3346        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3347        if (err < 0) {
3348            goto out;
3349        }
3350        v9_mode = stat_to_v9mode(&stbuf);
3351        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3352            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3353            /* Attempting to change the type */
3354            err = -EIO;
3355            goto out;
3356        }
3357        err = v9fs_co_chmod(pdu, &fidp->path,
3358                            v9mode_to_mode(v9stat.mode,
3359                                           &v9stat.extension));
3360        if (err < 0) {
3361            goto out;
3362        }
3363    }
3364    if (v9stat.mtime != -1 || v9stat.atime != -1) {
3365        struct timespec times[2];
3366        if (v9stat.atime != -1) {
3367            times[0].tv_sec = v9stat.atime;
3368            times[0].tv_nsec = 0;
3369        } else {
3370            times[0].tv_nsec = UTIME_OMIT;
3371        }
3372        if (v9stat.mtime != -1) {
3373            times[1].tv_sec = v9stat.mtime;
3374            times[1].tv_nsec = 0;
3375        } else {
3376            times[1].tv_nsec = UTIME_OMIT;
3377        }
3378        err = v9fs_co_utimensat(pdu, &fidp->path, times);
3379        if (err < 0) {
3380            goto out;
3381        }
3382    }
3383    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3384        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3385        if (err < 0) {
3386            goto out;
3387        }
3388    }
3389    if (v9stat.name.size != 0) {
3390        v9fs_path_write_lock(s);
3391        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3392        v9fs_path_unlock(s);
3393        if (err < 0) {
3394            goto out;
3395        }
3396    }
3397    if (v9stat.length != -1) {
3398        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3399        if (err < 0) {
3400            goto out;
3401        }
3402    }
3403    err = offset;
3404out:
3405    put_fid(pdu, fidp);
3406out_nofid:
3407    v9fs_stat_free(&v9stat);
3408    pdu_complete(pdu, err);
3409}
3410
3411static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3412{
3413    uint32_t f_type;
3414    uint32_t f_bsize;
3415    uint64_t f_blocks;
3416    uint64_t f_bfree;
3417    uint64_t f_bavail;
3418    uint64_t f_files;
3419    uint64_t f_ffree;
3420    uint64_t fsid_val;
3421    uint32_t f_namelen;
3422    size_t offset = 7;
3423    int32_t bsize_factor;
3424
3425    /*
3426     * compute bsize factor based on host file system block size
3427     * and client msize
3428     */
3429    bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
3430    if (!bsize_factor) {
3431        bsize_factor = 1;
3432    }
3433    f_type  = stbuf->f_type;
3434    f_bsize = stbuf->f_bsize;
3435    f_bsize *= bsize_factor;
3436    /*
3437     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3438     * adjust(divide) the number of blocks, free blocks and available
3439     * blocks by bsize factor
3440     */
3441    f_blocks = stbuf->f_blocks/bsize_factor;
3442    f_bfree  = stbuf->f_bfree/bsize_factor;
3443    f_bavail = stbuf->f_bavail/bsize_factor;
3444    f_files  = stbuf->f_files;
3445    f_ffree  = stbuf->f_ffree;
3446    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3447               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3448    f_namelen = stbuf->f_namelen;
3449
3450    return pdu_marshal(pdu, offset, "ddqqqqqqd",
3451                       f_type, f_bsize, f_blocks, f_bfree,
3452                       f_bavail, f_files, f_ffree,
3453                       fsid_val, f_namelen);
3454}
3455
3456static void coroutine_fn v9fs_statfs(void *opaque)
3457{
3458    int32_t fid;
3459    ssize_t retval = 0;
3460    size_t offset = 7;
3461    V9fsFidState *fidp;
3462    struct statfs stbuf;
3463    V9fsPDU *pdu = opaque;
3464    V9fsState *s = pdu->s;
3465
3466    retval = pdu_unmarshal(pdu, offset, "d", &fid);
3467    if (retval < 0) {
3468        goto out_nofid;
3469    }
3470    fidp = get_fid(pdu, fid);
3471    if (fidp == NULL) {
3472        retval = -ENOENT;
3473        goto out_nofid;
3474    }
3475    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3476    if (retval < 0) {
3477        goto out;
3478    }
3479    retval = v9fs_fill_statfs(s, pdu, &stbuf);
3480    if (retval < 0) {
3481        goto out;
3482    }
3483    retval += offset;
3484out:
3485    put_fid(pdu, fidp);
3486out_nofid:
3487    pdu_complete(pdu, retval);
3488}
3489
3490static void coroutine_fn v9fs_mknod(void *opaque)
3491{
3492
3493    int mode;
3494    gid_t gid;
3495    int32_t fid;
3496    V9fsQID qid;
3497    int err = 0;
3498    int major, minor;
3499    size_t offset = 7;
3500    V9fsString name;
3501    struct stat stbuf;
3502    V9fsFidState *fidp;
3503    V9fsPDU *pdu = opaque;
3504
3505    v9fs_string_init(&name);
3506    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3507                        &major, &minor, &gid);
3508    if (err < 0) {
3509        goto out_nofid;
3510    }
3511    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3512
3513    if (name_is_illegal(name.data)) {
3514        err = -ENOENT;
3515        goto out_nofid;
3516    }
3517
3518    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3519        err = -EEXIST;
3520        goto out_nofid;
3521    }
3522
3523    fidp = get_fid(pdu, fid);
3524    if (fidp == NULL) {
3525        err = -ENOENT;
3526        goto out_nofid;
3527    }
3528    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3529                        makedev(major, minor), mode, &stbuf);
3530    if (err < 0) {
3531        goto out;
3532    }
3533    err = stat_to_qid(pdu, &stbuf, &qid);
3534    if (err < 0) {
3535        goto out;
3536    }
3537    err = pdu_marshal(pdu, offset, "Q", &qid);
3538    if (err < 0) {
3539        goto out;
3540    }
3541    err += offset;
3542    trace_v9fs_mknod_return(pdu->tag, pdu->id,
3543                            qid.type, qid.version, qid.path);
3544out:
3545    put_fid(pdu, fidp);
3546out_nofid:
3547    pdu_complete(pdu, err);
3548    v9fs_string_free(&name);
3549}
3550
3551/*
3552 * Implement posix byte range locking code
3553 * Server side handling of locking code is very simple, because 9p server in
3554 * QEMU can handle only one client. And most of the lock handling
3555 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3556 * do any thing in * qemu 9p server side lock code path.
3557 * So when a TLOCK request comes, always return success
3558 */
3559static void coroutine_fn v9fs_lock(void *opaque)
3560{
3561    V9fsFlock flock;
3562    size_t offset = 7;
3563    struct stat stbuf;
3564    V9fsFidState *fidp;
3565    int32_t fid, err = 0;
3566    V9fsPDU *pdu = opaque;
3567
3568    v9fs_string_init(&flock.client_id);
3569    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3570                        &flock.flags, &flock.start, &flock.length,
3571                        &flock.proc_id, &flock.client_id);
3572    if (err < 0) {
3573        goto out_nofid;
3574    }
3575    trace_v9fs_lock(pdu->tag, pdu->id, fid,
3576                    flock.type, flock.start, flock.length);
3577
3578
3579    /* We support only block flag now (that too ignored currently) */
3580    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3581        err = -EINVAL;
3582        goto out_nofid;
3583    }
3584    fidp = get_fid(pdu, fid);
3585    if (fidp == NULL) {
3586        err = -ENOENT;
3587        goto out_nofid;
3588    }
3589    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3590    if (err < 0) {
3591        goto out;
3592    }
3593    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3594    if (err < 0) {
3595        goto out;
3596    }
3597    err += offset;
3598    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3599out:
3600    put_fid(pdu, fidp);
3601out_nofid:
3602    pdu_complete(pdu, err);
3603    v9fs_string_free(&flock.client_id);
3604}
3605
3606/*
3607 * When a TGETLOCK request comes, always return success because all lock
3608 * handling is done by client's VFS layer.
3609 */
3610static void coroutine_fn v9fs_getlock(void *opaque)
3611{
3612    size_t offset = 7;
3613    struct stat stbuf;
3614    V9fsFidState *fidp;
3615    V9fsGetlock glock;
3616    int32_t fid, err = 0;
3617    V9fsPDU *pdu = opaque;
3618
3619    v9fs_string_init(&glock.client_id);
3620    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3621                        &glock.start, &glock.length, &glock.proc_id,
3622                        &glock.client_id);
3623    if (err < 0) {
3624        goto out_nofid;
3625    }
3626    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3627                       glock.type, glock.start, glock.length);
3628
3629    fidp = get_fid(pdu, fid);
3630    if (fidp == NULL) {
3631        err = -ENOENT;
3632        goto out_nofid;
3633    }
3634    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3635    if (err < 0) {
3636        goto out;
3637    }
3638    glock.type = P9_LOCK_TYPE_UNLCK;
3639    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3640                          glock.start, glock.length, glock.proc_id,
3641                          &glock.client_id);
3642    if (err < 0) {
3643        goto out;
3644    }
3645    err += offset;
3646    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3647                              glock.length, glock.proc_id);
3648out:
3649    put_fid(pdu, fidp);
3650out_nofid:
3651    pdu_complete(pdu, err);
3652    v9fs_string_free(&glock.client_id);
3653}
3654
3655static void coroutine_fn v9fs_mkdir(void *opaque)
3656{
3657    V9fsPDU *pdu = opaque;
3658    size_t offset = 7;
3659    int32_t fid;
3660    struct stat stbuf;
3661    V9fsQID qid;
3662    V9fsString name;
3663    V9fsFidState *fidp;
3664    gid_t gid;
3665    int mode;
3666    int err = 0;
3667
3668    v9fs_string_init(&name);
3669    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3670    if (err < 0) {
3671        goto out_nofid;
3672    }
3673    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3674
3675    if (name_is_illegal(name.data)) {
3676        err = -ENOENT;
3677        goto out_nofid;
3678    }
3679
3680    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3681        err = -EEXIST;
3682        goto out_nofid;
3683    }
3684
3685    fidp = get_fid(pdu, fid);
3686    if (fidp == NULL) {
3687        err = -ENOENT;
3688        goto out_nofid;
3689    }
3690    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3691    if (err < 0) {
3692        goto out;
3693    }
3694    err = stat_to_qid(pdu, &stbuf, &qid);
3695    if (err < 0) {
3696        goto out;
3697    }
3698    err = pdu_marshal(pdu, offset, "Q", &qid);
3699    if (err < 0) {
3700        goto out;
3701    }
3702    err += offset;
3703    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3704                            qid.type, qid.version, qid.path, err);
3705out:
3706    put_fid(pdu, fidp);
3707out_nofid:
3708    pdu_complete(pdu, err);
3709    v9fs_string_free(&name);
3710}
3711
3712static void coroutine_fn v9fs_xattrwalk(void *opaque)
3713{
3714    int64_t size;
3715    V9fsString name;
3716    ssize_t err = 0;
3717    size_t offset = 7;
3718    int32_t fid, newfid;
3719    V9fsFidState *file_fidp;
3720    V9fsFidState *xattr_fidp = NULL;
3721    V9fsPDU *pdu = opaque;
3722    V9fsState *s = pdu->s;
3723
3724    v9fs_string_init(&name);
3725    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3726    if (err < 0) {
3727        goto out_nofid;
3728    }
3729    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3730
3731    file_fidp = get_fid(pdu, fid);
3732    if (file_fidp == NULL) {
3733        err = -ENOENT;
3734        goto out_nofid;
3735    }
3736    xattr_fidp = alloc_fid(s, newfid);
3737    if (xattr_fidp == NULL) {
3738        err = -EINVAL;
3739        goto out;
3740    }
3741    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3742    if (!v9fs_string_size(&name)) {
3743        /*
3744         * listxattr request. Get the size first
3745         */
3746        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3747        if (size < 0) {
3748            err = size;
3749            clunk_fid(s, xattr_fidp->fid);
3750            goto out;
3751        }
3752        /*
3753         * Read the xattr value
3754         */
3755        xattr_fidp->fs.xattr.len = size;
3756        xattr_fidp->fid_type = P9_FID_XATTR;
3757        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3758        xattr_fidp->fs.xattr.value = g_malloc0(size);
3759        if (size) {
3760            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3761                                     xattr_fidp->fs.xattr.value,
3762                                     xattr_fidp->fs.xattr.len);
3763            if (err < 0) {
3764                clunk_fid(s, xattr_fidp->fid);
3765                goto out;
3766            }
3767        }
3768        err = pdu_marshal(pdu, offset, "q", size);
3769        if (err < 0) {
3770            goto out;
3771        }
3772        err += offset;
3773    } else {
3774        /*
3775         * specific xattr fid. We check for xattr
3776         * presence also collect the xattr size
3777         */
3778        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3779                                 &name, NULL, 0);
3780        if (size < 0) {
3781            err = size;
3782            clunk_fid(s, xattr_fidp->fid);
3783            goto out;
3784        }
3785        /*
3786         * Read the xattr value
3787         */
3788        xattr_fidp->fs.xattr.len = size;
3789        xattr_fidp->fid_type = P9_FID_XATTR;
3790        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3791        xattr_fidp->fs.xattr.value = g_malloc0(size);
3792        if (size) {
3793            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3794                                    &name, xattr_fidp->fs.xattr.value,
3795                                    xattr_fidp->fs.xattr.len);
3796            if (err < 0) {
3797                clunk_fid(s, xattr_fidp->fid);
3798                goto out;
3799            }
3800        }
3801        err = pdu_marshal(pdu, offset, "q", size);
3802        if (err < 0) {
3803            goto out;
3804        }
3805        err += offset;
3806    }
3807    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3808out:
3809    put_fid(pdu, file_fidp);
3810    if (xattr_fidp) {
3811        put_fid(pdu, xattr_fidp);
3812    }
3813out_nofid:
3814    pdu_complete(pdu, err);
3815    v9fs_string_free(&name);
3816}
3817
3818static void coroutine_fn v9fs_xattrcreate(void *opaque)
3819{
3820    int flags, rflags = 0;
3821    int32_t fid;
3822    uint64_t size;
3823    ssize_t err = 0;
3824    V9fsString name;
3825    size_t offset = 7;
3826    V9fsFidState *file_fidp;
3827    V9fsFidState *xattr_fidp;
3828    V9fsPDU *pdu = opaque;
3829
3830    v9fs_string_init(&name);
3831    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3832    if (err < 0) {
3833        goto out_nofid;
3834    }
3835    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3836
3837    if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3838        err = -EINVAL;
3839        goto out_nofid;
3840    }
3841
3842    if (flags & P9_XATTR_CREATE) {
3843        rflags |= XATTR_CREATE;
3844    }
3845
3846    if (flags & P9_XATTR_REPLACE) {
3847        rflags |= XATTR_REPLACE;
3848    }
3849
3850    if (size > XATTR_SIZE_MAX) {
3851        err = -E2BIG;
3852        goto out_nofid;
3853    }
3854
3855    file_fidp = get_fid(pdu, fid);
3856    if (file_fidp == NULL) {
3857        err = -EINVAL;
3858        goto out_nofid;
3859    }
3860    if (file_fidp->fid_type != P9_FID_NONE) {
3861        err = -EINVAL;
3862        goto out_put_fid;
3863    }
3864
3865    /* Make the file fid point to xattr */
3866    xattr_fidp = file_fidp;
3867    xattr_fidp->fid_type = P9_FID_XATTR;
3868    xattr_fidp->fs.xattr.copied_len = 0;
3869    xattr_fidp->fs.xattr.xattrwalk_fid = false;
3870    xattr_fidp->fs.xattr.len = size;
3871    xattr_fidp->fs.xattr.flags = rflags;
3872    v9fs_string_init(&xattr_fidp->fs.xattr.name);
3873    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3874    xattr_fidp->fs.xattr.value = g_malloc0(size);
3875    err = offset;
3876out_put_fid:
3877    put_fid(pdu, file_fidp);
3878out_nofid:
3879    pdu_complete(pdu, err);
3880    v9fs_string_free(&name);
3881}
3882
3883static void coroutine_fn v9fs_readlink(void *opaque)
3884{
3885    V9fsPDU *pdu = opaque;
3886    size_t offset = 7;
3887    V9fsString target;
3888    int32_t fid;
3889    int err = 0;
3890    V9fsFidState *fidp;
3891
3892    err = pdu_unmarshal(pdu, offset, "d", &fid);
3893    if (err < 0) {
3894        goto out_nofid;
3895    }
3896    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3897    fidp = get_fid(pdu, fid);
3898    if (fidp == NULL) {
3899        err = -ENOENT;
3900        goto out_nofid;
3901    }
3902
3903    v9fs_string_init(&target);
3904    err = v9fs_co_readlink(pdu, &fidp->path, &target);
3905    if (err < 0) {
3906        goto out;
3907    }
3908    err = pdu_marshal(pdu, offset, "s", &target);
3909    if (err < 0) {
3910        v9fs_string_free(&target);
3911        goto out;
3912    }
3913    err += offset;
3914    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3915    v9fs_string_free(&target);
3916out:
3917    put_fid(pdu, fidp);
3918out_nofid:
3919    pdu_complete(pdu, err);
3920}
3921
3922static CoroutineEntry *pdu_co_handlers[] = {
3923    [P9_TREADDIR] = v9fs_readdir,
3924    [P9_TSTATFS] = v9fs_statfs,
3925    [P9_TGETATTR] = v9fs_getattr,
3926    [P9_TSETATTR] = v9fs_setattr,
3927    [P9_TXATTRWALK] = v9fs_xattrwalk,
3928    [P9_TXATTRCREATE] = v9fs_xattrcreate,
3929    [P9_TMKNOD] = v9fs_mknod,
3930    [P9_TRENAME] = v9fs_rename,
3931    [P9_TLOCK] = v9fs_lock,
3932    [P9_TGETLOCK] = v9fs_getlock,
3933    [P9_TRENAMEAT] = v9fs_renameat,
3934    [P9_TREADLINK] = v9fs_readlink,
3935    [P9_TUNLINKAT] = v9fs_unlinkat,
3936    [P9_TMKDIR] = v9fs_mkdir,
3937    [P9_TVERSION] = v9fs_version,
3938    [P9_TLOPEN] = v9fs_open,
3939    [P9_TATTACH] = v9fs_attach,
3940    [P9_TSTAT] = v9fs_stat,
3941    [P9_TWALK] = v9fs_walk,
3942    [P9_TCLUNK] = v9fs_clunk,
3943    [P9_TFSYNC] = v9fs_fsync,
3944    [P9_TOPEN] = v9fs_open,
3945    [P9_TREAD] = v9fs_read,
3946#if 0
3947    [P9_TAUTH] = v9fs_auth,
3948#endif
3949    [P9_TFLUSH] = v9fs_flush,
3950    [P9_TLINK] = v9fs_link,
3951    [P9_TSYMLINK] = v9fs_symlink,
3952    [P9_TCREATE] = v9fs_create,
3953    [P9_TLCREATE] = v9fs_lcreate,
3954    [P9_TWRITE] = v9fs_write,
3955    [P9_TWSTAT] = v9fs_wstat,
3956    [P9_TREMOVE] = v9fs_remove,
3957};
3958
3959static void coroutine_fn v9fs_op_not_supp(void *opaque)
3960{
3961    V9fsPDU *pdu = opaque;
3962    pdu_complete(pdu, -EOPNOTSUPP);
3963}
3964
3965static void coroutine_fn v9fs_fs_ro(void *opaque)
3966{
3967    V9fsPDU *pdu = opaque;
3968    pdu_complete(pdu, -EROFS);
3969}
3970
3971static inline bool is_read_only_op(V9fsPDU *pdu)
3972{
3973    switch (pdu->id) {
3974    case P9_TREADDIR:
3975    case P9_TSTATFS:
3976    case P9_TGETATTR:
3977    case P9_TXATTRWALK:
3978    case P9_TLOCK:
3979    case P9_TGETLOCK:
3980    case P9_TREADLINK:
3981    case P9_TVERSION:
3982    case P9_TLOPEN:
3983    case P9_TATTACH:
3984    case P9_TSTAT:
3985    case P9_TWALK:
3986    case P9_TCLUNK:
3987    case P9_TFSYNC:
3988    case P9_TOPEN:
3989    case P9_TREAD:
3990    case P9_TAUTH:
3991    case P9_TFLUSH:
3992        return 1;
3993    default:
3994        return 0;
3995    }
3996}
3997
3998void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
3999{
4000    Coroutine *co;
4001    CoroutineEntry *handler;
4002    V9fsState *s = pdu->s;
4003
4004    pdu->size = le32_to_cpu(hdr->size_le);
4005    pdu->id = hdr->id;
4006    pdu->tag = le16_to_cpu(hdr->tag_le);
4007
4008    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
4009        (pdu_co_handlers[pdu->id] == NULL)) {
4010        handler = v9fs_op_not_supp;
4011    } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
4012        handler = v9fs_fs_ro;
4013    } else {
4014        handler = pdu_co_handlers[pdu->id];
4015    }
4016
4017    qemu_co_queue_init(&pdu->complete);
4018    co = qemu_coroutine_create(handler, pdu);
4019    qemu_coroutine_enter(co);
4020}
4021
4022/* Returns 0 on success, 1 on failure. */
4023int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
4024                               Error **errp)
4025{
4026    int i, len;
4027    struct stat stat;
4028    FsDriverEntry *fse;
4029    V9fsPath path;
4030    int rc = 1;
4031
4032    assert(!s->transport);
4033    s->transport = t;
4034
4035    /* initialize pdu allocator */
4036    QLIST_INIT(&s->free_list);
4037    QLIST_INIT(&s->active_list);
4038    for (i = 0; i < MAX_REQ; i++) {
4039        QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4040        s->pdus[i].s = s;
4041        s->pdus[i].idx = i;
4042    }
4043
4044    v9fs_path_init(&path);
4045
4046    fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4047
4048    if (!fse) {
4049        /* We don't have a fsdev identified by fsdev_id */
4050        error_setg(errp, "9pfs device couldn't find fsdev with the "
4051                   "id = %s",
4052                   s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4053        goto out;
4054    }
4055
4056    if (!s->fsconf.tag) {
4057        /* we haven't specified a mount_tag */
4058        error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4059                   s->fsconf.fsdev_id);
4060        goto out;
4061    }
4062
4063    s->ctx.export_flags = fse->export_flags;
4064    s->ctx.fs_root = g_strdup(fse->path);
4065    s->ctx.exops.get_st_gen = NULL;
4066    len = strlen(s->fsconf.tag);
4067    if (len > MAX_TAG_LEN - 1) {
4068        error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4069                   "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4070        goto out;
4071    }
4072
4073    s->tag = g_strdup(s->fsconf.tag);
4074    s->ctx.uid = -1;
4075
4076    s->ops = fse->ops;
4077
4078    s->ctx.fmode = fse->fmode;
4079    s->ctx.dmode = fse->dmode;
4080
4081    s->fid_list = NULL;
4082    qemu_co_rwlock_init(&s->rename_lock);
4083
4084    if (s->ops->init(&s->ctx, errp) < 0) {
4085        error_prepend(errp, "cannot initialize fsdev '%s': ",
4086                      s->fsconf.fsdev_id);
4087        goto out;
4088    }
4089
4090    /*
4091     * Check details of export path, We need to use fs driver
4092     * call back to do that. Since we are in the init path, we don't
4093     * use co-routines here.
4094     */
4095    if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4096        error_setg(errp,
4097                   "error in converting name to path %s", strerror(errno));
4098        goto out;
4099    }
4100    if (s->ops->lstat(&s->ctx, &path, &stat)) {
4101        error_setg(errp, "share path %s does not exist", fse->path);
4102        goto out;
4103    } else if (!S_ISDIR(stat.st_mode)) {
4104        error_setg(errp, "share path %s is not a directory", fse->path);
4105        goto out;
4106    }
4107
4108    s->dev_id = stat.st_dev;
4109
4110    /* init inode remapping : */
4111    /* hash table for variable length inode suffixes */
4112    qpd_table_init(&s->qpd_table);
4113    /* hash table for slow/full inode remapping (most users won't need it) */
4114    qpf_table_init(&s->qpf_table);
4115    /* hash table for quick inode remapping */
4116    qpp_table_init(&s->qpp_table);
4117    s->qp_ndevices = 0;
4118    s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4119    s->qp_fullpath_next = 1;
4120
4121    s->ctx.fst = &fse->fst;
4122    fsdev_throttle_init(s->ctx.fst);
4123
4124    rc = 0;
4125out:
4126    if (rc) {
4127        v9fs_device_unrealize_common(s, NULL);
4128    }
4129    v9fs_path_free(&path);
4130    return rc;
4131}
4132
4133void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
4134{
4135    if (s->ops && s->ops->cleanup) {
4136        s->ops->cleanup(&s->ctx);
4137    }
4138    if (s->ctx.fst) {
4139        fsdev_throttle_cleanup(s->ctx.fst);
4140    }
4141    g_free(s->tag);
4142    qp_table_destroy(&s->qpd_table);
4143    qp_table_destroy(&s->qpp_table);
4144    qp_table_destroy(&s->qpf_table);
4145    g_free(s->ctx.fs_root);
4146}
4147
4148typedef struct VirtfsCoResetData {
4149    V9fsPDU pdu;
4150    bool done;
4151} VirtfsCoResetData;
4152
4153static void coroutine_fn virtfs_co_reset(void *opaque)
4154{
4155    VirtfsCoResetData *data = opaque;
4156
4157    virtfs_reset(&data->pdu);
4158    data->done = true;
4159}
4160
4161void v9fs_reset(V9fsState *s)
4162{
4163    VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4164    Coroutine *co;
4165
4166    while (!QLIST_EMPTY(&s->active_list)) {
4167        aio_poll(qemu_get_aio_context(), true);
4168    }
4169
4170    co = qemu_coroutine_create(virtfs_co_reset, &data);
4171    qemu_coroutine_enter(co);
4172
4173    while (!data.done) {
4174        aio_poll(qemu_get_aio_context(), true);
4175    }
4176}
4177
4178static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4179{
4180    struct rlimit rlim;
4181    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4182        error_report("Failed to get the resource limit");
4183        exit(1);
4184    }
4185    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
4186    open_fd_rc = rlim.rlim_cur/2;
4187}
4188