qemu/hw/9pfs/9p.c
<<
>>
Prefs
   1/*
   2 * Virtio 9p backend
   3 *
   4 * Copyright IBM, Corp. 2010
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14/*
  15 * Not so fast! You might want to read the 9p developer docs first:
  16 * https://wiki.qemu.org/Documentation/9p
  17 */
  18
  19#include "qemu/osdep.h"
  20#include <glib/gprintf.h>
  21#include "hw/virtio/virtio.h"
  22#include "qapi/error.h"
  23#include "qemu/error-report.h"
  24#include "qemu/iov.h"
  25#include "qemu/main-loop.h"
  26#include "qemu/sockets.h"
  27#include "virtio-9p.h"
  28#include "fsdev/qemu-fsdev.h"
  29#include "9p-xattr.h"
  30#include "coth.h"
  31#include "trace.h"
  32#include "migration/blocker.h"
  33#include "qemu/xxhash.h"
  34#include <math.h>
  35#include <linux/limits.h>
  36
  37int open_fd_hw;
  38int total_open_fd;
  39static int open_fd_rc;
  40
  41enum {
  42    Oread   = 0x00,
  43    Owrite  = 0x01,
  44    Ordwr   = 0x02,
  45    Oexec   = 0x03,
  46    Oexcl   = 0x04,
  47    Otrunc  = 0x10,
  48    Orexec  = 0x20,
  49    Orclose = 0x40,
  50    Oappend = 0x80,
  51};
  52
  53static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  54{
  55    ssize_t ret;
  56    va_list ap;
  57
  58    va_start(ap, fmt);
  59    ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
  60    va_end(ap);
  61
  62    return ret;
  63}
  64
  65static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
  66{
  67    ssize_t ret;
  68    va_list ap;
  69
  70    va_start(ap, fmt);
  71    ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
  72    va_end(ap);
  73
  74    return ret;
  75}
  76
  77static int omode_to_uflags(int8_t mode)
  78{
  79    int ret = 0;
  80
  81    switch (mode & 3) {
  82    case Oread:
  83        ret = O_RDONLY;
  84        break;
  85    case Ordwr:
  86        ret = O_RDWR;
  87        break;
  88    case Owrite:
  89        ret = O_WRONLY;
  90        break;
  91    case Oexec:
  92        ret = O_RDONLY;
  93        break;
  94    }
  95
  96    if (mode & Otrunc) {
  97        ret |= O_TRUNC;
  98    }
  99
 100    if (mode & Oappend) {
 101        ret |= O_APPEND;
 102    }
 103
 104    if (mode & Oexcl) {
 105        ret |= O_EXCL;
 106    }
 107
 108    return ret;
 109}
 110
 111typedef struct DotlOpenflagMap {
 112    int dotl_flag;
 113    int open_flag;
 114} DotlOpenflagMap;
 115
 116static int dotl_to_open_flags(int flags)
 117{
 118    int i;
 119    /*
 120     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
 121     * and P9_DOTL_NOACCESS
 122     */
 123    int oflags = flags & O_ACCMODE;
 124
 125    DotlOpenflagMap dotl_oflag_map[] = {
 126        { P9_DOTL_CREATE, O_CREAT },
 127        { P9_DOTL_EXCL, O_EXCL },
 128        { P9_DOTL_NOCTTY , O_NOCTTY },
 129        { P9_DOTL_TRUNC, O_TRUNC },
 130        { P9_DOTL_APPEND, O_APPEND },
 131        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
 132        { P9_DOTL_DSYNC, O_DSYNC },
 133        { P9_DOTL_FASYNC, FASYNC },
 134        { P9_DOTL_DIRECT, O_DIRECT },
 135        { P9_DOTL_LARGEFILE, O_LARGEFILE },
 136        { P9_DOTL_DIRECTORY, O_DIRECTORY },
 137        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
 138        { P9_DOTL_NOATIME, O_NOATIME },
 139        { P9_DOTL_SYNC, O_SYNC },
 140    };
 141
 142    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
 143        if (flags & dotl_oflag_map[i].dotl_flag) {
 144            oflags |= dotl_oflag_map[i].open_flag;
 145        }
 146    }
 147
 148    return oflags;
 149}
 150
 151void cred_init(FsCred *credp)
 152{
 153    credp->fc_uid = -1;
 154    credp->fc_gid = -1;
 155    credp->fc_mode = -1;
 156    credp->fc_rdev = -1;
 157}
 158
 159static int get_dotl_openflags(V9fsState *s, int oflags)
 160{
 161    int flags;
 162    /*
 163     * Filter the client open flags
 164     */
 165    flags = dotl_to_open_flags(oflags);
 166    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
 167    /*
 168     * Ignore direct disk access hint until the server supports it.
 169     */
 170    flags &= ~O_DIRECT;
 171    return flags;
 172}
 173
 174void v9fs_path_init(V9fsPath *path)
 175{
 176    path->data = NULL;
 177    path->size = 0;
 178}
 179
 180void v9fs_path_free(V9fsPath *path)
 181{
 182    g_free(path->data);
 183    path->data = NULL;
 184    path->size = 0;
 185}
 186
 187
 188void GCC_FMT_ATTR(2, 3)
 189v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
 190{
 191    va_list ap;
 192
 193    v9fs_path_free(path);
 194
 195    va_start(ap, fmt);
 196    /* Bump the size for including terminating NULL */
 197    path->size = g_vasprintf(&path->data, fmt, ap) + 1;
 198    va_end(ap);
 199}
 200
 201void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
 202{
 203    v9fs_path_free(dst);
 204    dst->size = src->size;
 205    dst->data = g_memdup(src->data, src->size);
 206}
 207
 208int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
 209                      const char *name, V9fsPath *path)
 210{
 211    int err;
 212    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
 213    if (err < 0) {
 214        err = -errno;
 215    }
 216    return err;
 217}
 218
 219/*
 220 * Return TRUE if s1 is an ancestor of s2.
 221 *
 222 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
 223 * As a special case, We treat s1 as ancestor of s2 if they are same!
 224 */
 225static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
 226{
 227    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
 228        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
 229            return 1;
 230        }
 231    }
 232    return 0;
 233}
 234
 235static size_t v9fs_string_size(V9fsString *str)
 236{
 237    return str->size;
 238}
 239
 240/*
 241 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
 242static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 243{
 244    int err = 1;
 245    if (f->fid_type == P9_FID_FILE) {
 246        if (f->fs.fd == -1) {
 247            do {
 248                err = v9fs_co_open(pdu, f, f->open_flags);
 249            } while (err == -EINTR && !pdu->cancelled);
 250        }
 251    } else if (f->fid_type == P9_FID_DIR) {
 252        if (f->fs.dir.stream == NULL) {
 253            do {
 254                err = v9fs_co_opendir(pdu, f);
 255            } while (err == -EINTR && !pdu->cancelled);
 256        }
 257    }
 258    return err;
 259}
 260
 261static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
 262{
 263    int err;
 264    V9fsFidState *f;
 265    V9fsState *s = pdu->s;
 266
 267    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
 268        BUG_ON(f->clunked);
 269        if (f->fid == fid) {
 270            /*
 271             * Update the fid ref upfront so that
 272             * we don't get reclaimed when we yield
 273             * in open later.
 274             */
 275            f->ref++;
 276            /*
 277             * check whether we need to reopen the
 278             * file. We might have closed the fd
 279             * while trying to free up some file
 280             * descriptors.
 281             */
 282            err = v9fs_reopen_fid(pdu, f);
 283            if (err < 0) {
 284                f->ref--;
 285                return NULL;
 286            }
 287            /*
 288             * Mark the fid as referenced so that the LRU
 289             * reclaim won't close the file descriptor
 290             */
 291            f->flags |= FID_REFERENCED;
 292            return f;
 293        }
 294    }
 295    return NULL;
 296}
 297
 298static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 299{
 300    V9fsFidState *f;
 301
 302    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
 303        /* If fid is already there return NULL */
 304        BUG_ON(f->clunked);
 305        if (f->fid == fid) {
 306            return NULL;
 307        }
 308    }
 309    f = g_malloc0(sizeof(V9fsFidState));
 310    f->fid = fid;
 311    f->fid_type = P9_FID_NONE;
 312    f->ref = 1;
 313    /*
 314     * Mark the fid as referenced so that the LRU
 315     * reclaim won't close the file descriptor
 316     */
 317    f->flags |= FID_REFERENCED;
 318    QSIMPLEQ_INSERT_TAIL(&s->fid_list, f, next);
 319
 320    v9fs_readdir_init(s->proto_version, &f->fs.dir);
 321    v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
 322
 323    return f;
 324}
 325
 326static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 327{
 328    int retval = 0;
 329
 330    if (fidp->fs.xattr.xattrwalk_fid) {
 331        /* getxattr/listxattr fid */
 332        goto free_value;
 333    }
 334    /*
 335     * if this is fid for setxattr. clunk should
 336     * result in setxattr localcall
 337     */
 338    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
 339        /* clunk after partial write */
 340        retval = -EINVAL;
 341        goto free_out;
 342    }
 343    if (fidp->fs.xattr.len) {
 344        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
 345                                   fidp->fs.xattr.value,
 346                                   fidp->fs.xattr.len,
 347                                   fidp->fs.xattr.flags);
 348    } else {
 349        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
 350    }
 351free_out:
 352    v9fs_string_free(&fidp->fs.xattr.name);
 353free_value:
 354    g_free(fidp->fs.xattr.value);
 355    return retval;
 356}
 357
 358static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 359{
 360    int retval = 0;
 361
 362    if (fidp->fid_type == P9_FID_FILE) {
 363        /* If we reclaimed the fd no need to close */
 364        if (fidp->fs.fd != -1) {
 365            retval = v9fs_co_close(pdu, &fidp->fs);
 366        }
 367    } else if (fidp->fid_type == P9_FID_DIR) {
 368        if (fidp->fs.dir.stream != NULL) {
 369            retval = v9fs_co_closedir(pdu, &fidp->fs);
 370        }
 371    } else if (fidp->fid_type == P9_FID_XATTR) {
 372        retval = v9fs_xattr_fid_clunk(pdu, fidp);
 373    }
 374    v9fs_path_free(&fidp->path);
 375    g_free(fidp);
 376    return retval;
 377}
 378
 379static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 380{
 381    BUG_ON(!fidp->ref);
 382    fidp->ref--;
 383    /*
 384     * Don't free the fid if it is in reclaim list
 385     */
 386    if (!fidp->ref && fidp->clunked) {
 387        if (fidp->fid == pdu->s->root_fid) {
 388            /*
 389             * if the clunked fid is root fid then we
 390             * have unmounted the fs on the client side.
 391             * delete the migration blocker. Ideally, this
 392             * should be hooked to transport close notification
 393             */
 394            if (pdu->s->migration_blocker) {
 395                migrate_del_blocker(pdu->s->migration_blocker);
 396                error_free(pdu->s->migration_blocker);
 397                pdu->s->migration_blocker = NULL;
 398            }
 399        }
 400        return free_fid(pdu, fidp);
 401    }
 402    return 0;
 403}
 404
 405static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
 406{
 407    V9fsFidState *fidp;
 408
 409    QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) {
 410        if (fidp->fid == fid) {
 411            QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
 412            fidp->clunked = true;
 413            return fidp;
 414        }
 415    }
 416    return NULL;
 417}
 418
 419void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
 420{
 421    int reclaim_count = 0;
 422    V9fsState *s = pdu->s;
 423    V9fsFidState *f;
 424    QSLIST_HEAD(, V9fsFidState) reclaim_list =
 425        QSLIST_HEAD_INITIALIZER(reclaim_list);
 426
 427    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
 428        /*
 429         * Unlink fids cannot be reclaimed. Check
 430         * for them and skip them. Also skip fids
 431         * currently being operated on.
 432         */
 433        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
 434            continue;
 435        }
 436        /*
 437         * if it is a recently referenced fid
 438         * we leave the fid untouched and clear the
 439         * reference bit. We come back to it later
 440         * in the next iteration. (a simple LRU without
 441         * moving list elements around)
 442         */
 443        if (f->flags & FID_REFERENCED) {
 444            f->flags &= ~FID_REFERENCED;
 445            continue;
 446        }
 447        /*
 448         * Add fids to reclaim list.
 449         */
 450        if (f->fid_type == P9_FID_FILE) {
 451            if (f->fs.fd != -1) {
 452                /*
 453                 * Up the reference count so that
 454                 * a clunk request won't free this fid
 455                 */
 456                f->ref++;
 457                QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
 458                f->fs_reclaim.fd = f->fs.fd;
 459                f->fs.fd = -1;
 460                reclaim_count++;
 461            }
 462        } else if (f->fid_type == P9_FID_DIR) {
 463            if (f->fs.dir.stream != NULL) {
 464                /*
 465                 * Up the reference count so that
 466                 * a clunk request won't free this fid
 467                 */
 468                f->ref++;
 469                QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
 470                f->fs_reclaim.dir.stream = f->fs.dir.stream;
 471                f->fs.dir.stream = NULL;
 472                reclaim_count++;
 473            }
 474        }
 475        if (reclaim_count >= open_fd_rc) {
 476            break;
 477        }
 478    }
 479    /*
 480     * Now close the fid in reclaim list. Free them if they
 481     * are already clunked.
 482     */
 483    while (!QSLIST_EMPTY(&reclaim_list)) {
 484        f = QSLIST_FIRST(&reclaim_list);
 485        QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next);
 486        if (f->fid_type == P9_FID_FILE) {
 487            v9fs_co_close(pdu, &f->fs_reclaim);
 488        } else if (f->fid_type == P9_FID_DIR) {
 489            v9fs_co_closedir(pdu, &f->fs_reclaim);
 490        }
 491        /*
 492         * Now drop the fid reference, free it
 493         * if clunked.
 494         */
 495        put_fid(pdu, f);
 496    }
 497}
 498
 499static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 500{
 501    int err;
 502    V9fsState *s = pdu->s;
 503    V9fsFidState *fidp, *fidp_next;
 504
 505    fidp = QSIMPLEQ_FIRST(&s->fid_list);
 506    if (!fidp) {
 507        return 0;
 508    }
 509
 510    /*
 511     * v9fs_reopen_fid() can yield : a reference on the fid must be held
 512     * to ensure its pointer remains valid and we can safely pass it to
 513     * QSIMPLEQ_NEXT(). The corresponding put_fid() can also yield so
 514     * we must keep a reference on the next fid as well. So the logic here
 515     * is to get a reference on a fid and only put it back during the next
 516     * iteration after we could get a reference on the next fid. Start with
 517     * the first one.
 518     */
 519    for (fidp->ref++; fidp; fidp = fidp_next) {
 520        if (fidp->path.size == path->size &&
 521            !memcmp(fidp->path.data, path->data, path->size)) {
 522            /* Mark the fid non reclaimable. */
 523            fidp->flags |= FID_NON_RECLAIMABLE;
 524
 525            /* reopen the file/dir if already closed */
 526            err = v9fs_reopen_fid(pdu, fidp);
 527            if (err < 0) {
 528                put_fid(pdu, fidp);
 529                return err;
 530            }
 531        }
 532
 533        fidp_next = QSIMPLEQ_NEXT(fidp, next);
 534
 535        if (fidp_next) {
 536            /*
 537             * Ensure the next fid survives a potential clunk request during
 538             * put_fid() below and v9fs_reopen_fid() in the next iteration.
 539             */
 540            fidp_next->ref++;
 541        }
 542
 543        /* We're done with this fid */
 544        put_fid(pdu, fidp);
 545    }
 546
 547    return 0;
 548}
 549
 550static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
 551{
 552    V9fsState *s = pdu->s;
 553    V9fsFidState *fidp;
 554
 555    /* Free all fids */
 556    while (!QSIMPLEQ_EMPTY(&s->fid_list)) {
 557        /* Get fid */
 558        fidp = QSIMPLEQ_FIRST(&s->fid_list);
 559        fidp->ref++;
 560
 561        /* Clunk fid */
 562        QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
 563        fidp->clunked = true;
 564
 565        put_fid(pdu, fidp);
 566    }
 567}
 568
 569#define P9_QID_TYPE_DIR         0x80
 570#define P9_QID_TYPE_SYMLINK     0x02
 571
 572#define P9_STAT_MODE_DIR        0x80000000
 573#define P9_STAT_MODE_APPEND     0x40000000
 574#define P9_STAT_MODE_EXCL       0x20000000
 575#define P9_STAT_MODE_MOUNT      0x10000000
 576#define P9_STAT_MODE_AUTH       0x08000000
 577#define P9_STAT_MODE_TMP        0x04000000
 578#define P9_STAT_MODE_SYMLINK    0x02000000
 579#define P9_STAT_MODE_LINK       0x01000000
 580#define P9_STAT_MODE_DEVICE     0x00800000
 581#define P9_STAT_MODE_NAMED_PIPE 0x00200000
 582#define P9_STAT_MODE_SOCKET     0x00100000
 583#define P9_STAT_MODE_SETUID     0x00080000
 584#define P9_STAT_MODE_SETGID     0x00040000
 585#define P9_STAT_MODE_SETVTX     0x00010000
 586
 587#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
 588                                P9_STAT_MODE_SYMLINK |      \
 589                                P9_STAT_MODE_LINK |         \
 590                                P9_STAT_MODE_DEVICE |       \
 591                                P9_STAT_MODE_NAMED_PIPE |   \
 592                                P9_STAT_MODE_SOCKET)
 593
 594/* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
 595static inline uint8_t mirror8bit(uint8_t byte)
 596{
 597    return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
 598}
 599
 600/* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
 601static inline uint64_t mirror64bit(uint64_t value)
 602{
 603    return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
 604           ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
 605           ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
 606           ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
 607           ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
 608           ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
 609           ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
 610           ((uint64_t)mirror8bit((value >> 56) & 0xff));
 611}
 612
 613/**
 614 * @brief Parameter k for the Exponential Golomb algorihm to be used.
 615 *
 616 * The smaller this value, the smaller the minimum bit count for the Exp.
 617 * Golomb generated affixes will be (at lowest index) however for the
 618 * price of having higher maximum bit count of generated affixes (at highest
 619 * index). Likewise increasing this parameter yields in smaller maximum bit
 620 * count for the price of having higher minimum bit count.
 621 *
 622 * In practice that means: a good value for k depends on the expected amount
 623 * of devices to be exposed by one export. For a small amount of devices k
 624 * should be small, for a large amount of devices k might be increased
 625 * instead. The default of k=0 should be fine for most users though.
 626 *
 627 * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
 628 * k should not change as long as guest is still running! Because that would
 629 * cause completely different inode numbers to be generated on guest.
 630 */
 631#define EXP_GOLOMB_K    0
 632
 633/**
 634 * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
 635 *
 636 * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
 637 * with growing length and with the mathematical property of being
 638 * "prefix-free". The latter means the generated prefixes can be prepended
 639 * in front of arbitrary numbers and the resulting concatenated numbers are
 640 * guaranteed to be always unique.
 641 *
 642 * This is a minor adjustment to the original Exp. Golomb algorithm in the
 643 * sense that lowest allowed index (@param n) starts with 1, not with zero.
 644 *
 645 * @param n - natural number (or index) of the prefix to be generated
 646 *            (1, 2, 3, ...)
 647 * @param k - parameter k of Exp. Golomb algorithm to be used
 648 *            (see comment on EXP_GOLOMB_K macro for details about k)
 649 */
 650static VariLenAffix expGolombEncode(uint64_t n, int k)
 651{
 652    const uint64_t value = n + (1 << k) - 1;
 653    const int bits = (int) log2(value) + 1;
 654    return (VariLenAffix) {
 655        .type = AffixType_Prefix,
 656        .value = value,
 657        .bits = bits + MAX((bits - 1 - k), 0)
 658    };
 659}
 660
 661/**
 662 * @brief Converts a suffix into a prefix, or a prefix into a suffix.
 663 *
 664 * Simply mirror all bits of the affix value, for the purpose to preserve
 665 * respectively the mathematical "prefix-free" or "suffix-free" property
 666 * after the conversion.
 667 *
 668 * If a passed prefix is suitable to create unique numbers, then the
 669 * returned suffix is suitable to create unique numbers as well (and vice
 670 * versa).
 671 */
 672static VariLenAffix invertAffix(const VariLenAffix *affix)
 673{
 674    return (VariLenAffix) {
 675        .type =
 676            (affix->type == AffixType_Suffix) ?
 677                AffixType_Prefix : AffixType_Suffix,
 678        .value =
 679            mirror64bit(affix->value) >>
 680            ((sizeof(affix->value) * 8) - affix->bits),
 681        .bits = affix->bits
 682    };
 683}
 684
 685/**
 686 * @brief Generates suffix numbers with "suffix-free" property.
 687 *
 688 * This is just a wrapper function on top of the Exp. Golomb algorithm.
 689 *
 690 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
 691 * this function converts the Exp. Golomb prefixes into appropriate suffixes
 692 * which are still suitable for generating unique numbers.
 693 *
 694 * @param n - natural number (or index) of the suffix to be generated
 695 *            (1, 2, 3, ...)
 696 */
 697static VariLenAffix affixForIndex(uint64_t index)
 698{
 699    VariLenAffix prefix;
 700    prefix = expGolombEncode(index, EXP_GOLOMB_K);
 701    return invertAffix(&prefix); /* convert prefix to suffix */
 702}
 703
 704/* creative abuse of tb_hash_func7, which is based on xxhash */
 705static uint32_t qpp_hash(QppEntry e)
 706{
 707    return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
 708}
 709
 710static uint32_t qpf_hash(QpfEntry e)
 711{
 712    return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
 713}
 714
 715static bool qpd_cmp_func(const void *obj, const void *userp)
 716{
 717    const QpdEntry *e1 = obj, *e2 = userp;
 718    return e1->dev == e2->dev;
 719}
 720
 721static bool qpp_cmp_func(const void *obj, const void *userp)
 722{
 723    const QppEntry *e1 = obj, *e2 = userp;
 724    return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
 725}
 726
 727static bool qpf_cmp_func(const void *obj, const void *userp)
 728{
 729    const QpfEntry *e1 = obj, *e2 = userp;
 730    return e1->dev == e2->dev && e1->ino == e2->ino;
 731}
 732
 733static void qp_table_remove(void *p, uint32_t h, void *up)
 734{
 735    g_free(p);
 736}
 737
 738static void qp_table_destroy(struct qht *ht)
 739{
 740    if (!ht || !ht->map) {
 741        return;
 742    }
 743    qht_iter(ht, qp_table_remove, NULL);
 744    qht_destroy(ht);
 745}
 746
 747static void qpd_table_init(struct qht *ht)
 748{
 749    qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 750}
 751
 752static void qpp_table_init(struct qht *ht)
 753{
 754    qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
 755}
 756
 757static void qpf_table_init(struct qht *ht)
 758{
 759    qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
 760}
 761
 762/*
 763 * Returns how many (high end) bits of inode numbers of the passed fs
 764 * device shall be used (in combination with the device number) to
 765 * generate hash values for qpp_table entries.
 766 *
 767 * This function is required if variable length suffixes are used for inode
 768 * number mapping on guest level. Since a device may end up having multiple
 769 * entries in qpp_table, each entry most probably with a different suffix
 770 * length, we thus need this function in conjunction with qpd_table to
 771 * "agree" about a fix amount of bits (per device) to be always used for
 772 * generating hash values for the purpose of accessing qpp_table in order
 773 * get consistent behaviour when accessing qpp_table.
 774 */
 775static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
 776{
 777    QpdEntry lookup = {
 778        .dev = dev
 779    }, *val;
 780    uint32_t hash = dev;
 781    VariLenAffix affix;
 782
 783    val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
 784    if (!val) {
 785        val = g_malloc0(sizeof(QpdEntry));
 786        *val = lookup;
 787        affix = affixForIndex(pdu->s->qp_affix_next);
 788        val->prefix_bits = affix.bits;
 789        qht_insert(&pdu->s->qpd_table, val, hash, NULL);
 790        pdu->s->qp_ndevices++;
 791    }
 792    return val->prefix_bits;
 793}
 794
 795/**
 796 * @brief Slow / full mapping host inode nr -> guest inode nr.
 797 *
 798 * This function performs a slower and much more costly remapping of an
 799 * original file inode number on host to an appropriate different inode
 800 * number on guest. For every (dev, inode) combination on host a new
 801 * sequential number is generated, cached and exposed as inode number on
 802 * guest.
 803 *
 804 * This is just a "last resort" fallback solution if the much faster/cheaper
 805 * qid_path_suffixmap() failed. In practice this slow / full mapping is not
 806 * expected ever to be used at all though.
 807 *
 808 * @see qid_path_suffixmap() for details
 809 *
 810 */
 811static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
 812                            uint64_t *path)
 813{
 814    QpfEntry lookup = {
 815        .dev = stbuf->st_dev,
 816        .ino = stbuf->st_ino
 817    }, *val;
 818    uint32_t hash = qpf_hash(lookup);
 819    VariLenAffix affix;
 820
 821    val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
 822
 823    if (!val) {
 824        if (pdu->s->qp_fullpath_next == 0) {
 825            /* no more files can be mapped :'( */
 826            error_report_once(
 827                "9p: No more prefixes available for remapping inodes from "
 828                "host to guest."
 829            );
 830            return -ENFILE;
 831        }
 832
 833        val = g_malloc0(sizeof(QppEntry));
 834        *val = lookup;
 835
 836        /* new unique inode and device combo */
 837        affix = affixForIndex(
 838            1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
 839        );
 840        val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
 841        pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
 842        qht_insert(&pdu->s->qpf_table, val, hash, NULL);
 843    }
 844
 845    *path = val->path;
 846    return 0;
 847}
 848
 849/**
 850 * @brief Quick mapping host inode nr -> guest inode nr.
 851 *
 852 * This function performs quick remapping of an original file inode number
 853 * on host to an appropriate different inode number on guest. This remapping
 854 * of inodes is required to avoid inode nr collisions on guest which would
 855 * happen if the 9p export contains more than 1 exported file system (or
 856 * more than 1 file system data set), because unlike on host level where the
 857 * files would have different device nrs, all files exported by 9p would
 858 * share the same device nr on guest (the device nr of the virtual 9p device
 859 * that is).
 860 *
 861 * Inode remapping is performed by chopping off high end bits of the original
 862 * inode number from host, shifting the result upwards and then assigning a
 863 * generated suffix number for the low end bits, where the same suffix number
 864 * will be shared by all inodes with the same device id AND the same high end
 865 * bits that have been chopped off. That approach utilizes the fact that inode
 866 * numbers very likely share the same high end bits (i.e. due to their common
 867 * sequential generation by file systems) and hence we only have to generate
 868 * and track a very limited amount of suffixes in practice due to that.
 869 *
 870 * We generate variable size suffixes for that purpose. The 1st generated
 871 * suffix will only have 1 bit and hence we only need to chop off 1 bit from
 872 * the original inode number. The subsequent suffixes being generated will
 873 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
 874 * generated will have 3 bits and hence we have to chop off 3 bits from their
 875 * original inodes, and so on. That approach of using variable length suffixes
 876 * (i.e. over fixed size ones) utilizes the fact that in practice only a very
 877 * limited amount of devices are shared by the same export (e.g. typically
 878 * less than 2 dozen devices per 9p export), so in practice we need to chop
 879 * off less bits than with fixed size prefixes and yet are flexible to add
 880 * new devices at runtime below host's export directory at any time without
 881 * having to reboot guest nor requiring to reconfigure guest for that. And due
 882 * to the very limited amount of original high end bits that we chop off that
 883 * way, the total amount of suffixes we need to generate is less than by using
 884 * fixed size prefixes and hence it also improves performance of the inode
 885 * remapping algorithm, and finally has the nice side effect that the inode
 886 * numbers on guest will be much smaller & human friendly. ;-)
 887 */
 888static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
 889                              uint64_t *path)
 890{
 891    const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
 892    QppEntry lookup = {
 893        .dev = stbuf->st_dev,
 894        .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
 895    }, *val;
 896    uint32_t hash = qpp_hash(lookup);
 897
 898    val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
 899
 900    if (!val) {
 901        if (pdu->s->qp_affix_next == 0) {
 902            /* we ran out of affixes */
 903            warn_report_once(
 904                "9p: Potential degraded performance of inode remapping"
 905            );
 906            return -ENFILE;
 907        }
 908
 909        val = g_malloc0(sizeof(QppEntry));
 910        *val = lookup;
 911
 912        /* new unique inode affix and device combo */
 913        val->qp_affix_index = pdu->s->qp_affix_next++;
 914        val->qp_affix = affixForIndex(val->qp_affix_index);
 915        qht_insert(&pdu->s->qpp_table, val, hash, NULL);
 916    }
 917    /* assuming generated affix to be suffix type, not prefix */
 918    *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
 919    return 0;
 920}
 921
 922static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
 923{
 924    int err;
 925    size_t size;
 926
 927    if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
 928        /* map inode+device to qid path (fast path) */
 929        err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
 930        if (err == -ENFILE) {
 931            /* fast path didn't work, fall back to full map */
 932            err = qid_path_fullmap(pdu, stbuf, &qidp->path);
 933        }
 934        if (err) {
 935            return err;
 936        }
 937    } else {
 938        if (pdu->s->dev_id != stbuf->st_dev) {
 939            if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
 940                error_report_once(
 941                    "9p: Multiple devices detected in same VirtFS export. "
 942                    "Access of guest to additional devices is (partly) "
 943                    "denied due to virtfs option 'multidevs=forbid' being "
 944                    "effective."
 945                );
 946                return -ENODEV;
 947            } else {
 948                warn_report_once(
 949                    "9p: Multiple devices detected in same VirtFS export, "
 950                    "which might lead to file ID collisions and severe "
 951                    "misbehaviours on guest! You should either use a "
 952                    "separate export for each device shared from host or "
 953                    "use virtfs option 'multidevs=remap'!"
 954                );
 955            }
 956        }
 957        memset(&qidp->path, 0, sizeof(qidp->path));
 958        size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
 959        memcpy(&qidp->path, &stbuf->st_ino, size);
 960    }
 961
 962    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 963    qidp->type = 0;
 964    if (S_ISDIR(stbuf->st_mode)) {
 965        qidp->type |= P9_QID_TYPE_DIR;
 966    }
 967    if (S_ISLNK(stbuf->st_mode)) {
 968        qidp->type |= P9_QID_TYPE_SYMLINK;
 969    }
 970
 971    return 0;
 972}
 973
 974V9fsPDU *pdu_alloc(V9fsState *s)
 975{
 976    V9fsPDU *pdu = NULL;
 977
 978    if (!QLIST_EMPTY(&s->free_list)) {
 979        pdu = QLIST_FIRST(&s->free_list);
 980        QLIST_REMOVE(pdu, next);
 981        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
 982    }
 983    return pdu;
 984}
 985
 986void pdu_free(V9fsPDU *pdu)
 987{
 988    V9fsState *s = pdu->s;
 989
 990    g_assert(!pdu->cancelled);
 991    QLIST_REMOVE(pdu, next);
 992    QLIST_INSERT_HEAD(&s->free_list, pdu, next);
 993}
 994
 995static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
 996{
 997    int8_t id = pdu->id + 1; /* Response */
 998    V9fsState *s = pdu->s;
 999    int ret;
1000
1001    /*
1002     * The 9p spec requires that successfully cancelled pdus receive no reply.
1003     * Sending a reply would confuse clients because they would
1004     * assume that any EINTR is the actual result of the operation,
1005     * rather than a consequence of the cancellation. However, if
1006     * the operation completed (succesfully or with an error other
1007     * than caused be cancellation), we do send out that reply, both
1008     * for efficiency and to avoid confusing the rest of the state machine
1009     * that assumes passing a non-error here will mean a successful
1010     * transmission of the reply.
1011     */
1012    bool discard = pdu->cancelled && len == -EINTR;
1013    if (discard) {
1014        trace_v9fs_rcancel(pdu->tag, pdu->id);
1015        pdu->size = 0;
1016        goto out_notify;
1017    }
1018
1019    if (len < 0) {
1020        int err = -len;
1021        len = 7;
1022
1023        if (s->proto_version != V9FS_PROTO_2000L) {
1024            V9fsString str;
1025
1026            str.data = strerror(err);
1027            str.size = strlen(str.data);
1028
1029            ret = pdu_marshal(pdu, len, "s", &str);
1030            if (ret < 0) {
1031                goto out_notify;
1032            }
1033            len += ret;
1034            id = P9_RERROR;
1035        }
1036
1037        ret = pdu_marshal(pdu, len, "d", err);
1038        if (ret < 0) {
1039            goto out_notify;
1040        }
1041        len += ret;
1042
1043        if (s->proto_version == V9FS_PROTO_2000L) {
1044            id = P9_RLERROR;
1045        }
1046        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1047    }
1048
1049    /* fill out the header */
1050    if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1051        goto out_notify;
1052    }
1053
1054    /* keep these in sync */
1055    pdu->size = len;
1056    pdu->id = id;
1057
1058out_notify:
1059    pdu->s->transport->push_and_notify(pdu);
1060
1061    /* Now wakeup anybody waiting in flush for this request */
1062    if (!qemu_co_queue_next(&pdu->complete)) {
1063        pdu_free(pdu);
1064    }
1065}
1066
1067static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1068{
1069    mode_t ret;
1070
1071    ret = mode & 0777;
1072    if (mode & P9_STAT_MODE_DIR) {
1073        ret |= S_IFDIR;
1074    }
1075
1076    if (mode & P9_STAT_MODE_SYMLINK) {
1077        ret |= S_IFLNK;
1078    }
1079    if (mode & P9_STAT_MODE_SOCKET) {
1080        ret |= S_IFSOCK;
1081    }
1082    if (mode & P9_STAT_MODE_NAMED_PIPE) {
1083        ret |= S_IFIFO;
1084    }
1085    if (mode & P9_STAT_MODE_DEVICE) {
1086        if (extension->size && extension->data[0] == 'c') {
1087            ret |= S_IFCHR;
1088        } else {
1089            ret |= S_IFBLK;
1090        }
1091    }
1092
1093    if (!(ret & ~0777)) {
1094        ret |= S_IFREG;
1095    }
1096
1097    if (mode & P9_STAT_MODE_SETUID) {
1098        ret |= S_ISUID;
1099    }
1100    if (mode & P9_STAT_MODE_SETGID) {
1101        ret |= S_ISGID;
1102    }
1103    if (mode & P9_STAT_MODE_SETVTX) {
1104        ret |= S_ISVTX;
1105    }
1106
1107    return ret;
1108}
1109
1110static int donttouch_stat(V9fsStat *stat)
1111{
1112    if (stat->type == -1 &&
1113        stat->dev == -1 &&
1114        stat->qid.type == 0xff &&
1115        stat->qid.version == (uint32_t) -1 &&
1116        stat->qid.path == (uint64_t) -1 &&
1117        stat->mode == -1 &&
1118        stat->atime == -1 &&
1119        stat->mtime == -1 &&
1120        stat->length == -1 &&
1121        !stat->name.size &&
1122        !stat->uid.size &&
1123        !stat->gid.size &&
1124        !stat->muid.size &&
1125        stat->n_uid == -1 &&
1126        stat->n_gid == -1 &&
1127        stat->n_muid == -1) {
1128        return 1;
1129    }
1130
1131    return 0;
1132}
1133
1134static void v9fs_stat_init(V9fsStat *stat)
1135{
1136    v9fs_string_init(&stat->name);
1137    v9fs_string_init(&stat->uid);
1138    v9fs_string_init(&stat->gid);
1139    v9fs_string_init(&stat->muid);
1140    v9fs_string_init(&stat->extension);
1141}
1142
1143static void v9fs_stat_free(V9fsStat *stat)
1144{
1145    v9fs_string_free(&stat->name);
1146    v9fs_string_free(&stat->uid);
1147    v9fs_string_free(&stat->gid);
1148    v9fs_string_free(&stat->muid);
1149    v9fs_string_free(&stat->extension);
1150}
1151
1152static uint32_t stat_to_v9mode(const struct stat *stbuf)
1153{
1154    uint32_t mode;
1155
1156    mode = stbuf->st_mode & 0777;
1157    if (S_ISDIR(stbuf->st_mode)) {
1158        mode |= P9_STAT_MODE_DIR;
1159    }
1160
1161    if (S_ISLNK(stbuf->st_mode)) {
1162        mode |= P9_STAT_MODE_SYMLINK;
1163    }
1164
1165    if (S_ISSOCK(stbuf->st_mode)) {
1166        mode |= P9_STAT_MODE_SOCKET;
1167    }
1168
1169    if (S_ISFIFO(stbuf->st_mode)) {
1170        mode |= P9_STAT_MODE_NAMED_PIPE;
1171    }
1172
1173    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1174        mode |= P9_STAT_MODE_DEVICE;
1175    }
1176
1177    if (stbuf->st_mode & S_ISUID) {
1178        mode |= P9_STAT_MODE_SETUID;
1179    }
1180
1181    if (stbuf->st_mode & S_ISGID) {
1182        mode |= P9_STAT_MODE_SETGID;
1183    }
1184
1185    if (stbuf->st_mode & S_ISVTX) {
1186        mode |= P9_STAT_MODE_SETVTX;
1187    }
1188
1189    return mode;
1190}
1191
1192static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1193                                       const char *basename,
1194                                       const struct stat *stbuf,
1195                                       V9fsStat *v9stat)
1196{
1197    int err;
1198
1199    memset(v9stat, 0, sizeof(*v9stat));
1200
1201    err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1202    if (err < 0) {
1203        return err;
1204    }
1205    v9stat->mode = stat_to_v9mode(stbuf);
1206    v9stat->atime = stbuf->st_atime;
1207    v9stat->mtime = stbuf->st_mtime;
1208    v9stat->length = stbuf->st_size;
1209
1210    v9fs_string_free(&v9stat->uid);
1211    v9fs_string_free(&v9stat->gid);
1212    v9fs_string_free(&v9stat->muid);
1213
1214    v9stat->n_uid = stbuf->st_uid;
1215    v9stat->n_gid = stbuf->st_gid;
1216    v9stat->n_muid = 0;
1217
1218    v9fs_string_free(&v9stat->extension);
1219
1220    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1221        err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1222        if (err < 0) {
1223            return err;
1224        }
1225    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1226        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1227                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1228                major(stbuf->st_rdev), minor(stbuf->st_rdev));
1229    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1230        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1231                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1232    }
1233
1234    v9fs_string_sprintf(&v9stat->name, "%s", basename);
1235
1236    v9stat->size = 61 +
1237        v9fs_string_size(&v9stat->name) +
1238        v9fs_string_size(&v9stat->uid) +
1239        v9fs_string_size(&v9stat->gid) +
1240        v9fs_string_size(&v9stat->muid) +
1241        v9fs_string_size(&v9stat->extension);
1242    return 0;
1243}
1244
1245#define P9_STATS_MODE          0x00000001ULL
1246#define P9_STATS_NLINK         0x00000002ULL
1247#define P9_STATS_UID           0x00000004ULL
1248#define P9_STATS_GID           0x00000008ULL
1249#define P9_STATS_RDEV          0x00000010ULL
1250#define P9_STATS_ATIME         0x00000020ULL
1251#define P9_STATS_MTIME         0x00000040ULL
1252#define P9_STATS_CTIME         0x00000080ULL
1253#define P9_STATS_INO           0x00000100ULL
1254#define P9_STATS_SIZE          0x00000200ULL
1255#define P9_STATS_BLOCKS        0x00000400ULL
1256
1257#define P9_STATS_BTIME         0x00000800ULL
1258#define P9_STATS_GEN           0x00001000ULL
1259#define P9_STATS_DATA_VERSION  0x00002000ULL
1260
1261#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1262#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1263
1264
1265static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1266                                V9fsStatDotl *v9lstat)
1267{
1268    memset(v9lstat, 0, sizeof(*v9lstat));
1269
1270    v9lstat->st_mode = stbuf->st_mode;
1271    v9lstat->st_nlink = stbuf->st_nlink;
1272    v9lstat->st_uid = stbuf->st_uid;
1273    v9lstat->st_gid = stbuf->st_gid;
1274    v9lstat->st_rdev = stbuf->st_rdev;
1275    v9lstat->st_size = stbuf->st_size;
1276    v9lstat->st_blksize = stbuf->st_blksize;
1277    v9lstat->st_blocks = stbuf->st_blocks;
1278    v9lstat->st_atime_sec = stbuf->st_atime;
1279    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1280    v9lstat->st_mtime_sec = stbuf->st_mtime;
1281    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1282    v9lstat->st_ctime_sec = stbuf->st_ctime;
1283    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1284    /* Currently we only support BASIC fields in stat */
1285    v9lstat->st_result_mask = P9_STATS_BASIC;
1286
1287    return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1288}
1289
1290static void print_sg(struct iovec *sg, int cnt)
1291{
1292    int i;
1293
1294    printf("sg[%d]: {", cnt);
1295    for (i = 0; i < cnt; i++) {
1296        if (i) {
1297            printf(", ");
1298        }
1299        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1300    }
1301    printf("}\n");
1302}
1303
1304/* Will call this only for path name based fid */
1305static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1306{
1307    V9fsPath str;
1308    v9fs_path_init(&str);
1309    v9fs_path_copy(&str, dst);
1310    v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1311    v9fs_path_free(&str);
1312}
1313
1314static inline bool is_ro_export(FsContext *ctx)
1315{
1316    return ctx->export_flags & V9FS_RDONLY;
1317}
1318
1319static void coroutine_fn v9fs_version(void *opaque)
1320{
1321    ssize_t err;
1322    V9fsPDU *pdu = opaque;
1323    V9fsState *s = pdu->s;
1324    V9fsString version;
1325    size_t offset = 7;
1326
1327    v9fs_string_init(&version);
1328    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1329    if (err < 0) {
1330        goto out;
1331    }
1332    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1333
1334    virtfs_reset(pdu);
1335
1336    if (!strcmp(version.data, "9P2000.u")) {
1337        s->proto_version = V9FS_PROTO_2000U;
1338    } else if (!strcmp(version.data, "9P2000.L")) {
1339        s->proto_version = V9FS_PROTO_2000L;
1340    } else {
1341        v9fs_string_sprintf(&version, "unknown");
1342        /* skip min. msize check, reporting invalid version has priority */
1343        goto marshal;
1344    }
1345
1346    if (s->msize < P9_MIN_MSIZE) {
1347        err = -EMSGSIZE;
1348        error_report(
1349            "9pfs: Client requested msize < minimum msize ("
1350            stringify(P9_MIN_MSIZE) ") supported by this server."
1351        );
1352        goto out;
1353    }
1354
1355    /* 8192 is the default msize of Linux clients */
1356    if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) {
1357        warn_report_once(
1358            "9p: degraded performance: a reasonable high msize should be "
1359            "chosen on client/guest side (chosen msize is <= 8192). See "
1360            "https://wiki.qemu.org/Documentation/9psetup#msize for details."
1361        );
1362    }
1363
1364marshal:
1365    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1366    if (err < 0) {
1367        goto out;
1368    }
1369    err += offset;
1370    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1371out:
1372    pdu_complete(pdu, err);
1373    v9fs_string_free(&version);
1374}
1375
1376static void coroutine_fn v9fs_attach(void *opaque)
1377{
1378    V9fsPDU *pdu = opaque;
1379    V9fsState *s = pdu->s;
1380    int32_t fid, afid, n_uname;
1381    V9fsString uname, aname;
1382    V9fsFidState *fidp;
1383    size_t offset = 7;
1384    V9fsQID qid;
1385    ssize_t err;
1386    struct stat stbuf;
1387
1388    v9fs_string_init(&uname);
1389    v9fs_string_init(&aname);
1390    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1391                        &afid, &uname, &aname, &n_uname);
1392    if (err < 0) {
1393        goto out_nofid;
1394    }
1395    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1396
1397    fidp = alloc_fid(s, fid);
1398    if (fidp == NULL) {
1399        err = -EINVAL;
1400        goto out_nofid;
1401    }
1402    fidp->uid = n_uname;
1403    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1404    if (err < 0) {
1405        err = -EINVAL;
1406        clunk_fid(s, fid);
1407        goto out;
1408    }
1409    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1410    if (err < 0) {
1411        err = -EINVAL;
1412        clunk_fid(s, fid);
1413        goto out;
1414    }
1415    err = stat_to_qid(pdu, &stbuf, &qid);
1416    if (err < 0) {
1417        err = -EINVAL;
1418        clunk_fid(s, fid);
1419        goto out;
1420    }
1421
1422    /*
1423     * disable migration if we haven't done already.
1424     * attach could get called multiple times for the same export.
1425     */
1426    if (!s->migration_blocker) {
1427        error_setg(&s->migration_blocker,
1428                   "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1429                   s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1430        err = migrate_add_blocker(s->migration_blocker, NULL);
1431        if (err < 0) {
1432            error_free(s->migration_blocker);
1433            s->migration_blocker = NULL;
1434            clunk_fid(s, fid);
1435            goto out;
1436        }
1437        s->root_fid = fid;
1438    }
1439
1440    err = pdu_marshal(pdu, offset, "Q", &qid);
1441    if (err < 0) {
1442        clunk_fid(s, fid);
1443        goto out;
1444    }
1445    err += offset;
1446
1447    memcpy(&s->root_st, &stbuf, sizeof(stbuf));
1448    trace_v9fs_attach_return(pdu->tag, pdu->id,
1449                             qid.type, qid.version, qid.path);
1450out:
1451    put_fid(pdu, fidp);
1452out_nofid:
1453    pdu_complete(pdu, err);
1454    v9fs_string_free(&uname);
1455    v9fs_string_free(&aname);
1456}
1457
1458static void coroutine_fn v9fs_stat(void *opaque)
1459{
1460    int32_t fid;
1461    V9fsStat v9stat;
1462    ssize_t err = 0;
1463    size_t offset = 7;
1464    struct stat stbuf;
1465    V9fsFidState *fidp;
1466    V9fsPDU *pdu = opaque;
1467    char *basename;
1468
1469    err = pdu_unmarshal(pdu, offset, "d", &fid);
1470    if (err < 0) {
1471        goto out_nofid;
1472    }
1473    trace_v9fs_stat(pdu->tag, pdu->id, fid);
1474
1475    fidp = get_fid(pdu, fid);
1476    if (fidp == NULL) {
1477        err = -ENOENT;
1478        goto out_nofid;
1479    }
1480    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1481    if (err < 0) {
1482        goto out;
1483    }
1484    basename = g_path_get_basename(fidp->path.data);
1485    err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1486    g_free(basename);
1487    if (err < 0) {
1488        goto out;
1489    }
1490    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1491    if (err < 0) {
1492        v9fs_stat_free(&v9stat);
1493        goto out;
1494    }
1495    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1496                           v9stat.atime, v9stat.mtime, v9stat.length);
1497    err += offset;
1498    v9fs_stat_free(&v9stat);
1499out:
1500    put_fid(pdu, fidp);
1501out_nofid:
1502    pdu_complete(pdu, err);
1503}
1504
1505static void coroutine_fn v9fs_getattr(void *opaque)
1506{
1507    int32_t fid;
1508    size_t offset = 7;
1509    ssize_t retval = 0;
1510    struct stat stbuf;
1511    V9fsFidState *fidp;
1512    uint64_t request_mask;
1513    V9fsStatDotl v9stat_dotl;
1514    V9fsPDU *pdu = opaque;
1515
1516    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1517    if (retval < 0) {
1518        goto out_nofid;
1519    }
1520    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1521
1522    fidp = get_fid(pdu, fid);
1523    if (fidp == NULL) {
1524        retval = -ENOENT;
1525        goto out_nofid;
1526    }
1527    /*
1528     * Currently we only support BASIC fields in stat, so there is no
1529     * need to look at request_mask.
1530     */
1531    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1532    if (retval < 0) {
1533        goto out;
1534    }
1535    retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1536    if (retval < 0) {
1537        goto out;
1538    }
1539
1540    /*  fill st_gen if requested and supported by underlying fs */
1541    if (request_mask & P9_STATS_GEN) {
1542        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1543        switch (retval) {
1544        case 0:
1545            /* we have valid st_gen: update result mask */
1546            v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1547            break;
1548        case -EINTR:
1549            /* request cancelled, e.g. by Tflush */
1550            goto out;
1551        default:
1552            /* failed to get st_gen: not fatal, ignore */
1553            break;
1554        }
1555    }
1556    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1557    if (retval < 0) {
1558        goto out;
1559    }
1560    retval += offset;
1561    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1562                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1563                              v9stat_dotl.st_gid);
1564out:
1565    put_fid(pdu, fidp);
1566out_nofid:
1567    pdu_complete(pdu, retval);
1568}
1569
1570/* Attribute flags */
1571#define P9_ATTR_MODE       (1 << 0)
1572#define P9_ATTR_UID        (1 << 1)
1573#define P9_ATTR_GID        (1 << 2)
1574#define P9_ATTR_SIZE       (1 << 3)
1575#define P9_ATTR_ATIME      (1 << 4)
1576#define P9_ATTR_MTIME      (1 << 5)
1577#define P9_ATTR_CTIME      (1 << 6)
1578#define P9_ATTR_ATIME_SET  (1 << 7)
1579#define P9_ATTR_MTIME_SET  (1 << 8)
1580
1581#define P9_ATTR_MASK    127
1582
1583static void coroutine_fn v9fs_setattr(void *opaque)
1584{
1585    int err = 0;
1586    int32_t fid;
1587    V9fsFidState *fidp;
1588    size_t offset = 7;
1589    V9fsIattr v9iattr;
1590    V9fsPDU *pdu = opaque;
1591
1592    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1593    if (err < 0) {
1594        goto out_nofid;
1595    }
1596
1597    trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1598                       v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1599                       v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1600
1601    fidp = get_fid(pdu, fid);
1602    if (fidp == NULL) {
1603        err = -EINVAL;
1604        goto out_nofid;
1605    }
1606    if (v9iattr.valid & P9_ATTR_MODE) {
1607        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1608        if (err < 0) {
1609            goto out;
1610        }
1611    }
1612    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1613        struct timespec times[2];
1614        if (v9iattr.valid & P9_ATTR_ATIME) {
1615            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1616                times[0].tv_sec = v9iattr.atime_sec;
1617                times[0].tv_nsec = v9iattr.atime_nsec;
1618            } else {
1619                times[0].tv_nsec = UTIME_NOW;
1620            }
1621        } else {
1622            times[0].tv_nsec = UTIME_OMIT;
1623        }
1624        if (v9iattr.valid & P9_ATTR_MTIME) {
1625            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1626                times[1].tv_sec = v9iattr.mtime_sec;
1627                times[1].tv_nsec = v9iattr.mtime_nsec;
1628            } else {
1629                times[1].tv_nsec = UTIME_NOW;
1630            }
1631        } else {
1632            times[1].tv_nsec = UTIME_OMIT;
1633        }
1634        err = v9fs_co_utimensat(pdu, &fidp->path, times);
1635        if (err < 0) {
1636            goto out;
1637        }
1638    }
1639    /*
1640     * If the only valid entry in iattr is ctime we can call
1641     * chown(-1,-1) to update the ctime of the file
1642     */
1643    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1644        ((v9iattr.valid & P9_ATTR_CTIME)
1645         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1646        if (!(v9iattr.valid & P9_ATTR_UID)) {
1647            v9iattr.uid = -1;
1648        }
1649        if (!(v9iattr.valid & P9_ATTR_GID)) {
1650            v9iattr.gid = -1;
1651        }
1652        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1653                            v9iattr.gid);
1654        if (err < 0) {
1655            goto out;
1656        }
1657    }
1658    if (v9iattr.valid & (P9_ATTR_SIZE)) {
1659        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1660        if (err < 0) {
1661            goto out;
1662        }
1663    }
1664    err = offset;
1665    trace_v9fs_setattr_return(pdu->tag, pdu->id);
1666out:
1667    put_fid(pdu, fidp);
1668out_nofid:
1669    pdu_complete(pdu, err);
1670}
1671
1672static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1673{
1674    int i;
1675    ssize_t err;
1676    size_t offset = 7;
1677
1678    err = pdu_marshal(pdu, offset, "w", nwnames);
1679    if (err < 0) {
1680        return err;
1681    }
1682    offset += err;
1683    for (i = 0; i < nwnames; i++) {
1684        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1685        if (err < 0) {
1686            return err;
1687        }
1688        offset += err;
1689    }
1690    return offset;
1691}
1692
1693static bool name_is_illegal(const char *name)
1694{
1695    return !*name || strchr(name, '/') != NULL;
1696}
1697
1698static bool same_stat_id(const struct stat *a, const struct stat *b)
1699{
1700    return a->st_dev == b->st_dev && a->st_ino == b->st_ino;
1701}
1702
1703static void coroutine_fn v9fs_walk(void *opaque)
1704{
1705    int name_idx;
1706    V9fsQID *qids = NULL;
1707    int i, err = 0;
1708    V9fsPath dpath, path, *pathes = NULL;
1709    uint16_t nwnames;
1710    struct stat stbuf, fidst, *stbufs = NULL;
1711    size_t offset = 7;
1712    int32_t fid, newfid;
1713    V9fsString *wnames = NULL;
1714    V9fsFidState *fidp;
1715    V9fsFidState *newfidp = NULL;
1716    V9fsPDU *pdu = opaque;
1717    V9fsState *s = pdu->s;
1718    V9fsQID qid;
1719
1720    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1721    if (err < 0) {
1722        pdu_complete(pdu, err);
1723        return ;
1724    }
1725    offset += err;
1726
1727    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1728
1729    if (nwnames > P9_MAXWELEM) {
1730        err = -EINVAL;
1731        goto out_nofid;
1732    }
1733    if (nwnames) {
1734        wnames = g_new0(V9fsString, nwnames);
1735        qids   = g_new0(V9fsQID, nwnames);
1736        stbufs = g_new0(struct stat, nwnames);
1737        pathes = g_new0(V9fsPath, nwnames);
1738        for (i = 0; i < nwnames; i++) {
1739            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1740            if (err < 0) {
1741                goto out_nofid;
1742            }
1743            if (name_is_illegal(wnames[i].data)) {
1744                err = -ENOENT;
1745                goto out_nofid;
1746            }
1747            offset += err;
1748        }
1749    }
1750    fidp = get_fid(pdu, fid);
1751    if (fidp == NULL) {
1752        err = -ENOENT;
1753        goto out_nofid;
1754    }
1755
1756    v9fs_path_init(&dpath);
1757    v9fs_path_init(&path);
1758    /*
1759     * Both dpath and path initially point to fidp.
1760     * Needed to handle request with nwnames == 0
1761     */
1762    v9fs_path_copy(&dpath, &fidp->path);
1763    v9fs_path_copy(&path, &fidp->path);
1764
1765    /*
1766     * To keep latency (i.e. overall execution time for processing this
1767     * Twalk client request) as small as possible, run all the required fs
1768     * driver code altogether inside the following block.
1769     */
1770    v9fs_co_run_in_worker({
1771        if (v9fs_request_cancelled(pdu)) {
1772            err = -EINTR;
1773            break;
1774        }
1775        err = s->ops->lstat(&s->ctx, &dpath, &fidst);
1776        if (err < 0) {
1777            err = -errno;
1778            break;
1779        }
1780        stbuf = fidst;
1781        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1782            if (v9fs_request_cancelled(pdu)) {
1783                err = -EINTR;
1784                break;
1785            }
1786            if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
1787                strcmp("..", wnames[name_idx].data))
1788            {
1789                err = s->ops->name_to_path(&s->ctx, &dpath,
1790                                        wnames[name_idx].data, &path);
1791                if (err < 0) {
1792                    err = -errno;
1793                    break;
1794                }
1795                if (v9fs_request_cancelled(pdu)) {
1796                    err = -EINTR;
1797                    break;
1798                }
1799                err = s->ops->lstat(&s->ctx, &path, &stbuf);
1800                if (err < 0) {
1801                    err = -errno;
1802                    break;
1803                }
1804                stbufs[name_idx] = stbuf;
1805                v9fs_path_copy(&dpath, &path);
1806                v9fs_path_copy(&pathes[name_idx], &path);
1807            }
1808        }
1809    });
1810    /*
1811     * Handle all the rest of this Twalk request on main thread ...
1812     */
1813    if (err < 0) {
1814        goto out;
1815    }
1816
1817    err = stat_to_qid(pdu, &fidst, &qid);
1818    if (err < 0) {
1819        goto out;
1820    }
1821    stbuf = fidst;
1822
1823    /* reset dpath and path */
1824    v9fs_path_copy(&dpath, &fidp->path);
1825    v9fs_path_copy(&path, &fidp->path);
1826
1827    for (name_idx = 0; name_idx < nwnames; name_idx++) {
1828        if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
1829            strcmp("..", wnames[name_idx].data))
1830        {
1831            stbuf = stbufs[name_idx];
1832            err = stat_to_qid(pdu, &stbuf, &qid);
1833            if (err < 0) {
1834                goto out;
1835            }
1836            v9fs_path_copy(&path, &pathes[name_idx]);
1837            v9fs_path_copy(&dpath, &path);
1838        }
1839        memcpy(&qids[name_idx], &qid, sizeof(qid));
1840    }
1841    if (fid == newfid) {
1842        if (fidp->fid_type != P9_FID_NONE) {
1843            err = -EINVAL;
1844            goto out;
1845        }
1846        v9fs_path_write_lock(s);
1847        v9fs_path_copy(&fidp->path, &path);
1848        v9fs_path_unlock(s);
1849    } else {
1850        newfidp = alloc_fid(s, newfid);
1851        if (newfidp == NULL) {
1852            err = -EINVAL;
1853            goto out;
1854        }
1855        newfidp->uid = fidp->uid;
1856        v9fs_path_copy(&newfidp->path, &path);
1857    }
1858    err = v9fs_walk_marshal(pdu, nwnames, qids);
1859    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1860out:
1861    put_fid(pdu, fidp);
1862    if (newfidp) {
1863        put_fid(pdu, newfidp);
1864    }
1865    v9fs_path_free(&dpath);
1866    v9fs_path_free(&path);
1867out_nofid:
1868    pdu_complete(pdu, err);
1869    if (nwnames && nwnames <= P9_MAXWELEM) {
1870        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1871            v9fs_string_free(&wnames[name_idx]);
1872            v9fs_path_free(&pathes[name_idx]);
1873        }
1874        g_free(wnames);
1875        g_free(qids);
1876        g_free(stbufs);
1877        g_free(pathes);
1878    }
1879}
1880
1881static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1882{
1883    struct statfs stbuf;
1884    int32_t iounit = 0;
1885    V9fsState *s = pdu->s;
1886
1887    /*
1888     * iounit should be multiples of f_bsize (host filesystem block size
1889     * and as well as less than (client msize - P9_IOHDRSZ))
1890     */
1891    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1892        if (stbuf.f_bsize) {
1893            iounit = stbuf.f_bsize;
1894            iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
1895        }
1896    }
1897    if (!iounit) {
1898        iounit = s->msize - P9_IOHDRSZ;
1899    }
1900    return iounit;
1901}
1902
1903static void coroutine_fn v9fs_open(void *opaque)
1904{
1905    int flags;
1906    int32_t fid;
1907    int32_t mode;
1908    V9fsQID qid;
1909    int iounit = 0;
1910    ssize_t err = 0;
1911    size_t offset = 7;
1912    struct stat stbuf;
1913    V9fsFidState *fidp;
1914    V9fsPDU *pdu = opaque;
1915    V9fsState *s = pdu->s;
1916
1917    if (s->proto_version == V9FS_PROTO_2000L) {
1918        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1919    } else {
1920        uint8_t modebyte;
1921        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1922        mode = modebyte;
1923    }
1924    if (err < 0) {
1925        goto out_nofid;
1926    }
1927    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1928
1929    fidp = get_fid(pdu, fid);
1930    if (fidp == NULL) {
1931        err = -ENOENT;
1932        goto out_nofid;
1933    }
1934    if (fidp->fid_type != P9_FID_NONE) {
1935        err = -EINVAL;
1936        goto out;
1937    }
1938
1939    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1940    if (err < 0) {
1941        goto out;
1942    }
1943    err = stat_to_qid(pdu, &stbuf, &qid);
1944    if (err < 0) {
1945        goto out;
1946    }
1947    if (S_ISDIR(stbuf.st_mode)) {
1948        err = v9fs_co_opendir(pdu, fidp);
1949        if (err < 0) {
1950            goto out;
1951        }
1952        fidp->fid_type = P9_FID_DIR;
1953        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1954        if (err < 0) {
1955            goto out;
1956        }
1957        err += offset;
1958    } else {
1959        if (s->proto_version == V9FS_PROTO_2000L) {
1960            flags = get_dotl_openflags(s, mode);
1961        } else {
1962            flags = omode_to_uflags(mode);
1963        }
1964        if (is_ro_export(&s->ctx)) {
1965            if (mode & O_WRONLY || mode & O_RDWR ||
1966                mode & O_APPEND || mode & O_TRUNC) {
1967                err = -EROFS;
1968                goto out;
1969            }
1970        }
1971        err = v9fs_co_open(pdu, fidp, flags);
1972        if (err < 0) {
1973            goto out;
1974        }
1975        fidp->fid_type = P9_FID_FILE;
1976        fidp->open_flags = flags;
1977        if (flags & O_EXCL) {
1978            /*
1979             * We let the host file system do O_EXCL check
1980             * We should not reclaim such fd
1981             */
1982            fidp->flags |= FID_NON_RECLAIMABLE;
1983        }
1984        iounit = get_iounit(pdu, &fidp->path);
1985        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1986        if (err < 0) {
1987            goto out;
1988        }
1989        err += offset;
1990    }
1991    trace_v9fs_open_return(pdu->tag, pdu->id,
1992                           qid.type, qid.version, qid.path, iounit);
1993out:
1994    put_fid(pdu, fidp);
1995out_nofid:
1996    pdu_complete(pdu, err);
1997}
1998
1999static void coroutine_fn v9fs_lcreate(void *opaque)
2000{
2001    int32_t dfid, flags, mode;
2002    gid_t gid;
2003    ssize_t err = 0;
2004    ssize_t offset = 7;
2005    V9fsString name;
2006    V9fsFidState *fidp;
2007    struct stat stbuf;
2008    V9fsQID qid;
2009    int32_t iounit;
2010    V9fsPDU *pdu = opaque;
2011
2012    v9fs_string_init(&name);
2013    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
2014                        &name, &flags, &mode, &gid);
2015    if (err < 0) {
2016        goto out_nofid;
2017    }
2018    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
2019
2020    if (name_is_illegal(name.data)) {
2021        err = -ENOENT;
2022        goto out_nofid;
2023    }
2024
2025    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2026        err = -EEXIST;
2027        goto out_nofid;
2028    }
2029
2030    fidp = get_fid(pdu, dfid);
2031    if (fidp == NULL) {
2032        err = -ENOENT;
2033        goto out_nofid;
2034    }
2035    if (fidp->fid_type != P9_FID_NONE) {
2036        err = -EINVAL;
2037        goto out;
2038    }
2039
2040    flags = get_dotl_openflags(pdu->s, flags);
2041    err = v9fs_co_open2(pdu, fidp, &name, gid,
2042                        flags | O_CREAT, mode, &stbuf);
2043    if (err < 0) {
2044        goto out;
2045    }
2046    fidp->fid_type = P9_FID_FILE;
2047    fidp->open_flags = flags;
2048    if (flags & O_EXCL) {
2049        /*
2050         * We let the host file system do O_EXCL check
2051         * We should not reclaim such fd
2052         */
2053        fidp->flags |= FID_NON_RECLAIMABLE;
2054    }
2055    iounit =  get_iounit(pdu, &fidp->path);
2056    err = stat_to_qid(pdu, &stbuf, &qid);
2057    if (err < 0) {
2058        goto out;
2059    }
2060    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2061    if (err < 0) {
2062        goto out;
2063    }
2064    err += offset;
2065    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2066                              qid.type, qid.version, qid.path, iounit);
2067out:
2068    put_fid(pdu, fidp);
2069out_nofid:
2070    pdu_complete(pdu, err);
2071    v9fs_string_free(&name);
2072}
2073
2074static void coroutine_fn v9fs_fsync(void *opaque)
2075{
2076    int err;
2077    int32_t fid;
2078    int datasync;
2079    size_t offset = 7;
2080    V9fsFidState *fidp;
2081    V9fsPDU *pdu = opaque;
2082
2083    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2084    if (err < 0) {
2085        goto out_nofid;
2086    }
2087    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2088
2089    fidp = get_fid(pdu, fid);
2090    if (fidp == NULL) {
2091        err = -ENOENT;
2092        goto out_nofid;
2093    }
2094    err = v9fs_co_fsync(pdu, fidp, datasync);
2095    if (!err) {
2096        err = offset;
2097    }
2098    put_fid(pdu, fidp);
2099out_nofid:
2100    pdu_complete(pdu, err);
2101}
2102
2103static void coroutine_fn v9fs_clunk(void *opaque)
2104{
2105    int err;
2106    int32_t fid;
2107    size_t offset = 7;
2108    V9fsFidState *fidp;
2109    V9fsPDU *pdu = opaque;
2110    V9fsState *s = pdu->s;
2111
2112    err = pdu_unmarshal(pdu, offset, "d", &fid);
2113    if (err < 0) {
2114        goto out_nofid;
2115    }
2116    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2117
2118    fidp = clunk_fid(s, fid);
2119    if (fidp == NULL) {
2120        err = -ENOENT;
2121        goto out_nofid;
2122    }
2123    /*
2124     * Bump the ref so that put_fid will
2125     * free the fid.
2126     */
2127    fidp->ref++;
2128    err = put_fid(pdu, fidp);
2129    if (!err) {
2130        err = offset;
2131    }
2132out_nofid:
2133    pdu_complete(pdu, err);
2134}
2135
2136/*
2137 * Create a QEMUIOVector for a sub-region of PDU iovecs
2138 *
2139 * @qiov:       uninitialized QEMUIOVector
2140 * @skip:       number of bytes to skip from beginning of PDU
2141 * @size:       number of bytes to include
2142 * @is_write:   true - write, false - read
2143 *
2144 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2145 * with qemu_iovec_destroy().
2146 */
2147static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2148                                    size_t skip, size_t size,
2149                                    bool is_write)
2150{
2151    QEMUIOVector elem;
2152    struct iovec *iov;
2153    unsigned int niov;
2154
2155    if (is_write) {
2156        pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
2157    } else {
2158        pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
2159    }
2160
2161    qemu_iovec_init_external(&elem, iov, niov);
2162    qemu_iovec_init(qiov, niov);
2163    qemu_iovec_concat(qiov, &elem, skip, size);
2164}
2165
2166static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2167                           uint64_t off, uint32_t max_count)
2168{
2169    ssize_t err;
2170    size_t offset = 7;
2171    uint64_t read_count;
2172    QEMUIOVector qiov_full;
2173
2174    if (fidp->fs.xattr.len < off) {
2175        read_count = 0;
2176    } else {
2177        read_count = fidp->fs.xattr.len - off;
2178    }
2179    if (read_count > max_count) {
2180        read_count = max_count;
2181    }
2182    err = pdu_marshal(pdu, offset, "d", read_count);
2183    if (err < 0) {
2184        return err;
2185    }
2186    offset += err;
2187
2188    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
2189    err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2190                    ((char *)fidp->fs.xattr.value) + off,
2191                    read_count);
2192    qemu_iovec_destroy(&qiov_full);
2193    if (err < 0) {
2194        return err;
2195    }
2196    offset += err;
2197    return offset;
2198}
2199
2200static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2201                                                  V9fsFidState *fidp,
2202                                                  uint32_t max_count)
2203{
2204    V9fsPath path;
2205    V9fsStat v9stat;
2206    int len, err = 0;
2207    int32_t count = 0;
2208    struct stat stbuf;
2209    off_t saved_dir_pos;
2210    struct dirent *dent;
2211
2212    /* save the directory position */
2213    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2214    if (saved_dir_pos < 0) {
2215        return saved_dir_pos;
2216    }
2217
2218    while (1) {
2219        v9fs_path_init(&path);
2220
2221        v9fs_readdir_lock(&fidp->fs.dir);
2222
2223        err = v9fs_co_readdir(pdu, fidp, &dent);
2224        if (err || !dent) {
2225            break;
2226        }
2227        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2228        if (err < 0) {
2229            break;
2230        }
2231        err = v9fs_co_lstat(pdu, &path, &stbuf);
2232        if (err < 0) {
2233            break;
2234        }
2235        err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2236        if (err < 0) {
2237            break;
2238        }
2239        if ((count + v9stat.size + 2) > max_count) {
2240            v9fs_readdir_unlock(&fidp->fs.dir);
2241
2242            /* Ran out of buffer. Set dir back to old position and return */
2243            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2244            v9fs_stat_free(&v9stat);
2245            v9fs_path_free(&path);
2246            return count;
2247        }
2248
2249        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2250        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2251
2252        v9fs_readdir_unlock(&fidp->fs.dir);
2253
2254        if (len < 0) {
2255            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2256            v9fs_stat_free(&v9stat);
2257            v9fs_path_free(&path);
2258            return len;
2259        }
2260        count += len;
2261        v9fs_stat_free(&v9stat);
2262        v9fs_path_free(&path);
2263        saved_dir_pos = dent->d_off;
2264    }
2265
2266    v9fs_readdir_unlock(&fidp->fs.dir);
2267
2268    v9fs_path_free(&path);
2269    if (err < 0) {
2270        return err;
2271    }
2272    return count;
2273}
2274
2275static void coroutine_fn v9fs_read(void *opaque)
2276{
2277    int32_t fid;
2278    uint64_t off;
2279    ssize_t err = 0;
2280    int32_t count = 0;
2281    size_t offset = 7;
2282    uint32_t max_count;
2283    V9fsFidState *fidp;
2284    V9fsPDU *pdu = opaque;
2285    V9fsState *s = pdu->s;
2286
2287    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2288    if (err < 0) {
2289        goto out_nofid;
2290    }
2291    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2292
2293    fidp = get_fid(pdu, fid);
2294    if (fidp == NULL) {
2295        err = -EINVAL;
2296        goto out_nofid;
2297    }
2298    if (fidp->fid_type == P9_FID_DIR) {
2299        if (s->proto_version != V9FS_PROTO_2000U) {
2300            warn_report_once(
2301                "9p: bad client: T_read request on directory only expected "
2302                "with 9P2000.u protocol version"
2303            );
2304            err = -EOPNOTSUPP;
2305            goto out;
2306        }
2307        if (off == 0) {
2308            v9fs_co_rewinddir(pdu, fidp);
2309        }
2310        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2311        if (count < 0) {
2312            err = count;
2313            goto out;
2314        }
2315        err = pdu_marshal(pdu, offset, "d", count);
2316        if (err < 0) {
2317            goto out;
2318        }
2319        err += offset + count;
2320    } else if (fidp->fid_type == P9_FID_FILE) {
2321        QEMUIOVector qiov_full;
2322        QEMUIOVector qiov;
2323        int32_t len;
2324
2325        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
2326        qemu_iovec_init(&qiov, qiov_full.niov);
2327        do {
2328            qemu_iovec_reset(&qiov);
2329            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2330            if (0) {
2331                print_sg(qiov.iov, qiov.niov);
2332            }
2333            /* Loop in case of EINTR */
2334            do {
2335                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2336                if (len >= 0) {
2337                    off   += len;
2338                    count += len;
2339                }
2340            } while (len == -EINTR && !pdu->cancelled);
2341            if (len < 0) {
2342                /* IO error return the error */
2343                err = len;
2344                goto out_free_iovec;
2345            }
2346        } while (count < max_count && len > 0);
2347        err = pdu_marshal(pdu, offset, "d", count);
2348        if (err < 0) {
2349            goto out_free_iovec;
2350        }
2351        err += offset + count;
2352out_free_iovec:
2353        qemu_iovec_destroy(&qiov);
2354        qemu_iovec_destroy(&qiov_full);
2355    } else if (fidp->fid_type == P9_FID_XATTR) {
2356        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2357    } else {
2358        err = -EINVAL;
2359    }
2360    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2361out:
2362    put_fid(pdu, fidp);
2363out_nofid:
2364    pdu_complete(pdu, err);
2365}
2366
2367/**
2368 * Returns size required in Rreaddir response for the passed dirent @p name.
2369 *
2370 * @param name - directory entry's name (i.e. file name, directory name)
2371 * @returns required size in bytes
2372 */
2373size_t v9fs_readdir_response_size(V9fsString *name)
2374{
2375    /*
2376     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2377     * size of type (1) + size of name.size (2) + strlen(name.data)
2378     */
2379    return 24 + v9fs_string_size(name);
2380}
2381
2382static void v9fs_free_dirents(struct V9fsDirEnt *e)
2383{
2384    struct V9fsDirEnt *next = NULL;
2385
2386    for (; e; e = next) {
2387        next = e->next;
2388        g_free(e->dent);
2389        g_free(e->st);
2390        g_free(e);
2391    }
2392}
2393
2394static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2395                                        off_t offset, int32_t max_count)
2396{
2397    size_t size;
2398    V9fsQID qid;
2399    V9fsString name;
2400    int len, err = 0;
2401    int32_t count = 0;
2402    struct dirent *dent;
2403    struct stat *st;
2404    struct V9fsDirEnt *entries = NULL;
2405
2406    /*
2407     * inode remapping requires the device id, which in turn might be
2408     * different for different directory entries, so if inode remapping is
2409     * enabled we have to make a full stat for each directory entry
2410     */
2411    const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
2412
2413    /*
2414     * Fetch all required directory entries altogether on a background IO
2415     * thread from fs driver. We don't want to do that for each entry
2416     * individually, because hopping between threads (this main IO thread
2417     * and background IO driver thread) would sum up to huge latencies.
2418     */
2419    count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
2420                                 dostat);
2421    if (count < 0) {
2422        err = count;
2423        count = 0;
2424        goto out;
2425    }
2426    count = 0;
2427
2428    for (struct V9fsDirEnt *e = entries; e; e = e->next) {
2429        dent = e->dent;
2430
2431        if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2432            st = e->st;
2433            /* e->st should never be NULL, but just to be sure */
2434            if (!st) {
2435                err = -1;
2436                break;
2437            }
2438
2439            /* remap inode */
2440            err = stat_to_qid(pdu, st, &qid);
2441            if (err < 0) {
2442                break;
2443            }
2444        } else {
2445            /*
2446             * Fill up just the path field of qid because the client uses
2447             * only that. To fill the entire qid structure we will have
2448             * to stat each dirent found, which is expensive. For the
2449             * latter reason we don't call stat_to_qid() here. Only drawback
2450             * is that no multi-device export detection of stat_to_qid()
2451             * would be done and provided as error to the user here. But
2452             * user would get that error anyway when accessing those
2453             * files/dirs through other ways.
2454             */
2455            size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2456            memcpy(&qid.path, &dent->d_ino, size);
2457            /* Fill the other fields with dummy values */
2458            qid.type = 0;
2459            qid.version = 0;
2460        }
2461
2462        v9fs_string_init(&name);
2463        v9fs_string_sprintf(&name, "%s", dent->d_name);
2464
2465        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2466        len = pdu_marshal(pdu, 11 + count, "Qqbs",
2467                          &qid, dent->d_off,
2468                          dent->d_type, &name);
2469
2470        v9fs_string_free(&name);
2471
2472        if (len < 0) {
2473            err = len;
2474            break;
2475        }
2476
2477        count += len;
2478    }
2479
2480out:
2481    v9fs_free_dirents(entries);
2482    if (err < 0) {
2483        return err;
2484    }
2485    return count;
2486}
2487
2488static void coroutine_fn v9fs_readdir(void *opaque)
2489{
2490    int32_t fid;
2491    V9fsFidState *fidp;
2492    ssize_t retval = 0;
2493    size_t offset = 7;
2494    uint64_t initial_offset;
2495    int32_t count;
2496    uint32_t max_count;
2497    V9fsPDU *pdu = opaque;
2498    V9fsState *s = pdu->s;
2499
2500    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2501                           &initial_offset, &max_count);
2502    if (retval < 0) {
2503        goto out_nofid;
2504    }
2505    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2506
2507    /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
2508    if (max_count > s->msize - 11) {
2509        max_count = s->msize - 11;
2510        warn_report_once(
2511            "9p: bad client: T_readdir with count > msize - 11"
2512        );
2513    }
2514
2515    fidp = get_fid(pdu, fid);
2516    if (fidp == NULL) {
2517        retval = -EINVAL;
2518        goto out_nofid;
2519    }
2520    if (!fidp->fs.dir.stream) {
2521        retval = -EINVAL;
2522        goto out;
2523    }
2524    if (s->proto_version != V9FS_PROTO_2000L) {
2525        warn_report_once(
2526            "9p: bad client: T_readdir request only expected with 9P2000.L "
2527            "protocol version"
2528        );
2529        retval = -EOPNOTSUPP;
2530        goto out;
2531    }
2532    count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
2533    if (count < 0) {
2534        retval = count;
2535        goto out;
2536    }
2537    retval = pdu_marshal(pdu, offset, "d", count);
2538    if (retval < 0) {
2539        goto out;
2540    }
2541    retval += count + offset;
2542    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2543out:
2544    put_fid(pdu, fidp);
2545out_nofid:
2546    pdu_complete(pdu, retval);
2547}
2548
2549static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2550                            uint64_t off, uint32_t count,
2551                            struct iovec *sg, int cnt)
2552{
2553    int i, to_copy;
2554    ssize_t err = 0;
2555    uint64_t write_count;
2556    size_t offset = 7;
2557
2558
2559    if (fidp->fs.xattr.len < off) {
2560        return -ENOSPC;
2561    }
2562    write_count = fidp->fs.xattr.len - off;
2563    if (write_count > count) {
2564        write_count = count;
2565    }
2566    err = pdu_marshal(pdu, offset, "d", write_count);
2567    if (err < 0) {
2568        return err;
2569    }
2570    err += offset;
2571    fidp->fs.xattr.copied_len += write_count;
2572    /*
2573     * Now copy the content from sg list
2574     */
2575    for (i = 0; i < cnt; i++) {
2576        if (write_count > sg[i].iov_len) {
2577            to_copy = sg[i].iov_len;
2578        } else {
2579            to_copy = write_count;
2580        }
2581        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2582        /* updating vs->off since we are not using below */
2583        off += to_copy;
2584        write_count -= to_copy;
2585    }
2586
2587    return err;
2588}
2589
2590static void coroutine_fn v9fs_write(void *opaque)
2591{
2592    ssize_t err;
2593    int32_t fid;
2594    uint64_t off;
2595    uint32_t count;
2596    int32_t len = 0;
2597    int32_t total = 0;
2598    size_t offset = 7;
2599    V9fsFidState *fidp;
2600    V9fsPDU *pdu = opaque;
2601    V9fsState *s = pdu->s;
2602    QEMUIOVector qiov_full;
2603    QEMUIOVector qiov;
2604
2605    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2606    if (err < 0) {
2607        pdu_complete(pdu, err);
2608        return;
2609    }
2610    offset += err;
2611    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
2612    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2613
2614    fidp = get_fid(pdu, fid);
2615    if (fidp == NULL) {
2616        err = -EINVAL;
2617        goto out_nofid;
2618    }
2619    if (fidp->fid_type == P9_FID_FILE) {
2620        if (fidp->fs.fd == -1) {
2621            err = -EINVAL;
2622            goto out;
2623        }
2624    } else if (fidp->fid_type == P9_FID_XATTR) {
2625        /*
2626         * setxattr operation
2627         */
2628        err = v9fs_xattr_write(s, pdu, fidp, off, count,
2629                               qiov_full.iov, qiov_full.niov);
2630        goto out;
2631    } else {
2632        err = -EINVAL;
2633        goto out;
2634    }
2635    qemu_iovec_init(&qiov, qiov_full.niov);
2636    do {
2637        qemu_iovec_reset(&qiov);
2638        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2639        if (0) {
2640            print_sg(qiov.iov, qiov.niov);
2641        }
2642        /* Loop in case of EINTR */
2643        do {
2644            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2645            if (len >= 0) {
2646                off   += len;
2647                total += len;
2648            }
2649        } while (len == -EINTR && !pdu->cancelled);
2650        if (len < 0) {
2651            /* IO error return the error */
2652            err = len;
2653            goto out_qiov;
2654        }
2655    } while (total < count && len > 0);
2656
2657    offset = 7;
2658    err = pdu_marshal(pdu, offset, "d", total);
2659    if (err < 0) {
2660        goto out_qiov;
2661    }
2662    err += offset;
2663    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2664out_qiov:
2665    qemu_iovec_destroy(&qiov);
2666out:
2667    put_fid(pdu, fidp);
2668out_nofid:
2669    qemu_iovec_destroy(&qiov_full);
2670    pdu_complete(pdu, err);
2671}
2672
2673static void coroutine_fn v9fs_create(void *opaque)
2674{
2675    int32_t fid;
2676    int err = 0;
2677    size_t offset = 7;
2678    V9fsFidState *fidp;
2679    V9fsQID qid;
2680    int32_t perm;
2681    int8_t mode;
2682    V9fsPath path;
2683    struct stat stbuf;
2684    V9fsString name;
2685    V9fsString extension;
2686    int iounit;
2687    V9fsPDU *pdu = opaque;
2688    V9fsState *s = pdu->s;
2689
2690    v9fs_path_init(&path);
2691    v9fs_string_init(&name);
2692    v9fs_string_init(&extension);
2693    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2694                        &perm, &mode, &extension);
2695    if (err < 0) {
2696        goto out_nofid;
2697    }
2698    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2699
2700    if (name_is_illegal(name.data)) {
2701        err = -ENOENT;
2702        goto out_nofid;
2703    }
2704
2705    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2706        err = -EEXIST;
2707        goto out_nofid;
2708    }
2709
2710    fidp = get_fid(pdu, fid);
2711    if (fidp == NULL) {
2712        err = -EINVAL;
2713        goto out_nofid;
2714    }
2715    if (fidp->fid_type != P9_FID_NONE) {
2716        err = -EINVAL;
2717        goto out;
2718    }
2719    if (perm & P9_STAT_MODE_DIR) {
2720        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2721                            fidp->uid, -1, &stbuf);
2722        if (err < 0) {
2723            goto out;
2724        }
2725        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2726        if (err < 0) {
2727            goto out;
2728        }
2729        v9fs_path_write_lock(s);
2730        v9fs_path_copy(&fidp->path, &path);
2731        v9fs_path_unlock(s);
2732        err = v9fs_co_opendir(pdu, fidp);
2733        if (err < 0) {
2734            goto out;
2735        }
2736        fidp->fid_type = P9_FID_DIR;
2737    } else if (perm & P9_STAT_MODE_SYMLINK) {
2738        err = v9fs_co_symlink(pdu, fidp, &name,
2739                              extension.data, -1 , &stbuf);
2740        if (err < 0) {
2741            goto out;
2742        }
2743        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2744        if (err < 0) {
2745            goto out;
2746        }
2747        v9fs_path_write_lock(s);
2748        v9fs_path_copy(&fidp->path, &path);
2749        v9fs_path_unlock(s);
2750    } else if (perm & P9_STAT_MODE_LINK) {
2751        int32_t ofid = atoi(extension.data);
2752        V9fsFidState *ofidp = get_fid(pdu, ofid);
2753        if (ofidp == NULL) {
2754            err = -EINVAL;
2755            goto out;
2756        }
2757        err = v9fs_co_link(pdu, ofidp, fidp, &name);
2758        put_fid(pdu, ofidp);
2759        if (err < 0) {
2760            goto out;
2761        }
2762        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2763        if (err < 0) {
2764            fidp->fid_type = P9_FID_NONE;
2765            goto out;
2766        }
2767        v9fs_path_write_lock(s);
2768        v9fs_path_copy(&fidp->path, &path);
2769        v9fs_path_unlock(s);
2770        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2771        if (err < 0) {
2772            fidp->fid_type = P9_FID_NONE;
2773            goto out;
2774        }
2775    } else if (perm & P9_STAT_MODE_DEVICE) {
2776        char ctype;
2777        uint32_t major, minor;
2778        mode_t nmode = 0;
2779
2780        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2781            err = -errno;
2782            goto out;
2783        }
2784
2785        switch (ctype) {
2786        case 'c':
2787            nmode = S_IFCHR;
2788            break;
2789        case 'b':
2790            nmode = S_IFBLK;
2791            break;
2792        default:
2793            err = -EIO;
2794            goto out;
2795        }
2796
2797        nmode |= perm & 0777;
2798        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2799                            makedev(major, minor), nmode, &stbuf);
2800        if (err < 0) {
2801            goto out;
2802        }
2803        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2804        if (err < 0) {
2805            goto out;
2806        }
2807        v9fs_path_write_lock(s);
2808        v9fs_path_copy(&fidp->path, &path);
2809        v9fs_path_unlock(s);
2810    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2811        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2812                            0, S_IFIFO | (perm & 0777), &stbuf);
2813        if (err < 0) {
2814            goto out;
2815        }
2816        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2817        if (err < 0) {
2818            goto out;
2819        }
2820        v9fs_path_write_lock(s);
2821        v9fs_path_copy(&fidp->path, &path);
2822        v9fs_path_unlock(s);
2823    } else if (perm & P9_STAT_MODE_SOCKET) {
2824        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2825                            0, S_IFSOCK | (perm & 0777), &stbuf);
2826        if (err < 0) {
2827            goto out;
2828        }
2829        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2830        if (err < 0) {
2831            goto out;
2832        }
2833        v9fs_path_write_lock(s);
2834        v9fs_path_copy(&fidp->path, &path);
2835        v9fs_path_unlock(s);
2836    } else {
2837        err = v9fs_co_open2(pdu, fidp, &name, -1,
2838                            omode_to_uflags(mode) | O_CREAT, perm, &stbuf);
2839        if (err < 0) {
2840            goto out;
2841        }
2842        fidp->fid_type = P9_FID_FILE;
2843        fidp->open_flags = omode_to_uflags(mode);
2844        if (fidp->open_flags & O_EXCL) {
2845            /*
2846             * We let the host file system do O_EXCL check
2847             * We should not reclaim such fd
2848             */
2849            fidp->flags |= FID_NON_RECLAIMABLE;
2850        }
2851    }
2852    iounit = get_iounit(pdu, &fidp->path);
2853    err = stat_to_qid(pdu, &stbuf, &qid);
2854    if (err < 0) {
2855        goto out;
2856    }
2857    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2858    if (err < 0) {
2859        goto out;
2860    }
2861    err += offset;
2862    trace_v9fs_create_return(pdu->tag, pdu->id,
2863                             qid.type, qid.version, qid.path, iounit);
2864out:
2865    put_fid(pdu, fidp);
2866out_nofid:
2867   pdu_complete(pdu, err);
2868   v9fs_string_free(&name);
2869   v9fs_string_free(&extension);
2870   v9fs_path_free(&path);
2871}
2872
2873static void coroutine_fn v9fs_symlink(void *opaque)
2874{
2875    V9fsPDU *pdu = opaque;
2876    V9fsString name;
2877    V9fsString symname;
2878    V9fsFidState *dfidp;
2879    V9fsQID qid;
2880    struct stat stbuf;
2881    int32_t dfid;
2882    int err = 0;
2883    gid_t gid;
2884    size_t offset = 7;
2885
2886    v9fs_string_init(&name);
2887    v9fs_string_init(&symname);
2888    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2889    if (err < 0) {
2890        goto out_nofid;
2891    }
2892    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2893
2894    if (name_is_illegal(name.data)) {
2895        err = -ENOENT;
2896        goto out_nofid;
2897    }
2898
2899    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2900        err = -EEXIST;
2901        goto out_nofid;
2902    }
2903
2904    dfidp = get_fid(pdu, dfid);
2905    if (dfidp == NULL) {
2906        err = -EINVAL;
2907        goto out_nofid;
2908    }
2909    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2910    if (err < 0) {
2911        goto out;
2912    }
2913    err = stat_to_qid(pdu, &stbuf, &qid);
2914    if (err < 0) {
2915        goto out;
2916    }
2917    err =  pdu_marshal(pdu, offset, "Q", &qid);
2918    if (err < 0) {
2919        goto out;
2920    }
2921    err += offset;
2922    trace_v9fs_symlink_return(pdu->tag, pdu->id,
2923                              qid.type, qid.version, qid.path);
2924out:
2925    put_fid(pdu, dfidp);
2926out_nofid:
2927    pdu_complete(pdu, err);
2928    v9fs_string_free(&name);
2929    v9fs_string_free(&symname);
2930}
2931
2932static void coroutine_fn v9fs_flush(void *opaque)
2933{
2934    ssize_t err;
2935    int16_t tag;
2936    size_t offset = 7;
2937    V9fsPDU *cancel_pdu = NULL;
2938    V9fsPDU *pdu = opaque;
2939    V9fsState *s = pdu->s;
2940
2941    err = pdu_unmarshal(pdu, offset, "w", &tag);
2942    if (err < 0) {
2943        pdu_complete(pdu, err);
2944        return;
2945    }
2946    trace_v9fs_flush(pdu->tag, pdu->id, tag);
2947
2948    if (pdu->tag == tag) {
2949        warn_report("the guest sent a self-referencing 9P flush request");
2950    } else {
2951        QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2952            if (cancel_pdu->tag == tag) {
2953                break;
2954            }
2955        }
2956    }
2957    if (cancel_pdu) {
2958        cancel_pdu->cancelled = 1;
2959        /*
2960         * Wait for pdu to complete.
2961         */
2962        qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2963        if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2964            cancel_pdu->cancelled = 0;
2965            pdu_free(cancel_pdu);
2966        }
2967    }
2968    pdu_complete(pdu, 7);
2969}
2970
2971static void coroutine_fn v9fs_link(void *opaque)
2972{
2973    V9fsPDU *pdu = opaque;
2974    int32_t dfid, oldfid;
2975    V9fsFidState *dfidp, *oldfidp;
2976    V9fsString name;
2977    size_t offset = 7;
2978    int err = 0;
2979
2980    v9fs_string_init(&name);
2981    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2982    if (err < 0) {
2983        goto out_nofid;
2984    }
2985    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2986
2987    if (name_is_illegal(name.data)) {
2988        err = -ENOENT;
2989        goto out_nofid;
2990    }
2991
2992    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2993        err = -EEXIST;
2994        goto out_nofid;
2995    }
2996
2997    dfidp = get_fid(pdu, dfid);
2998    if (dfidp == NULL) {
2999        err = -ENOENT;
3000        goto out_nofid;
3001    }
3002
3003    oldfidp = get_fid(pdu, oldfid);
3004    if (oldfidp == NULL) {
3005        err = -ENOENT;
3006        goto out;
3007    }
3008    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
3009    if (!err) {
3010        err = offset;
3011    }
3012    put_fid(pdu, oldfidp);
3013out:
3014    put_fid(pdu, dfidp);
3015out_nofid:
3016    v9fs_string_free(&name);
3017    pdu_complete(pdu, err);
3018}
3019
3020/* Only works with path name based fid */
3021static void coroutine_fn v9fs_remove(void *opaque)
3022{
3023    int32_t fid;
3024    int err = 0;
3025    size_t offset = 7;
3026    V9fsFidState *fidp;
3027    V9fsPDU *pdu = opaque;
3028
3029    err = pdu_unmarshal(pdu, offset, "d", &fid);
3030    if (err < 0) {
3031        goto out_nofid;
3032    }
3033    trace_v9fs_remove(pdu->tag, pdu->id, fid);
3034
3035    fidp = get_fid(pdu, fid);
3036    if (fidp == NULL) {
3037        err = -EINVAL;
3038        goto out_nofid;
3039    }
3040    /* if fs driver is not path based, return EOPNOTSUPP */
3041    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3042        err = -EOPNOTSUPP;
3043        goto out_err;
3044    }
3045    /*
3046     * IF the file is unlinked, we cannot reopen
3047     * the file later. So don't reclaim fd
3048     */
3049    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
3050    if (err < 0) {
3051        goto out_err;
3052    }
3053    err = v9fs_co_remove(pdu, &fidp->path);
3054    if (!err) {
3055        err = offset;
3056    }
3057out_err:
3058    /* For TREMOVE we need to clunk the fid even on failed remove */
3059    clunk_fid(pdu->s, fidp->fid);
3060    put_fid(pdu, fidp);
3061out_nofid:
3062    pdu_complete(pdu, err);
3063}
3064
3065static void coroutine_fn v9fs_unlinkat(void *opaque)
3066{
3067    int err = 0;
3068    V9fsString name;
3069    int32_t dfid, flags, rflags = 0;
3070    size_t offset = 7;
3071    V9fsPath path;
3072    V9fsFidState *dfidp;
3073    V9fsPDU *pdu = opaque;
3074
3075    v9fs_string_init(&name);
3076    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
3077    if (err < 0) {
3078        goto out_nofid;
3079    }
3080
3081    if (name_is_illegal(name.data)) {
3082        err = -ENOENT;
3083        goto out_nofid;
3084    }
3085
3086    if (!strcmp(".", name.data)) {
3087        err = -EINVAL;
3088        goto out_nofid;
3089    }
3090
3091    if (!strcmp("..", name.data)) {
3092        err = -ENOTEMPTY;
3093        goto out_nofid;
3094    }
3095
3096    if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3097        err = -EINVAL;
3098        goto out_nofid;
3099    }
3100
3101    if (flags & P9_DOTL_AT_REMOVEDIR) {
3102        rflags |= AT_REMOVEDIR;
3103    }
3104
3105    dfidp = get_fid(pdu, dfid);
3106    if (dfidp == NULL) {
3107        err = -EINVAL;
3108        goto out_nofid;
3109    }
3110    /*
3111     * IF the file is unlinked, we cannot reopen
3112     * the file later. So don't reclaim fd
3113     */
3114    v9fs_path_init(&path);
3115    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3116    if (err < 0) {
3117        goto out_err;
3118    }
3119    err = v9fs_mark_fids_unreclaim(pdu, &path);
3120    if (err < 0) {
3121        goto out_err;
3122    }
3123    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3124    if (!err) {
3125        err = offset;
3126    }
3127out_err:
3128    put_fid(pdu, dfidp);
3129    v9fs_path_free(&path);
3130out_nofid:
3131    pdu_complete(pdu, err);
3132    v9fs_string_free(&name);
3133}
3134
3135
3136/* Only works with path name based fid */
3137static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3138                                             int32_t newdirfid,
3139                                             V9fsString *name)
3140{
3141    int err = 0;
3142    V9fsPath new_path;
3143    V9fsFidState *tfidp;
3144    V9fsState *s = pdu->s;
3145    V9fsFidState *dirfidp = NULL;
3146
3147    v9fs_path_init(&new_path);
3148    if (newdirfid != -1) {
3149        dirfidp = get_fid(pdu, newdirfid);
3150        if (dirfidp == NULL) {
3151            return -ENOENT;
3152        }
3153        if (fidp->fid_type != P9_FID_NONE) {
3154            err = -EINVAL;
3155            goto out;
3156        }
3157        err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3158        if (err < 0) {
3159            goto out;
3160        }
3161    } else {
3162        char *dir_name = g_path_get_dirname(fidp->path.data);
3163        V9fsPath dir_path;
3164
3165        v9fs_path_init(&dir_path);
3166        v9fs_path_sprintf(&dir_path, "%s", dir_name);
3167        g_free(dir_name);
3168
3169        err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3170        v9fs_path_free(&dir_path);
3171        if (err < 0) {
3172            goto out;
3173        }
3174    }
3175    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3176    if (err < 0) {
3177        goto out;
3178    }
3179    /*
3180     * Fixup fid's pointing to the old name to
3181     * start pointing to the new name
3182     */
3183    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3184        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3185            /* replace the name */
3186            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3187        }
3188    }
3189out:
3190    if (dirfidp) {
3191        put_fid(pdu, dirfidp);
3192    }
3193    v9fs_path_free(&new_path);
3194    return err;
3195}
3196
3197/* Only works with path name based fid */
3198static void coroutine_fn v9fs_rename(void *opaque)
3199{
3200    int32_t fid;
3201    ssize_t err = 0;
3202    size_t offset = 7;
3203    V9fsString name;
3204    int32_t newdirfid;
3205    V9fsFidState *fidp;
3206    V9fsPDU *pdu = opaque;
3207    V9fsState *s = pdu->s;
3208
3209    v9fs_string_init(&name);
3210    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3211    if (err < 0) {
3212        goto out_nofid;
3213    }
3214
3215    if (name_is_illegal(name.data)) {
3216        err = -ENOENT;
3217        goto out_nofid;
3218    }
3219
3220    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3221        err = -EISDIR;
3222        goto out_nofid;
3223    }
3224
3225    fidp = get_fid(pdu, fid);
3226    if (fidp == NULL) {
3227        err = -ENOENT;
3228        goto out_nofid;
3229    }
3230    if (fidp->fid_type != P9_FID_NONE) {
3231        err = -EINVAL;
3232        goto out;
3233    }
3234    /* if fs driver is not path based, return EOPNOTSUPP */
3235    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3236        err = -EOPNOTSUPP;
3237        goto out;
3238    }
3239    v9fs_path_write_lock(s);
3240    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3241    v9fs_path_unlock(s);
3242    if (!err) {
3243        err = offset;
3244    }
3245out:
3246    put_fid(pdu, fidp);
3247out_nofid:
3248    pdu_complete(pdu, err);
3249    v9fs_string_free(&name);
3250}
3251
3252static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3253                                           V9fsString *old_name,
3254                                           V9fsPath *newdir,
3255                                           V9fsString *new_name)
3256{
3257    V9fsFidState *tfidp;
3258    V9fsPath oldpath, newpath;
3259    V9fsState *s = pdu->s;
3260    int err;
3261
3262    v9fs_path_init(&oldpath);
3263    v9fs_path_init(&newpath);
3264    err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3265    if (err < 0) {
3266        goto out;
3267    }
3268    err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3269    if (err < 0) {
3270        goto out;
3271    }
3272
3273    /*
3274     * Fixup fid's pointing to the old name to
3275     * start pointing to the new name
3276     */
3277    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3278        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3279            /* replace the name */
3280            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3281        }
3282    }
3283out:
3284    v9fs_path_free(&oldpath);
3285    v9fs_path_free(&newpath);
3286    return err;
3287}
3288
3289static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3290                                               V9fsString *old_name,
3291                                               int32_t newdirfid,
3292                                               V9fsString *new_name)
3293{
3294    int err = 0;
3295    V9fsState *s = pdu->s;
3296    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3297
3298    olddirfidp = get_fid(pdu, olddirfid);
3299    if (olddirfidp == NULL) {
3300        err = -ENOENT;
3301        goto out;
3302    }
3303    if (newdirfid != -1) {
3304        newdirfidp = get_fid(pdu, newdirfid);
3305        if (newdirfidp == NULL) {
3306            err = -ENOENT;
3307            goto out;
3308        }
3309    } else {
3310        newdirfidp = get_fid(pdu, olddirfid);
3311    }
3312
3313    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3314                           &newdirfidp->path, new_name);
3315    if (err < 0) {
3316        goto out;
3317    }
3318    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3319        /* Only for path based fid  we need to do the below fixup */
3320        err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3321                                 &newdirfidp->path, new_name);
3322    }
3323out:
3324    if (olddirfidp) {
3325        put_fid(pdu, olddirfidp);
3326    }
3327    if (newdirfidp) {
3328        put_fid(pdu, newdirfidp);
3329    }
3330    return err;
3331}
3332
3333static void coroutine_fn v9fs_renameat(void *opaque)
3334{
3335    ssize_t err = 0;
3336    size_t offset = 7;
3337    V9fsPDU *pdu = opaque;
3338    V9fsState *s = pdu->s;
3339    int32_t olddirfid, newdirfid;
3340    V9fsString old_name, new_name;
3341
3342    v9fs_string_init(&old_name);
3343    v9fs_string_init(&new_name);
3344    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3345                        &old_name, &newdirfid, &new_name);
3346    if (err < 0) {
3347        goto out_err;
3348    }
3349
3350    if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3351        err = -ENOENT;
3352        goto out_err;
3353    }
3354
3355    if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3356        !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3357        err = -EISDIR;
3358        goto out_err;
3359    }
3360
3361    v9fs_path_write_lock(s);
3362    err = v9fs_complete_renameat(pdu, olddirfid,
3363                                 &old_name, newdirfid, &new_name);
3364    v9fs_path_unlock(s);
3365    if (!err) {
3366        err = offset;
3367    }
3368
3369out_err:
3370    pdu_complete(pdu, err);
3371    v9fs_string_free(&old_name);
3372    v9fs_string_free(&new_name);
3373}
3374
3375static void coroutine_fn v9fs_wstat(void *opaque)
3376{
3377    int32_t fid;
3378    int err = 0;
3379    int16_t unused;
3380    V9fsStat v9stat;
3381    size_t offset = 7;
3382    struct stat stbuf;
3383    V9fsFidState *fidp;
3384    V9fsPDU *pdu = opaque;
3385    V9fsState *s = pdu->s;
3386
3387    v9fs_stat_init(&v9stat);
3388    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3389    if (err < 0) {
3390        goto out_nofid;
3391    }
3392    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3393                     v9stat.mode, v9stat.atime, v9stat.mtime);
3394
3395    fidp = get_fid(pdu, fid);
3396    if (fidp == NULL) {
3397        err = -EINVAL;
3398        goto out_nofid;
3399    }
3400    /* do we need to sync the file? */
3401    if (donttouch_stat(&v9stat)) {
3402        err = v9fs_co_fsync(pdu, fidp, 0);
3403        goto out;
3404    }
3405    if (v9stat.mode != -1) {
3406        uint32_t v9_mode;
3407        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3408        if (err < 0) {
3409            goto out;
3410        }
3411        v9_mode = stat_to_v9mode(&stbuf);
3412        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3413            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3414            /* Attempting to change the type */
3415            err = -EIO;
3416            goto out;
3417        }
3418        err = v9fs_co_chmod(pdu, &fidp->path,
3419                            v9mode_to_mode(v9stat.mode,
3420                                           &v9stat.extension));
3421        if (err < 0) {
3422            goto out;
3423        }
3424    }
3425    if (v9stat.mtime != -1 || v9stat.atime != -1) {
3426        struct timespec times[2];
3427        if (v9stat.atime != -1) {
3428            times[0].tv_sec = v9stat.atime;
3429            times[0].tv_nsec = 0;
3430        } else {
3431            times[0].tv_nsec = UTIME_OMIT;
3432        }
3433        if (v9stat.mtime != -1) {
3434            times[1].tv_sec = v9stat.mtime;
3435            times[1].tv_nsec = 0;
3436        } else {
3437            times[1].tv_nsec = UTIME_OMIT;
3438        }
3439        err = v9fs_co_utimensat(pdu, &fidp->path, times);
3440        if (err < 0) {
3441            goto out;
3442        }
3443    }
3444    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3445        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3446        if (err < 0) {
3447            goto out;
3448        }
3449    }
3450    if (v9stat.name.size != 0) {
3451        v9fs_path_write_lock(s);
3452        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3453        v9fs_path_unlock(s);
3454        if (err < 0) {
3455            goto out;
3456        }
3457    }
3458    if (v9stat.length != -1) {
3459        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3460        if (err < 0) {
3461            goto out;
3462        }
3463    }
3464    err = offset;
3465out:
3466    put_fid(pdu, fidp);
3467out_nofid:
3468    v9fs_stat_free(&v9stat);
3469    pdu_complete(pdu, err);
3470}
3471
3472static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3473{
3474    uint32_t f_type;
3475    uint32_t f_bsize;
3476    uint64_t f_blocks;
3477    uint64_t f_bfree;
3478    uint64_t f_bavail;
3479    uint64_t f_files;
3480    uint64_t f_ffree;
3481    uint64_t fsid_val;
3482    uint32_t f_namelen;
3483    size_t offset = 7;
3484    int32_t bsize_factor;
3485
3486    /*
3487     * compute bsize factor based on host file system block size
3488     * and client msize
3489     */
3490    bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize;
3491    if (!bsize_factor) {
3492        bsize_factor = 1;
3493    }
3494    f_type  = stbuf->f_type;
3495    f_bsize = stbuf->f_bsize;
3496    f_bsize *= bsize_factor;
3497    /*
3498     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3499     * adjust(divide) the number of blocks, free blocks and available
3500     * blocks by bsize factor
3501     */
3502    f_blocks = stbuf->f_blocks / bsize_factor;
3503    f_bfree  = stbuf->f_bfree / bsize_factor;
3504    f_bavail = stbuf->f_bavail / bsize_factor;
3505    f_files  = stbuf->f_files;
3506    f_ffree  = stbuf->f_ffree;
3507    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3508               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3509    f_namelen = stbuf->f_namelen;
3510
3511    return pdu_marshal(pdu, offset, "ddqqqqqqd",
3512                       f_type, f_bsize, f_blocks, f_bfree,
3513                       f_bavail, f_files, f_ffree,
3514                       fsid_val, f_namelen);
3515}
3516
3517static void coroutine_fn v9fs_statfs(void *opaque)
3518{
3519    int32_t fid;
3520    ssize_t retval = 0;
3521    size_t offset = 7;
3522    V9fsFidState *fidp;
3523    struct statfs stbuf;
3524    V9fsPDU *pdu = opaque;
3525    V9fsState *s = pdu->s;
3526
3527    retval = pdu_unmarshal(pdu, offset, "d", &fid);
3528    if (retval < 0) {
3529        goto out_nofid;
3530    }
3531    fidp = get_fid(pdu, fid);
3532    if (fidp == NULL) {
3533        retval = -ENOENT;
3534        goto out_nofid;
3535    }
3536    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3537    if (retval < 0) {
3538        goto out;
3539    }
3540    retval = v9fs_fill_statfs(s, pdu, &stbuf);
3541    if (retval < 0) {
3542        goto out;
3543    }
3544    retval += offset;
3545out:
3546    put_fid(pdu, fidp);
3547out_nofid:
3548    pdu_complete(pdu, retval);
3549}
3550
3551static void coroutine_fn v9fs_mknod(void *opaque)
3552{
3553
3554    int mode;
3555    gid_t gid;
3556    int32_t fid;
3557    V9fsQID qid;
3558    int err = 0;
3559    int major, minor;
3560    size_t offset = 7;
3561    V9fsString name;
3562    struct stat stbuf;
3563    V9fsFidState *fidp;
3564    V9fsPDU *pdu = opaque;
3565
3566    v9fs_string_init(&name);
3567    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3568                        &major, &minor, &gid);
3569    if (err < 0) {
3570        goto out_nofid;
3571    }
3572    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3573
3574    if (name_is_illegal(name.data)) {
3575        err = -ENOENT;
3576        goto out_nofid;
3577    }
3578
3579    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3580        err = -EEXIST;
3581        goto out_nofid;
3582    }
3583
3584    fidp = get_fid(pdu, fid);
3585    if (fidp == NULL) {
3586        err = -ENOENT;
3587        goto out_nofid;
3588    }
3589    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3590                        makedev(major, minor), mode, &stbuf);
3591    if (err < 0) {
3592        goto out;
3593    }
3594    err = stat_to_qid(pdu, &stbuf, &qid);
3595    if (err < 0) {
3596        goto out;
3597    }
3598    err = pdu_marshal(pdu, offset, "Q", &qid);
3599    if (err < 0) {
3600        goto out;
3601    }
3602    err += offset;
3603    trace_v9fs_mknod_return(pdu->tag, pdu->id,
3604                            qid.type, qid.version, qid.path);
3605out:
3606    put_fid(pdu, fidp);
3607out_nofid:
3608    pdu_complete(pdu, err);
3609    v9fs_string_free(&name);
3610}
3611
3612/*
3613 * Implement posix byte range locking code
3614 * Server side handling of locking code is very simple, because 9p server in
3615 * QEMU can handle only one client. And most of the lock handling
3616 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3617 * do any thing in * qemu 9p server side lock code path.
3618 * So when a TLOCK request comes, always return success
3619 */
3620static void coroutine_fn v9fs_lock(void *opaque)
3621{
3622    V9fsFlock flock;
3623    size_t offset = 7;
3624    struct stat stbuf;
3625    V9fsFidState *fidp;
3626    int32_t fid, err = 0;
3627    V9fsPDU *pdu = opaque;
3628
3629    v9fs_string_init(&flock.client_id);
3630    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3631                        &flock.flags, &flock.start, &flock.length,
3632                        &flock.proc_id, &flock.client_id);
3633    if (err < 0) {
3634        goto out_nofid;
3635    }
3636    trace_v9fs_lock(pdu->tag, pdu->id, fid,
3637                    flock.type, flock.start, flock.length);
3638
3639
3640    /* We support only block flag now (that too ignored currently) */
3641    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3642        err = -EINVAL;
3643        goto out_nofid;
3644    }
3645    fidp = get_fid(pdu, fid);
3646    if (fidp == NULL) {
3647        err = -ENOENT;
3648        goto out_nofid;
3649    }
3650    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3651    if (err < 0) {
3652        goto out;
3653    }
3654    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3655    if (err < 0) {
3656        goto out;
3657    }
3658    err += offset;
3659    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3660out:
3661    put_fid(pdu, fidp);
3662out_nofid:
3663    pdu_complete(pdu, err);
3664    v9fs_string_free(&flock.client_id);
3665}
3666
3667/*
3668 * When a TGETLOCK request comes, always return success because all lock
3669 * handling is done by client's VFS layer.
3670 */
3671static void coroutine_fn v9fs_getlock(void *opaque)
3672{
3673    size_t offset = 7;
3674    struct stat stbuf;
3675    V9fsFidState *fidp;
3676    V9fsGetlock glock;
3677    int32_t fid, err = 0;
3678    V9fsPDU *pdu = opaque;
3679
3680    v9fs_string_init(&glock.client_id);
3681    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3682                        &glock.start, &glock.length, &glock.proc_id,
3683                        &glock.client_id);
3684    if (err < 0) {
3685        goto out_nofid;
3686    }
3687    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3688                       glock.type, glock.start, glock.length);
3689
3690    fidp = get_fid(pdu, fid);
3691    if (fidp == NULL) {
3692        err = -ENOENT;
3693        goto out_nofid;
3694    }
3695    err = v9fs_co_fstat(pdu, fidp, &stbuf);
3696    if (err < 0) {
3697        goto out;
3698    }
3699    glock.type = P9_LOCK_TYPE_UNLCK;
3700    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3701                          glock.start, glock.length, glock.proc_id,
3702                          &glock.client_id);
3703    if (err < 0) {
3704        goto out;
3705    }
3706    err += offset;
3707    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3708                              glock.length, glock.proc_id);
3709out:
3710    put_fid(pdu, fidp);
3711out_nofid:
3712    pdu_complete(pdu, err);
3713    v9fs_string_free(&glock.client_id);
3714}
3715
3716static void coroutine_fn v9fs_mkdir(void *opaque)
3717{
3718    V9fsPDU *pdu = opaque;
3719    size_t offset = 7;
3720    int32_t fid;
3721    struct stat stbuf;
3722    V9fsQID qid;
3723    V9fsString name;
3724    V9fsFidState *fidp;
3725    gid_t gid;
3726    int mode;
3727    int err = 0;
3728
3729    v9fs_string_init(&name);
3730    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3731    if (err < 0) {
3732        goto out_nofid;
3733    }
3734    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3735
3736    if (name_is_illegal(name.data)) {
3737        err = -ENOENT;
3738        goto out_nofid;
3739    }
3740
3741    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3742        err = -EEXIST;
3743        goto out_nofid;
3744    }
3745
3746    fidp = get_fid(pdu, fid);
3747    if (fidp == NULL) {
3748        err = -ENOENT;
3749        goto out_nofid;
3750    }
3751    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3752    if (err < 0) {
3753        goto out;
3754    }
3755    err = stat_to_qid(pdu, &stbuf, &qid);
3756    if (err < 0) {
3757        goto out;
3758    }
3759    err = pdu_marshal(pdu, offset, "Q", &qid);
3760    if (err < 0) {
3761        goto out;
3762    }
3763    err += offset;
3764    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3765                            qid.type, qid.version, qid.path, err);
3766out:
3767    put_fid(pdu, fidp);
3768out_nofid:
3769    pdu_complete(pdu, err);
3770    v9fs_string_free(&name);
3771}
3772
3773static void coroutine_fn v9fs_xattrwalk(void *opaque)
3774{
3775    int64_t size;
3776    V9fsString name;
3777    ssize_t err = 0;
3778    size_t offset = 7;
3779    int32_t fid, newfid;
3780    V9fsFidState *file_fidp;
3781    V9fsFidState *xattr_fidp = NULL;
3782    V9fsPDU *pdu = opaque;
3783    V9fsState *s = pdu->s;
3784
3785    v9fs_string_init(&name);
3786    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3787    if (err < 0) {
3788        goto out_nofid;
3789    }
3790    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3791
3792    file_fidp = get_fid(pdu, fid);
3793    if (file_fidp == NULL) {
3794        err = -ENOENT;
3795        goto out_nofid;
3796    }
3797    xattr_fidp = alloc_fid(s, newfid);
3798    if (xattr_fidp == NULL) {
3799        err = -EINVAL;
3800        goto out;
3801    }
3802    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3803    if (!v9fs_string_size(&name)) {
3804        /*
3805         * listxattr request. Get the size first
3806         */
3807        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3808        if (size < 0) {
3809            err = size;
3810            clunk_fid(s, xattr_fidp->fid);
3811            goto out;
3812        }
3813        /*
3814         * Read the xattr value
3815         */
3816        xattr_fidp->fs.xattr.len = size;
3817        xattr_fidp->fid_type = P9_FID_XATTR;
3818        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3819        xattr_fidp->fs.xattr.value = g_malloc0(size);
3820        if (size) {
3821            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3822                                     xattr_fidp->fs.xattr.value,
3823                                     xattr_fidp->fs.xattr.len);
3824            if (err < 0) {
3825                clunk_fid(s, xattr_fidp->fid);
3826                goto out;
3827            }
3828        }
3829        err = pdu_marshal(pdu, offset, "q", size);
3830        if (err < 0) {
3831            goto out;
3832        }
3833        err += offset;
3834    } else {
3835        /*
3836         * specific xattr fid. We check for xattr
3837         * presence also collect the xattr size
3838         */
3839        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3840                                 &name, NULL, 0);
3841        if (size < 0) {
3842            err = size;
3843            clunk_fid(s, xattr_fidp->fid);
3844            goto out;
3845        }
3846        /*
3847         * Read the xattr value
3848         */
3849        xattr_fidp->fs.xattr.len = size;
3850        xattr_fidp->fid_type = P9_FID_XATTR;
3851        xattr_fidp->fs.xattr.xattrwalk_fid = true;
3852        xattr_fidp->fs.xattr.value = g_malloc0(size);
3853        if (size) {
3854            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3855                                    &name, xattr_fidp->fs.xattr.value,
3856                                    xattr_fidp->fs.xattr.len);
3857            if (err < 0) {
3858                clunk_fid(s, xattr_fidp->fid);
3859                goto out;
3860            }
3861        }
3862        err = pdu_marshal(pdu, offset, "q", size);
3863        if (err < 0) {
3864            goto out;
3865        }
3866        err += offset;
3867    }
3868    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3869out:
3870    put_fid(pdu, file_fidp);
3871    if (xattr_fidp) {
3872        put_fid(pdu, xattr_fidp);
3873    }
3874out_nofid:
3875    pdu_complete(pdu, err);
3876    v9fs_string_free(&name);
3877}
3878
3879static void coroutine_fn v9fs_xattrcreate(void *opaque)
3880{
3881    int flags, rflags = 0;
3882    int32_t fid;
3883    uint64_t size;
3884    ssize_t err = 0;
3885    V9fsString name;
3886    size_t offset = 7;
3887    V9fsFidState *file_fidp;
3888    V9fsFidState *xattr_fidp;
3889    V9fsPDU *pdu = opaque;
3890
3891    v9fs_string_init(&name);
3892    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3893    if (err < 0) {
3894        goto out_nofid;
3895    }
3896    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3897
3898    if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3899        err = -EINVAL;
3900        goto out_nofid;
3901    }
3902
3903    if (flags & P9_XATTR_CREATE) {
3904        rflags |= XATTR_CREATE;
3905    }
3906
3907    if (flags & P9_XATTR_REPLACE) {
3908        rflags |= XATTR_REPLACE;
3909    }
3910
3911    if (size > XATTR_SIZE_MAX) {
3912        err = -E2BIG;
3913        goto out_nofid;
3914    }
3915
3916    file_fidp = get_fid(pdu, fid);
3917    if (file_fidp == NULL) {
3918        err = -EINVAL;
3919        goto out_nofid;
3920    }
3921    if (file_fidp->fid_type != P9_FID_NONE) {
3922        err = -EINVAL;
3923        goto out_put_fid;
3924    }
3925
3926    /* Make the file fid point to xattr */
3927    xattr_fidp = file_fidp;
3928    xattr_fidp->fid_type = P9_FID_XATTR;
3929    xattr_fidp->fs.xattr.copied_len = 0;
3930    xattr_fidp->fs.xattr.xattrwalk_fid = false;
3931    xattr_fidp->fs.xattr.len = size;
3932    xattr_fidp->fs.xattr.flags = rflags;
3933    v9fs_string_init(&xattr_fidp->fs.xattr.name);
3934    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3935    xattr_fidp->fs.xattr.value = g_malloc0(size);
3936    err = offset;
3937out_put_fid:
3938    put_fid(pdu, file_fidp);
3939out_nofid:
3940    pdu_complete(pdu, err);
3941    v9fs_string_free(&name);
3942}
3943
3944static void coroutine_fn v9fs_readlink(void *opaque)
3945{
3946    V9fsPDU *pdu = opaque;
3947    size_t offset = 7;
3948    V9fsString target;
3949    int32_t fid;
3950    int err = 0;
3951    V9fsFidState *fidp;
3952
3953    err = pdu_unmarshal(pdu, offset, "d", &fid);
3954    if (err < 0) {
3955        goto out_nofid;
3956    }
3957    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3958    fidp = get_fid(pdu, fid);
3959    if (fidp == NULL) {
3960        err = -ENOENT;
3961        goto out_nofid;
3962    }
3963
3964    v9fs_string_init(&target);
3965    err = v9fs_co_readlink(pdu, &fidp->path, &target);
3966    if (err < 0) {
3967        goto out;
3968    }
3969    err = pdu_marshal(pdu, offset, "s", &target);
3970    if (err < 0) {
3971        v9fs_string_free(&target);
3972        goto out;
3973    }
3974    err += offset;
3975    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3976    v9fs_string_free(&target);
3977out:
3978    put_fid(pdu, fidp);
3979out_nofid:
3980    pdu_complete(pdu, err);
3981}
3982
3983static CoroutineEntry *pdu_co_handlers[] = {
3984    [P9_TREADDIR] = v9fs_readdir,
3985    [P9_TSTATFS] = v9fs_statfs,
3986    [P9_TGETATTR] = v9fs_getattr,
3987    [P9_TSETATTR] = v9fs_setattr,
3988    [P9_TXATTRWALK] = v9fs_xattrwalk,
3989    [P9_TXATTRCREATE] = v9fs_xattrcreate,
3990    [P9_TMKNOD] = v9fs_mknod,
3991    [P9_TRENAME] = v9fs_rename,
3992    [P9_TLOCK] = v9fs_lock,
3993    [P9_TGETLOCK] = v9fs_getlock,
3994    [P9_TRENAMEAT] = v9fs_renameat,
3995    [P9_TREADLINK] = v9fs_readlink,
3996    [P9_TUNLINKAT] = v9fs_unlinkat,
3997    [P9_TMKDIR] = v9fs_mkdir,
3998    [P9_TVERSION] = v9fs_version,
3999    [P9_TLOPEN] = v9fs_open,
4000    [P9_TATTACH] = v9fs_attach,
4001    [P9_TSTAT] = v9fs_stat,
4002    [P9_TWALK] = v9fs_walk,
4003    [P9_TCLUNK] = v9fs_clunk,
4004    [P9_TFSYNC] = v9fs_fsync,
4005    [P9_TOPEN] = v9fs_open,
4006    [P9_TREAD] = v9fs_read,
4007#if 0
4008    [P9_TAUTH] = v9fs_auth,
4009#endif
4010    [P9_TFLUSH] = v9fs_flush,
4011    [P9_TLINK] = v9fs_link,
4012    [P9_TSYMLINK] = v9fs_symlink,
4013    [P9_TCREATE] = v9fs_create,
4014    [P9_TLCREATE] = v9fs_lcreate,
4015    [P9_TWRITE] = v9fs_write,
4016    [P9_TWSTAT] = v9fs_wstat,
4017    [P9_TREMOVE] = v9fs_remove,
4018};
4019
4020static void coroutine_fn v9fs_op_not_supp(void *opaque)
4021{
4022    V9fsPDU *pdu = opaque;
4023    pdu_complete(pdu, -EOPNOTSUPP);
4024}
4025
4026static void coroutine_fn v9fs_fs_ro(void *opaque)
4027{
4028    V9fsPDU *pdu = opaque;
4029    pdu_complete(pdu, -EROFS);
4030}
4031
4032static inline bool is_read_only_op(V9fsPDU *pdu)
4033{
4034    switch (pdu->id) {
4035    case P9_TREADDIR:
4036    case P9_TSTATFS:
4037    case P9_TGETATTR:
4038    case P9_TXATTRWALK:
4039    case P9_TLOCK:
4040    case P9_TGETLOCK:
4041    case P9_TREADLINK:
4042    case P9_TVERSION:
4043    case P9_TLOPEN:
4044    case P9_TATTACH:
4045    case P9_TSTAT:
4046    case P9_TWALK:
4047    case P9_TCLUNK:
4048    case P9_TFSYNC:
4049    case P9_TOPEN:
4050    case P9_TREAD:
4051    case P9_TAUTH:
4052    case P9_TFLUSH:
4053        return 1;
4054    default:
4055        return 0;
4056    }
4057}
4058
4059void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
4060{
4061    Coroutine *co;
4062    CoroutineEntry *handler;
4063    V9fsState *s = pdu->s;
4064
4065    pdu->size = le32_to_cpu(hdr->size_le);
4066    pdu->id = hdr->id;
4067    pdu->tag = le16_to_cpu(hdr->tag_le);
4068
4069    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
4070        (pdu_co_handlers[pdu->id] == NULL)) {
4071        handler = v9fs_op_not_supp;
4072    } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
4073        handler = v9fs_fs_ro;
4074    } else {
4075        handler = pdu_co_handlers[pdu->id];
4076    }
4077
4078    qemu_co_queue_init(&pdu->complete);
4079    co = qemu_coroutine_create(handler, pdu);
4080    qemu_coroutine_enter(co);
4081}
4082
4083/* Returns 0 on success, 1 on failure. */
4084int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
4085                               Error **errp)
4086{
4087    ERRP_GUARD();
4088    int i, len;
4089    struct stat stat;
4090    FsDriverEntry *fse;
4091    V9fsPath path;
4092    int rc = 1;
4093
4094    assert(!s->transport);
4095    s->transport = t;
4096
4097    /* initialize pdu allocator */
4098    QLIST_INIT(&s->free_list);
4099    QLIST_INIT(&s->active_list);
4100    for (i = 0; i < MAX_REQ; i++) {
4101        QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4102        s->pdus[i].s = s;
4103        s->pdus[i].idx = i;
4104    }
4105
4106    v9fs_path_init(&path);
4107
4108    fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4109
4110    if (!fse) {
4111        /* We don't have a fsdev identified by fsdev_id */
4112        error_setg(errp, "9pfs device couldn't find fsdev with the "
4113                   "id = %s",
4114                   s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4115        goto out;
4116    }
4117
4118    if (!s->fsconf.tag) {
4119        /* we haven't specified a mount_tag */
4120        error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4121                   s->fsconf.fsdev_id);
4122        goto out;
4123    }
4124
4125    s->ctx.export_flags = fse->export_flags;
4126    s->ctx.fs_root = g_strdup(fse->path);
4127    s->ctx.exops.get_st_gen = NULL;
4128    len = strlen(s->fsconf.tag);
4129    if (len > MAX_TAG_LEN - 1) {
4130        error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4131                   "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4132        goto out;
4133    }
4134
4135    s->tag = g_strdup(s->fsconf.tag);
4136    s->ctx.uid = -1;
4137
4138    s->ops = fse->ops;
4139
4140    s->ctx.fmode = fse->fmode;
4141    s->ctx.dmode = fse->dmode;
4142
4143    QSIMPLEQ_INIT(&s->fid_list);
4144    qemu_co_rwlock_init(&s->rename_lock);
4145
4146    if (s->ops->init(&s->ctx, errp) < 0) {
4147        error_prepend(errp, "cannot initialize fsdev '%s': ",
4148                      s->fsconf.fsdev_id);
4149        goto out;
4150    }
4151
4152    /*
4153     * Check details of export path, We need to use fs driver
4154     * call back to do that. Since we are in the init path, we don't
4155     * use co-routines here.
4156     */
4157    if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4158        error_setg(errp,
4159                   "error in converting name to path %s", strerror(errno));
4160        goto out;
4161    }
4162    if (s->ops->lstat(&s->ctx, &path, &stat)) {
4163        error_setg(errp, "share path %s does not exist", fse->path);
4164        goto out;
4165    } else if (!S_ISDIR(stat.st_mode)) {
4166        error_setg(errp, "share path %s is not a directory", fse->path);
4167        goto out;
4168    }
4169
4170    s->dev_id = stat.st_dev;
4171
4172    /* init inode remapping : */
4173    /* hash table for variable length inode suffixes */
4174    qpd_table_init(&s->qpd_table);
4175    /* hash table for slow/full inode remapping (most users won't need it) */
4176    qpf_table_init(&s->qpf_table);
4177    /* hash table for quick inode remapping */
4178    qpp_table_init(&s->qpp_table);
4179    s->qp_ndevices = 0;
4180    s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4181    s->qp_fullpath_next = 1;
4182
4183    s->ctx.fst = &fse->fst;
4184    fsdev_throttle_init(s->ctx.fst);
4185
4186    rc = 0;
4187out:
4188    if (rc) {
4189        v9fs_device_unrealize_common(s);
4190    }
4191    v9fs_path_free(&path);
4192    return rc;
4193}
4194
4195void v9fs_device_unrealize_common(V9fsState *s)
4196{
4197    if (s->ops && s->ops->cleanup) {
4198        s->ops->cleanup(&s->ctx);
4199    }
4200    if (s->ctx.fst) {
4201        fsdev_throttle_cleanup(s->ctx.fst);
4202    }
4203    g_free(s->tag);
4204    qp_table_destroy(&s->qpd_table);
4205    qp_table_destroy(&s->qpp_table);
4206    qp_table_destroy(&s->qpf_table);
4207    g_free(s->ctx.fs_root);
4208}
4209
4210typedef struct VirtfsCoResetData {
4211    V9fsPDU pdu;
4212    bool done;
4213} VirtfsCoResetData;
4214
4215static void coroutine_fn virtfs_co_reset(void *opaque)
4216{
4217    VirtfsCoResetData *data = opaque;
4218
4219    virtfs_reset(&data->pdu);
4220    data->done = true;
4221}
4222
4223void v9fs_reset(V9fsState *s)
4224{
4225    VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4226    Coroutine *co;
4227
4228    while (!QLIST_EMPTY(&s->active_list)) {
4229        aio_poll(qemu_get_aio_context(), true);
4230    }
4231
4232    co = qemu_coroutine_create(virtfs_co_reset, &data);
4233    qemu_coroutine_enter(co);
4234
4235    while (!data.done) {
4236        aio_poll(qemu_get_aio_context(), true);
4237    }
4238}
4239
4240static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4241{
4242    struct rlimit rlim;
4243    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4244        error_report("Failed to get the resource limit");
4245        exit(1);
4246    }
4247    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3);
4248    open_fd_rc = rlim.rlim_cur / 2;
4249}
4250