qemu/hw/9pfs/virtio-9p.c
<<
>>
Prefs
   1/*
   2 * Virtio 9p backend
   3 *
   4 * Copyright IBM, Corp. 2010
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "hw/virtio/virtio.h"
  15#include "hw/i386/pc.h"
  16#include "qemu/sockets.h"
  17#include "virtio-9p.h"
  18#include "fsdev/qemu-fsdev.h"
  19#include "virtio-9p-xattr.h"
  20#include "virtio-9p-coth.h"
  21#include "trace.h"
  22#include "migration/migration.h"
  23
  24int open_fd_hw;
  25int total_open_fd;
  26static int open_fd_rc;
  27
  28enum {
  29    Oread   = 0x00,
  30    Owrite  = 0x01,
  31    Ordwr   = 0x02,
  32    Oexec   = 0x03,
  33    Oexcl   = 0x04,
  34    Otrunc  = 0x10,
  35    Orexec  = 0x20,
  36    Orclose = 0x40,
  37    Oappend = 0x80,
  38};
  39
  40static int omode_to_uflags(int8_t mode)
  41{
  42    int ret = 0;
  43
  44    switch (mode & 3) {
  45    case Oread:
  46        ret = O_RDONLY;
  47        break;
  48    case Ordwr:
  49        ret = O_RDWR;
  50        break;
  51    case Owrite:
  52        ret = O_WRONLY;
  53        break;
  54    case Oexec:
  55        ret = O_RDONLY;
  56        break;
  57    }
  58
  59    if (mode & Otrunc) {
  60        ret |= O_TRUNC;
  61    }
  62
  63    if (mode & Oappend) {
  64        ret |= O_APPEND;
  65    }
  66
  67    if (mode & Oexcl) {
  68        ret |= O_EXCL;
  69    }
  70
  71    return ret;
  72}
  73
  74struct dotl_openflag_map {
  75    int dotl_flag;
  76    int open_flag;
  77};
  78
  79static int dotl_to_open_flags(int flags)
  80{
  81    int i;
  82    /*
  83     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
  84     * and P9_DOTL_NOACCESS
  85     */
  86    int oflags = flags & O_ACCMODE;
  87
  88    struct dotl_openflag_map dotl_oflag_map[] = {
  89        { P9_DOTL_CREATE, O_CREAT },
  90        { P9_DOTL_EXCL, O_EXCL },
  91        { P9_DOTL_NOCTTY , O_NOCTTY },
  92        { P9_DOTL_TRUNC, O_TRUNC },
  93        { P9_DOTL_APPEND, O_APPEND },
  94        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
  95        { P9_DOTL_DSYNC, O_DSYNC },
  96        { P9_DOTL_FASYNC, FASYNC },
  97        { P9_DOTL_DIRECT, O_DIRECT },
  98        { P9_DOTL_LARGEFILE, O_LARGEFILE },
  99        { P9_DOTL_DIRECTORY, O_DIRECTORY },
 100        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
 101        { P9_DOTL_NOATIME, O_NOATIME },
 102        { P9_DOTL_SYNC, O_SYNC },
 103    };
 104
 105    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
 106        if (flags & dotl_oflag_map[i].dotl_flag) {
 107            oflags |= dotl_oflag_map[i].open_flag;
 108        }
 109    }
 110
 111    return oflags;
 112}
 113
 114void cred_init(FsCred *credp)
 115{
 116    credp->fc_uid = -1;
 117    credp->fc_gid = -1;
 118    credp->fc_mode = -1;
 119    credp->fc_rdev = -1;
 120}
 121
 122static int get_dotl_openflags(V9fsState *s, int oflags)
 123{
 124    int flags;
 125    /*
 126     * Filter the client open flags
 127     */
 128    flags = dotl_to_open_flags(oflags);
 129    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
 130    /*
 131     * Ignore direct disk access hint until the server supports it.
 132     */
 133    flags &= ~O_DIRECT;
 134    return flags;
 135}
 136
 137void v9fs_path_init(V9fsPath *path)
 138{
 139    path->data = NULL;
 140    path->size = 0;
 141}
 142
 143void v9fs_path_free(V9fsPath *path)
 144{
 145    g_free(path->data);
 146    path->data = NULL;
 147    path->size = 0;
 148}
 149
 150void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs)
 151{
 152    v9fs_path_free(lhs);
 153    lhs->data = g_malloc(rhs->size);
 154    memcpy(lhs->data, rhs->data, rhs->size);
 155    lhs->size = rhs->size;
 156}
 157
 158int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
 159                      const char *name, V9fsPath *path)
 160{
 161    int err;
 162    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
 163    if (err < 0) {
 164        err = -errno;
 165    }
 166    return err;
 167}
 168
 169/*
 170 * Return TRUE if s1 is an ancestor of s2.
 171 *
 172 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
 173 * As a special case, We treat s1 as ancestor of s2 if they are same!
 174 */
 175static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
 176{
 177    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
 178        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
 179            return 1;
 180        }
 181    }
 182    return 0;
 183}
 184
 185static size_t v9fs_string_size(V9fsString *str)
 186{
 187    return str->size;
 188}
 189
 190/*
 191 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
 192static int v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 193{
 194    int err = 1;
 195    if (f->fid_type == P9_FID_FILE) {
 196        if (f->fs.fd == -1) {
 197            do {
 198                err = v9fs_co_open(pdu, f, f->open_flags);
 199            } while (err == -EINTR && !pdu->cancelled);
 200        }
 201    } else if (f->fid_type == P9_FID_DIR) {
 202        if (f->fs.dir == NULL) {
 203            do {
 204                err = v9fs_co_opendir(pdu, f);
 205            } while (err == -EINTR && !pdu->cancelled);
 206        }
 207    }
 208    return err;
 209}
 210
 211static V9fsFidState *get_fid(V9fsPDU *pdu, int32_t fid)
 212{
 213    int err;
 214    V9fsFidState *f;
 215    V9fsState *s = pdu->s;
 216
 217    for (f = s->fid_list; f; f = f->next) {
 218        BUG_ON(f->clunked);
 219        if (f->fid == fid) {
 220            /*
 221             * Update the fid ref upfront so that
 222             * we don't get reclaimed when we yield
 223             * in open later.
 224             */
 225            f->ref++;
 226            /*
 227             * check whether we need to reopen the
 228             * file. We might have closed the fd
 229             * while trying to free up some file
 230             * descriptors.
 231             */
 232            err = v9fs_reopen_fid(pdu, f);
 233            if (err < 0) {
 234                f->ref--;
 235                return NULL;
 236            }
 237            /*
 238             * Mark the fid as referenced so that the LRU
 239             * reclaim won't close the file descriptor
 240             */
 241            f->flags |= FID_REFERENCED;
 242            return f;
 243        }
 244    }
 245    return NULL;
 246}
 247
 248static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 249{
 250    V9fsFidState *f;
 251
 252    for (f = s->fid_list; f; f = f->next) {
 253        /* If fid is already there return NULL */
 254        BUG_ON(f->clunked);
 255        if (f->fid == fid) {
 256            return NULL;
 257        }
 258    }
 259    f = g_malloc0(sizeof(V9fsFidState));
 260    f->fid = fid;
 261    f->fid_type = P9_FID_NONE;
 262    f->ref = 1;
 263    /*
 264     * Mark the fid as referenced so that the LRU
 265     * reclaim won't close the file descriptor
 266     */
 267    f->flags |= FID_REFERENCED;
 268    f->next = s->fid_list;
 269    s->fid_list = f;
 270
 271    return f;
 272}
 273
 274static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 275{
 276    int retval = 0;
 277
 278    if (fidp->fs.xattr.copied_len == -1) {
 279        /* getxattr/listxattr fid */
 280        goto free_value;
 281    }
 282    /*
 283     * if this is fid for setxattr. clunk should
 284     * result in setxattr localcall
 285     */
 286    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
 287        /* clunk after partial write */
 288        retval = -EINVAL;
 289        goto free_out;
 290    }
 291    if (fidp->fs.xattr.len) {
 292        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
 293                                   fidp->fs.xattr.value,
 294                                   fidp->fs.xattr.len,
 295                                   fidp->fs.xattr.flags);
 296    } else {
 297        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
 298    }
 299free_out:
 300    v9fs_string_free(&fidp->fs.xattr.name);
 301free_value:
 302    g_free(fidp->fs.xattr.value);
 303    return retval;
 304}
 305
 306static int free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 307{
 308    int retval = 0;
 309
 310    if (fidp->fid_type == P9_FID_FILE) {
 311        /* If we reclaimed the fd no need to close */
 312        if (fidp->fs.fd != -1) {
 313            retval = v9fs_co_close(pdu, &fidp->fs);
 314        }
 315    } else if (fidp->fid_type == P9_FID_DIR) {
 316        if (fidp->fs.dir != NULL) {
 317            retval = v9fs_co_closedir(pdu, &fidp->fs);
 318        }
 319    } else if (fidp->fid_type == P9_FID_XATTR) {
 320        retval = v9fs_xattr_fid_clunk(pdu, fidp);
 321    }
 322    v9fs_path_free(&fidp->path);
 323    g_free(fidp);
 324    return retval;
 325}
 326
 327static int put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 328{
 329    BUG_ON(!fidp->ref);
 330    fidp->ref--;
 331    /*
 332     * Don't free the fid if it is in reclaim list
 333     */
 334    if (!fidp->ref && fidp->clunked) {
 335        if (fidp->fid == pdu->s->root_fid) {
 336            /*
 337             * if the clunked fid is root fid then we
 338             * have unmounted the fs on the client side.
 339             * delete the migration blocker. Ideally, this
 340             * should be hooked to transport close notification
 341             */
 342            if (pdu->s->migration_blocker) {
 343                migrate_del_blocker(pdu->s->migration_blocker);
 344                error_free(pdu->s->migration_blocker);
 345                pdu->s->migration_blocker = NULL;
 346            }
 347        }
 348        return free_fid(pdu, fidp);
 349    }
 350    return 0;
 351}
 352
 353static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
 354{
 355    V9fsFidState **fidpp, *fidp;
 356
 357    for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
 358        if ((*fidpp)->fid == fid) {
 359            break;
 360        }
 361    }
 362    if (*fidpp == NULL) {
 363        return NULL;
 364    }
 365    fidp = *fidpp;
 366    *fidpp = fidp->next;
 367    fidp->clunked = 1;
 368    return fidp;
 369}
 370
 371void v9fs_reclaim_fd(V9fsPDU *pdu)
 372{
 373    int reclaim_count = 0;
 374    V9fsState *s = pdu->s;
 375    V9fsFidState *f, *reclaim_list = NULL;
 376
 377    for (f = s->fid_list; f; f = f->next) {
 378        /*
 379         * Unlink fids cannot be reclaimed. Check
 380         * for them and skip them. Also skip fids
 381         * currently being operated on.
 382         */
 383        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
 384            continue;
 385        }
 386        /*
 387         * if it is a recently referenced fid
 388         * we leave the fid untouched and clear the
 389         * reference bit. We come back to it later
 390         * in the next iteration. (a simple LRU without
 391         * moving list elements around)
 392         */
 393        if (f->flags & FID_REFERENCED) {
 394            f->flags &= ~FID_REFERENCED;
 395            continue;
 396        }
 397        /*
 398         * Add fids to reclaim list.
 399         */
 400        if (f->fid_type == P9_FID_FILE) {
 401            if (f->fs.fd != -1) {
 402                /*
 403                 * Up the reference count so that
 404                 * a clunk request won't free this fid
 405                 */
 406                f->ref++;
 407                f->rclm_lst = reclaim_list;
 408                reclaim_list = f;
 409                f->fs_reclaim.fd = f->fs.fd;
 410                f->fs.fd = -1;
 411                reclaim_count++;
 412            }
 413        } else if (f->fid_type == P9_FID_DIR) {
 414            if (f->fs.dir != NULL) {
 415                /*
 416                 * Up the reference count so that
 417                 * a clunk request won't free this fid
 418                 */
 419                f->ref++;
 420                f->rclm_lst = reclaim_list;
 421                reclaim_list = f;
 422                f->fs_reclaim.dir = f->fs.dir;
 423                f->fs.dir = NULL;
 424                reclaim_count++;
 425            }
 426        }
 427        if (reclaim_count >= open_fd_rc) {
 428            break;
 429        }
 430    }
 431    /*
 432     * Now close the fid in reclaim list. Free them if they
 433     * are already clunked.
 434     */
 435    while (reclaim_list) {
 436        f = reclaim_list;
 437        reclaim_list = f->rclm_lst;
 438        if (f->fid_type == P9_FID_FILE) {
 439            v9fs_co_close(pdu, &f->fs_reclaim);
 440        } else if (f->fid_type == P9_FID_DIR) {
 441            v9fs_co_closedir(pdu, &f->fs_reclaim);
 442        }
 443        f->rclm_lst = NULL;
 444        /*
 445         * Now drop the fid reference, free it
 446         * if clunked.
 447         */
 448        put_fid(pdu, f);
 449    }
 450}
 451
 452static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 453{
 454    int err;
 455    V9fsState *s = pdu->s;
 456    V9fsFidState *fidp, head_fid;
 457
 458    head_fid.next = s->fid_list;
 459    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
 460        if (fidp->path.size != path->size) {
 461            continue;
 462        }
 463        if (!memcmp(fidp->path.data, path->data, path->size)) {
 464            /* Mark the fid non reclaimable. */
 465            fidp->flags |= FID_NON_RECLAIMABLE;
 466
 467            /* reopen the file/dir if already closed */
 468            err = v9fs_reopen_fid(pdu, fidp);
 469            if (err < 0) {
 470                return -1;
 471            }
 472            /*
 473             * Go back to head of fid list because
 474             * the list could have got updated when
 475             * switched to the worker thread
 476             */
 477            if (err == 0) {
 478                fidp = &head_fid;
 479            }
 480        }
 481    }
 482    return 0;
 483}
 484
 485static void virtfs_reset(V9fsPDU *pdu)
 486{
 487    V9fsState *s = pdu->s;
 488    V9fsFidState *fidp = NULL;
 489
 490    /* Free all fids */
 491    while (s->fid_list) {
 492        fidp = s->fid_list;
 493        s->fid_list = fidp->next;
 494
 495        if (fidp->ref) {
 496            fidp->clunked = 1;
 497        } else {
 498            free_fid(pdu, fidp);
 499        }
 500    }
 501    if (fidp) {
 502        /* One or more unclunked fids found... */
 503        error_report("9pfs:%s: One or more uncluncked fids "
 504                     "found during reset", __func__);
 505    }
 506}
 507
 508#define P9_QID_TYPE_DIR         0x80
 509#define P9_QID_TYPE_SYMLINK     0x02
 510
 511#define P9_STAT_MODE_DIR        0x80000000
 512#define P9_STAT_MODE_APPEND     0x40000000
 513#define P9_STAT_MODE_EXCL       0x20000000
 514#define P9_STAT_MODE_MOUNT      0x10000000
 515#define P9_STAT_MODE_AUTH       0x08000000
 516#define P9_STAT_MODE_TMP        0x04000000
 517#define P9_STAT_MODE_SYMLINK    0x02000000
 518#define P9_STAT_MODE_LINK       0x01000000
 519#define P9_STAT_MODE_DEVICE     0x00800000
 520#define P9_STAT_MODE_NAMED_PIPE 0x00200000
 521#define P9_STAT_MODE_SOCKET     0x00100000
 522#define P9_STAT_MODE_SETUID     0x00080000
 523#define P9_STAT_MODE_SETGID     0x00040000
 524#define P9_STAT_MODE_SETVTX     0x00010000
 525
 526#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
 527                                P9_STAT_MODE_SYMLINK |      \
 528                                P9_STAT_MODE_LINK |         \
 529                                P9_STAT_MODE_DEVICE |       \
 530                                P9_STAT_MODE_NAMED_PIPE |   \
 531                                P9_STAT_MODE_SOCKET)
 532
 533/* This is the algorithm from ufs in spfs */
 534static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
 535{
 536    size_t size;
 537
 538    memset(&qidp->path, 0, sizeof(qidp->path));
 539    size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
 540    memcpy(&qidp->path, &stbuf->st_ino, size);
 541    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 542    qidp->type = 0;
 543    if (S_ISDIR(stbuf->st_mode)) {
 544        qidp->type |= P9_QID_TYPE_DIR;
 545    }
 546    if (S_ISLNK(stbuf->st_mode)) {
 547        qidp->type |= P9_QID_TYPE_SYMLINK;
 548    }
 549}
 550
 551static int fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, V9fsQID *qidp)
 552{
 553    struct stat stbuf;
 554    int err;
 555
 556    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
 557    if (err < 0) {
 558        return err;
 559    }
 560    stat_to_qid(&stbuf, qidp);
 561    return 0;
 562}
 563
 564static V9fsPDU *alloc_pdu(V9fsState *s)
 565{
 566    V9fsPDU *pdu = NULL;
 567
 568    if (!QLIST_EMPTY(&s->free_list)) {
 569        pdu = QLIST_FIRST(&s->free_list);
 570        QLIST_REMOVE(pdu, next);
 571        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
 572    }
 573    return pdu;
 574}
 575
 576static void free_pdu(V9fsState *s, V9fsPDU *pdu)
 577{
 578    if (pdu) {
 579        /*
 580         * Cancelled pdu are added back to the freelist
 581         * by flush request .
 582         */
 583        if (!pdu->cancelled) {
 584            QLIST_REMOVE(pdu, next);
 585            QLIST_INSERT_HEAD(&s->free_list, pdu, next);
 586        }
 587    }
 588}
 589
 590/*
 591 * We don't do error checking for pdu_marshal/unmarshal here
 592 * because we always expect to have enough space to encode
 593 * error details
 594 */
 595static void complete_pdu(V9fsState *s, V9fsPDU *pdu, ssize_t len)
 596{
 597    int8_t id = pdu->id + 1; /* Response */
 598
 599    if (len < 0) {
 600        int err = -len;
 601        len = 7;
 602
 603        if (s->proto_version != V9FS_PROTO_2000L) {
 604            V9fsString str;
 605
 606            str.data = strerror(err);
 607            str.size = strlen(str.data);
 608
 609            len += pdu_marshal(pdu, len, "s", &str);
 610            id = P9_RERROR;
 611        }
 612
 613        len += pdu_marshal(pdu, len, "d", err);
 614
 615        if (s->proto_version == V9FS_PROTO_2000L) {
 616            id = P9_RLERROR;
 617        }
 618        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
 619    }
 620
 621    /* fill out the header */
 622    pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag);
 623
 624    /* keep these in sync */
 625    pdu->size = len;
 626    pdu->id = id;
 627
 628    /* push onto queue and notify */
 629    virtqueue_push(s->vq, &pdu->elem, len);
 630
 631    /* FIXME: we should batch these completions */
 632    virtio_notify(VIRTIO_DEVICE(s), s->vq);
 633
 634    /* Now wakeup anybody waiting in flush for this request */
 635    qemu_co_queue_next(&pdu->complete);
 636
 637    free_pdu(s, pdu);
 638}
 639
 640static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
 641{
 642    mode_t ret;
 643
 644    ret = mode & 0777;
 645    if (mode & P9_STAT_MODE_DIR) {
 646        ret |= S_IFDIR;
 647    }
 648
 649    if (mode & P9_STAT_MODE_SYMLINK) {
 650        ret |= S_IFLNK;
 651    }
 652    if (mode & P9_STAT_MODE_SOCKET) {
 653        ret |= S_IFSOCK;
 654    }
 655    if (mode & P9_STAT_MODE_NAMED_PIPE) {
 656        ret |= S_IFIFO;
 657    }
 658    if (mode & P9_STAT_MODE_DEVICE) {
 659        if (extension->size && extension->data[0] == 'c') {
 660            ret |= S_IFCHR;
 661        } else {
 662            ret |= S_IFBLK;
 663        }
 664    }
 665
 666    if (!(ret&~0777)) {
 667        ret |= S_IFREG;
 668    }
 669
 670    if (mode & P9_STAT_MODE_SETUID) {
 671        ret |= S_ISUID;
 672    }
 673    if (mode & P9_STAT_MODE_SETGID) {
 674        ret |= S_ISGID;
 675    }
 676    if (mode & P9_STAT_MODE_SETVTX) {
 677        ret |= S_ISVTX;
 678    }
 679
 680    return ret;
 681}
 682
 683static int donttouch_stat(V9fsStat *stat)
 684{
 685    if (stat->type == -1 &&
 686        stat->dev == -1 &&
 687        stat->qid.type == -1 &&
 688        stat->qid.version == -1 &&
 689        stat->qid.path == -1 &&
 690        stat->mode == -1 &&
 691        stat->atime == -1 &&
 692        stat->mtime == -1 &&
 693        stat->length == -1 &&
 694        !stat->name.size &&
 695        !stat->uid.size &&
 696        !stat->gid.size &&
 697        !stat->muid.size &&
 698        stat->n_uid == -1 &&
 699        stat->n_gid == -1 &&
 700        stat->n_muid == -1) {
 701        return 1;
 702    }
 703
 704    return 0;
 705}
 706
 707static void v9fs_stat_init(V9fsStat *stat)
 708{
 709    v9fs_string_init(&stat->name);
 710    v9fs_string_init(&stat->uid);
 711    v9fs_string_init(&stat->gid);
 712    v9fs_string_init(&stat->muid);
 713    v9fs_string_init(&stat->extension);
 714}
 715
 716static void v9fs_stat_free(V9fsStat *stat)
 717{
 718    v9fs_string_free(&stat->name);
 719    v9fs_string_free(&stat->uid);
 720    v9fs_string_free(&stat->gid);
 721    v9fs_string_free(&stat->muid);
 722    v9fs_string_free(&stat->extension);
 723}
 724
 725static uint32_t stat_to_v9mode(const struct stat *stbuf)
 726{
 727    uint32_t mode;
 728
 729    mode = stbuf->st_mode & 0777;
 730    if (S_ISDIR(stbuf->st_mode)) {
 731        mode |= P9_STAT_MODE_DIR;
 732    }
 733
 734    if (S_ISLNK(stbuf->st_mode)) {
 735        mode |= P9_STAT_MODE_SYMLINK;
 736    }
 737
 738    if (S_ISSOCK(stbuf->st_mode)) {
 739        mode |= P9_STAT_MODE_SOCKET;
 740    }
 741
 742    if (S_ISFIFO(stbuf->st_mode)) {
 743        mode |= P9_STAT_MODE_NAMED_PIPE;
 744    }
 745
 746    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
 747        mode |= P9_STAT_MODE_DEVICE;
 748    }
 749
 750    if (stbuf->st_mode & S_ISUID) {
 751        mode |= P9_STAT_MODE_SETUID;
 752    }
 753
 754    if (stbuf->st_mode & S_ISGID) {
 755        mode |= P9_STAT_MODE_SETGID;
 756    }
 757
 758    if (stbuf->st_mode & S_ISVTX) {
 759        mode |= P9_STAT_MODE_SETVTX;
 760    }
 761
 762    return mode;
 763}
 764
 765static int stat_to_v9stat(V9fsPDU *pdu, V9fsPath *name,
 766                            const struct stat *stbuf,
 767                            V9fsStat *v9stat)
 768{
 769    int err;
 770    const char *str;
 771
 772    memset(v9stat, 0, sizeof(*v9stat));
 773
 774    stat_to_qid(stbuf, &v9stat->qid);
 775    v9stat->mode = stat_to_v9mode(stbuf);
 776    v9stat->atime = stbuf->st_atime;
 777    v9stat->mtime = stbuf->st_mtime;
 778    v9stat->length = stbuf->st_size;
 779
 780    v9fs_string_null(&v9stat->uid);
 781    v9fs_string_null(&v9stat->gid);
 782    v9fs_string_null(&v9stat->muid);
 783
 784    v9stat->n_uid = stbuf->st_uid;
 785    v9stat->n_gid = stbuf->st_gid;
 786    v9stat->n_muid = 0;
 787
 788    v9fs_string_null(&v9stat->extension);
 789
 790    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
 791        err = v9fs_co_readlink(pdu, name, &v9stat->extension);
 792        if (err < 0) {
 793            return err;
 794        }
 795    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
 796        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
 797                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
 798                major(stbuf->st_rdev), minor(stbuf->st_rdev));
 799    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
 800        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
 801                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
 802    }
 803
 804    str = strrchr(name->data, '/');
 805    if (str) {
 806        str += 1;
 807    } else {
 808        str = name->data;
 809    }
 810
 811    v9fs_string_sprintf(&v9stat->name, "%s", str);
 812
 813    v9stat->size = 61 +
 814        v9fs_string_size(&v9stat->name) +
 815        v9fs_string_size(&v9stat->uid) +
 816        v9fs_string_size(&v9stat->gid) +
 817        v9fs_string_size(&v9stat->muid) +
 818        v9fs_string_size(&v9stat->extension);
 819    return 0;
 820}
 821
 822#define P9_STATS_MODE          0x00000001ULL
 823#define P9_STATS_NLINK         0x00000002ULL
 824#define P9_STATS_UID           0x00000004ULL
 825#define P9_STATS_GID           0x00000008ULL
 826#define P9_STATS_RDEV          0x00000010ULL
 827#define P9_STATS_ATIME         0x00000020ULL
 828#define P9_STATS_MTIME         0x00000040ULL
 829#define P9_STATS_CTIME         0x00000080ULL
 830#define P9_STATS_INO           0x00000100ULL
 831#define P9_STATS_SIZE          0x00000200ULL
 832#define P9_STATS_BLOCKS        0x00000400ULL
 833
 834#define P9_STATS_BTIME         0x00000800ULL
 835#define P9_STATS_GEN           0x00001000ULL
 836#define P9_STATS_DATA_VERSION  0x00002000ULL
 837
 838#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
 839#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
 840
 841
 842static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
 843                                V9fsStatDotl *v9lstat)
 844{
 845    memset(v9lstat, 0, sizeof(*v9lstat));
 846
 847    v9lstat->st_mode = stbuf->st_mode;
 848    v9lstat->st_nlink = stbuf->st_nlink;
 849    v9lstat->st_uid = stbuf->st_uid;
 850    v9lstat->st_gid = stbuf->st_gid;
 851    v9lstat->st_rdev = stbuf->st_rdev;
 852    v9lstat->st_size = stbuf->st_size;
 853    v9lstat->st_blksize = stbuf->st_blksize;
 854    v9lstat->st_blocks = stbuf->st_blocks;
 855    v9lstat->st_atime_sec = stbuf->st_atime;
 856    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
 857    v9lstat->st_mtime_sec = stbuf->st_mtime;
 858    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
 859    v9lstat->st_ctime_sec = stbuf->st_ctime;
 860    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
 861    /* Currently we only support BASIC fields in stat */
 862    v9lstat->st_result_mask = P9_STATS_BASIC;
 863
 864    stat_to_qid(stbuf, &v9lstat->qid);
 865}
 866
 867static void print_sg(struct iovec *sg, int cnt)
 868{
 869    int i;
 870
 871    printf("sg[%d]: {", cnt);
 872    for (i = 0; i < cnt; i++) {
 873        if (i) {
 874            printf(", ");
 875        }
 876        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
 877    }
 878    printf("}\n");
 879}
 880
 881/* Will call this only for path name based fid */
 882static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
 883{
 884    V9fsPath str;
 885    v9fs_path_init(&str);
 886    v9fs_path_copy(&str, dst);
 887    v9fs_string_sprintf((V9fsString *)dst, "%s%s", src->data, str.data+len);
 888    v9fs_path_free(&str);
 889    /* +1 to include terminating NULL */
 890    dst->size++;
 891}
 892
 893static inline bool is_ro_export(FsContext *ctx)
 894{
 895    return ctx->export_flags & V9FS_RDONLY;
 896}
 897
 898static void v9fs_version(void *opaque)
 899{
 900    ssize_t err;
 901    V9fsPDU *pdu = opaque;
 902    V9fsState *s = pdu->s;
 903    V9fsString version;
 904    size_t offset = 7;
 905
 906    v9fs_string_init(&version);
 907    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
 908    if (err < 0) {
 909        offset = err;
 910        goto out;
 911    }
 912    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
 913
 914    virtfs_reset(pdu);
 915
 916    if (!strcmp(version.data, "9P2000.u")) {
 917        s->proto_version = V9FS_PROTO_2000U;
 918    } else if (!strcmp(version.data, "9P2000.L")) {
 919        s->proto_version = V9FS_PROTO_2000L;
 920    } else {
 921        v9fs_string_sprintf(&version, "unknown");
 922    }
 923
 924    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
 925    if (err < 0) {
 926        offset = err;
 927        goto out;
 928    }
 929    offset += err;
 930    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
 931out:
 932    complete_pdu(s, pdu, offset);
 933    v9fs_string_free(&version);
 934}
 935
 936static void v9fs_attach(void *opaque)
 937{
 938    V9fsPDU *pdu = opaque;
 939    V9fsState *s = pdu->s;
 940    int32_t fid, afid, n_uname;
 941    V9fsString uname, aname;
 942    V9fsFidState *fidp;
 943    size_t offset = 7;
 944    V9fsQID qid;
 945    ssize_t err;
 946
 947    v9fs_string_init(&uname);
 948    v9fs_string_init(&aname);
 949    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
 950                        &afid, &uname, &aname, &n_uname);
 951    if (err < 0) {
 952        goto out_nofid;
 953    }
 954    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
 955
 956    fidp = alloc_fid(s, fid);
 957    if (fidp == NULL) {
 958        err = -EINVAL;
 959        goto out_nofid;
 960    }
 961    fidp->uid = n_uname;
 962    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
 963    if (err < 0) {
 964        err = -EINVAL;
 965        clunk_fid(s, fid);
 966        goto out;
 967    }
 968    err = fid_to_qid(pdu, fidp, &qid);
 969    if (err < 0) {
 970        err = -EINVAL;
 971        clunk_fid(s, fid);
 972        goto out;
 973    }
 974    err = pdu_marshal(pdu, offset, "Q", &qid);
 975    if (err < 0) {
 976        clunk_fid(s, fid);
 977        goto out;
 978    }
 979    err += offset;
 980    trace_v9fs_attach_return(pdu->tag, pdu->id,
 981                             qid.type, qid.version, qid.path);
 982    /*
 983     * disable migration if we haven't done already.
 984     * attach could get called multiple times for the same export.
 985     */
 986    if (!s->migration_blocker) {
 987        s->root_fid = fid;
 988        error_setg(&s->migration_blocker,
 989                   "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
 990                   s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
 991        migrate_add_blocker(s->migration_blocker);
 992    }
 993out:
 994    put_fid(pdu, fidp);
 995out_nofid:
 996    complete_pdu(s, pdu, err);
 997    v9fs_string_free(&uname);
 998    v9fs_string_free(&aname);
 999}
1000
1001static void v9fs_stat(void *opaque)
1002{
1003    int32_t fid;
1004    V9fsStat v9stat;
1005    ssize_t err = 0;
1006    size_t offset = 7;
1007    struct stat stbuf;
1008    V9fsFidState *fidp;
1009    V9fsPDU *pdu = opaque;
1010    V9fsState *s = pdu->s;
1011
1012    err = pdu_unmarshal(pdu, offset, "d", &fid);
1013    if (err < 0) {
1014        goto out_nofid;
1015    }
1016    trace_v9fs_stat(pdu->tag, pdu->id, fid);
1017
1018    fidp = get_fid(pdu, fid);
1019    if (fidp == NULL) {
1020        err = -ENOENT;
1021        goto out_nofid;
1022    }
1023    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1024    if (err < 0) {
1025        goto out;
1026    }
1027    err = stat_to_v9stat(pdu, &fidp->path, &stbuf, &v9stat);
1028    if (err < 0) {
1029        goto out;
1030    }
1031    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1032    if (err < 0) {
1033        v9fs_stat_free(&v9stat);
1034        goto out;
1035    }
1036    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1037                           v9stat.atime, v9stat.mtime, v9stat.length);
1038    err += offset;
1039    v9fs_stat_free(&v9stat);
1040out:
1041    put_fid(pdu, fidp);
1042out_nofid:
1043    complete_pdu(s, pdu, err);
1044}
1045
1046static void v9fs_getattr(void *opaque)
1047{
1048    int32_t fid;
1049    size_t offset = 7;
1050    ssize_t retval = 0;
1051    struct stat stbuf;
1052    V9fsFidState *fidp;
1053    uint64_t request_mask;
1054    V9fsStatDotl v9stat_dotl;
1055    V9fsPDU *pdu = opaque;
1056    V9fsState *s = pdu->s;
1057
1058    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1059    if (retval < 0) {
1060        goto out_nofid;
1061    }
1062    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1063
1064    fidp = get_fid(pdu, fid);
1065    if (fidp == NULL) {
1066        retval = -ENOENT;
1067        goto out_nofid;
1068    }
1069    /*
1070     * Currently we only support BASIC fields in stat, so there is no
1071     * need to look at request_mask.
1072     */
1073    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1074    if (retval < 0) {
1075        goto out;
1076    }
1077    stat_to_v9stat_dotl(s, &stbuf, &v9stat_dotl);
1078
1079    /*  fill st_gen if requested and supported by underlying fs */
1080    if (request_mask & P9_STATS_GEN) {
1081        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1082        switch (retval) {
1083        case 0:
1084            /* we have valid st_gen: update result mask */
1085            v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1086            break;
1087        case -EINTR:
1088            /* request cancelled, e.g. by Tflush */
1089            goto out;
1090        default:
1091            /* failed to get st_gen: not fatal, ignore */
1092            break;
1093        }
1094    }
1095    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1096    if (retval < 0) {
1097        goto out;
1098    }
1099    retval += offset;
1100    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1101                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1102                              v9stat_dotl.st_gid);
1103out:
1104    put_fid(pdu, fidp);
1105out_nofid:
1106    complete_pdu(s, pdu, retval);
1107}
1108
1109/* Attribute flags */
1110#define P9_ATTR_MODE       (1 << 0)
1111#define P9_ATTR_UID        (1 << 1)
1112#define P9_ATTR_GID        (1 << 2)
1113#define P9_ATTR_SIZE       (1 << 3)
1114#define P9_ATTR_ATIME      (1 << 4)
1115#define P9_ATTR_MTIME      (1 << 5)
1116#define P9_ATTR_CTIME      (1 << 6)
1117#define P9_ATTR_ATIME_SET  (1 << 7)
1118#define P9_ATTR_MTIME_SET  (1 << 8)
1119
1120#define P9_ATTR_MASK    127
1121
1122static void v9fs_setattr(void *opaque)
1123{
1124    int err = 0;
1125    int32_t fid;
1126    V9fsFidState *fidp;
1127    size_t offset = 7;
1128    V9fsIattr v9iattr;
1129    V9fsPDU *pdu = opaque;
1130    V9fsState *s = pdu->s;
1131
1132    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1133    if (err < 0) {
1134        goto out_nofid;
1135    }
1136
1137    fidp = get_fid(pdu, fid);
1138    if (fidp == NULL) {
1139        err = -EINVAL;
1140        goto out_nofid;
1141    }
1142    if (v9iattr.valid & P9_ATTR_MODE) {
1143        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1144        if (err < 0) {
1145            goto out;
1146        }
1147    }
1148    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1149        struct timespec times[2];
1150        if (v9iattr.valid & P9_ATTR_ATIME) {
1151            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1152                times[0].tv_sec = v9iattr.atime_sec;
1153                times[0].tv_nsec = v9iattr.atime_nsec;
1154            } else {
1155                times[0].tv_nsec = UTIME_NOW;
1156            }
1157        } else {
1158            times[0].tv_nsec = UTIME_OMIT;
1159        }
1160        if (v9iattr.valid & P9_ATTR_MTIME) {
1161            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1162                times[1].tv_sec = v9iattr.mtime_sec;
1163                times[1].tv_nsec = v9iattr.mtime_nsec;
1164            } else {
1165                times[1].tv_nsec = UTIME_NOW;
1166            }
1167        } else {
1168            times[1].tv_nsec = UTIME_OMIT;
1169        }
1170        err = v9fs_co_utimensat(pdu, &fidp->path, times);
1171        if (err < 0) {
1172            goto out;
1173        }
1174    }
1175    /*
1176     * If the only valid entry in iattr is ctime we can call
1177     * chown(-1,-1) to update the ctime of the file
1178     */
1179    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1180        ((v9iattr.valid & P9_ATTR_CTIME)
1181         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1182        if (!(v9iattr.valid & P9_ATTR_UID)) {
1183            v9iattr.uid = -1;
1184        }
1185        if (!(v9iattr.valid & P9_ATTR_GID)) {
1186            v9iattr.gid = -1;
1187        }
1188        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1189                            v9iattr.gid);
1190        if (err < 0) {
1191            goto out;
1192        }
1193    }
1194    if (v9iattr.valid & (P9_ATTR_SIZE)) {
1195        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1196        if (err < 0) {
1197            goto out;
1198        }
1199    }
1200    err = offset;
1201out:
1202    put_fid(pdu, fidp);
1203out_nofid:
1204    complete_pdu(s, pdu, err);
1205}
1206
1207static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1208{
1209    int i;
1210    ssize_t err;
1211    size_t offset = 7;
1212
1213    err = pdu_marshal(pdu, offset, "w", nwnames);
1214    if (err < 0) {
1215        return err;
1216    }
1217    offset += err;
1218    for (i = 0; i < nwnames; i++) {
1219        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1220        if (err < 0) {
1221            return err;
1222        }
1223        offset += err;
1224    }
1225    return offset;
1226}
1227
1228static void v9fs_walk(void *opaque)
1229{
1230    int name_idx;
1231    V9fsQID *qids = NULL;
1232    int i, err = 0;
1233    V9fsPath dpath, path;
1234    uint16_t nwnames;
1235    struct stat stbuf;
1236    size_t offset = 7;
1237    int32_t fid, newfid;
1238    V9fsString *wnames = NULL;
1239    V9fsFidState *fidp;
1240    V9fsFidState *newfidp = NULL;
1241    V9fsPDU *pdu = opaque;
1242    V9fsState *s = pdu->s;
1243
1244    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1245    if (err < 0) {
1246        complete_pdu(s, pdu, err);
1247        return ;
1248    }
1249    offset += err;
1250
1251    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1252
1253    if (nwnames && nwnames <= P9_MAXWELEM) {
1254        wnames = g_malloc0(sizeof(wnames[0]) * nwnames);
1255        qids   = g_malloc0(sizeof(qids[0]) * nwnames);
1256        for (i = 0; i < nwnames; i++) {
1257            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1258            if (err < 0) {
1259                goto out_nofid;
1260            }
1261            offset += err;
1262        }
1263    } else if (nwnames > P9_MAXWELEM) {
1264        err = -EINVAL;
1265        goto out_nofid;
1266    }
1267    fidp = get_fid(pdu, fid);
1268    if (fidp == NULL) {
1269        err = -ENOENT;
1270        goto out_nofid;
1271    }
1272    v9fs_path_init(&dpath);
1273    v9fs_path_init(&path);
1274    /*
1275     * Both dpath and path initially poin to fidp.
1276     * Needed to handle request with nwnames == 0
1277     */
1278    v9fs_path_copy(&dpath, &fidp->path);
1279    v9fs_path_copy(&path, &fidp->path);
1280    for (name_idx = 0; name_idx < nwnames; name_idx++) {
1281        err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, &path);
1282        if (err < 0) {
1283            goto out;
1284        }
1285        err = v9fs_co_lstat(pdu, &path, &stbuf);
1286        if (err < 0) {
1287            goto out;
1288        }
1289        stat_to_qid(&stbuf, &qids[name_idx]);
1290        v9fs_path_copy(&dpath, &path);
1291    }
1292    if (fid == newfid) {
1293        BUG_ON(fidp->fid_type != P9_FID_NONE);
1294        v9fs_path_copy(&fidp->path, &path);
1295    } else {
1296        newfidp = alloc_fid(s, newfid);
1297        if (newfidp == NULL) {
1298            err = -EINVAL;
1299            goto out;
1300        }
1301        newfidp->uid = fidp->uid;
1302        v9fs_path_copy(&newfidp->path, &path);
1303    }
1304    err = v9fs_walk_marshal(pdu, nwnames, qids);
1305    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1306out:
1307    put_fid(pdu, fidp);
1308    if (newfidp) {
1309        put_fid(pdu, newfidp);
1310    }
1311    v9fs_path_free(&dpath);
1312    v9fs_path_free(&path);
1313out_nofid:
1314    complete_pdu(s, pdu, err);
1315    if (nwnames && nwnames <= P9_MAXWELEM) {
1316        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1317            v9fs_string_free(&wnames[name_idx]);
1318        }
1319        g_free(wnames);
1320        g_free(qids);
1321    }
1322}
1323
1324static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
1325{
1326    struct statfs stbuf;
1327    int32_t iounit = 0;
1328    V9fsState *s = pdu->s;
1329
1330    /*
1331     * iounit should be multiples of f_bsize (host filesystem block size
1332     * and as well as less than (client msize - P9_IOHDRSZ))
1333     */
1334    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1335        iounit = stbuf.f_bsize;
1336        iounit *= (s->msize - P9_IOHDRSZ)/stbuf.f_bsize;
1337    }
1338    if (!iounit) {
1339        iounit = s->msize - P9_IOHDRSZ;
1340    }
1341    return iounit;
1342}
1343
1344static void v9fs_open(void *opaque)
1345{
1346    int flags;
1347    int32_t fid;
1348    int32_t mode;
1349    V9fsQID qid;
1350    int iounit = 0;
1351    ssize_t err = 0;
1352    size_t offset = 7;
1353    struct stat stbuf;
1354    V9fsFidState *fidp;
1355    V9fsPDU *pdu = opaque;
1356    V9fsState *s = pdu->s;
1357
1358    if (s->proto_version == V9FS_PROTO_2000L) {
1359        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1360    } else {
1361        uint8_t modebyte;
1362        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1363        mode = modebyte;
1364    }
1365    if (err < 0) {
1366        goto out_nofid;
1367    }
1368    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1369
1370    fidp = get_fid(pdu, fid);
1371    if (fidp == NULL) {
1372        err = -ENOENT;
1373        goto out_nofid;
1374    }
1375    BUG_ON(fidp->fid_type != P9_FID_NONE);
1376
1377    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1378    if (err < 0) {
1379        goto out;
1380    }
1381    stat_to_qid(&stbuf, &qid);
1382    if (S_ISDIR(stbuf.st_mode)) {
1383        err = v9fs_co_opendir(pdu, fidp);
1384        if (err < 0) {
1385            goto out;
1386        }
1387        fidp->fid_type = P9_FID_DIR;
1388        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1389        if (err < 0) {
1390            goto out;
1391        }
1392        err += offset;
1393    } else {
1394        if (s->proto_version == V9FS_PROTO_2000L) {
1395            flags = get_dotl_openflags(s, mode);
1396        } else {
1397            flags = omode_to_uflags(mode);
1398        }
1399        if (is_ro_export(&s->ctx)) {
1400            if (mode & O_WRONLY || mode & O_RDWR ||
1401                mode & O_APPEND || mode & O_TRUNC) {
1402                err = -EROFS;
1403                goto out;
1404            }
1405        }
1406        err = v9fs_co_open(pdu, fidp, flags);
1407        if (err < 0) {
1408            goto out;
1409        }
1410        fidp->fid_type = P9_FID_FILE;
1411        fidp->open_flags = flags;
1412        if (flags & O_EXCL) {
1413            /*
1414             * We let the host file system do O_EXCL check
1415             * We should not reclaim such fd
1416             */
1417            fidp->flags |= FID_NON_RECLAIMABLE;
1418        }
1419        iounit = get_iounit(pdu, &fidp->path);
1420        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1421        if (err < 0) {
1422            goto out;
1423        }
1424        err += offset;
1425    }
1426    trace_v9fs_open_return(pdu->tag, pdu->id,
1427                           qid.type, qid.version, qid.path, iounit);
1428out:
1429    put_fid(pdu, fidp);
1430out_nofid:
1431    complete_pdu(s, pdu, err);
1432}
1433
1434static void v9fs_lcreate(void *opaque)
1435{
1436    int32_t dfid, flags, mode;
1437    gid_t gid;
1438    ssize_t err = 0;
1439    ssize_t offset = 7;
1440    V9fsString name;
1441    V9fsFidState *fidp;
1442    struct stat stbuf;
1443    V9fsQID qid;
1444    int32_t iounit;
1445    V9fsPDU *pdu = opaque;
1446
1447    v9fs_string_init(&name);
1448    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1449                        &name, &flags, &mode, &gid);
1450    if (err < 0) {
1451        goto out_nofid;
1452    }
1453    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1454
1455    fidp = get_fid(pdu, dfid);
1456    if (fidp == NULL) {
1457        err = -ENOENT;
1458        goto out_nofid;
1459    }
1460
1461    flags = get_dotl_openflags(pdu->s, flags);
1462    err = v9fs_co_open2(pdu, fidp, &name, gid,
1463                        flags | O_CREAT, mode, &stbuf);
1464    if (err < 0) {
1465        goto out;
1466    }
1467    fidp->fid_type = P9_FID_FILE;
1468    fidp->open_flags = flags;
1469    if (flags & O_EXCL) {
1470        /*
1471         * We let the host file system do O_EXCL check
1472         * We should not reclaim such fd
1473         */
1474        fidp->flags |= FID_NON_RECLAIMABLE;
1475    }
1476    iounit =  get_iounit(pdu, &fidp->path);
1477    stat_to_qid(&stbuf, &qid);
1478    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1479    if (err < 0) {
1480        goto out;
1481    }
1482    err += offset;
1483    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
1484                              qid.type, qid.version, qid.path, iounit);
1485out:
1486    put_fid(pdu, fidp);
1487out_nofid:
1488    complete_pdu(pdu->s, pdu, err);
1489    v9fs_string_free(&name);
1490}
1491
1492static void v9fs_fsync(void *opaque)
1493{
1494    int err;
1495    int32_t fid;
1496    int datasync;
1497    size_t offset = 7;
1498    V9fsFidState *fidp;
1499    V9fsPDU *pdu = opaque;
1500    V9fsState *s = pdu->s;
1501
1502    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
1503    if (err < 0) {
1504        goto out_nofid;
1505    }
1506    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
1507
1508    fidp = get_fid(pdu, fid);
1509    if (fidp == NULL) {
1510        err = -ENOENT;
1511        goto out_nofid;
1512    }
1513    err = v9fs_co_fsync(pdu, fidp, datasync);
1514    if (!err) {
1515        err = offset;
1516    }
1517    put_fid(pdu, fidp);
1518out_nofid:
1519    complete_pdu(s, pdu, err);
1520}
1521
1522static void v9fs_clunk(void *opaque)
1523{
1524    int err;
1525    int32_t fid;
1526    size_t offset = 7;
1527    V9fsFidState *fidp;
1528    V9fsPDU *pdu = opaque;
1529    V9fsState *s = pdu->s;
1530
1531    err = pdu_unmarshal(pdu, offset, "d", &fid);
1532    if (err < 0) {
1533        goto out_nofid;
1534    }
1535    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
1536
1537    fidp = clunk_fid(s, fid);
1538    if (fidp == NULL) {
1539        err = -ENOENT;
1540        goto out_nofid;
1541    }
1542    /*
1543     * Bump the ref so that put_fid will
1544     * free the fid.
1545     */
1546    fidp->ref++;
1547    err = put_fid(pdu, fidp);
1548    if (!err) {
1549        err = offset;
1550    }
1551out_nofid:
1552    complete_pdu(s, pdu, err);
1553}
1554
1555static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1556                           uint64_t off, uint32_t max_count)
1557{
1558    ssize_t err;
1559    size_t offset = 7;
1560    int read_count;
1561    int64_t xattr_len;
1562
1563    xattr_len = fidp->fs.xattr.len;
1564    read_count = xattr_len - off;
1565    if (read_count > max_count) {
1566        read_count = max_count;
1567    } else if (read_count < 0) {
1568        /*
1569         * read beyond XATTR value
1570         */
1571        read_count = 0;
1572    }
1573    err = pdu_marshal(pdu, offset, "d", read_count);
1574    if (err < 0) {
1575        return err;
1576    }
1577    offset += err;
1578    err = v9fs_pack(pdu->elem.in_sg, pdu->elem.in_num, offset,
1579                    ((char *)fidp->fs.xattr.value) + off,
1580                    read_count);
1581    if (err < 0) {
1582        return err;
1583    }
1584    offset += err;
1585    return offset;
1586}
1587
1588static int v9fs_do_readdir_with_stat(V9fsPDU *pdu,
1589                                     V9fsFidState *fidp, uint32_t max_count)
1590{
1591    V9fsPath path;
1592    V9fsStat v9stat;
1593    int len, err = 0;
1594    int32_t count = 0;
1595    struct stat stbuf;
1596    off_t saved_dir_pos;
1597    struct dirent *dent, *result;
1598
1599    /* save the directory position */
1600    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1601    if (saved_dir_pos < 0) {
1602        return saved_dir_pos;
1603    }
1604
1605    dent = g_malloc(sizeof(struct dirent));
1606
1607    while (1) {
1608        v9fs_path_init(&path);
1609        err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1610        if (err || !result) {
1611            break;
1612        }
1613        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
1614        if (err < 0) {
1615            goto out;
1616        }
1617        err = v9fs_co_lstat(pdu, &path, &stbuf);
1618        if (err < 0) {
1619            goto out;
1620        }
1621        err = stat_to_v9stat(pdu, &path, &stbuf, &v9stat);
1622        if (err < 0) {
1623            goto out;
1624        }
1625        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1626        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
1627        if ((len != (v9stat.size + 2)) || ((count + len) > max_count)) {
1628            /* Ran out of buffer. Set dir back to old position and return */
1629            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1630            v9fs_stat_free(&v9stat);
1631            v9fs_path_free(&path);
1632            g_free(dent);
1633            return count;
1634        }
1635        count += len;
1636        v9fs_stat_free(&v9stat);
1637        v9fs_path_free(&path);
1638        saved_dir_pos = dent->d_off;
1639    }
1640out:
1641    g_free(dent);
1642    v9fs_path_free(&path);
1643    if (err < 0) {
1644        return err;
1645    }
1646    return count;
1647}
1648
1649/*
1650 * Create a QEMUIOVector for a sub-region of PDU iovecs
1651 *
1652 * @qiov:       uninitialized QEMUIOVector
1653 * @skip:       number of bytes to skip from beginning of PDU
1654 * @size:       number of bytes to include
1655 * @is_write:   true - write, false - read
1656 *
1657 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
1658 * with qemu_iovec_destroy().
1659 */
1660static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
1661                                    size_t skip, size_t size,
1662                                    bool is_write)
1663{
1664    QEMUIOVector elem;
1665    struct iovec *iov;
1666    unsigned int niov;
1667
1668    if (is_write) {
1669        iov = pdu->elem.out_sg;
1670        niov = pdu->elem.out_num;
1671    } else {
1672        iov = pdu->elem.in_sg;
1673        niov = pdu->elem.in_num;
1674    }
1675
1676    qemu_iovec_init_external(&elem, iov, niov);
1677    qemu_iovec_init(qiov, niov);
1678    qemu_iovec_concat(qiov, &elem, skip, size);
1679}
1680
1681static void v9fs_read(void *opaque)
1682{
1683    int32_t fid;
1684    uint64_t off;
1685    ssize_t err = 0;
1686    int32_t count = 0;
1687    size_t offset = 7;
1688    uint32_t max_count;
1689    V9fsFidState *fidp;
1690    V9fsPDU *pdu = opaque;
1691    V9fsState *s = pdu->s;
1692
1693    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
1694    if (err < 0) {
1695        goto out_nofid;
1696    }
1697    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
1698
1699    fidp = get_fid(pdu, fid);
1700    if (fidp == NULL) {
1701        err = -EINVAL;
1702        goto out_nofid;
1703    }
1704    if (fidp->fid_type == P9_FID_DIR) {
1705
1706        if (off == 0) {
1707            v9fs_co_rewinddir(pdu, fidp);
1708        }
1709        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
1710        if (count < 0) {
1711            err = count;
1712            goto out;
1713        }
1714        err = pdu_marshal(pdu, offset, "d", count);
1715        if (err < 0) {
1716            goto out;
1717        }
1718        err += offset + count;
1719    } else if (fidp->fid_type == P9_FID_FILE) {
1720        QEMUIOVector qiov_full;
1721        QEMUIOVector qiov;
1722        int32_t len;
1723
1724        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
1725        qemu_iovec_init(&qiov, qiov_full.niov);
1726        do {
1727            qemu_iovec_reset(&qiov);
1728            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
1729            if (0) {
1730                print_sg(qiov.iov, qiov.niov);
1731            }
1732            /* Loop in case of EINTR */
1733            do {
1734                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
1735                if (len >= 0) {
1736                    off   += len;
1737                    count += len;
1738                }
1739            } while (len == -EINTR && !pdu->cancelled);
1740            if (len < 0) {
1741                /* IO error return the error */
1742                err = len;
1743                goto out;
1744            }
1745        } while (count < max_count && len > 0);
1746        err = pdu_marshal(pdu, offset, "d", count);
1747        if (err < 0) {
1748            goto out;
1749        }
1750        err += offset + count;
1751        qemu_iovec_destroy(&qiov);
1752        qemu_iovec_destroy(&qiov_full);
1753    } else if (fidp->fid_type == P9_FID_XATTR) {
1754        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
1755    } else {
1756        err = -EINVAL;
1757    }
1758    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
1759out:
1760    put_fid(pdu, fidp);
1761out_nofid:
1762    complete_pdu(s, pdu, err);
1763}
1764
1765static size_t v9fs_readdir_data_size(V9fsString *name)
1766{
1767    /*
1768     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
1769     * size of type (1) + size of name.size (2) + strlen(name.data)
1770     */
1771    return 24 + v9fs_string_size(name);
1772}
1773
1774static int v9fs_do_readdir(V9fsPDU *pdu,
1775                           V9fsFidState *fidp, int32_t max_count)
1776{
1777    size_t size;
1778    V9fsQID qid;
1779    V9fsString name;
1780    int len, err = 0;
1781    int32_t count = 0;
1782    off_t saved_dir_pos;
1783    struct dirent *dent, *result;
1784
1785    /* save the directory position */
1786    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1787    if (saved_dir_pos < 0) {
1788        return saved_dir_pos;
1789    }
1790
1791    dent = g_malloc(sizeof(struct dirent));
1792
1793    while (1) {
1794        err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1795        if (err || !result) {
1796            break;
1797        }
1798        v9fs_string_init(&name);
1799        v9fs_string_sprintf(&name, "%s", dent->d_name);
1800        if ((count + v9fs_readdir_data_size(&name)) > max_count) {
1801            /* Ran out of buffer. Set dir back to old position and return */
1802            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1803            v9fs_string_free(&name);
1804            g_free(dent);
1805            return count;
1806        }
1807        /*
1808         * Fill up just the path field of qid because the client uses
1809         * only that. To fill the entire qid structure we will have
1810         * to stat each dirent found, which is expensive
1811         */
1812        size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
1813        memcpy(&qid.path, &dent->d_ino, size);
1814        /* Fill the other fields with dummy values */
1815        qid.type = 0;
1816        qid.version = 0;
1817
1818        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1819        len = pdu_marshal(pdu, 11 + count, "Qqbs",
1820                          &qid, dent->d_off,
1821                          dent->d_type, &name);
1822        if (len < 0) {
1823            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1824            v9fs_string_free(&name);
1825            g_free(dent);
1826            return len;
1827        }
1828        count += len;
1829        v9fs_string_free(&name);
1830        saved_dir_pos = dent->d_off;
1831    }
1832    g_free(dent);
1833    if (err < 0) {
1834        return err;
1835    }
1836    return count;
1837}
1838
1839static void v9fs_readdir(void *opaque)
1840{
1841    int32_t fid;
1842    V9fsFidState *fidp;
1843    ssize_t retval = 0;
1844    size_t offset = 7;
1845    uint64_t initial_offset;
1846    int32_t count;
1847    uint32_t max_count;
1848    V9fsPDU *pdu = opaque;
1849    V9fsState *s = pdu->s;
1850
1851    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
1852                           &initial_offset, &max_count);
1853    if (retval < 0) {
1854        goto out_nofid;
1855    }
1856    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
1857
1858    fidp = get_fid(pdu, fid);
1859    if (fidp == NULL) {
1860        retval = -EINVAL;
1861        goto out_nofid;
1862    }
1863    if (!fidp->fs.dir) {
1864        retval = -EINVAL;
1865        goto out;
1866    }
1867    if (initial_offset == 0) {
1868        v9fs_co_rewinddir(pdu, fidp);
1869    } else {
1870        v9fs_co_seekdir(pdu, fidp, initial_offset);
1871    }
1872    count = v9fs_do_readdir(pdu, fidp, max_count);
1873    if (count < 0) {
1874        retval = count;
1875        goto out;
1876    }
1877    retval = pdu_marshal(pdu, offset, "d", count);
1878    if (retval < 0) {
1879        goto out;
1880    }
1881    retval += count + offset;
1882    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
1883out:
1884    put_fid(pdu, fidp);
1885out_nofid:
1886    complete_pdu(s, pdu, retval);
1887}
1888
1889static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1890                            uint64_t off, uint32_t count,
1891                            struct iovec *sg, int cnt)
1892{
1893    int i, to_copy;
1894    ssize_t err = 0;
1895    int write_count;
1896    int64_t xattr_len;
1897    size_t offset = 7;
1898
1899
1900    xattr_len = fidp->fs.xattr.len;
1901    write_count = xattr_len - off;
1902    if (write_count > count) {
1903        write_count = count;
1904    } else if (write_count < 0) {
1905        /*
1906         * write beyond XATTR value len specified in
1907         * xattrcreate
1908         */
1909        err = -ENOSPC;
1910        goto out;
1911    }
1912    err = pdu_marshal(pdu, offset, "d", write_count);
1913    if (err < 0) {
1914        return err;
1915    }
1916    err += offset;
1917    fidp->fs.xattr.copied_len += write_count;
1918    /*
1919     * Now copy the content from sg list
1920     */
1921    for (i = 0; i < cnt; i++) {
1922        if (write_count > sg[i].iov_len) {
1923            to_copy = sg[i].iov_len;
1924        } else {
1925            to_copy = write_count;
1926        }
1927        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
1928        /* updating vs->off since we are not using below */
1929        off += to_copy;
1930        write_count -= to_copy;
1931    }
1932out:
1933    return err;
1934}
1935
1936static void v9fs_write(void *opaque)
1937{
1938    ssize_t err;
1939    int32_t fid;
1940    uint64_t off;
1941    uint32_t count;
1942    int32_t len = 0;
1943    int32_t total = 0;
1944    size_t offset = 7;
1945    V9fsFidState *fidp;
1946    V9fsPDU *pdu = opaque;
1947    V9fsState *s = pdu->s;
1948    QEMUIOVector qiov_full;
1949    QEMUIOVector qiov;
1950
1951    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
1952    if (err < 0) {
1953        complete_pdu(s, pdu, err);
1954        return;
1955    }
1956    offset += err;
1957    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
1958    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
1959
1960    fidp = get_fid(pdu, fid);
1961    if (fidp == NULL) {
1962        err = -EINVAL;
1963        goto out_nofid;
1964    }
1965    if (fidp->fid_type == P9_FID_FILE) {
1966        if (fidp->fs.fd == -1) {
1967            err = -EINVAL;
1968            goto out;
1969        }
1970    } else if (fidp->fid_type == P9_FID_XATTR) {
1971        /*
1972         * setxattr operation
1973         */
1974        err = v9fs_xattr_write(s, pdu, fidp, off, count,
1975                               qiov_full.iov, qiov_full.niov);
1976        goto out;
1977    } else {
1978        err = -EINVAL;
1979        goto out;
1980    }
1981    qemu_iovec_init(&qiov, qiov_full.niov);
1982    do {
1983        qemu_iovec_reset(&qiov);
1984        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
1985        if (0) {
1986            print_sg(qiov.iov, qiov.niov);
1987        }
1988        /* Loop in case of EINTR */
1989        do {
1990            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
1991            if (len >= 0) {
1992                off   += len;
1993                total += len;
1994            }
1995        } while (len == -EINTR && !pdu->cancelled);
1996        if (len < 0) {
1997            /* IO error return the error */
1998            err = len;
1999            goto out_qiov;
2000        }
2001    } while (total < count && len > 0);
2002
2003    offset = 7;
2004    err = pdu_marshal(pdu, offset, "d", total);
2005    if (err < 0) {
2006        goto out;
2007    }
2008    err += offset;
2009    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2010out_qiov:
2011    qemu_iovec_destroy(&qiov);
2012out:
2013    put_fid(pdu, fidp);
2014out_nofid:
2015    qemu_iovec_destroy(&qiov_full);
2016    complete_pdu(s, pdu, err);
2017}
2018
2019static void v9fs_create(void *opaque)
2020{
2021    int32_t fid;
2022    int err = 0;
2023    size_t offset = 7;
2024    V9fsFidState *fidp;
2025    V9fsQID qid;
2026    int32_t perm;
2027    int8_t mode;
2028    V9fsPath path;
2029    struct stat stbuf;
2030    V9fsString name;
2031    V9fsString extension;
2032    int iounit;
2033    V9fsPDU *pdu = opaque;
2034
2035    v9fs_path_init(&path);
2036    v9fs_string_init(&name);
2037    v9fs_string_init(&extension);
2038    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2039                        &perm, &mode, &extension);
2040    if (err < 0) {
2041        goto out_nofid;
2042    }
2043    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2044
2045    fidp = get_fid(pdu, fid);
2046    if (fidp == NULL) {
2047        err = -EINVAL;
2048        goto out_nofid;
2049    }
2050    if (perm & P9_STAT_MODE_DIR) {
2051        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2052                            fidp->uid, -1, &stbuf);
2053        if (err < 0) {
2054            goto out;
2055        }
2056        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2057        if (err < 0) {
2058            goto out;
2059        }
2060        v9fs_path_copy(&fidp->path, &path);
2061        err = v9fs_co_opendir(pdu, fidp);
2062        if (err < 0) {
2063            goto out;
2064        }
2065        fidp->fid_type = P9_FID_DIR;
2066    } else if (perm & P9_STAT_MODE_SYMLINK) {
2067        err = v9fs_co_symlink(pdu, fidp, &name,
2068                              extension.data, -1 , &stbuf);
2069        if (err < 0) {
2070            goto out;
2071        }
2072        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2073        if (err < 0) {
2074            goto out;
2075        }
2076        v9fs_path_copy(&fidp->path, &path);
2077    } else if (perm & P9_STAT_MODE_LINK) {
2078        int32_t ofid = atoi(extension.data);
2079        V9fsFidState *ofidp = get_fid(pdu, ofid);
2080        if (ofidp == NULL) {
2081            err = -EINVAL;
2082            goto out;
2083        }
2084        err = v9fs_co_link(pdu, ofidp, fidp, &name);
2085        put_fid(pdu, ofidp);
2086        if (err < 0) {
2087            goto out;
2088        }
2089        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2090        if (err < 0) {
2091            fidp->fid_type = P9_FID_NONE;
2092            goto out;
2093        }
2094        v9fs_path_copy(&fidp->path, &path);
2095        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2096        if (err < 0) {
2097            fidp->fid_type = P9_FID_NONE;
2098            goto out;
2099        }
2100    } else if (perm & P9_STAT_MODE_DEVICE) {
2101        char ctype;
2102        uint32_t major, minor;
2103        mode_t nmode = 0;
2104
2105        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2106            err = -errno;
2107            goto out;
2108        }
2109
2110        switch (ctype) {
2111        case 'c':
2112            nmode = S_IFCHR;
2113            break;
2114        case 'b':
2115            nmode = S_IFBLK;
2116            break;
2117        default:
2118            err = -EIO;
2119            goto out;
2120        }
2121
2122        nmode |= perm & 0777;
2123        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2124                            makedev(major, minor), nmode, &stbuf);
2125        if (err < 0) {
2126            goto out;
2127        }
2128        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2129        if (err < 0) {
2130            goto out;
2131        }
2132        v9fs_path_copy(&fidp->path, &path);
2133    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2134        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2135                            0, S_IFIFO | (perm & 0777), &stbuf);
2136        if (err < 0) {
2137            goto out;
2138        }
2139        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2140        if (err < 0) {
2141            goto out;
2142        }
2143        v9fs_path_copy(&fidp->path, &path);
2144    } else if (perm & P9_STAT_MODE_SOCKET) {
2145        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2146                            0, S_IFSOCK | (perm & 0777), &stbuf);
2147        if (err < 0) {
2148            goto out;
2149        }
2150        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2151        if (err < 0) {
2152            goto out;
2153        }
2154        v9fs_path_copy(&fidp->path, &path);
2155    } else {
2156        err = v9fs_co_open2(pdu, fidp, &name, -1,
2157                            omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2158        if (err < 0) {
2159            goto out;
2160        }
2161        fidp->fid_type = P9_FID_FILE;
2162        fidp->open_flags = omode_to_uflags(mode);
2163        if (fidp->open_flags & O_EXCL) {
2164            /*
2165             * We let the host file system do O_EXCL check
2166             * We should not reclaim such fd
2167             */
2168            fidp->flags |= FID_NON_RECLAIMABLE;
2169        }
2170    }
2171    iounit = get_iounit(pdu, &fidp->path);
2172    stat_to_qid(&stbuf, &qid);
2173    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2174    if (err < 0) {
2175        goto out;
2176    }
2177    err += offset;
2178    trace_v9fs_create_return(pdu->tag, pdu->id,
2179                             qid.type, qid.version, qid.path, iounit);
2180out:
2181    put_fid(pdu, fidp);
2182out_nofid:
2183   complete_pdu(pdu->s, pdu, err);
2184   v9fs_string_free(&name);
2185   v9fs_string_free(&extension);
2186   v9fs_path_free(&path);
2187}
2188
2189static void v9fs_symlink(void *opaque)
2190{
2191    V9fsPDU *pdu = opaque;
2192    V9fsString name;
2193    V9fsString symname;
2194    V9fsFidState *dfidp;
2195    V9fsQID qid;
2196    struct stat stbuf;
2197    int32_t dfid;
2198    int err = 0;
2199    gid_t gid;
2200    size_t offset = 7;
2201
2202    v9fs_string_init(&name);
2203    v9fs_string_init(&symname);
2204    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2205    if (err < 0) {
2206        goto out_nofid;
2207    }
2208    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2209
2210    dfidp = get_fid(pdu, dfid);
2211    if (dfidp == NULL) {
2212        err = -EINVAL;
2213        goto out_nofid;
2214    }
2215    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2216    if (err < 0) {
2217        goto out;
2218    }
2219    stat_to_qid(&stbuf, &qid);
2220    err =  pdu_marshal(pdu, offset, "Q", &qid);
2221    if (err < 0) {
2222        goto out;
2223    }
2224    err += offset;
2225    trace_v9fs_symlink_return(pdu->tag, pdu->id,
2226                              qid.type, qid.version, qid.path);
2227out:
2228    put_fid(pdu, dfidp);
2229out_nofid:
2230    complete_pdu(pdu->s, pdu, err);
2231    v9fs_string_free(&name);
2232    v9fs_string_free(&symname);
2233}
2234
2235static void v9fs_flush(void *opaque)
2236{
2237    ssize_t err;
2238    int16_t tag;
2239    size_t offset = 7;
2240    V9fsPDU *cancel_pdu;
2241    V9fsPDU *pdu = opaque;
2242    V9fsState *s = pdu->s;
2243
2244    err = pdu_unmarshal(pdu, offset, "w", &tag);
2245    if (err < 0) {
2246        complete_pdu(s, pdu, err);
2247        return;
2248    }
2249    trace_v9fs_flush(pdu->tag, pdu->id, tag);
2250
2251    QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2252        if (cancel_pdu->tag == tag) {
2253            break;
2254        }
2255    }
2256    if (cancel_pdu) {
2257        cancel_pdu->cancelled = 1;
2258        /*
2259         * Wait for pdu to complete.
2260         */
2261        qemu_co_queue_wait(&cancel_pdu->complete);
2262        cancel_pdu->cancelled = 0;
2263        free_pdu(pdu->s, cancel_pdu);
2264    }
2265    complete_pdu(s, pdu, 7);
2266}
2267
2268static void v9fs_link(void *opaque)
2269{
2270    V9fsPDU *pdu = opaque;
2271    V9fsState *s = pdu->s;
2272    int32_t dfid, oldfid;
2273    V9fsFidState *dfidp, *oldfidp;
2274    V9fsString name;
2275    size_t offset = 7;
2276    int err = 0;
2277
2278    v9fs_string_init(&name);
2279    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2280    if (err < 0) {
2281        goto out_nofid;
2282    }
2283    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2284
2285    dfidp = get_fid(pdu, dfid);
2286    if (dfidp == NULL) {
2287        err = -ENOENT;
2288        goto out_nofid;
2289    }
2290
2291    oldfidp = get_fid(pdu, oldfid);
2292    if (oldfidp == NULL) {
2293        err = -ENOENT;
2294        goto out;
2295    }
2296    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2297    if (!err) {
2298        err = offset;
2299    }
2300out:
2301    put_fid(pdu, dfidp);
2302out_nofid:
2303    v9fs_string_free(&name);
2304    complete_pdu(s, pdu, err);
2305}
2306
2307/* Only works with path name based fid */
2308static void v9fs_remove(void *opaque)
2309{
2310    int32_t fid;
2311    int err = 0;
2312    size_t offset = 7;
2313    V9fsFidState *fidp;
2314    V9fsPDU *pdu = opaque;
2315
2316    err = pdu_unmarshal(pdu, offset, "d", &fid);
2317    if (err < 0) {
2318        goto out_nofid;
2319    }
2320    trace_v9fs_remove(pdu->tag, pdu->id, fid);
2321
2322    fidp = get_fid(pdu, fid);
2323    if (fidp == NULL) {
2324        err = -EINVAL;
2325        goto out_nofid;
2326    }
2327    /* if fs driver is not path based, return EOPNOTSUPP */
2328    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2329        err = -EOPNOTSUPP;
2330        goto out_err;
2331    }
2332    /*
2333     * IF the file is unlinked, we cannot reopen
2334     * the file later. So don't reclaim fd
2335     */
2336    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2337    if (err < 0) {
2338        goto out_err;
2339    }
2340    err = v9fs_co_remove(pdu, &fidp->path);
2341    if (!err) {
2342        err = offset;
2343    }
2344out_err:
2345    /* For TREMOVE we need to clunk the fid even on failed remove */
2346    clunk_fid(pdu->s, fidp->fid);
2347    put_fid(pdu, fidp);
2348out_nofid:
2349    complete_pdu(pdu->s, pdu, err);
2350}
2351
2352static void v9fs_unlinkat(void *opaque)
2353{
2354    int err = 0;
2355    V9fsString name;
2356    int32_t dfid, flags;
2357    size_t offset = 7;
2358    V9fsPath path;
2359    V9fsFidState *dfidp;
2360    V9fsPDU *pdu = opaque;
2361
2362    v9fs_string_init(&name);
2363    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
2364    if (err < 0) {
2365        goto out_nofid;
2366    }
2367    dfidp = get_fid(pdu, dfid);
2368    if (dfidp == NULL) {
2369        err = -EINVAL;
2370        goto out_nofid;
2371    }
2372    /*
2373     * IF the file is unlinked, we cannot reopen
2374     * the file later. So don't reclaim fd
2375     */
2376    v9fs_path_init(&path);
2377    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
2378    if (err < 0) {
2379        goto out_err;
2380    }
2381    err = v9fs_mark_fids_unreclaim(pdu, &path);
2382    if (err < 0) {
2383        goto out_err;
2384    }
2385    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, flags);
2386    if (!err) {
2387        err = offset;
2388    }
2389out_err:
2390    put_fid(pdu, dfidp);
2391    v9fs_path_free(&path);
2392out_nofid:
2393    complete_pdu(pdu->s, pdu, err);
2394    v9fs_string_free(&name);
2395}
2396
2397
2398/* Only works with path name based fid */
2399static int v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
2400                                int32_t newdirfid, V9fsString *name)
2401{
2402    char *end;
2403    int err = 0;
2404    V9fsPath new_path;
2405    V9fsFidState *tfidp;
2406    V9fsState *s = pdu->s;
2407    V9fsFidState *dirfidp = NULL;
2408    char *old_name, *new_name;
2409
2410    v9fs_path_init(&new_path);
2411    if (newdirfid != -1) {
2412        dirfidp = get_fid(pdu, newdirfid);
2413        if (dirfidp == NULL) {
2414            err = -ENOENT;
2415            goto out_nofid;
2416        }
2417        BUG_ON(dirfidp->fid_type != P9_FID_NONE);
2418        v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
2419    } else {
2420        old_name = fidp->path.data;
2421        end = strrchr(old_name, '/');
2422        if (end) {
2423            end++;
2424        } else {
2425            end = old_name;
2426        }
2427        new_name = g_malloc0(end - old_name + name->size + 1);
2428        strncat(new_name, old_name, end - old_name);
2429        strncat(new_name + (end - old_name), name->data, name->size);
2430        v9fs_co_name_to_path(pdu, NULL, new_name, &new_path);
2431        g_free(new_name);
2432    }
2433    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
2434    if (err < 0) {
2435        goto out;
2436    }
2437    /*
2438     * Fixup fid's pointing to the old name to
2439     * start pointing to the new name
2440     */
2441    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2442        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
2443            /* replace the name */
2444            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
2445        }
2446    }
2447out:
2448    if (dirfidp) {
2449        put_fid(pdu, dirfidp);
2450    }
2451    v9fs_path_free(&new_path);
2452out_nofid:
2453    return err;
2454}
2455
2456/* Only works with path name based fid */
2457static void v9fs_rename(void *opaque)
2458{
2459    int32_t fid;
2460    ssize_t err = 0;
2461    size_t offset = 7;
2462    V9fsString name;
2463    int32_t newdirfid;
2464    V9fsFidState *fidp;
2465    V9fsPDU *pdu = opaque;
2466    V9fsState *s = pdu->s;
2467
2468    v9fs_string_init(&name);
2469    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
2470    if (err < 0) {
2471        goto out_nofid;
2472    }
2473    fidp = get_fid(pdu, fid);
2474    if (fidp == NULL) {
2475        err = -ENOENT;
2476        goto out_nofid;
2477    }
2478    BUG_ON(fidp->fid_type != P9_FID_NONE);
2479    /* if fs driver is not path based, return EOPNOTSUPP */
2480    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2481        err = -EOPNOTSUPP;
2482        goto out;
2483    }
2484    v9fs_path_write_lock(s);
2485    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
2486    v9fs_path_unlock(s);
2487    if (!err) {
2488        err = offset;
2489    }
2490out:
2491    put_fid(pdu, fidp);
2492out_nofid:
2493    complete_pdu(s, pdu, err);
2494    v9fs_string_free(&name);
2495}
2496
2497static void v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
2498                               V9fsString *old_name, V9fsPath *newdir,
2499                               V9fsString *new_name)
2500{
2501    V9fsFidState *tfidp;
2502    V9fsPath oldpath, newpath;
2503    V9fsState *s = pdu->s;
2504
2505
2506    v9fs_path_init(&oldpath);
2507    v9fs_path_init(&newpath);
2508    v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
2509    v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
2510
2511    /*
2512     * Fixup fid's pointing to the old name to
2513     * start pointing to the new name
2514     */
2515    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2516        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
2517            /* replace the name */
2518            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
2519        }
2520    }
2521    v9fs_path_free(&oldpath);
2522    v9fs_path_free(&newpath);
2523}
2524
2525static int v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
2526                                  V9fsString *old_name, int32_t newdirfid,
2527                                  V9fsString *new_name)
2528{
2529    int err = 0;
2530    V9fsState *s = pdu->s;
2531    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
2532
2533    olddirfidp = get_fid(pdu, olddirfid);
2534    if (olddirfidp == NULL) {
2535        err = -ENOENT;
2536        goto out;
2537    }
2538    if (newdirfid != -1) {
2539        newdirfidp = get_fid(pdu, newdirfid);
2540        if (newdirfidp == NULL) {
2541            err = -ENOENT;
2542            goto out;
2543        }
2544    } else {
2545        newdirfidp = get_fid(pdu, olddirfid);
2546    }
2547
2548    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
2549                           &newdirfidp->path, new_name);
2550    if (err < 0) {
2551        goto out;
2552    }
2553    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
2554        /* Only for path based fid  we need to do the below fixup */
2555        v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
2556                           &newdirfidp->path, new_name);
2557    }
2558out:
2559    if (olddirfidp) {
2560        put_fid(pdu, olddirfidp);
2561    }
2562    if (newdirfidp) {
2563        put_fid(pdu, newdirfidp);
2564    }
2565    return err;
2566}
2567
2568static void v9fs_renameat(void *opaque)
2569{
2570    ssize_t err = 0;
2571    size_t offset = 7;
2572    V9fsPDU *pdu = opaque;
2573    V9fsState *s = pdu->s;
2574    int32_t olddirfid, newdirfid;
2575    V9fsString old_name, new_name;
2576
2577    v9fs_string_init(&old_name);
2578    v9fs_string_init(&new_name);
2579    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
2580                        &old_name, &newdirfid, &new_name);
2581    if (err < 0) {
2582        goto out_err;
2583    }
2584
2585    v9fs_path_write_lock(s);
2586    err = v9fs_complete_renameat(pdu, olddirfid,
2587                                 &old_name, newdirfid, &new_name);
2588    v9fs_path_unlock(s);
2589    if (!err) {
2590        err = offset;
2591    }
2592
2593out_err:
2594    complete_pdu(s, pdu, err);
2595    v9fs_string_free(&old_name);
2596    v9fs_string_free(&new_name);
2597}
2598
2599static void v9fs_wstat(void *opaque)
2600{
2601    int32_t fid;
2602    int err = 0;
2603    int16_t unused;
2604    V9fsStat v9stat;
2605    size_t offset = 7;
2606    struct stat stbuf;
2607    V9fsFidState *fidp;
2608    V9fsPDU *pdu = opaque;
2609    V9fsState *s = pdu->s;
2610
2611    v9fs_stat_init(&v9stat);
2612    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
2613    if (err < 0) {
2614        goto out_nofid;
2615    }
2616    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
2617                     v9stat.mode, v9stat.atime, v9stat.mtime);
2618
2619    fidp = get_fid(pdu, fid);
2620    if (fidp == NULL) {
2621        err = -EINVAL;
2622        goto out_nofid;
2623    }
2624    /* do we need to sync the file? */
2625    if (donttouch_stat(&v9stat)) {
2626        err = v9fs_co_fsync(pdu, fidp, 0);
2627        goto out;
2628    }
2629    if (v9stat.mode != -1) {
2630        uint32_t v9_mode;
2631        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2632        if (err < 0) {
2633            goto out;
2634        }
2635        v9_mode = stat_to_v9mode(&stbuf);
2636        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
2637            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
2638            /* Attempting to change the type */
2639            err = -EIO;
2640            goto out;
2641        }
2642        err = v9fs_co_chmod(pdu, &fidp->path,
2643                            v9mode_to_mode(v9stat.mode,
2644                                           &v9stat.extension));
2645        if (err < 0) {
2646            goto out;
2647        }
2648    }
2649    if (v9stat.mtime != -1 || v9stat.atime != -1) {
2650        struct timespec times[2];
2651        if (v9stat.atime != -1) {
2652            times[0].tv_sec = v9stat.atime;
2653            times[0].tv_nsec = 0;
2654        } else {
2655            times[0].tv_nsec = UTIME_OMIT;
2656        }
2657        if (v9stat.mtime != -1) {
2658            times[1].tv_sec = v9stat.mtime;
2659            times[1].tv_nsec = 0;
2660        } else {
2661            times[1].tv_nsec = UTIME_OMIT;
2662        }
2663        err = v9fs_co_utimensat(pdu, &fidp->path, times);
2664        if (err < 0) {
2665            goto out;
2666        }
2667    }
2668    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
2669        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
2670        if (err < 0) {
2671            goto out;
2672        }
2673    }
2674    if (v9stat.name.size != 0) {
2675        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
2676        if (err < 0) {
2677            goto out;
2678        }
2679    }
2680    if (v9stat.length != -1) {
2681        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
2682        if (err < 0) {
2683            goto out;
2684        }
2685    }
2686    err = offset;
2687out:
2688    put_fid(pdu, fidp);
2689out_nofid:
2690    v9fs_stat_free(&v9stat);
2691    complete_pdu(s, pdu, err);
2692}
2693
2694static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
2695{
2696    uint32_t f_type;
2697    uint32_t f_bsize;
2698    uint64_t f_blocks;
2699    uint64_t f_bfree;
2700    uint64_t f_bavail;
2701    uint64_t f_files;
2702    uint64_t f_ffree;
2703    uint64_t fsid_val;
2704    uint32_t f_namelen;
2705    size_t offset = 7;
2706    int32_t bsize_factor;
2707
2708    /*
2709     * compute bsize factor based on host file system block size
2710     * and client msize
2711     */
2712    bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
2713    if (!bsize_factor) {
2714        bsize_factor = 1;
2715    }
2716    f_type  = stbuf->f_type;
2717    f_bsize = stbuf->f_bsize;
2718    f_bsize *= bsize_factor;
2719    /*
2720     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
2721     * adjust(divide) the number of blocks, free blocks and available
2722     * blocks by bsize factor
2723     */
2724    f_blocks = stbuf->f_blocks/bsize_factor;
2725    f_bfree  = stbuf->f_bfree/bsize_factor;
2726    f_bavail = stbuf->f_bavail/bsize_factor;
2727    f_files  = stbuf->f_files;
2728    f_ffree  = stbuf->f_ffree;
2729    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
2730               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
2731    f_namelen = stbuf->f_namelen;
2732
2733    return pdu_marshal(pdu, offset, "ddqqqqqqd",
2734                       f_type, f_bsize, f_blocks, f_bfree,
2735                       f_bavail, f_files, f_ffree,
2736                       fsid_val, f_namelen);
2737}
2738
2739static void v9fs_statfs(void *opaque)
2740{
2741    int32_t fid;
2742    ssize_t retval = 0;
2743    size_t offset = 7;
2744    V9fsFidState *fidp;
2745    struct statfs stbuf;
2746    V9fsPDU *pdu = opaque;
2747    V9fsState *s = pdu->s;
2748
2749    retval = pdu_unmarshal(pdu, offset, "d", &fid);
2750    if (retval < 0) {
2751        goto out_nofid;
2752    }
2753    fidp = get_fid(pdu, fid);
2754    if (fidp == NULL) {
2755        retval = -ENOENT;
2756        goto out_nofid;
2757    }
2758    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
2759    if (retval < 0) {
2760        goto out;
2761    }
2762    retval = v9fs_fill_statfs(s, pdu, &stbuf);
2763    if (retval < 0) {
2764        goto out;
2765    }
2766    retval += offset;
2767out:
2768    put_fid(pdu, fidp);
2769out_nofid:
2770    complete_pdu(s, pdu, retval);
2771}
2772
2773static void v9fs_mknod(void *opaque)
2774{
2775
2776    int mode;
2777    gid_t gid;
2778    int32_t fid;
2779    V9fsQID qid;
2780    int err = 0;
2781    int major, minor;
2782    size_t offset = 7;
2783    V9fsString name;
2784    struct stat stbuf;
2785    V9fsFidState *fidp;
2786    V9fsPDU *pdu = opaque;
2787    V9fsState *s = pdu->s;
2788
2789    v9fs_string_init(&name);
2790    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
2791                        &major, &minor, &gid);
2792    if (err < 0) {
2793        goto out_nofid;
2794    }
2795    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
2796
2797    fidp = get_fid(pdu, fid);
2798    if (fidp == NULL) {
2799        err = -ENOENT;
2800        goto out_nofid;
2801    }
2802    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
2803                        makedev(major, minor), mode, &stbuf);
2804    if (err < 0) {
2805        goto out;
2806    }
2807    stat_to_qid(&stbuf, &qid);
2808    err = pdu_marshal(pdu, offset, "Q", &qid);
2809    if (err < 0) {
2810        goto out;
2811    }
2812    err += offset;
2813    trace_v9fs_mknod_return(pdu->tag, pdu->id,
2814                            qid.type, qid.version, qid.path);
2815out:
2816    put_fid(pdu, fidp);
2817out_nofid:
2818    complete_pdu(s, pdu, err);
2819    v9fs_string_free(&name);
2820}
2821
2822/*
2823 * Implement posix byte range locking code
2824 * Server side handling of locking code is very simple, because 9p server in
2825 * QEMU can handle only one client. And most of the lock handling
2826 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
2827 * do any thing in * qemu 9p server side lock code path.
2828 * So when a TLOCK request comes, always return success
2829 */
2830static void v9fs_lock(void *opaque)
2831{
2832    int8_t status;
2833    V9fsFlock flock;
2834    size_t offset = 7;
2835    struct stat stbuf;
2836    V9fsFidState *fidp;
2837    int32_t fid, err = 0;
2838    V9fsPDU *pdu = opaque;
2839    V9fsState *s = pdu->s;
2840
2841    status = P9_LOCK_ERROR;
2842    v9fs_string_init(&flock.client_id);
2843    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
2844                        &flock.flags, &flock.start, &flock.length,
2845                        &flock.proc_id, &flock.client_id);
2846    if (err < 0) {
2847        goto out_nofid;
2848    }
2849    trace_v9fs_lock(pdu->tag, pdu->id, fid,
2850                    flock.type, flock.start, flock.length);
2851
2852
2853    /* We support only block flag now (that too ignored currently) */
2854    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
2855        err = -EINVAL;
2856        goto out_nofid;
2857    }
2858    fidp = get_fid(pdu, fid);
2859    if (fidp == NULL) {
2860        err = -ENOENT;
2861        goto out_nofid;
2862    }
2863    err = v9fs_co_fstat(pdu, fidp, &stbuf);
2864    if (err < 0) {
2865        goto out;
2866    }
2867    status = P9_LOCK_SUCCESS;
2868out:
2869    put_fid(pdu, fidp);
2870out_nofid:
2871    err = pdu_marshal(pdu, offset, "b", status);
2872    if (err > 0) {
2873        err += offset;
2874    }
2875    trace_v9fs_lock_return(pdu->tag, pdu->id, status);
2876    complete_pdu(s, pdu, err);
2877    v9fs_string_free(&flock.client_id);
2878}
2879
2880/*
2881 * When a TGETLOCK request comes, always return success because all lock
2882 * handling is done by client's VFS layer.
2883 */
2884static void v9fs_getlock(void *opaque)
2885{
2886    size_t offset = 7;
2887    struct stat stbuf;
2888    V9fsFidState *fidp;
2889    V9fsGetlock glock;
2890    int32_t fid, err = 0;
2891    V9fsPDU *pdu = opaque;
2892    V9fsState *s = pdu->s;
2893
2894    v9fs_string_init(&glock.client_id);
2895    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
2896                        &glock.start, &glock.length, &glock.proc_id,
2897                        &glock.client_id);
2898    if (err < 0) {
2899        goto out_nofid;
2900    }
2901    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
2902                       glock.type, glock.start, glock.length);
2903
2904    fidp = get_fid(pdu, fid);
2905    if (fidp == NULL) {
2906        err = -ENOENT;
2907        goto out_nofid;
2908    }
2909    err = v9fs_co_fstat(pdu, fidp, &stbuf);
2910    if (err < 0) {
2911        goto out;
2912    }
2913    glock.type = P9_LOCK_TYPE_UNLCK;
2914    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
2915                          glock.start, glock.length, glock.proc_id,
2916                          &glock.client_id);
2917    if (err < 0) {
2918        goto out;
2919    }
2920    err += offset;
2921    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
2922                              glock.length, glock.proc_id);
2923out:
2924    put_fid(pdu, fidp);
2925out_nofid:
2926    complete_pdu(s, pdu, err);
2927    v9fs_string_free(&glock.client_id);
2928}
2929
2930static void v9fs_mkdir(void *opaque)
2931{
2932    V9fsPDU *pdu = opaque;
2933    size_t offset = 7;
2934    int32_t fid;
2935    struct stat stbuf;
2936    V9fsQID qid;
2937    V9fsString name;
2938    V9fsFidState *fidp;
2939    gid_t gid;
2940    int mode;
2941    int err = 0;
2942
2943    v9fs_string_init(&name);
2944    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
2945    if (err < 0) {
2946        goto out_nofid;
2947    }
2948    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
2949
2950    fidp = get_fid(pdu, fid);
2951    if (fidp == NULL) {
2952        err = -ENOENT;
2953        goto out_nofid;
2954    }
2955    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
2956    if (err < 0) {
2957        goto out;
2958    }
2959    stat_to_qid(&stbuf, &qid);
2960    err = pdu_marshal(pdu, offset, "Q", &qid);
2961    if (err < 0) {
2962        goto out;
2963    }
2964    err += offset;
2965    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
2966                            qid.type, qid.version, qid.path, err);
2967out:
2968    put_fid(pdu, fidp);
2969out_nofid:
2970    complete_pdu(pdu->s, pdu, err);
2971    v9fs_string_free(&name);
2972}
2973
2974static void v9fs_xattrwalk(void *opaque)
2975{
2976    int64_t size;
2977    V9fsString name;
2978    ssize_t err = 0;
2979    size_t offset = 7;
2980    int32_t fid, newfid;
2981    V9fsFidState *file_fidp;
2982    V9fsFidState *xattr_fidp = NULL;
2983    V9fsPDU *pdu = opaque;
2984    V9fsState *s = pdu->s;
2985
2986    v9fs_string_init(&name);
2987    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
2988    if (err < 0) {
2989        goto out_nofid;
2990    }
2991    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
2992
2993    file_fidp = get_fid(pdu, fid);
2994    if (file_fidp == NULL) {
2995        err = -ENOENT;
2996        goto out_nofid;
2997    }
2998    xattr_fidp = alloc_fid(s, newfid);
2999    if (xattr_fidp == NULL) {
3000        err = -EINVAL;
3001        goto out;
3002    }
3003    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3004    if (name.data == NULL) {
3005        /*
3006         * listxattr request. Get the size first
3007         */
3008        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3009        if (size < 0) {
3010            err = size;
3011            clunk_fid(s, xattr_fidp->fid);
3012            goto out;
3013        }
3014        /*
3015         * Read the xattr value
3016         */
3017        xattr_fidp->fs.xattr.len = size;
3018        xattr_fidp->fid_type = P9_FID_XATTR;
3019        xattr_fidp->fs.xattr.copied_len = -1;
3020        if (size) {
3021            xattr_fidp->fs.xattr.value = g_malloc(size);
3022            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3023                                     xattr_fidp->fs.xattr.value,
3024                                     xattr_fidp->fs.xattr.len);
3025            if (err < 0) {
3026                clunk_fid(s, xattr_fidp->fid);
3027                goto out;
3028            }
3029        }
3030        err = pdu_marshal(pdu, offset, "q", size);
3031        if (err < 0) {
3032            goto out;
3033        }
3034        err += offset;
3035    } else {
3036        /*
3037         * specific xattr fid. We check for xattr
3038         * presence also collect the xattr size
3039         */
3040        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3041                                 &name, NULL, 0);
3042        if (size < 0) {
3043            err = size;
3044            clunk_fid(s, xattr_fidp->fid);
3045            goto out;
3046        }
3047        /*
3048         * Read the xattr value
3049         */
3050        xattr_fidp->fs.xattr.len = size;
3051        xattr_fidp->fid_type = P9_FID_XATTR;
3052        xattr_fidp->fs.xattr.copied_len = -1;
3053        if (size) {
3054            xattr_fidp->fs.xattr.value = g_malloc(size);
3055            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3056                                    &name, xattr_fidp->fs.xattr.value,
3057                                    xattr_fidp->fs.xattr.len);
3058            if (err < 0) {
3059                clunk_fid(s, xattr_fidp->fid);
3060                goto out;
3061            }
3062        }
3063        err = pdu_marshal(pdu, offset, "q", size);
3064        if (err < 0) {
3065            goto out;
3066        }
3067        err += offset;
3068    }
3069    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3070out:
3071    put_fid(pdu, file_fidp);
3072    if (xattr_fidp) {
3073        put_fid(pdu, xattr_fidp);
3074    }
3075out_nofid:
3076    complete_pdu(s, pdu, err);
3077    v9fs_string_free(&name);
3078}
3079
3080static void v9fs_xattrcreate(void *opaque)
3081{
3082    int flags;
3083    int32_t fid;
3084    int64_t size;
3085    ssize_t err = 0;
3086    V9fsString name;
3087    size_t offset = 7;
3088    V9fsFidState *file_fidp;
3089    V9fsFidState *xattr_fidp;
3090    V9fsPDU *pdu = opaque;
3091    V9fsState *s = pdu->s;
3092
3093    v9fs_string_init(&name);
3094    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3095    if (err < 0) {
3096        goto out_nofid;
3097    }
3098    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3099
3100    file_fidp = get_fid(pdu, fid);
3101    if (file_fidp == NULL) {
3102        err = -EINVAL;
3103        goto out_nofid;
3104    }
3105    /* Make the file fid point to xattr */
3106    xattr_fidp = file_fidp;
3107    xattr_fidp->fid_type = P9_FID_XATTR;
3108    xattr_fidp->fs.xattr.copied_len = 0;
3109    xattr_fidp->fs.xattr.len = size;
3110    xattr_fidp->fs.xattr.flags = flags;
3111    v9fs_string_init(&xattr_fidp->fs.xattr.name);
3112    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3113    xattr_fidp->fs.xattr.value = g_malloc(size);
3114    err = offset;
3115    put_fid(pdu, file_fidp);
3116out_nofid:
3117    complete_pdu(s, pdu, err);
3118    v9fs_string_free(&name);
3119}
3120
3121static void v9fs_readlink(void *opaque)
3122{
3123    V9fsPDU *pdu = opaque;
3124    size_t offset = 7;
3125    V9fsString target;
3126    int32_t fid;
3127    int err = 0;
3128    V9fsFidState *fidp;
3129
3130    err = pdu_unmarshal(pdu, offset, "d", &fid);
3131    if (err < 0) {
3132        goto out_nofid;
3133    }
3134    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3135    fidp = get_fid(pdu, fid);
3136    if (fidp == NULL) {
3137        err = -ENOENT;
3138        goto out_nofid;
3139    }
3140
3141    v9fs_string_init(&target);
3142    err = v9fs_co_readlink(pdu, &fidp->path, &target);
3143    if (err < 0) {
3144        goto out;
3145    }
3146    err = pdu_marshal(pdu, offset, "s", &target);
3147    if (err < 0) {
3148        v9fs_string_free(&target);
3149        goto out;
3150    }
3151    err += offset;
3152    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3153    v9fs_string_free(&target);
3154out:
3155    put_fid(pdu, fidp);
3156out_nofid:
3157    complete_pdu(pdu->s, pdu, err);
3158}
3159
3160static CoroutineEntry *pdu_co_handlers[] = {
3161    [P9_TREADDIR] = v9fs_readdir,
3162    [P9_TSTATFS] = v9fs_statfs,
3163    [P9_TGETATTR] = v9fs_getattr,
3164    [P9_TSETATTR] = v9fs_setattr,
3165    [P9_TXATTRWALK] = v9fs_xattrwalk,
3166    [P9_TXATTRCREATE] = v9fs_xattrcreate,
3167    [P9_TMKNOD] = v9fs_mknod,
3168    [P9_TRENAME] = v9fs_rename,
3169    [P9_TLOCK] = v9fs_lock,
3170    [P9_TGETLOCK] = v9fs_getlock,
3171    [P9_TRENAMEAT] = v9fs_renameat,
3172    [P9_TREADLINK] = v9fs_readlink,
3173    [P9_TUNLINKAT] = v9fs_unlinkat,
3174    [P9_TMKDIR] = v9fs_mkdir,
3175    [P9_TVERSION] = v9fs_version,
3176    [P9_TLOPEN] = v9fs_open,
3177    [P9_TATTACH] = v9fs_attach,
3178    [P9_TSTAT] = v9fs_stat,
3179    [P9_TWALK] = v9fs_walk,
3180    [P9_TCLUNK] = v9fs_clunk,
3181    [P9_TFSYNC] = v9fs_fsync,
3182    [P9_TOPEN] = v9fs_open,
3183    [P9_TREAD] = v9fs_read,
3184#if 0
3185    [P9_TAUTH] = v9fs_auth,
3186#endif
3187    [P9_TFLUSH] = v9fs_flush,
3188    [P9_TLINK] = v9fs_link,
3189    [P9_TSYMLINK] = v9fs_symlink,
3190    [P9_TCREATE] = v9fs_create,
3191    [P9_TLCREATE] = v9fs_lcreate,
3192    [P9_TWRITE] = v9fs_write,
3193    [P9_TWSTAT] = v9fs_wstat,
3194    [P9_TREMOVE] = v9fs_remove,
3195};
3196
3197static void v9fs_op_not_supp(void *opaque)
3198{
3199    V9fsPDU *pdu = opaque;
3200    complete_pdu(pdu->s, pdu, -EOPNOTSUPP);
3201}
3202
3203static void v9fs_fs_ro(void *opaque)
3204{
3205    V9fsPDU *pdu = opaque;
3206    complete_pdu(pdu->s, pdu, -EROFS);
3207}
3208
3209static inline bool is_read_only_op(V9fsPDU *pdu)
3210{
3211    switch (pdu->id) {
3212    case P9_TREADDIR:
3213    case P9_TSTATFS:
3214    case P9_TGETATTR:
3215    case P9_TXATTRWALK:
3216    case P9_TLOCK:
3217    case P9_TGETLOCK:
3218    case P9_TREADLINK:
3219    case P9_TVERSION:
3220    case P9_TLOPEN:
3221    case P9_TATTACH:
3222    case P9_TSTAT:
3223    case P9_TWALK:
3224    case P9_TCLUNK:
3225    case P9_TFSYNC:
3226    case P9_TOPEN:
3227    case P9_TREAD:
3228    case P9_TAUTH:
3229    case P9_TFLUSH:
3230        return 1;
3231    default:
3232        return 0;
3233    }
3234}
3235
3236static void submit_pdu(V9fsState *s, V9fsPDU *pdu)
3237{
3238    Coroutine *co;
3239    CoroutineEntry *handler;
3240
3241    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
3242        (pdu_co_handlers[pdu->id] == NULL)) {
3243        handler = v9fs_op_not_supp;
3244    } else {
3245        handler = pdu_co_handlers[pdu->id];
3246    }
3247
3248    if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
3249        handler = v9fs_fs_ro;
3250    }
3251    co = qemu_coroutine_create(handler);
3252    qemu_coroutine_enter(co, pdu);
3253}
3254
3255void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
3256{
3257    V9fsState *s = (V9fsState *)vdev;
3258    V9fsPDU *pdu;
3259    ssize_t len;
3260
3261    while ((pdu = alloc_pdu(s)) &&
3262            (len = virtqueue_pop(vq, &pdu->elem)) != 0) {
3263        uint8_t *ptr;
3264        pdu->s = s;
3265        BUG_ON(pdu->elem.out_num == 0 || pdu->elem.in_num == 0);
3266        BUG_ON(pdu->elem.out_sg[0].iov_len < 7);
3267
3268        ptr = pdu->elem.out_sg[0].iov_base;
3269
3270        pdu->size = le32_to_cpu(*(uint32_t *)ptr);
3271        pdu->id = ptr[4];
3272        pdu->tag = le16_to_cpu(*(uint16_t *)(ptr + 5));
3273        qemu_co_queue_init(&pdu->complete);
3274        submit_pdu(s, pdu);
3275    }
3276    free_pdu(s, pdu);
3277}
3278
3279static void __attribute__((__constructor__)) virtio_9p_set_fd_limit(void)
3280{
3281    struct rlimit rlim;
3282    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
3283        fprintf(stderr, "Failed to get the resource limit\n");
3284        exit(1);
3285    }
3286    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
3287    open_fd_rc = rlim.rlim_cur/2;
3288}
3289